LCOV - code coverage report
Current view: top level - src/backend/backup - basebackup_incremental.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 197 223 88.3 %
Date: 2025-01-18 04:15:08 Functions: 15 15 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * basebackup_incremental.c
       4             :  *    code for incremental backup support
       5             :  *
       6             :  * This code isn't actually in charge of taking an incremental backup;
       7             :  * the actual construction of the incremental backup happens in
       8             :  * basebackup.c. Here, we're concerned with providing the necessary
       9             :  * supports for that operation. In particular, we need to parse the
      10             :  * backup manifest supplied by the user taking the incremental backup
      11             :  * and extract the required information from it.
      12             :  *
      13             :  * Portions Copyright (c) 2010-2025, PostgreSQL Global Development Group
      14             :  *
      15             :  * IDENTIFICATION
      16             :  *    src/backend/backup/basebackup_incremental.c
      17             :  *
      18             :  *-------------------------------------------------------------------------
      19             :  */
      20             : #include "postgres.h"
      21             : 
      22             : #include "access/timeline.h"
      23             : #include "access/xlog.h"
      24             : #include "backup/basebackup_incremental.h"
      25             : #include "backup/walsummary.h"
      26             : #include "common/blkreftable.h"
      27             : #include "common/hashfn.h"
      28             : #include "common/int.h"
      29             : #include "common/parse_manifest.h"
      30             : #include "postmaster/walsummarizer.h"
      31             : 
      32             : #define BLOCKS_PER_READ         512
      33             : 
      34             : /*
      35             :  * We expect to find the last lines of the manifest, including the checksum,
      36             :  * in the last MIN_CHUNK bytes of the manifest. We trigger an incremental
      37             :  * parse step if we are about to overflow MAX_CHUNK bytes.
      38             :  */
      39             : #define MIN_CHUNK  1024
      40             : #define MAX_CHUNK (128 *  1024)
      41             : 
      42             : /*
      43             :  * Details extracted from the WAL ranges present in the supplied backup manifest.
      44             :  */
      45             : typedef struct
      46             : {
      47             :     TimeLineID  tli;
      48             :     XLogRecPtr  start_lsn;
      49             :     XLogRecPtr  end_lsn;
      50             : } backup_wal_range;
      51             : 
      52             : /*
      53             :  * Details extracted from the file list present in the supplied backup manifest.
      54             :  */
      55             : typedef struct
      56             : {
      57             :     uint32      status;
      58             :     const char *path;
      59             :     uint64      size;
      60             : } backup_file_entry;
      61             : 
      62             : static uint32 hash_string_pointer(const char *s);
      63             : #define SH_PREFIX               backup_file
      64             : #define SH_ELEMENT_TYPE         backup_file_entry
      65             : #define SH_KEY_TYPE             const char *
      66             : #define SH_KEY                  path
      67             : #define SH_HASH_KEY(tb, key)    hash_string_pointer(key)
      68             : #define SH_EQUAL(tb, a, b)      (strcmp(a, b) == 0)
      69             : #define SH_SCOPE                static inline
      70             : #define SH_DECLARE
      71             : #define SH_DEFINE
      72             : #include "lib/simplehash.h"
      73             : 
      74             : struct IncrementalBackupInfo
      75             : {
      76             :     /* Memory context for this object and its subsidiary objects. */
      77             :     MemoryContext mcxt;
      78             : 
      79             :     /* Temporary buffer for storing the manifest while parsing it. */
      80             :     StringInfoData buf;
      81             : 
      82             :     /* WAL ranges extracted from the backup manifest. */
      83             :     List       *manifest_wal_ranges;
      84             : 
      85             :     /*
      86             :      * Files extracted from the backup manifest.
      87             :      *
      88             :      * We don't really need this information, because we use WAL summaries to
      89             :      * figure out what's changed. It would be unsafe to just rely on the list
      90             :      * of files that existed before, because it's possible for a file to be
      91             :      * removed and a new one created with the same name and different
      92             :      * contents. In such cases, the whole file must still be sent. We can tell
      93             :      * from the WAL summaries whether that happened, but not from the file
      94             :      * list.
      95             :      *
      96             :      * Nonetheless, this data is useful for sanity checking. If a file that we
      97             :      * think we shouldn't need to send is not present in the manifest for the
      98             :      * prior backup, something has gone terribly wrong. We retain the file
      99             :      * names and sizes, but not the checksums or last modified times, for
     100             :      * which we have no use.
     101             :      *
     102             :      * One significant downside of storing this data is that it consumes
     103             :      * memory. If that turns out to be a problem, we might have to decide not
     104             :      * to retain this information, or to make it optional.
     105             :      */
     106             :     backup_file_hash *manifest_files;
     107             : 
     108             :     /*
     109             :      * Block-reference table for the incremental backup.
     110             :      *
     111             :      * It's possible that storing the entire block-reference table in memory
     112             :      * will be a problem for some users. The in-memory format that we're using
     113             :      * here is pretty efficient, converging to little more than 1 bit per
     114             :      * block for relation forks with large numbers of modified blocks. It's
     115             :      * possible, however, that if you try to perform an incremental backup of
     116             :      * a database with a sufficiently large number of relations on a
     117             :      * sufficiently small machine, you could run out of memory here. If that
     118             :      * turns out to be a problem in practice, we'll need to be more clever.
     119             :      */
     120             :     BlockRefTable *brtab;
     121             : 
     122             :     /*
     123             :      * State object for incremental JSON parsing
     124             :      */
     125             :     JsonManifestParseIncrementalState *inc_state;
     126             : };
     127             : 
     128             : static void manifest_process_version(JsonManifestParseContext *context,
     129             :                                      int manifest_version);
     130             : static void manifest_process_system_identifier(JsonManifestParseContext *context,
     131             :                                                uint64 manifest_system_identifier);
     132             : static void manifest_process_file(JsonManifestParseContext *context,
     133             :                                   const char *pathname,
     134             :                                   uint64 size,
     135             :                                   pg_checksum_type checksum_type,
     136             :                                   int checksum_length,
     137             :                                   uint8 *checksum_payload);
     138             : static void manifest_process_wal_range(JsonManifestParseContext *context,
     139             :                                        TimeLineID tli,
     140             :                                        XLogRecPtr start_lsn,
     141             :                                        XLogRecPtr end_lsn);
     142             : static void manifest_report_error(JsonManifestParseContext *context,
     143             :                                   const char *fmt,...)
     144             :             pg_attribute_printf(2, 3) pg_attribute_noreturn();
     145             : static int  compare_block_numbers(const void *a, const void *b);
     146             : 
     147             : /*
     148             :  * Create a new object for storing information extracted from the manifest
     149             :  * supplied when creating an incremental backup.
     150             :  */
     151             : IncrementalBackupInfo *
     152          20 : CreateIncrementalBackupInfo(MemoryContext mcxt)
     153             : {
     154             :     IncrementalBackupInfo *ib;
     155             :     MemoryContext oldcontext;
     156             :     JsonManifestParseContext *context;
     157             : 
     158          20 :     oldcontext = MemoryContextSwitchTo(mcxt);
     159             : 
     160          20 :     ib = palloc0(sizeof(IncrementalBackupInfo));
     161          20 :     ib->mcxt = mcxt;
     162          20 :     initStringInfo(&ib->buf);
     163             : 
     164             :     /*
     165             :      * It's hard to guess how many files a "typical" installation will have in
     166             :      * the data directory, but a fresh initdb creates almost 1000 files as of
     167             :      * this writing, so it seems to make sense for our estimate to
     168             :      * substantially higher.
     169             :      */
     170          20 :     ib->manifest_files = backup_file_create(mcxt, 10000, NULL);
     171             : 
     172          20 :     context = palloc0(sizeof(JsonManifestParseContext));
     173             :     /* Parse the manifest. */
     174          20 :     context->private_data = ib;
     175          20 :     context->version_cb = manifest_process_version;
     176          20 :     context->system_identifier_cb = manifest_process_system_identifier;
     177          20 :     context->per_file_cb = manifest_process_file;
     178          20 :     context->per_wal_range_cb = manifest_process_wal_range;
     179          20 :     context->error_cb = manifest_report_error;
     180             : 
     181          20 :     ib->inc_state = json_parse_manifest_incremental_init(context);
     182             : 
     183          20 :     MemoryContextSwitchTo(oldcontext);
     184             : 
     185          20 :     return ib;
     186             : }
     187             : 
     188             : /*
     189             :  * Before taking an incremental backup, the caller must supply the backup
     190             :  * manifest from a prior backup. Each chunk of manifest data received
     191             :  * from the client should be passed to this function.
     192             :  */
     193             : void
     194          60 : AppendIncrementalManifestData(IncrementalBackupInfo *ib, const char *data,
     195             :                               int len)
     196             : {
     197             :     MemoryContext oldcontext;
     198             : 
     199             :     /* Switch to our memory context. */
     200          60 :     oldcontext = MemoryContextSwitchTo(ib->mcxt);
     201             : 
     202          60 :     if (ib->buf.len > MIN_CHUNK && ib->buf.len + len > MAX_CHUNK)
     203             :     {
     204             :         /*
     205             :          * time for an incremental parse. We'll do all but the last MIN_CHUNK
     206             :          * so that we have enough left for the final piece.
     207             :          */
     208          20 :         json_parse_manifest_incremental_chunk(ib->inc_state, ib->buf.data,
     209          20 :                                               ib->buf.len - MIN_CHUNK, false);
     210             :         /* now remove what we just parsed  */
     211          18 :         memmove(ib->buf.data, ib->buf.data + (ib->buf.len - MIN_CHUNK),
     212             :                 MIN_CHUNK + 1);
     213          18 :         ib->buf.len = MIN_CHUNK;
     214             :     }
     215             : 
     216          58 :     appendBinaryStringInfo(&ib->buf, data, len);
     217             : 
     218             :     /* Switch back to previous memory context. */
     219          58 :     MemoryContextSwitchTo(oldcontext);
     220          58 : }
     221             : 
     222             : /*
     223             :  * Finalize an IncrementalBackupInfo object after all manifest data has
     224             :  * been supplied via calls to AppendIncrementalManifestData.
     225             :  */
     226             : void
     227          18 : FinalizeIncrementalManifest(IncrementalBackupInfo *ib)
     228             : {
     229             :     MemoryContext oldcontext;
     230             : 
     231             :     /* Switch to our memory context. */
     232          18 :     oldcontext = MemoryContextSwitchTo(ib->mcxt);
     233             : 
     234             :     /* Parse the last chunk of the manifest */
     235          18 :     json_parse_manifest_incremental_chunk(ib->inc_state, ib->buf.data,
     236          18 :                                           ib->buf.len, true);
     237             : 
     238             :     /* Done with the buffer, so release memory. */
     239          18 :     pfree(ib->buf.data);
     240          18 :     ib->buf.data = NULL;
     241             : 
     242             :     /* Done with inc_state, so release that memory too */
     243          18 :     json_parse_manifest_incremental_shutdown(ib->inc_state);
     244             : 
     245             :     /* Switch back to previous memory context. */
     246          18 :     MemoryContextSwitchTo(oldcontext);
     247          18 : }
     248             : 
     249             : /*
     250             :  * Prepare to take an incremental backup.
     251             :  *
     252             :  * Before this function is called, AppendIncrementalManifestData and
     253             :  * FinalizeIncrementalManifest should have already been called to pass all
     254             :  * the manifest data to this object.
     255             :  *
     256             :  * This function performs sanity checks on the data extracted from the
     257             :  * manifest and figures out for which WAL ranges we need summaries, and
     258             :  * whether those summaries are available. Then, it reads and combines the
     259             :  * data from those summary files. It also updates the backup_state with the
     260             :  * reference TLI and LSN for the prior backup.
     261             :  */
     262             : void
     263          18 : PrepareForIncrementalBackup(IncrementalBackupInfo *ib,
     264             :                             BackupState *backup_state)
     265             : {
     266             :     MemoryContext oldcontext;
     267             :     List       *expectedTLEs;
     268             :     List       *all_wslist,
     269          18 :                *required_wslist = NIL;
     270             :     ListCell   *lc;
     271             :     TimeLineHistoryEntry **tlep;
     272             :     int         num_wal_ranges;
     273             :     int         i;
     274          18 :     bool        found_backup_start_tli = false;
     275          18 :     TimeLineID  earliest_wal_range_tli = 0;
     276          18 :     XLogRecPtr  earliest_wal_range_start_lsn = InvalidXLogRecPtr;
     277          18 :     TimeLineID  latest_wal_range_tli = 0;
     278             : 
     279             :     Assert(ib->buf.data == NULL);
     280             : 
     281             :     /* Switch to our memory context. */
     282          18 :     oldcontext = MemoryContextSwitchTo(ib->mcxt);
     283             : 
     284             :     /*
     285             :      * A valid backup manifest must always contain at least one WAL range
     286             :      * (usually exactly one, unless the backup spanned a timeline switch).
     287             :      */
     288          18 :     num_wal_ranges = list_length(ib->manifest_wal_ranges);
     289          18 :     if (num_wal_ranges == 0)
     290           0 :         ereport(ERROR,
     291             :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     292             :                  errmsg("manifest contains no required WAL ranges")));
     293             : 
     294             :     /*
     295             :      * Match up the TLIs that appear in the WAL ranges of the backup manifest
     296             :      * with those that appear in this server's timeline history. We expect
     297             :      * every backup_wal_range to match to a TimeLineHistoryEntry; if it does
     298             :      * not, that's an error.
     299             :      *
     300             :      * This loop also decides which of the WAL ranges is the manifest is most
     301             :      * ancient and which one is the newest, according to the timeline history
     302             :      * of this server, and stores TLIs of those WAL ranges into
     303             :      * earliest_wal_range_tli and latest_wal_range_tli. It also updates
     304             :      * earliest_wal_range_start_lsn to the start LSN of the WAL range for
     305             :      * earliest_wal_range_tli.
     306             :      *
     307             :      * Note that the return value of readTimeLineHistory puts the latest
     308             :      * timeline at the beginning of the list, not the end. Hence, the earliest
     309             :      * TLI is the one that occurs nearest the end of the list returned by
     310             :      * readTimeLineHistory, and the latest TLI is the one that occurs closest
     311             :      * to the beginning.
     312             :      */
     313          18 :     expectedTLEs = readTimeLineHistory(backup_state->starttli);
     314          18 :     tlep = palloc0(num_wal_ranges * sizeof(TimeLineHistoryEntry *));
     315          36 :     for (i = 0; i < num_wal_ranges; ++i)
     316             :     {
     317          18 :         backup_wal_range *range = list_nth(ib->manifest_wal_ranges, i);
     318          18 :         bool        saw_earliest_wal_range_tli = false;
     319          18 :         bool        saw_latest_wal_range_tli = false;
     320             : 
     321             :         /* Search this server's history for this WAL range's TLI. */
     322          20 :         foreach(lc, expectedTLEs)
     323             :         {
     324          20 :             TimeLineHistoryEntry *tle = lfirst(lc);
     325             : 
     326          20 :             if (tle->tli == range->tli)
     327             :             {
     328          18 :                 tlep[i] = tle;
     329          18 :                 break;
     330             :             }
     331             : 
     332           2 :             if (tle->tli == earliest_wal_range_tli)
     333           0 :                 saw_earliest_wal_range_tli = true;
     334           2 :             if (tle->tli == latest_wal_range_tli)
     335           0 :                 saw_latest_wal_range_tli = true;
     336             :         }
     337             : 
     338             :         /*
     339             :          * An incremental backup can only be taken relative to a backup that
     340             :          * represents a previous state of this server. If the backup requires
     341             :          * WAL from a timeline that's not in our history, that definitely
     342             :          * isn't the case.
     343             :          */
     344          18 :         if (tlep[i] == NULL)
     345           0 :             ereport(ERROR,
     346             :                     (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     347             :                      errmsg("timeline %u found in manifest, but not in this server's history",
     348             :                             range->tli)));
     349             : 
     350             :         /*
     351             :          * If we found this TLI in the server's history before encountering
     352             :          * the latest TLI seen so far in the server's history, then this TLI
     353             :          * is the latest one seen so far.
     354             :          *
     355             :          * If on the other hand we saw the earliest TLI seen so far before
     356             :          * finding this TLI, this TLI is earlier than the earliest one seen so
     357             :          * far. And if this is the first TLI for which we've searched, it's
     358             :          * also the earliest one seen so far.
     359             :          *
     360             :          * On the first loop iteration, both things should necessarily be
     361             :          * true.
     362             :          */
     363          18 :         if (!saw_latest_wal_range_tli)
     364          18 :             latest_wal_range_tli = range->tli;
     365          18 :         if (earliest_wal_range_tli == 0 || saw_earliest_wal_range_tli)
     366             :         {
     367          18 :             earliest_wal_range_tli = range->tli;
     368          18 :             earliest_wal_range_start_lsn = range->start_lsn;
     369             :         }
     370             :     }
     371             : 
     372             :     /*
     373             :      * Propagate information about the prior backup into the backup_label that
     374             :      * will be generated for this backup.
     375             :      */
     376          18 :     backup_state->istartpoint = earliest_wal_range_start_lsn;
     377          18 :     backup_state->istarttli = earliest_wal_range_tli;
     378             : 
     379             :     /*
     380             :      * Sanity check start and end LSNs for the WAL ranges in the manifest.
     381             :      *
     382             :      * Commonly, there won't be any timeline switches during the prior backup
     383             :      * at all, but if there are, they should happen at the same LSNs that this
     384             :      * server switched timelines.
     385             :      *
     386             :      * Whether there are any timeline switches during the prior backup or not,
     387             :      * the prior backup shouldn't require any WAL from a timeline prior to the
     388             :      * start of that timeline. It also shouldn't require any WAL from later
     389             :      * than the start of this backup.
     390             :      *
     391             :      * If any of these sanity checks fail, one possible explanation is that
     392             :      * the user has generated WAL on the same timeline with the same LSNs more
     393             :      * than once. For instance, if two standbys running on timeline 1 were
     394             :      * both promoted and (due to a broken archiving setup) both selected new
     395             :      * timeline ID 2, then it's possible that one of these checks might trip.
     396             :      *
     397             :      * Note that there are lots of ways for the user to do something very bad
     398             :      * without tripping any of these checks, and they are not intended to be
     399             :      * comprehensive. It's pretty hard to see how we could be certain of
     400             :      * anything here. However, if there's a problem staring us right in the
     401             :      * face, it's best to report it, so we do.
     402             :      */
     403          36 :     for (i = 0; i < num_wal_ranges; ++i)
     404             :     {
     405          18 :         backup_wal_range *range = list_nth(ib->manifest_wal_ranges, i);
     406             : 
     407          18 :         if (range->tli == earliest_wal_range_tli)
     408             :         {
     409          18 :             if (range->start_lsn < tlep[i]->begin)
     410           0 :                 ereport(ERROR,
     411             :                         (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     412             :                          errmsg("manifest requires WAL from initial timeline %u starting at %X/%X, but that timeline begins at %X/%X",
     413             :                                 range->tli,
     414             :                                 LSN_FORMAT_ARGS(range->start_lsn),
     415             :                                 LSN_FORMAT_ARGS(tlep[i]->begin))));
     416             :         }
     417             :         else
     418             :         {
     419           0 :             if (range->start_lsn != tlep[i]->begin)
     420           0 :                 ereport(ERROR,
     421             :                         (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     422             :                          errmsg("manifest requires WAL from continuation timeline %u starting at %X/%X, but that timeline begins at %X/%X",
     423             :                                 range->tli,
     424             :                                 LSN_FORMAT_ARGS(range->start_lsn),
     425             :                                 LSN_FORMAT_ARGS(tlep[i]->begin))));
     426             :         }
     427             : 
     428          18 :         if (range->tli == latest_wal_range_tli)
     429             :         {
     430          18 :             if (range->end_lsn > backup_state->startpoint)
     431           0 :                 ereport(ERROR,
     432             :                         (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     433             :                          errmsg("manifest requires WAL from final timeline %u ending at %X/%X, but this backup starts at %X/%X",
     434             :                                 range->tli,
     435             :                                 LSN_FORMAT_ARGS(range->end_lsn),
     436             :                                 LSN_FORMAT_ARGS(backup_state->startpoint)),
     437             :                          errhint("This can happen for incremental backups on a standby if there was little activity since the previous backup.")));
     438             :         }
     439             :         else
     440             :         {
     441           0 :             if (range->end_lsn != tlep[i]->end)
     442           0 :                 ereport(ERROR,
     443             :                         (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     444             :                          errmsg("manifest requires WAL from non-final timeline %u ending at %X/%X, but this server switched timelines at %X/%X",
     445             :                                 range->tli,
     446             :                                 LSN_FORMAT_ARGS(range->end_lsn),
     447             :                                 LSN_FORMAT_ARGS(tlep[i]->end))));
     448             :         }
     449             : 
     450             :     }
     451             : 
     452             :     /*
     453             :      * Wait for WAL summarization to catch up to the backup start LSN. This
     454             :      * will throw an error if the WAL summarizer appears to be stuck. If WAL
     455             :      * summarization gets disabled while we're waiting, this will return
     456             :      * immediately, and we'll error out further down if the WAL summaries are
     457             :      * incomplete.
     458             :      */
     459          18 :     WaitForWalSummarization(backup_state->startpoint);
     460             : 
     461             :     /*
     462             :      * Retrieve a list of all WAL summaries on any timeline that overlap with
     463             :      * the LSN range of interest. We could instead call GetWalSummaries() once
     464             :      * per timeline in the loop that follows, but that would involve reading
     465             :      * the directory multiple times. It should be mildly faster - and perhaps
     466             :      * a bit safer - to do it just once.
     467             :      */
     468          18 :     all_wslist = GetWalSummaries(0, earliest_wal_range_start_lsn,
     469             :                                  backup_state->startpoint);
     470             : 
     471             :     /*
     472             :      * We need WAL summaries for everything that happened during the prior
     473             :      * backup and everything that happened afterward up until the point where
     474             :      * the current backup started.
     475             :      */
     476          20 :     foreach(lc, expectedTLEs)
     477             :     {
     478          20 :         TimeLineHistoryEntry *tle = lfirst(lc);
     479          20 :         XLogRecPtr  tli_start_lsn = tle->begin;
     480          20 :         XLogRecPtr  tli_end_lsn = tle->end;
     481          20 :         XLogRecPtr  tli_missing_lsn = InvalidXLogRecPtr;
     482             :         List       *tli_wslist;
     483             : 
     484             :         /*
     485             :          * Working through the history of this server from the current
     486             :          * timeline backwards, we skip everything until we find the timeline
     487             :          * where this backup started. Most of the time, this means we won't
     488             :          * skip anything at all, as it's unlikely that the timeline has
     489             :          * changed since the beginning of the backup moments ago.
     490             :          */
     491          20 :         if (tle->tli == backup_state->starttli)
     492             :         {
     493          18 :             found_backup_start_tli = true;
     494          18 :             tli_end_lsn = backup_state->startpoint;
     495             :         }
     496           2 :         else if (!found_backup_start_tli)
     497           0 :             continue;
     498             : 
     499             :         /*
     500             :          * Find the summaries that overlap the LSN range of interest for this
     501             :          * timeline. If this is the earliest timeline involved, the range of
     502             :          * interest begins with the start LSN of the prior backup; otherwise,
     503             :          * it begins at the LSN at which this timeline came into existence. If
     504             :          * this is the latest TLI involved, the range of interest ends at the
     505             :          * start LSN of the current backup; otherwise, it ends at the point
     506             :          * where we switched from this timeline to the next one.
     507             :          */
     508          20 :         if (tle->tli == earliest_wal_range_tli)
     509          18 :             tli_start_lsn = earliest_wal_range_start_lsn;
     510          20 :         tli_wslist = FilterWalSummaries(all_wslist, tle->tli,
     511             :                                         tli_start_lsn, tli_end_lsn);
     512             : 
     513             :         /*
     514             :          * There is no guarantee that the WAL summaries we found cover the
     515             :          * entire range of LSNs for which summaries are required, or indeed
     516             :          * that we found any WAL summaries at all. Check whether we have a
     517             :          * problem of that sort.
     518             :          */
     519          20 :         if (!WalSummariesAreComplete(tli_wslist, tli_start_lsn, tli_end_lsn,
     520             :                                      &tli_missing_lsn))
     521             :         {
     522           0 :             if (XLogRecPtrIsInvalid(tli_missing_lsn))
     523           0 :                 ereport(ERROR,
     524             :                         (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     525             :                          errmsg("WAL summaries are required on timeline %u from %X/%X to %X/%X, but no summaries for that timeline and LSN range exist",
     526             :                                 tle->tli,
     527             :                                 LSN_FORMAT_ARGS(tli_start_lsn),
     528             :                                 LSN_FORMAT_ARGS(tli_end_lsn))));
     529             :             else
     530           0 :                 ereport(ERROR,
     531             :                         (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     532             :                          errmsg("WAL summaries are required on timeline %u from %X/%X to %X/%X, but the summaries for that timeline and LSN range are incomplete",
     533             :                                 tle->tli,
     534             :                                 LSN_FORMAT_ARGS(tli_start_lsn),
     535             :                                 LSN_FORMAT_ARGS(tli_end_lsn)),
     536             :                          errdetail("The first unsummarized LSN in this range is %X/%X.",
     537             :                                    LSN_FORMAT_ARGS(tli_missing_lsn))));
     538             :         }
     539             : 
     540             :         /*
     541             :          * Remember that we need to read these summaries.
     542             :          *
     543             :          * Technically, it's possible that this could read more files than
     544             :          * required, since tli_wslist in theory could contain redundant
     545             :          * summaries. For instance, if we have a summary from 0/10000000 to
     546             :          * 0/20000000 and also one from 0/00000000 to 0/30000000, then the
     547             :          * latter subsumes the former and the former could be ignored.
     548             :          *
     549             :          * We ignore this possibility because the WAL summarizer only tries to
     550             :          * generate summaries that do not overlap. If somehow they exist,
     551             :          * we'll do a bit of extra work but the results should still be
     552             :          * correct.
     553             :          */
     554          20 :         required_wslist = list_concat(required_wslist, tli_wslist);
     555             : 
     556             :         /*
     557             :          * Timelines earlier than the one in which the prior backup began are
     558             :          * not relevant.
     559             :          */
     560          20 :         if (tle->tli == earliest_wal_range_tli)
     561          18 :             break;
     562             :     }
     563             : 
     564             :     /*
     565             :      * Read all of the required block reference table files and merge all of
     566             :      * the data into a single in-memory block reference table.
     567             :      *
     568             :      * See the comments for struct IncrementalBackupInfo for some thoughts on
     569             :      * memory usage.
     570             :      */
     571          18 :     ib->brtab = CreateEmptyBlockRefTable();
     572          52 :     foreach(lc, required_wslist)
     573             :     {
     574          34 :         WalSummaryFile *ws = lfirst(lc);
     575             :         WalSummaryIO wsio;
     576             :         BlockRefTableReader *reader;
     577             :         RelFileLocator rlocator;
     578             :         ForkNumber  forknum;
     579             :         BlockNumber limit_block;
     580             :         BlockNumber blocks[BLOCKS_PER_READ];
     581             : 
     582          34 :         wsio.file = OpenWalSummaryFile(ws, false);
     583          34 :         wsio.filepos = 0;
     584          34 :         ereport(DEBUG1,
     585             :                 (errmsg_internal("reading WAL summary file \"%s\"",
     586             :                                  FilePathName(wsio.file))));
     587          34 :         reader = CreateBlockRefTableReader(ReadWalSummary, &wsio,
     588             :                                            FilePathName(wsio.file),
     589             :                                            ReportWalSummaryError, NULL);
     590         732 :         while (BlockRefTableReaderNextRelation(reader, &rlocator, &forknum,
     591             :                                                &limit_block))
     592             :         {
     593         698 :             BlockRefTableSetLimitBlock(ib->brtab, &rlocator,
     594             :                                        forknum, limit_block);
     595             : 
     596             :             while (1)
     597         504 :             {
     598             :                 unsigned    nblocks;
     599             :                 unsigned    i;
     600             : 
     601        1202 :                 nblocks = BlockRefTableReaderGetBlocks(reader, blocks,
     602             :                                                        BLOCKS_PER_READ);
     603        1202 :                 if (nblocks == 0)
     604         698 :                     break;
     605             : 
     606        2322 :                 for (i = 0; i < nblocks; ++i)
     607        1818 :                     BlockRefTableMarkBlockModified(ib->brtab, &rlocator,
     608             :                                                    forknum, blocks[i]);
     609             :             }
     610             :         }
     611          34 :         DestroyBlockRefTableReader(reader);
     612          34 :         FileClose(wsio.file);
     613             :     }
     614             : 
     615             :     /* Switch back to previous memory context. */
     616          18 :     MemoryContextSwitchTo(oldcontext);
     617          18 : }
     618             : 
     619             : /*
     620             :  * Get the pathname that should be used when a file is sent incrementally.
     621             :  *
     622             :  * The result is a palloc'd string.
     623             :  */
     624             : char *
     625        3214 : GetIncrementalFilePath(Oid dboid, Oid spcoid, RelFileNumber relfilenumber,
     626             :                        ForkNumber forknum, unsigned segno)
     627             : {
     628             :     char       *path;
     629             :     char       *lastslash;
     630             :     char       *ipath;
     631             : 
     632        3214 :     path = GetRelationPath(dboid, spcoid, relfilenumber, INVALID_PROC_NUMBER,
     633             :                            forknum);
     634             : 
     635        3214 :     lastslash = strrchr(path, '/');
     636             :     Assert(lastslash != NULL);
     637        3214 :     *lastslash = '\0';
     638             : 
     639        3214 :     if (segno > 0)
     640           0 :         ipath = psprintf("%s/INCREMENTAL.%s.%u", path, lastslash + 1, segno);
     641             :     else
     642        3214 :         ipath = psprintf("%s/INCREMENTAL.%s", path, lastslash + 1);
     643             : 
     644        3214 :     pfree(path);
     645             : 
     646        3214 :     return ipath;
     647             : }
     648             : 
     649             : /*
     650             :  * How should we back up a particular file as part of an incremental backup?
     651             :  *
     652             :  * If the return value is BACK_UP_FILE_FULLY, caller should back up the whole
     653             :  * file just as if this were not an incremental backup.  The contents of the
     654             :  * relative_block_numbers array are unspecified in this case.
     655             :  *
     656             :  * If the return value is BACK_UP_FILE_INCREMENTALLY, caller should include
     657             :  * an incremental file in the backup instead of the entire file. On return,
     658             :  * *num_blocks_required will be set to the number of blocks that need to be
     659             :  * sent, and the actual block numbers will have been stored in
     660             :  * relative_block_numbers, which should be an array of at least RELSEG_SIZE.
     661             :  * In addition, *truncation_block_length will be set to the value that should
     662             :  * be included in the incremental file.
     663             :  */
     664             : FileBackupMethod
     665       18406 : GetFileBackupMethod(IncrementalBackupInfo *ib, const char *path,
     666             :                     Oid dboid, Oid spcoid,
     667             :                     RelFileNumber relfilenumber, ForkNumber forknum,
     668             :                     unsigned segno, size_t size,
     669             :                     unsigned *num_blocks_required,
     670             :                     BlockNumber *relative_block_numbers,
     671             :                     unsigned *truncation_block_length)
     672             : {
     673             :     BlockNumber limit_block;
     674             :     BlockNumber start_blkno;
     675             :     BlockNumber stop_blkno;
     676             :     RelFileLocator rlocator;
     677             :     BlockRefTableEntry *brtentry;
     678             :     unsigned    i;
     679             :     unsigned    nblocks;
     680             : 
     681             :     /* Should only be called after PrepareForIncrementalBackup. */
     682             :     Assert(ib->buf.data == NULL);
     683             : 
     684             :     /*
     685             :      * dboid could be InvalidOid if shared rel, but spcoid and relfilenumber
     686             :      * should have legal values.
     687             :      */
     688             :     Assert(OidIsValid(spcoid));
     689             :     Assert(RelFileNumberIsValid(relfilenumber));
     690             : 
     691             :     /*
     692             :      * If the file size is too large or not a multiple of BLCKSZ, then
     693             :      * something weird is happening, so give up and send the whole file.
     694             :      */
     695       18406 :     if ((size % BLCKSZ) != 0 || size / BLCKSZ > RELSEG_SIZE)
     696           0 :         return BACK_UP_FILE_FULLY;
     697             : 
     698             :     /*
     699             :      * The free-space map fork is not properly WAL-logged, so we need to
     700             :      * backup the entire file every time.
     701             :      */
     702       18406 :     if (forknum == FSM_FORKNUM)
     703        2240 :         return BACK_UP_FILE_FULLY;
     704             : 
     705             :     /*
     706             :      * If this file was not part of the prior backup, back it up fully.
     707             :      *
     708             :      * If this file was created after the prior backup and before the start of
     709             :      * the current backup, then the WAL summary information will tell us to
     710             :      * back up the whole file. However, if this file was created after the
     711             :      * start of the current backup, then the WAL summary won't know anything
     712             :      * about it. Without this logic, we would erroneously conclude that it was
     713             :      * OK to send it incrementally.
     714             :      *
     715             :      * Note that the file could have existed at the time of the prior backup,
     716             :      * gotten deleted, and then a new file with the same name could have been
     717             :      * created.  In that case, this logic won't prevent the file from being
     718             :      * backed up incrementally. But, if the deletion happened before the start
     719             :      * of the current backup, the limit block will be 0, inducing a full
     720             :      * backup. If the deletion happened after the start of the current backup,
     721             :      * reconstruction will erroneously combine blocks from the current
     722             :      * lifespan of the file with blocks from the previous lifespan -- but in
     723             :      * this type of case, WAL replay to reach backup consistency should remove
     724             :      * and recreate the file anyway, so the initial bogus contents should not
     725             :      * matter.
     726             :      */
     727       16166 :     if (backup_file_lookup(ib->manifest_files, path) == NULL)
     728             :     {
     729             :         char       *ipath;
     730             : 
     731        3214 :         ipath = GetIncrementalFilePath(dboid, spcoid, relfilenumber,
     732             :                                        forknum, segno);
     733        3214 :         if (backup_file_lookup(ib->manifest_files, ipath) == NULL)
     734         548 :             return BACK_UP_FILE_FULLY;
     735             :     }
     736             : 
     737             :     /*
     738             :      * Look up the special block reference table entry for the database as a
     739             :      * whole.
     740             :      */
     741       15618 :     rlocator.spcOid = spcoid;
     742       15618 :     rlocator.dbOid = dboid;
     743       15618 :     rlocator.relNumber = 0;
     744       15618 :     if (BlockRefTableGetEntry(ib->brtab, &rlocator, MAIN_FORKNUM,
     745             :                               &limit_block) != NULL)
     746             :     {
     747             :         /*
     748             :          * According to the WAL summary, this database OID/tablespace OID
     749             :          * pairing has been created since the previous backup. So, everything
     750             :          * in it must be backed up fully.
     751             :          */
     752         522 :         return BACK_UP_FILE_FULLY;
     753             :     }
     754             : 
     755             :     /* Look up the block reference table entry for this relfilenode. */
     756       15096 :     rlocator.relNumber = relfilenumber;
     757       15096 :     brtentry = BlockRefTableGetEntry(ib->brtab, &rlocator, forknum,
     758             :                                      &limit_block);
     759             : 
     760             :     /*
     761             :      * If there is no entry, then there have been no WAL-logged changes to the
     762             :      * relation since the predecessor backup was taken, so we can back it up
     763             :      * incrementally and need not include any modified blocks.
     764             :      *
     765             :      * However, if the file is zero-length, we should do a full backup,
     766             :      * because an incremental file is always more than zero length, and it's
     767             :      * silly to take an incremental backup when a full backup would be
     768             :      * smaller.
     769             :      */
     770       15096 :     if (brtentry == NULL)
     771             :     {
     772       15032 :         if (size == 0)
     773        3076 :             return BACK_UP_FILE_FULLY;
     774       11956 :         *num_blocks_required = 0;
     775       11956 :         *truncation_block_length = size / BLCKSZ;
     776       11956 :         return BACK_UP_FILE_INCREMENTALLY;
     777             :     }
     778             : 
     779             :     /*
     780             :      * If the limit_block is less than or equal to the point where this
     781             :      * segment starts, send the whole file.
     782             :      */
     783          64 :     if (limit_block <= segno * RELSEG_SIZE)
     784           0 :         return BACK_UP_FILE_FULLY;
     785             : 
     786             :     /*
     787             :      * Get relevant entries from the block reference table entry.
     788             :      *
     789             :      * We shouldn't overflow computing the start or stop block numbers, but if
     790             :      * it manages to happen somehow, detect it and throw an error.
     791             :      */
     792          64 :     start_blkno = segno * RELSEG_SIZE;
     793          64 :     stop_blkno = start_blkno + (size / BLCKSZ);
     794          64 :     if (start_blkno / RELSEG_SIZE != segno || stop_blkno < start_blkno)
     795           0 :         ereport(ERROR,
     796             :                 errcode(ERRCODE_INTERNAL_ERROR),
     797             :                 errmsg_internal("overflow computing block number bounds for segment %u with size %zu",
     798             :                                 segno, size));
     799             : 
     800             :     /*
     801             :      * This will write *absolute* block numbers into the output array, but
     802             :      * we'll transpose them below.
     803             :      */
     804          64 :     nblocks = BlockRefTableEntryGetBlocks(brtentry, start_blkno, stop_blkno,
     805             :                                           relative_block_numbers, RELSEG_SIZE);
     806             :     Assert(nblocks <= RELSEG_SIZE);
     807             : 
     808             :     /*
     809             :      * If we're going to have to send nearly all of the blocks, then just send
     810             :      * the whole file, because that won't require much extra storage or
     811             :      * transfer and will speed up and simplify backup restoration. It's not
     812             :      * clear what threshold is most appropriate here and perhaps it ought to
     813             :      * be configurable, but for now we're just going to say that if we'd need
     814             :      * to send 90% of the blocks anyway, give up and send the whole file.
     815             :      *
     816             :      * NB: If you change the threshold here, at least make sure to back up the
     817             :      * file fully when every single block must be sent, because there's
     818             :      * nothing good about sending an incremental file in that case.
     819             :      */
     820          64 :     if (nblocks * BLCKSZ > size * 0.9)
     821          18 :         return BACK_UP_FILE_FULLY;
     822             : 
     823             :     /*
     824             :      * Looks like we can send an incremental file, so sort the block numbers
     825             :      * and then transpose them from absolute block numbers to relative block
     826             :      * numbers if necessary.
     827             :      *
     828             :      * NB: If the block reference table was using the bitmap representation
     829             :      * for a given chunk, the block numbers in that chunk will already be
     830             :      * sorted, but when the array-of-offsets representation is used, we can
     831             :      * receive block numbers here out of order.
     832             :      */
     833          46 :     qsort(relative_block_numbers, nblocks, sizeof(BlockNumber),
     834             :           compare_block_numbers);
     835          46 :     if (start_blkno != 0)
     836             :     {
     837           0 :         for (i = 0; i < nblocks; ++i)
     838           0 :             relative_block_numbers[i] -= start_blkno;
     839             :     }
     840          46 :     *num_blocks_required = nblocks;
     841             : 
     842             :     /*
     843             :      * The truncation block length is the minimum length of the reconstructed
     844             :      * file. Any block numbers below this threshold that are not present in
     845             :      * the backup need to be fetched from the prior backup. At or above this
     846             :      * threshold, blocks should only be included in the result if they are
     847             :      * present in the backup. (This may require inserting zero blocks if the
     848             :      * blocks included in the backup are non-consecutive.)
     849             :      */
     850          46 :     *truncation_block_length = size / BLCKSZ;
     851          46 :     if (BlockNumberIsValid(limit_block))
     852             :     {
     853           0 :         unsigned    relative_limit = limit_block - segno * RELSEG_SIZE;
     854             : 
     855           0 :         if (*truncation_block_length < relative_limit)
     856           0 :             *truncation_block_length = relative_limit;
     857             :     }
     858             : 
     859             :     /* Send it incrementally. */
     860          46 :     return BACK_UP_FILE_INCREMENTALLY;
     861             : }
     862             : 
     863             : /*
     864             :  * Compute the size for a header of an incremental file containing a given
     865             :  * number of blocks. The header is rounded to a multiple of BLCKSZ, but
     866             :  * only if the file will store some block data.
     867             :  */
     868             : size_t
     869       12002 : GetIncrementalHeaderSize(unsigned num_blocks_required)
     870             : {
     871             :     size_t      result;
     872             : 
     873             :     /* Make sure we're not going to overflow. */
     874             :     Assert(num_blocks_required <= RELSEG_SIZE);
     875             : 
     876             :     /*
     877             :      * Three four byte quantities (magic number, truncation block length,
     878             :      * block count) followed by block numbers.
     879             :      */
     880       12002 :     result = 3 * sizeof(uint32) + (sizeof(BlockNumber) * num_blocks_required);
     881             : 
     882             :     /*
     883             :      * Round the header size to a multiple of BLCKSZ - when not a multiple of
     884             :      * BLCKSZ, add the missing fraction of a block. But do this only if the
     885             :      * file will store data for some blocks, otherwise keep it small.
     886             :      */
     887       12002 :     if ((num_blocks_required > 0) && (result % BLCKSZ != 0))
     888          46 :         result += BLCKSZ - (result % BLCKSZ);
     889             : 
     890       12002 :     return result;
     891             : }
     892             : 
     893             : /*
     894             :  * Compute the size for an incremental file containing a given number of blocks.
     895             :  */
     896             : size_t
     897       12002 : GetIncrementalFileSize(unsigned num_blocks_required)
     898             : {
     899             :     size_t      result;
     900             : 
     901             :     /* Make sure we're not going to overflow. */
     902             :     Assert(num_blocks_required <= RELSEG_SIZE);
     903             : 
     904             :     /*
     905             :      * Header with three four byte quantities (magic number, truncation block
     906             :      * length, block count) followed by block numbers, rounded to a multiple
     907             :      * of BLCKSZ (for files with block data), followed by block contents.
     908             :      */
     909       12002 :     result = GetIncrementalHeaderSize(num_blocks_required);
     910       12002 :     result += BLCKSZ * num_blocks_required;
     911             : 
     912       12002 :     return result;
     913             : }
     914             : 
     915             : /*
     916             :  * Helper function for filemap hash table.
     917             :  */
     918             : static uint32
     919       38726 : hash_string_pointer(const char *s)
     920             : {
     921       38726 :     unsigned char *ss = (unsigned char *) s;
     922             : 
     923       38726 :     return hash_bytes(ss, strlen(s));
     924             : }
     925             : 
     926             : /*
     927             :  * This callback to validate the manifest version for incremental backup.
     928             :  */
     929             : static void
     930          20 : manifest_process_version(JsonManifestParseContext *context,
     931             :                          int manifest_version)
     932             : {
     933             :     /* Incremental backups don't work with manifest version 1 */
     934          20 :     if (manifest_version == 1)
     935           0 :         context->error_cb(context,
     936             :                           "backup manifest version 1 does not support incremental backup");
     937          20 : }
     938             : 
     939             : /*
     940             :  * This callback to validate the manifest system identifier against the current
     941             :  * database server.
     942             :  */
     943             : static void
     944          20 : manifest_process_system_identifier(JsonManifestParseContext *context,
     945             :                                    uint64 manifest_system_identifier)
     946             : {
     947             :     uint64      system_identifier;
     948             : 
     949             :     /* Get system identifier of current system */
     950          20 :     system_identifier = GetSystemIdentifier();
     951             : 
     952          20 :     if (manifest_system_identifier != system_identifier)
     953           2 :         context->error_cb(context,
     954             :                           "system identifier in backup manifest is %llu, but database system identifier is %llu",
     955             :                           (unsigned long long) manifest_system_identifier,
     956             :                           (unsigned long long) system_identifier);
     957          18 : }
     958             : 
     959             : /*
     960             :  * This callback is invoked for each file mentioned in the backup manifest.
     961             :  *
     962             :  * We store the path to each file and the size of each file for sanity-checking
     963             :  * purposes. For further details, see comments for IncrementalBackupInfo.
     964             :  */
     965             : static void
     966       18738 : manifest_process_file(JsonManifestParseContext *context,
     967             :                       const char *pathname, uint64 size,
     968             :                       pg_checksum_type checksum_type,
     969             :                       int checksum_length,
     970             :                       uint8 *checksum_payload)
     971             : {
     972       18738 :     IncrementalBackupInfo *ib = context->private_data;
     973             :     backup_file_entry *entry;
     974             :     bool        found;
     975             : 
     976       18738 :     entry = backup_file_insert(ib->manifest_files, pathname, &found);
     977       18738 :     if (!found)
     978             :     {
     979       18738 :         entry->path = MemoryContextStrdup(ib->manifest_files->ctx,
     980             :                                           pathname);
     981       18738 :         entry->size = size;
     982             :     }
     983       18738 : }
     984             : 
     985             : /*
     986             :  * This callback is invoked for each WAL range mentioned in the backup
     987             :  * manifest.
     988             :  *
     989             :  * We're just interested in learning the oldest LSN and the corresponding TLI
     990             :  * that appear in any WAL range.
     991             :  */
     992             : static void
     993          18 : manifest_process_wal_range(JsonManifestParseContext *context,
     994             :                            TimeLineID tli, XLogRecPtr start_lsn,
     995             :                            XLogRecPtr end_lsn)
     996             : {
     997          18 :     IncrementalBackupInfo *ib = context->private_data;
     998          18 :     backup_wal_range *range = palloc(sizeof(backup_wal_range));
     999             : 
    1000          18 :     range->tli = tli;
    1001          18 :     range->start_lsn = start_lsn;
    1002          18 :     range->end_lsn = end_lsn;
    1003          18 :     ib->manifest_wal_ranges = lappend(ib->manifest_wal_ranges, range);
    1004          18 : }
    1005             : 
    1006             : /*
    1007             :  * This callback is invoked if an error occurs while parsing the backup
    1008             :  * manifest.
    1009             :  */
    1010             : static void
    1011           2 : manifest_report_error(JsonManifestParseContext *context, const char *fmt,...)
    1012             : {
    1013             :     StringInfoData errbuf;
    1014             : 
    1015           2 :     initStringInfo(&errbuf);
    1016             : 
    1017             :     for (;;)
    1018           0 :     {
    1019             :         va_list     ap;
    1020             :         int         needed;
    1021             : 
    1022           2 :         va_start(ap, fmt);
    1023           2 :         needed = appendStringInfoVA(&errbuf, fmt, ap);
    1024           2 :         va_end(ap);
    1025           2 :         if (needed == 0)
    1026           2 :             break;
    1027           0 :         enlargeStringInfo(&errbuf, needed);
    1028             :     }
    1029             : 
    1030           2 :     ereport(ERROR,
    1031             :             errmsg_internal("%s", errbuf.data));
    1032             : }
    1033             : 
    1034             : /*
    1035             :  * Quicksort comparator for block numbers.
    1036             :  */
    1037             : static int
    1038          46 : compare_block_numbers(const void *a, const void *b)
    1039             : {
    1040          46 :     BlockNumber aa = *(BlockNumber *) a;
    1041          46 :     BlockNumber bb = *(BlockNumber *) b;
    1042             : 
    1043          46 :     return pg_cmp_u32(aa, bb);
    1044             : }

Generated by: LCOV version 1.14