LCOV - PostgreSQL 19devel - src/backend/backup/basebackup

LCOV - code coverage report

Current view:	top level - src/backend/backup - basebackup_incremental.c (source / functions)		Hit	Total	Coverage
Test:	PostgreSQL 19devel	Lines:	196	222	88.3 %
Date:	2025-08-09 11:18:03	Functions:	15	15	100.0 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * basebackup_incremental.c
       4             :  *    code for incremental backup support
       5             :  *
       6             :  * This code isn't actually in charge of taking an incremental backup;
       7             :  * the actual construction of the incremental backup happens in
       8             :  * basebackup.c. Here, we're concerned with providing the necessary
       9             :  * supports for that operation. In particular, we need to parse the
      10             :  * backup manifest supplied by the user taking the incremental backup
      11             :  * and extract the required information from it.
      12             :  *
      13             :  * Portions Copyright (c) 2010-2025, PostgreSQL Global Development Group
      14             :  *
      15             :  * IDENTIFICATION
      16             :  *    src/backend/backup/basebackup_incremental.c
      17             :  *
      18             :  *-------------------------------------------------------------------------
      19             :  */
      20             : #include "postgres.h"
      21             : 
      22             : #include "access/timeline.h"
      23             : #include "access/xlog.h"
      24             : #include "backup/basebackup_incremental.h"
      25             : #include "backup/walsummary.h"
      26             : #include "common/blkreftable.h"
      27             : #include "common/hashfn.h"
      28             : #include "common/int.h"
      29             : #include "common/parse_manifest.h"
      30             : #include "postmaster/walsummarizer.h"
      31             : 
      32             : #define BLOCKS_PER_READ         512
      33             : 
      34             : /*
      35             :  * We expect to find the last lines of the manifest, including the checksum,
      36             :  * in the last MIN_CHUNK bytes of the manifest. We trigger an incremental
      37             :  * parse step if we are about to overflow MAX_CHUNK bytes.
      38             :  */
      39             : #define MIN_CHUNK  1024
      40             : #define MAX_CHUNK (128 *  1024)
      41             : 
      42             : /*
      43             :  * Details extracted from the WAL ranges present in the supplied backup manifest.
      44             :  */
      45             : typedef struct
      46             : {
      47             :     TimeLineID  tli;
      48             :     XLogRecPtr  start_lsn;
      49             :     XLogRecPtr  end_lsn;
      50             : } backup_wal_range;
      51             : 
      52             : /*
      53             :  * Details extracted from the file list present in the supplied backup manifest.
      54             :  */
      55             : typedef struct
      56             : {
      57             :     uint32      status;
      58             :     const char *path;
      59             :     uint64      size;
      60             : } backup_file_entry;
      61             : 
      62             : static uint32 hash_string_pointer(const char *s);
      63             : #define SH_PREFIX               backup_file
      64             : #define SH_ELEMENT_TYPE         backup_file_entry
      65             : #define SH_KEY_TYPE             const char *
      66             : #define SH_KEY                  path
      67             : #define SH_HASH_KEY(tb, key)    hash_string_pointer(key)
      68             : #define SH_EQUAL(tb, a, b)      (strcmp(a, b) == 0)
      69             : #define SH_SCOPE                static inline
      70             : #define SH_DECLARE
      71             : #define SH_DEFINE
      72             : #include "lib/simplehash.h"
      73             : 
      74             : struct IncrementalBackupInfo
      75             : {
      76             :     /* Memory context for this object and its subsidiary objects. */
      77             :     MemoryContext mcxt;
      78             : 
      79             :     /* Temporary buffer for storing the manifest while parsing it. */
      80             :     StringInfoData buf;
      81             : 
      82             :     /* WAL ranges extracted from the backup manifest. */
      83             :     List       *manifest_wal_ranges;
      84             : 
      85             :     /*
      86             :      * Files extracted from the backup manifest.
      87             :      *
      88             :      * We don't really need this information, because we use WAL summaries to
      89             :      * figure out what's changed. It would be unsafe to just rely on the list
      90             :      * of files that existed before, because it's possible for a file to be
      91             :      * removed and a new one created with the same name and different
      92             :      * contents. In such cases, the whole file must still be sent. We can tell
      93             :      * from the WAL summaries whether that happened, but not from the file
      94             :      * list.
      95             :      *
      96             :      * Nonetheless, this data is useful for sanity checking. If a file that we
      97             :      * think we shouldn't need to send is not present in the manifest for the
      98             :      * prior backup, something has gone terribly wrong. We retain the file
      99             :      * names and sizes, but not the checksums or last modified times, for
     100             :      * which we have no use.
     101             :      *
     102             :      * One significant downside of storing this data is that it consumes
     103             :      * memory. If that turns out to be a problem, we might have to decide not
     104             :      * to retain this information, or to make it optional.
     105             :      */
     106             :     backup_file_hash *manifest_files;
     107             : 
     108             :     /*
     109             :      * Block-reference table for the incremental backup.
     110             :      *
     111             :      * It's possible that storing the entire block-reference table in memory
     112             :      * will be a problem for some users. The in-memory format that we're using
     113             :      * here is pretty efficient, converging to little more than 1 bit per
     114             :      * block for relation forks with large numbers of modified blocks. It's
     115             :      * possible, however, that if you try to perform an incremental backup of
     116             :      * a database with a sufficiently large number of relations on a
     117             :      * sufficiently small machine, you could run out of memory here. If that
     118             :      * turns out to be a problem in practice, we'll need to be more clever.
     119             :      */
     120             :     BlockRefTable *brtab;
     121             : 
     122             :     /*
     123             :      * State object for incremental JSON parsing
     124             :      */
     125             :     JsonManifestParseIncrementalState *inc_state;
     126             : };
     127             : 
     128             : static void manifest_process_version(JsonManifestParseContext *context,
     129             :                                      int manifest_version);
     130             : static void manifest_process_system_identifier(JsonManifestParseContext *context,
     131             :                                                uint64 manifest_system_identifier);
     132             : static void manifest_process_file(JsonManifestParseContext *context,
     133             :                                   const char *pathname,
     134             :                                   uint64 size,
     135             :                                   pg_checksum_type checksum_type,
     136             :                                   int checksum_length,
     137             :                                   uint8 *checksum_payload);
     138             : static void manifest_process_wal_range(JsonManifestParseContext *context,
     139             :                                        TimeLineID tli,
     140             :                                        XLogRecPtr start_lsn,
     141             :                                        XLogRecPtr end_lsn);
     142             : pg_noreturn static void manifest_report_error(JsonManifestParseContext *context,
     143             :                                               const char *fmt,...)
     144             :             pg_attribute_printf(2, 3);
     145             : static int  compare_block_numbers(const void *a, const void *b);
     146             : 
     147             : /*
     148             :  * Create a new object for storing information extracted from the manifest
     149             :  * supplied when creating an incremental backup.
     150             :  */
     151             : IncrementalBackupInfo *
     152          22 : CreateIncrementalBackupInfo(MemoryContext mcxt)
     153             : {
     154             :     IncrementalBackupInfo *ib;
     155             :     MemoryContext oldcontext;
     156             :     JsonManifestParseContext *context;
     157             : 
     158          22 :     oldcontext = MemoryContextSwitchTo(mcxt);
     159             : 
     160          22 :     ib = palloc0(sizeof(IncrementalBackupInfo));
     161          22 :     ib->mcxt = mcxt;
     162          22 :     initStringInfo(&ib->buf);
     163             : 
     164             :     /*
     165             :      * It's hard to guess how many files a "typical" installation will have in
     166             :      * the data directory, but a fresh initdb creates almost 1000 files as of
     167             :      * this writing, so it seems to make sense for our estimate to
     168             :      * substantially higher.
     169             :      */
     170          22 :     ib->manifest_files = backup_file_create(mcxt, 10000, NULL);
     171             : 
     172          22 :     context = palloc0(sizeof(JsonManifestParseContext));
     173             :     /* Parse the manifest. */
     174          22 :     context->private_data = ib;
     175          22 :     context->version_cb = manifest_process_version;
     176          22 :     context->system_identifier_cb = manifest_process_system_identifier;
     177          22 :     context->per_file_cb = manifest_process_file;
     178          22 :     context->per_wal_range_cb = manifest_process_wal_range;
     179          22 :     context->error_cb = manifest_report_error;
     180             : 
     181          22 :     ib->inc_state = json_parse_manifest_incremental_init(context);
     182             : 
     183          22 :     MemoryContextSwitchTo(oldcontext);
     184             : 
     185          22 :     return ib;
     186             : }
     187             : 
     188             : /*
     189             :  * Before taking an incremental backup, the caller must supply the backup
     190             :  * manifest from a prior backup. Each chunk of manifest data received
     191             :  * from the client should be passed to this function.
     192             :  */
     193             : void
     194          66 : AppendIncrementalManifestData(IncrementalBackupInfo *ib, const char *data,
     195             :                               int len)
     196             : {
     197             :     MemoryContext oldcontext;
     198             : 
     199             :     /* Switch to our memory context. */
     200          66 :     oldcontext = MemoryContextSwitchTo(ib->mcxt);
     201             : 
     202          66 :     if (ib->buf.len > MIN_CHUNK && ib->buf.len + len > MAX_CHUNK)
     203             :     {
     204             :         /*
     205             :          * time for an incremental parse. We'll do all but the last MIN_CHUNK
     206             :          * so that we have enough left for the final piece.
     207             :          */
     208          22 :         json_parse_manifest_incremental_chunk(ib->inc_state, ib->buf.data,
     209          22 :                                               ib->buf.len - MIN_CHUNK, false);
     210             :         /* now remove what we just parsed  */
     211          20 :         memmove(ib->buf.data, ib->buf.data + (ib->buf.len - MIN_CHUNK),
     212             :                 MIN_CHUNK + 1);
     213          20 :         ib->buf.len = MIN_CHUNK;
     214             :     }
     215             : 
     216          64 :     appendBinaryStringInfo(&ib->buf, data, len);
     217             : 
     218             :     /* Switch back to previous memory context. */
     219          64 :     MemoryContextSwitchTo(oldcontext);
     220          64 : }
     221             : 
     222             : /*
     223             :  * Finalize an IncrementalBackupInfo object after all manifest data has
     224             :  * been supplied via calls to AppendIncrementalManifestData.
     225             :  */
     226             : void
     227          20 : FinalizeIncrementalManifest(IncrementalBackupInfo *ib)
     228             : {
     229             :     MemoryContext oldcontext;
     230             : 
     231             :     /* Switch to our memory context. */
     232          20 :     oldcontext = MemoryContextSwitchTo(ib->mcxt);
     233             : 
     234             :     /* Parse the last chunk of the manifest */
     235          20 :     json_parse_manifest_incremental_chunk(ib->inc_state, ib->buf.data,
     236          20 :                                           ib->buf.len, true);
     237             : 
     238             :     /* Done with the buffer, so release memory. */
     239          20 :     pfree(ib->buf.data);
     240          20 :     ib->buf.data = NULL;
     241             : 
     242             :     /* Done with inc_state, so release that memory too */
     243          20 :     json_parse_manifest_incremental_shutdown(ib->inc_state);
     244             : 
     245             :     /* Switch back to previous memory context. */
     246          20 :     MemoryContextSwitchTo(oldcontext);
     247          20 : }
     248             : 
     249             : /*
     250             :  * Prepare to take an incremental backup.
     251             :  *
     252             :  * Before this function is called, AppendIncrementalManifestData and
     253             :  * FinalizeIncrementalManifest should have already been called to pass all
     254             :  * the manifest data to this object.
     255             :  *
     256             :  * This function performs sanity checks on the data extracted from the
     257             :  * manifest and figures out for which WAL ranges we need summaries, and
     258             :  * whether those summaries are available. Then, it reads and combines the
     259             :  * data from those summary files. It also updates the backup_state with the
     260             :  * reference TLI and LSN for the prior backup.
     261             :  */
     262             : void
     263          20 : PrepareForIncrementalBackup(IncrementalBackupInfo *ib,
     264             :                             BackupState *backup_state)
     265             : {
     266             :     MemoryContext oldcontext;
     267             :     List       *expectedTLEs;
     268             :     List       *all_wslist,
     269          20 :                *required_wslist = NIL;
     270             :     ListCell   *lc;
     271             :     TimeLineHistoryEntry **tlep;
     272             :     int         num_wal_ranges;
     273             :     int         i;
     274          20 :     bool        found_backup_start_tli = false;
     275          20 :     TimeLineID  earliest_wal_range_tli = 0;
     276          20 :     XLogRecPtr  earliest_wal_range_start_lsn = InvalidXLogRecPtr;
     277          20 :     TimeLineID  latest_wal_range_tli = 0;
     278             : 
     279             :     Assert(ib->buf.data == NULL);
     280             : 
     281             :     /* Switch to our memory context. */
     282          20 :     oldcontext = MemoryContextSwitchTo(ib->mcxt);
     283             : 
     284             :     /*
     285             :      * A valid backup manifest must always contain at least one WAL range
     286             :      * (usually exactly one, unless the backup spanned a timeline switch).
     287             :      */
     288          20 :     num_wal_ranges = list_length(ib->manifest_wal_ranges);
     289          20 :     if (num_wal_ranges == 0)
     290           0 :         ereport(ERROR,
     291             :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     292             :                  errmsg("manifest contains no required WAL ranges")));
     293             : 
     294             :     /*
     295             :      * Match up the TLIs that appear in the WAL ranges of the backup manifest
     296             :      * with those that appear in this server's timeline history. We expect
     297             :      * every backup_wal_range to match to a TimeLineHistoryEntry; if it does
     298             :      * not, that's an error.
     299             :      *
     300             :      * This loop also decides which of the WAL ranges is the manifest is most
     301             :      * ancient and which one is the newest, according to the timeline history
     302             :      * of this server, and stores TLIs of those WAL ranges into
     303             :      * earliest_wal_range_tli and latest_wal_range_tli. It also updates
     304             :      * earliest_wal_range_start_lsn to the start LSN of the WAL range for
     305             :      * earliest_wal_range_tli.
     306             :      *
     307             :      * Note that the return value of readTimeLineHistory puts the latest
     308             :      * timeline at the beginning of the list, not the end. Hence, the earliest
     309             :      * TLI is the one that occurs nearest the end of the list returned by
     310             :      * readTimeLineHistory, and the latest TLI is the one that occurs closest
     311             :      * to the beginning.
     312             :      */
     313          20 :     expectedTLEs = readTimeLineHistory(backup_state->starttli);
     314          20 :     tlep = palloc0(num_wal_ranges * sizeof(TimeLineHistoryEntry *));
     315          40 :     for (i = 0; i < num_wal_ranges; ++i)
     316             :     {
     317          20 :         backup_wal_range *range = list_nth(ib->manifest_wal_ranges, i);
     318          20 :         bool        saw_earliest_wal_range_tli = false;
     319          20 :         bool        saw_latest_wal_range_tli = false;
     320             : 
     321             :         /* Search this server's history for this WAL range's TLI. */
     322          22 :         foreach(lc, expectedTLEs)
     323             :         {
     324          22 :             TimeLineHistoryEntry *tle = lfirst(lc);
     325             : 
     326          22 :             if (tle->tli == range->tli)
     327             :             {
     328          20 :                 tlep[i] = tle;
     329          20 :                 break;
     330             :             }
     331             : 
     332           2 :             if (tle->tli == earliest_wal_range_tli)
     333           0 :                 saw_earliest_wal_range_tli = true;
     334           2 :             if (tle->tli == latest_wal_range_tli)
     335           0 :                 saw_latest_wal_range_tli = true;
     336             :         }
     337             : 
     338             :         /*
     339             :          * An incremental backup can only be taken relative to a backup that
     340             :          * represents a previous state of this server. If the backup requires
     341             :          * WAL from a timeline that's not in our history, that definitely
     342             :          * isn't the case.
     343             :          */
     344          20 :         if (tlep[i] == NULL)
     345           0 :             ereport(ERROR,
     346             :                     (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     347             :                      errmsg("timeline %u found in manifest, but not in this server's history",
     348             :                             range->tli)));
     349             : 
     350             :         /*
     351             :          * If we found this TLI in the server's history before encountering
     352             :          * the latest TLI seen so far in the server's history, then this TLI
     353             :          * is the latest one seen so far.
     354             :          *
     355             :          * If on the other hand we saw the earliest TLI seen so far before
     356             :          * finding this TLI, this TLI is earlier than the earliest one seen so
     357             :          * far. And if this is the first TLI for which we've searched, it's
     358             :          * also the earliest one seen so far.
     359             :          *
     360             :          * On the first loop iteration, both things should necessarily be
     361             :          * true.
     362             :          */
     363          20 :         if (!saw_latest_wal_range_tli)
     364          20 :             latest_wal_range_tli = range->tli;
     365          20 :         if (earliest_wal_range_tli == 0 || saw_earliest_wal_range_tli)
     366             :         {
     367          20 :             earliest_wal_range_tli = range->tli;
     368          20 :             earliest_wal_range_start_lsn = range->start_lsn;
     369             :         }
     370             :     }
     371             : 
     372             :     /*
     373             :      * Propagate information about the prior backup into the backup_label that
     374             :      * will be generated for this backup.
     375             :      */
     376          20 :     backup_state->istartpoint = earliest_wal_range_start_lsn;
     377          20 :     backup_state->istarttli = earliest_wal_range_tli;
     378             : 
     379             :     /*
     380             :      * Sanity check start and end LSNs for the WAL ranges in the manifest.
     381             :      *
     382             :      * Commonly, there won't be any timeline switches during the prior backup
     383             :      * at all, but if there are, they should happen at the same LSNs that this
     384             :      * server switched timelines.
     385             :      *
     386             :      * Whether there are any timeline switches during the prior backup or not,
     387             :      * the prior backup shouldn't require any WAL from a timeline prior to the
     388             :      * start of that timeline. It also shouldn't require any WAL from later
     389             :      * than the start of this backup.
     390             :      *
     391             :      * If any of these sanity checks fail, one possible explanation is that
     392             :      * the user has generated WAL on the same timeline with the same LSNs more
     393             :      * than once. For instance, if two standbys running on timeline 1 were
     394             :      * both promoted and (due to a broken archiving setup) both selected new
     395             :      * timeline ID 2, then it's possible that one of these checks might trip.
     396             :      *
     397             :      * Note that there are lots of ways for the user to do something very bad
     398             :      * without tripping any of these checks, and they are not intended to be
     399             :      * comprehensive. It's pretty hard to see how we could be certain of
     400             :      * anything here. However, if there's a problem staring us right in the
     401             :      * face, it's best to report it, so we do.
     402             :      */
     403          40 :     for (i = 0; i < num_wal_ranges; ++i)
     404             :     {
     405          20 :         backup_wal_range *range = list_nth(ib->manifest_wal_ranges, i);
     406             : 
     407          20 :         if (range->tli == earliest_wal_range_tli)
     408             :         {
     409          20 :             if (range->start_lsn < tlep[i]->begin)
     410           0 :                 ereport(ERROR,
     411             :                         (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     412             :                          errmsg("manifest requires WAL from initial timeline %u starting at %X/%08X, but that timeline begins at %X/%08X",
     413             :                                 range->tli,
     414             :                                 LSN_FORMAT_ARGS(range->start_lsn),
     415             :                                 LSN_FORMAT_ARGS(tlep[i]->begin))));
     416             :         }
     417             :         else
     418             :         {
     419           0 :             if (range->start_lsn != tlep[i]->begin)
     420           0 :                 ereport(ERROR,
     421             :                         (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     422             :                          errmsg("manifest requires WAL from continuation timeline %u starting at %X/%08X, but that timeline begins at %X/%08X",
     423             :                                 range->tli,
     424             :                                 LSN_FORMAT_ARGS(range->start_lsn),
     425             :                                 LSN_FORMAT_ARGS(tlep[i]->begin))));
     426             :         }
     427             : 
     428          20 :         if (range->tli == latest_wal_range_tli)
     429             :         {
     430          20 :             if (range->end_lsn > backup_state->startpoint)
     431           0 :                 ereport(ERROR,
     432             :                         (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     433             :                          errmsg("manifest requires WAL from final timeline %u ending at %X/%08X, but this backup starts at %X/%08X",
     434             :                                 range->tli,
     435             :                                 LSN_FORMAT_ARGS(range->end_lsn),
     436             :                                 LSN_FORMAT_ARGS(backup_state->startpoint)),
     437             :                          errhint("This can happen for incremental backups on a standby if there was little activity since the previous backup.")));
     438             :         }
     439             :         else
     440             :         {
     441           0 :             if (range->end_lsn != tlep[i]->end)
     442           0 :                 ereport(ERROR,
     443             :                         (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     444             :                          errmsg("manifest requires WAL from non-final timeline %u ending at %X/%08X, but this server switched timelines at %X/%08X",
     445             :                                 range->tli,
     446             :                                 LSN_FORMAT_ARGS(range->end_lsn),
     447             :                                 LSN_FORMAT_ARGS(tlep[i]->end))));
     448             :         }
     449             : 
     450             :     }
     451             : 
     452             :     /*
     453             :      * Wait for WAL summarization to catch up to the backup start LSN. This
     454             :      * will throw an error if the WAL summarizer appears to be stuck. If WAL
     455             :      * summarization gets disabled while we're waiting, this will return
     456             :      * immediately, and we'll error out further down if the WAL summaries are
     457             :      * incomplete.
     458             :      */
     459          20 :     WaitForWalSummarization(backup_state->startpoint);
     460             : 
     461             :     /*
     462             :      * Retrieve a list of all WAL summaries on any timeline that overlap with
     463             :      * the LSN range of interest. We could instead call GetWalSummaries() once
     464             :      * per timeline in the loop that follows, but that would involve reading
     465             :      * the directory multiple times. It should be mildly faster - and perhaps
     466             :      * a bit safer - to do it just once.
     467             :      */
     468          20 :     all_wslist = GetWalSummaries(0, earliest_wal_range_start_lsn,
     469             :                                  backup_state->startpoint);
     470             : 
     471             :     /*
     472             :      * We need WAL summaries for everything that happened during the prior
     473             :      * backup and everything that happened afterward up until the point where
     474             :      * the current backup started.
     475             :      */
     476          22 :     foreach(lc, expectedTLEs)
     477             :     {
     478          22 :         TimeLineHistoryEntry *tle = lfirst(lc);
     479          22 :         XLogRecPtr  tli_start_lsn = tle->begin;
     480          22 :         XLogRecPtr  tli_end_lsn = tle->end;
     481          22 :         XLogRecPtr  tli_missing_lsn = InvalidXLogRecPtr;
     482             :         List       *tli_wslist;
     483             : 
     484             :         /*
     485             :          * Working through the history of this server from the current
     486             :          * timeline backwards, we skip everything until we find the timeline
     487             :          * where this backup started. Most of the time, this means we won't
     488             :          * skip anything at all, as it's unlikely that the timeline has
     489             :          * changed since the beginning of the backup moments ago.
     490             :          */
     491          22 :         if (tle->tli == backup_state->starttli)
     492             :         {
     493          20 :             found_backup_start_tli = true;
     494          20 :             tli_end_lsn = backup_state->startpoint;
     495             :         }
     496           2 :         else if (!found_backup_start_tli)
     497           0 :             continue;
     498             : 
     499             :         /*
     500             :          * Find the summaries that overlap the LSN range of interest for this
     501             :          * timeline. If this is the earliest timeline involved, the range of
     502             :          * interest begins with the start LSN of the prior backup; otherwise,
     503             :          * it begins at the LSN at which this timeline came into existence. If
     504             :          * this is the latest TLI involved, the range of interest ends at the
     505             :          * start LSN of the current backup; otherwise, it ends at the point
     506             :          * where we switched from this timeline to the next one.
     507             :          */
     508          22 :         if (tle->tli == earliest_wal_range_tli)
     509          20 :             tli_start_lsn = earliest_wal_range_start_lsn;
     510          22 :         tli_wslist = FilterWalSummaries(all_wslist, tle->tli,
     511             :                                         tli_start_lsn, tli_end_lsn);
     512             : 
     513             :         /*
     514             :          * There is no guarantee that the WAL summaries we found cover the
     515             :          * entire range of LSNs for which summaries are required, or indeed
     516             :          * that we found any WAL summaries at all. Check whether we have a
     517             :          * problem of that sort.
     518             :          */
     519          22 :         if (!WalSummariesAreComplete(tli_wslist, tli_start_lsn, tli_end_lsn,
     520             :                                      &tli_missing_lsn))
     521             :         {
     522           0 :             if (XLogRecPtrIsInvalid(tli_missing_lsn))
     523           0 :                 ereport(ERROR,
     524             :                         (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     525             :                          errmsg("WAL summaries are required on timeline %u from %X/%08X to %X/%08X, but no summaries for that timeline and LSN range exist",
     526             :                                 tle->tli,
     527             :                                 LSN_FORMAT_ARGS(tli_start_lsn),
     528             :                                 LSN_FORMAT_ARGS(tli_end_lsn))));
     529             :             else
     530           0 :                 ereport(ERROR,
     531             :                         (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     532             :                          errmsg("WAL summaries are required on timeline %u from %X/%08X to %X/%08X, but the summaries for that timeline and LSN range are incomplete",
     533             :                                 tle->tli,
     534             :                                 LSN_FORMAT_ARGS(tli_start_lsn),
     535             :                                 LSN_FORMAT_ARGS(tli_end_lsn)),
     536             :                          errdetail("The first unsummarized LSN in this range is %X/%08X.",
     537             :                                    LSN_FORMAT_ARGS(tli_missing_lsn))));
     538             :         }
     539             : 
     540             :         /*
     541             :          * Remember that we need to read these summaries.
     542             :          *
     543             :          * Technically, it's possible that this could read more files than
     544             :          * required, since tli_wslist in theory could contain redundant
     545             :          * summaries. For instance, if we have a summary from 0/10000000 to
     546             :          * 0/20000000 and also one from 0/00000000 to 0/30000000, then the
     547             :          * latter subsumes the former and the former could be ignored.
     548             :          *
     549             :          * We ignore this possibility because the WAL summarizer only tries to
     550             :          * generate summaries that do not overlap. If somehow they exist,
     551             :          * we'll do a bit of extra work but the results should still be
     552             :          * correct.
     553             :          */
     554          22 :         required_wslist = list_concat(required_wslist, tli_wslist);
     555             : 
     556             :         /*
     557             :          * Timelines earlier than the one in which the prior backup began are
     558             :          * not relevant.
     559             :          */
     560          22 :         if (tle->tli == earliest_wal_range_tli)
     561          20 :             break;
     562             :     }
     563             : 
     564             :     /*
     565             :      * Read all of the required block reference table files and merge all of
     566             :      * the data into a single in-memory block reference table.
     567             :      *
     568             :      * See the comments for struct IncrementalBackupInfo for some thoughts on
     569             :      * memory usage.
     570             :      */
     571          20 :     ib->brtab = CreateEmptyBlockRefTable();
     572          56 :     foreach(lc, required_wslist)
     573             :     {
     574          36 :         WalSummaryFile *ws = lfirst(lc);
     575             :         WalSummaryIO wsio;
     576             :         BlockRefTableReader *reader;
     577             :         RelFileLocator rlocator;
     578             :         ForkNumber  forknum;
     579             :         BlockNumber limit_block;
     580             :         BlockNumber blocks[BLOCKS_PER_READ];
     581             : 
     582          36 :         wsio.file = OpenWalSummaryFile(ws, false);
     583          36 :         wsio.filepos = 0;
     584          36 :         ereport(DEBUG1,
     585             :                 (errmsg_internal("reading WAL summary file \"%s\"",
     586             :                                  FilePathName(wsio.file))));
     587          36 :         reader = CreateBlockRefTableReader(ReadWalSummary, &wsio,
     588             :                                            FilePathName(wsio.file),
     589             :                                            ReportWalSummaryError, NULL);
     590         738 :         while (BlockRefTableReaderNextRelation(reader, &rlocator, &forknum,
     591             :                                                &limit_block))
     592             :         {
     593         702 :             BlockRefTableSetLimitBlock(ib->brtab, &rlocator,
     594             :                                        forknum, limit_block);
     595             : 
     596             :             while (1)
     597         508 :             {
     598             :                 unsigned    nblocks;
     599             :                 unsigned    i;
     600             : 
     601        1210 :                 nblocks = BlockRefTableReaderGetBlocks(reader, blocks,
     602             :                                                        BLOCKS_PER_READ);
     603        1210 :                 if (nblocks == 0)
     604         702 :                     break;
     605             : 
     606        2350 :                 for (i = 0; i < nblocks; ++i)
     607        1842 :                     BlockRefTableMarkBlockModified(ib->brtab, &rlocator,
     608             :                                                    forknum, blocks[i]);
     609             :             }
     610             :         }
     611          36 :         DestroyBlockRefTableReader(reader);
     612          36 :         FileClose(wsio.file);
     613             :     }
     614             : 
     615             :     /* Switch back to previous memory context. */
     616          20 :     MemoryContextSwitchTo(oldcontext);
     617          20 : }
     618             : 
     619             : /*
     620             :  * Get the pathname that should be used when a file is sent incrementally.
     621             :  *
     622             :  * The result is a palloc'd string.
     623             :  */
     624             : char *
     625        3210 : GetIncrementalFilePath(Oid dboid, Oid spcoid, RelFileNumber relfilenumber,
     626             :                        ForkNumber forknum, unsigned segno)
     627             : {
     628             :     RelPathStr  path;
     629             :     char       *lastslash;
     630             :     char       *ipath;
     631             : 
     632        3210 :     path = GetRelationPath(dboid, spcoid, relfilenumber, INVALID_PROC_NUMBER,
     633             :                            forknum);
     634             : 
     635        3210 :     lastslash = strrchr(path.str, '/');
     636             :     Assert(lastslash != NULL);
     637        3210 :     *lastslash = '\0';
     638             : 
     639        3210 :     if (segno > 0)
     640           0 :         ipath = psprintf("%s/INCREMENTAL.%s.%u", path.str, lastslash + 1, segno);
     641             :     else
     642        3210 :         ipath = psprintf("%s/INCREMENTAL.%s", path.str, lastslash + 1);
     643             : 
     644        3210 :     return ipath;
     645             : }
     646             : 
     647             : /*
     648             :  * How should we back up a particular file as part of an incremental backup?
     649             :  *
     650             :  * If the return value is BACK_UP_FILE_FULLY, caller should back up the whole
     651             :  * file just as if this were not an incremental backup.  The contents of the
     652             :  * relative_block_numbers array are unspecified in this case.
     653             :  *
     654             :  * If the return value is BACK_UP_FILE_INCREMENTALLY, caller should include
     655             :  * an incremental file in the backup instead of the entire file. On return,
     656             :  * *num_blocks_required will be set to the number of blocks that need to be
     657             :  * sent, and the actual block numbers will have been stored in
     658             :  * relative_block_numbers, which should be an array of at least RELSEG_SIZE.
     659             :  * In addition, *truncation_block_length will be set to the value that should
     660             :  * be included in the incremental file.
     661             :  */
     662             : FileBackupMethod
     663       20284 : GetFileBackupMethod(IncrementalBackupInfo *ib, const char *path,
     664             :                     Oid dboid, Oid spcoid,
     665             :                     RelFileNumber relfilenumber, ForkNumber forknum,
     666             :                     unsigned segno, size_t size,
     667             :                     unsigned *num_blocks_required,
     668             :                     BlockNumber *relative_block_numbers,
     669             :                     unsigned *truncation_block_length)
     670             : {
     671             :     BlockNumber limit_block;
     672             :     BlockNumber start_blkno;
     673             :     BlockNumber stop_blkno;
     674             :     RelFileLocator rlocator;
     675             :     BlockRefTableEntry *brtentry;
     676             :     unsigned    i;
     677             :     unsigned    nblocks;
     678             : 
     679             :     /* Should only be called after PrepareForIncrementalBackup. */
     680             :     Assert(ib->buf.data == NULL);
     681             : 
     682             :     /*
     683             :      * dboid could be InvalidOid if shared rel, but spcoid and relfilenumber
     684             :      * should have legal values.
     685             :      */
     686             :     Assert(OidIsValid(spcoid));
     687             :     Assert(RelFileNumberIsValid(relfilenumber));
     688             : 
     689             :     /*
     690             :      * If the file size is too large or not a multiple of BLCKSZ, then
     691             :      * something weird is happening, so give up and send the whole file.
     692             :      */
     693       20284 :     if ((size % BLCKSZ) != 0 || size / BLCKSZ > RELSEG_SIZE)
     694           0 :         return BACK_UP_FILE_FULLY;
     695             : 
     696             :     /*
     697             :      * The free-space map fork is not properly WAL-logged, so we need to
     698             :      * backup the entire file every time.
     699             :      */
     700       20284 :     if (forknum == FSM_FORKNUM)
     701        2474 :         return BACK_UP_FILE_FULLY;
     702             : 
     703             :     /*
     704             :      * If this file was not part of the prior backup, back it up fully.
     705             :      *
     706             :      * If this file was created after the prior backup and before the start of
     707             :      * the current backup, then the WAL summary information will tell us to
     708             :      * back up the whole file. However, if this file was created after the
     709             :      * start of the current backup, then the WAL summary won't know anything
     710             :      * about it. Without this logic, we would erroneously conclude that it was
     711             :      * OK to send it incrementally.
     712             :      *
     713             :      * Note that the file could have existed at the time of the prior backup,
     714             :      * gotten deleted, and then a new file with the same name could have been
     715             :      * created.  In that case, this logic won't prevent the file from being
     716             :      * backed up incrementally. But, if the deletion happened before the start
     717             :      * of the current backup, the limit block will be 0, inducing a full
     718             :      * backup. If the deletion happened after the start of the current backup,
     719             :      * reconstruction will erroneously combine blocks from the current
     720             :      * lifespan of the file with blocks from the previous lifespan -- but in
     721             :      * this type of case, WAL replay to reach backup consistency should remove
     722             :      * and recreate the file anyway, so the initial bogus contents should not
     723             :      * matter.
     724             :      */
     725       17810 :     if (backup_file_lookup(ib->manifest_files, path) == NULL)
     726             :     {
     727             :         char       *ipath;
     728             : 
     729        3210 :         ipath = GetIncrementalFilePath(dboid, spcoid, relfilenumber,
     730             :                                        forknum, segno);
     731        3210 :         if (backup_file_lookup(ib->manifest_files, ipath) == NULL)
     732         548 :             return BACK_UP_FILE_FULLY;
     733             :     }
     734             : 
     735             :     /*
     736             :      * Look up the special block reference table entry for the database as a
     737             :      * whole.
     738             :      */
     739       17262 :     rlocator.spcOid = spcoid;
     740       17262 :     rlocator.dbOid = dboid;
     741       17262 :     rlocator.relNumber = 0;
     742       17262 :     if (BlockRefTableGetEntry(ib->brtab, &rlocator, MAIN_FORKNUM,
     743             :                               &limit_block) != NULL)
     744             :     {
     745             :         /*
     746             :          * According to the WAL summary, this database OID/tablespace OID
     747             :          * pairing has been created since the previous backup. So, everything
     748             :          * in it must be backed up fully.
     749             :          */
     750         522 :         return BACK_UP_FILE_FULLY;
     751             :     }
     752             : 
     753             :     /* Look up the block reference table entry for this relfilenode. */
     754       16740 :     rlocator.relNumber = relfilenumber;
     755       16740 :     brtentry = BlockRefTableGetEntry(ib->brtab, &rlocator, forknum,
     756             :                                      &limit_block);
     757             : 
     758             :     /*
     759             :      * If there is no entry, then there have been no WAL-logged changes to the
     760             :      * relation since the predecessor backup was taken, so we can back it up
     761             :      * incrementally and need not include any modified blocks.
     762             :      *
     763             :      * However, if the file is zero-length, we should do a full backup,
     764             :      * because an incremental file is always more than zero length, and it's
     765             :      * silly to take an incremental backup when a full backup would be
     766             :      * smaller.
     767             :      */
     768       16740 :     if (brtentry == NULL)
     769             :     {
     770       16672 :         if (size == 0)
     771        3400 :             return BACK_UP_FILE_FULLY;
     772       13272 :         *num_blocks_required = 0;
     773       13272 :         *truncation_block_length = size / BLCKSZ;
     774       13272 :         return BACK_UP_FILE_INCREMENTALLY;
     775             :     }
     776             : 
     777             :     /*
     778             :      * If the limit_block is less than or equal to the point where this
     779             :      * segment starts, send the whole file.
     780             :      */
     781          68 :     if (limit_block <= segno * RELSEG_SIZE)
     782           0 :         return BACK_UP_FILE_FULLY;
     783             : 
     784             :     /*
     785             :      * Get relevant entries from the block reference table entry.
     786             :      *
     787             :      * We shouldn't overflow computing the start or stop block numbers, but if
     788             :      * it manages to happen somehow, detect it and throw an error.
     789             :      */
     790          68 :     start_blkno = segno * RELSEG_SIZE;
     791          68 :     stop_blkno = start_blkno + (size / BLCKSZ);
     792          68 :     if (start_blkno / RELSEG_SIZE != segno || stop_blkno < start_blkno)
     793           0 :         ereport(ERROR,
     794             :                 errcode(ERRCODE_INTERNAL_ERROR),
     795             :                 errmsg_internal("overflow computing block number bounds for segment %u with size %zu",
     796             :                                 segno, size));
     797             : 
     798             :     /*
     799             :      * This will write *absolute* block numbers into the output array, but
     800             :      * we'll transpose them below.
     801             :      */
     802          68 :     nblocks = BlockRefTableEntryGetBlocks(brtentry, start_blkno, stop_blkno,
     803             :                                           relative_block_numbers, RELSEG_SIZE);
     804             :     Assert(nblocks <= RELSEG_SIZE);
     805             : 
     806             :     /*
     807             :      * If we're going to have to send nearly all of the blocks, then just send
     808             :      * the whole file, because that won't require much extra storage or
     809             :      * transfer and will speed up and simplify backup restoration. It's not
     810             :      * clear what threshold is most appropriate here and perhaps it ought to
     811             :      * be configurable, but for now we're just going to say that if we'd need
     812             :      * to send 90% of the blocks anyway, give up and send the whole file.
     813             :      *
     814             :      * NB: If you change the threshold here, at least make sure to back up the
     815             :      * file fully when every single block must be sent, because there's
     816             :      * nothing good about sending an incremental file in that case.
     817             :      */
     818          68 :     if (nblocks * BLCKSZ > size * 0.9)
     819          18 :         return BACK_UP_FILE_FULLY;
     820             : 
     821             :     /*
     822             :      * Looks like we can send an incremental file, so sort the block numbers
     823             :      * and then transpose them from absolute block numbers to relative block
     824             :      * numbers if necessary.
     825             :      *
     826             :      * NB: If the block reference table was using the bitmap representation
     827             :      * for a given chunk, the block numbers in that chunk will already be
     828             :      * sorted, but when the array-of-offsets representation is used, we can
     829             :      * receive block numbers here out of order.
     830             :      */
     831          50 :     qsort(relative_block_numbers, nblocks, sizeof(BlockNumber),
     832             :           compare_block_numbers);
     833          50 :     if (start_blkno != 0)
     834             :     {
     835           0 :         for (i = 0; i < nblocks; ++i)
     836           0 :             relative_block_numbers[i] -= start_blkno;
     837             :     }
     838          50 :     *num_blocks_required = nblocks;
     839             : 
     840             :     /*
     841             :      * The truncation block length is the minimum length of the reconstructed
     842             :      * file. Any block numbers below this threshold that are not present in
     843             :      * the backup need to be fetched from the prior backup. At or above this
     844             :      * threshold, blocks should only be included in the result if they are
     845             :      * present in the backup. (This may require inserting zero blocks if the
     846             :      * blocks included in the backup are non-consecutive.)
     847             :      */
     848          50 :     *truncation_block_length = size / BLCKSZ;
     849          50 :     if (BlockNumberIsValid(limit_block))
     850             :     {
     851           0 :         unsigned    relative_limit = limit_block - segno * RELSEG_SIZE;
     852             : 
     853           0 :         if (*truncation_block_length < relative_limit)
     854           0 :             *truncation_block_length = relative_limit;
     855             :     }
     856             : 
     857             :     /* Send it incrementally. */
     858          50 :     return BACK_UP_FILE_INCREMENTALLY;
     859             : }
     860             : 
     861             : /*
     862             :  * Compute the size for a header of an incremental file containing a given
     863             :  * number of blocks. The header is rounded to a multiple of BLCKSZ, but
     864             :  * only if the file will store some block data.
     865             :  */
     866             : size_t
     867       13322 : GetIncrementalHeaderSize(unsigned num_blocks_required)
     868             : {
     869             :     size_t      result;
     870             : 
     871             :     /* Make sure we're not going to overflow. */
     872             :     Assert(num_blocks_required <= RELSEG_SIZE);
     873             : 
     874             :     /*
     875             :      * Three four byte quantities (magic number, truncation block length,
     876             :      * block count) followed by block numbers.
     877             :      */
     878       13322 :     result = 3 * sizeof(uint32) + (sizeof(BlockNumber) * num_blocks_required);
     879             : 
     880             :     /*
     881             :      * Round the header size to a multiple of BLCKSZ - when not a multiple of
     882             :      * BLCKSZ, add the missing fraction of a block. But do this only if the
     883             :      * file will store data for some blocks, otherwise keep it small.
     884             :      */
     885       13322 :     if ((num_blocks_required > 0) && (result % BLCKSZ != 0))
     886          50 :         result += BLCKSZ - (result % BLCKSZ);
     887             : 
     888       13322 :     return result;
     889             : }
     890             : 
     891             : /*
     892             :  * Compute the size for an incremental file containing a given number of blocks.
     893             :  */
     894             : size_t
     895       13322 : GetIncrementalFileSize(unsigned num_blocks_required)
     896             : {
     897             :     size_t      result;
     898             : 
     899             :     /* Make sure we're not going to overflow. */
     900             :     Assert(num_blocks_required <= RELSEG_SIZE);
     901             : 
     902             :     /*
     903             :      * Header with three four byte quantities (magic number, truncation block
     904             :      * length, block count) followed by block numbers, rounded to a multiple
     905             :      * of BLCKSZ (for files with block data), followed by block contents.
     906             :      */
     907       13322 :     result = GetIncrementalHeaderSize(num_blocks_required);
     908       13322 :     result += BLCKSZ * num_blocks_required;
     909             : 
     910       13322 :     return result;
     911             : }
     912             : 
     913             : /*
     914             :  * Helper function for filemap hash table.
     915             :  */
     916             : static uint32
     917       42348 : hash_string_pointer(const char *s)
     918             : {
     919       42348 :     unsigned char *ss = (unsigned char *) s;
     920             : 
     921       42348 :     return hash_bytes(ss, strlen(s));
     922             : }
     923             : 
     924             : /*
     925             :  * This callback to validate the manifest version for incremental backup.
     926             :  */
     927             : static void
     928          22 : manifest_process_version(JsonManifestParseContext *context,
     929             :                          int manifest_version)
     930             : {
     931             :     /* Incremental backups don't work with manifest version 1 */
     932          22 :     if (manifest_version == 1)
     933           0 :         context->error_cb(context,
     934             :                           "backup manifest version 1 does not support incremental backup");
     935          22 : }
     936             : 
     937             : /*
     938             :  * This callback to validate the manifest system identifier against the current
     939             :  * database server.
     940             :  */
     941             : static void
     942          22 : manifest_process_system_identifier(JsonManifestParseContext *context,
     943             :                                    uint64 manifest_system_identifier)
     944             : {
     945             :     uint64      system_identifier;
     946             : 
     947             :     /* Get system identifier of current system */
     948          22 :     system_identifier = GetSystemIdentifier();
     949             : 
     950          22 :     if (manifest_system_identifier != system_identifier)
     951           2 :         context->error_cb(context,
     952             :                           "system identifier in backup manifest is %" PRIu64 ", but database system identifier is %" PRIu64,
     953             :                           manifest_system_identifier,
     954             :                           system_identifier);
     955          20 : }
     956             : 
     957             : /*
     958             :  * This callback is invoked for each file mentioned in the backup manifest.
     959             :  *
     960             :  * We store the path to each file and the size of each file for sanity-checking
     961             :  * purposes. For further details, see comments for IncrementalBackupInfo.
     962             :  */
     963             : static void
     964       20650 : manifest_process_file(JsonManifestParseContext *context,
     965             :                       const char *pathname, uint64 size,
     966             :                       pg_checksum_type checksum_type,
     967             :                       int checksum_length,
     968             :                       uint8 *checksum_payload)
     969             : {
     970       20650 :     IncrementalBackupInfo *ib = context->private_data;
     971             :     backup_file_entry *entry;
     972             :     bool        found;
     973             : 
     974       20650 :     entry = backup_file_insert(ib->manifest_files, pathname, &found);
     975       20650 :     if (!found)
     976             :     {
     977       20650 :         entry->path = MemoryContextStrdup(ib->manifest_files->ctx,
     978             :                                           pathname);
     979       20650 :         entry->size = size;
     980             :     }
     981       20650 : }
     982             : 
     983             : /*
     984             :  * This callback is invoked for each WAL range mentioned in the backup
     985             :  * manifest.
     986             :  *
     987             :  * We're just interested in learning the oldest LSN and the corresponding TLI
     988             :  * that appear in any WAL range.
     989             :  */
     990             : static void
     991          20 : manifest_process_wal_range(JsonManifestParseContext *context,
     992             :                            TimeLineID tli, XLogRecPtr start_lsn,
     993             :                            XLogRecPtr end_lsn)
     994             : {
     995          20 :     IncrementalBackupInfo *ib = context->private_data;
     996          20 :     backup_wal_range *range = palloc(sizeof(backup_wal_range));
     997             : 
     998          20 :     range->tli = tli;
     999          20 :     range->start_lsn = start_lsn;
    1000          20 :     range->end_lsn = end_lsn;
    1001          20 :     ib->manifest_wal_ranges = lappend(ib->manifest_wal_ranges, range);
    1002          20 : }
    1003             : 
    1004             : /*
    1005             :  * This callback is invoked if an error occurs while parsing the backup
    1006             :  * manifest.
    1007             :  */
    1008             : static void
    1009           2 : manifest_report_error(JsonManifestParseContext *context, const char *fmt,...)
    1010             : {
    1011             :     StringInfoData errbuf;
    1012             : 
    1013           2 :     initStringInfo(&errbuf);
    1014             : 
    1015             :     for (;;)
    1016           0 :     {
    1017             :         va_list     ap;
    1018             :         int         needed;
    1019             : 
    1020           2 :         va_start(ap, fmt);
    1021           2 :         needed = appendStringInfoVA(&errbuf, fmt, ap);
    1022           2 :         va_end(ap);
    1023           2 :         if (needed == 0)
    1024           2 :             break;
    1025           0 :         enlargeStringInfo(&errbuf, needed);
    1026             :     }
    1027             : 
    1028           2 :     ereport(ERROR,
    1029             :             errmsg_internal("%s", errbuf.data));
    1030             : }
    1031             : 
    1032             : /*
    1033             :  * Quicksort comparator for block numbers.
    1034             :  */
    1035             : static int
    1036          40 : compare_block_numbers(const void *a, const void *b)
    1037             : {
    1038          40 :     BlockNumber aa = *(BlockNumber *) a;
    1039          40 :     BlockNumber bb = *(BlockNumber *) b;
    1040             : 
    1041          40 :     return pg_cmp_u32(aa, bb);
    1042             : }

Generated by: LCOV version 1.16