LCOV - code coverage report
Current view: top level - src/backend/backup - basebackup.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 479 577 83.0 %
Date: 2023-10-02 07:10:39 Functions: 12 13 92.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * basebackup.c
       4             :  *    code for taking a base backup and streaming it to a standby
       5             :  *
       6             :  * Portions Copyright (c) 2010-2023, PostgreSQL Global Development Group
       7             :  *
       8             :  * IDENTIFICATION
       9             :  *    src/backend/backup/basebackup.c
      10             :  *
      11             :  *-------------------------------------------------------------------------
      12             :  */
      13             : #include "postgres.h"
      14             : 
      15             : #include <sys/stat.h>
      16             : #include <unistd.h>
      17             : #include <time.h>
      18             : 
      19             : #include "access/xlog_internal.h"
      20             : #include "access/xlogbackup.h"
      21             : #include "backup/backup_manifest.h"
      22             : #include "backup/basebackup.h"
      23             : #include "backup/basebackup_sink.h"
      24             : #include "backup/basebackup_target.h"
      25             : #include "commands/defrem.h"
      26             : #include "common/compression.h"
      27             : #include "common/file_perm.h"
      28             : #include "common/file_utils.h"
      29             : #include "lib/stringinfo.h"
      30             : #include "miscadmin.h"
      31             : #include "nodes/pg_list.h"
      32             : #include "pgstat.h"
      33             : #include "pgtar.h"
      34             : #include "port.h"
      35             : #include "postmaster/syslogger.h"
      36             : #include "replication/walsender.h"
      37             : #include "replication/walsender_private.h"
      38             : #include "storage/bufpage.h"
      39             : #include "storage/checksum.h"
      40             : #include "storage/dsm_impl.h"
      41             : #include "storage/ipc.h"
      42             : #include "storage/reinit.h"
      43             : #include "utils/builtins.h"
      44             : #include "utils/guc.h"
      45             : #include "utils/ps_status.h"
      46             : #include "utils/relcache.h"
      47             : #include "utils/resowner.h"
      48             : #include "utils/timestamp.h"
      49             : 
      50             : /*
      51             :  * How much data do we want to send in one CopyData message? Note that
      52             :  * this may also result in reading the underlying files in chunks of this
      53             :  * size.
      54             :  *
      55             :  * NB: The buffer size is required to be a multiple of the system block
      56             :  * size, so use that value instead if it's bigger than our preference.
      57             :  */
      58             : #define SINK_BUFFER_LENGTH          Max(32768, BLCKSZ)
      59             : 
      60             : typedef struct
      61             : {
      62             :     const char *label;
      63             :     bool        progress;
      64             :     bool        fastcheckpoint;
      65             :     bool        nowait;
      66             :     bool        includewal;
      67             :     uint32      maxrate;
      68             :     bool        sendtblspcmapfile;
      69             :     bool        send_to_client;
      70             :     bool        use_copytblspc;
      71             :     BaseBackupTargetHandle *target_handle;
      72             :     backup_manifest_option manifest;
      73             :     pg_compress_algorithm compression;
      74             :     pg_compress_specification compression_specification;
      75             :     pg_checksum_type manifest_checksum_type;
      76             : } basebackup_options;
      77             : 
      78             : static int64 sendTablespace(bbsink *sink, char *path, char *spcoid, bool sizeonly,
      79             :                             struct backup_manifest_info *manifest);
      80             : static int64 sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeonly,
      81             :                      List *tablespaces, bool sendtblspclinks,
      82             :                      backup_manifest_info *manifest, const char *spcoid);
      83             : static bool sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
      84             :                      struct stat *statbuf, bool missing_ok, Oid dboid,
      85             :                      backup_manifest_info *manifest, const char *spcoid);
      86             : static void sendFileWithContent(bbsink *sink, const char *filename,
      87             :                                 const char *content,
      88             :                                 backup_manifest_info *manifest);
      89             : static int64 _tarWriteHeader(bbsink *sink, const char *filename,
      90             :                              const char *linktarget, struct stat *statbuf,
      91             :                              bool sizeonly);
      92             : static void _tarWritePadding(bbsink *sink, int len);
      93             : static void convert_link_to_directory(const char *pathbuf, struct stat *statbuf);
      94             : static void perform_base_backup(basebackup_options *opt, bbsink *sink);
      95             : static void parse_basebackup_options(List *options, basebackup_options *opt);
      96             : static int  compareWalFileNames(const ListCell *a, const ListCell *b);
      97             : static bool is_checksummed_file(const char *fullpath, const char *filename);
      98             : static int  basebackup_read_file(int fd, char *buf, size_t nbytes, off_t offset,
      99             :                                  const char *filename, bool partial_read_ok);
     100             : 
     101             : /* Was the backup currently in-progress initiated in recovery mode? */
     102             : static bool backup_started_in_recovery = false;
     103             : 
     104             : /* Total number of checksum failures during base backup. */
     105             : static long long int total_checksum_failures;
     106             : 
     107             : /* Do not verify checksums. */
     108             : static bool noverify_checksums = false;
     109             : 
     110             : /*
     111             :  * Definition of one element part of an exclusion list, used for paths part
     112             :  * of checksum validation or base backups.  "name" is the name of the file
     113             :  * or path to check for exclusion.  If "match_prefix" is true, any items
     114             :  * matching the name as prefix are excluded.
     115             :  */
     116             : struct exclude_list_item
     117             : {
     118             :     const char *name;
     119             :     bool        match_prefix;
     120             : };
     121             : 
     122             : /*
     123             :  * The contents of these directories are removed or recreated during server
     124             :  * start so they are not included in backups.  The directories themselves are
     125             :  * kept and included as empty to preserve access permissions.
     126             :  *
     127             :  * Note: this list should be kept in sync with the filter lists in pg_rewind's
     128             :  * filemap.c.
     129             :  */
     130             : static const char *const excludeDirContents[] =
     131             : {
     132             :     /*
     133             :      * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped
     134             :      * because extensions like pg_stat_statements store data there.
     135             :      */
     136             :     PG_STAT_TMP_DIR,
     137             : 
     138             :     /*
     139             :      * It is generally not useful to backup the contents of this directory
     140             :      * even if the intention is to restore to another primary. See backup.sgml
     141             :      * for a more detailed description.
     142             :      */
     143             :     "pg_replslot",
     144             : 
     145             :     /* Contents removed on startup, see dsm_cleanup_for_mmap(). */
     146             :     PG_DYNSHMEM_DIR,
     147             : 
     148             :     /* Contents removed on startup, see AsyncShmemInit(). */
     149             :     "pg_notify",
     150             : 
     151             :     /*
     152             :      * Old contents are loaded for possible debugging but are not required for
     153             :      * normal operation, see SerialInit().
     154             :      */
     155             :     "pg_serial",
     156             : 
     157             :     /* Contents removed on startup, see DeleteAllExportedSnapshotFiles(). */
     158             :     "pg_snapshots",
     159             : 
     160             :     /* Contents zeroed on startup, see StartupSUBTRANS(). */
     161             :     "pg_subtrans",
     162             : 
     163             :     /* end of list */
     164             :     NULL
     165             : };
     166             : 
     167             : /*
     168             :  * List of files excluded from backups.
     169             :  */
     170             : static const struct exclude_list_item excludeFiles[] =
     171             : {
     172             :     /* Skip auto conf temporary file. */
     173             :     {PG_AUTOCONF_FILENAME ".tmp", false},
     174             : 
     175             :     /* Skip current log file temporary file */
     176             :     {LOG_METAINFO_DATAFILE_TMP, false},
     177             : 
     178             :     /*
     179             :      * Skip relation cache because it is rebuilt on startup.  This includes
     180             :      * temporary files.
     181             :      */
     182             :     {RELCACHE_INIT_FILENAME, true},
     183             : 
     184             :     /*
     185             :      * backup_label and tablespace_map should not exist in a running cluster
     186             :      * capable of doing an online backup, but exclude them just in case.
     187             :      */
     188             :     {BACKUP_LABEL_FILE, false},
     189             :     {TABLESPACE_MAP, false},
     190             : 
     191             :     /*
     192             :      * If there's a backup_manifest, it belongs to a backup that was used to
     193             :      * start this server. It is *not* correct for this backup. Our
     194             :      * backup_manifest is injected into the backup separately if users want
     195             :      * it.
     196             :      */
     197             :     {"backup_manifest", false},
     198             : 
     199             :     {"postmaster.pid", false},
     200             :     {"postmaster.opts", false},
     201             : 
     202             :     /* end of list */
     203             :     {NULL, false}
     204             : };
     205             : 
     206             : /*
     207             :  * List of files excluded from checksum validation.
     208             :  *
     209             :  * Note: this list should be kept in sync with what pg_checksums.c
     210             :  * includes.
     211             :  */
     212             : static const struct exclude_list_item noChecksumFiles[] = {
     213             :     {"pg_control", false},
     214             :     {"pg_filenode.map", false},
     215             :     {"pg_internal.init", true},
     216             :     {"PG_VERSION", false},
     217             : #ifdef EXEC_BACKEND
     218             :     {"config_exec_params", true},
     219             : #endif
     220             :     {NULL, false}
     221             : };
     222             : 
     223             : /*
     224             :  * Actually do a base backup for the specified tablespaces.
     225             :  *
     226             :  * This is split out mainly to avoid complaints about "variable might be
     227             :  * clobbered by longjmp" from stupider versions of gcc.
     228             :  */
     229             : static void
     230         236 : perform_base_backup(basebackup_options *opt, bbsink *sink)
     231             : {
     232             :     bbsink_state state;
     233             :     XLogRecPtr  endptr;
     234             :     TimeLineID  endtli;
     235             :     backup_manifest_info manifest;
     236             :     BackupState *backup_state;
     237             :     StringInfo  tablespace_map;
     238             : 
     239             :     /* Initial backup state, insofar as we know it now. */
     240         236 :     state.tablespaces = NIL;
     241         236 :     state.tablespace_num = 0;
     242         236 :     state.bytes_done = 0;
     243         236 :     state.bytes_total = 0;
     244         236 :     state.bytes_total_is_valid = false;
     245             : 
     246             :     /* we're going to use a BufFile, so we need a ResourceOwner */
     247             :     Assert(CurrentResourceOwner == NULL);
     248         236 :     CurrentResourceOwner = ResourceOwnerCreate(NULL, "base backup");
     249             : 
     250         236 :     backup_started_in_recovery = RecoveryInProgress();
     251             : 
     252         236 :     InitializeBackupManifest(&manifest, opt->manifest,
     253             :                              opt->manifest_checksum_type);
     254             : 
     255         236 :     total_checksum_failures = 0;
     256             : 
     257             :     /* Allocate backup related variables. */
     258         236 :     backup_state = (BackupState *) palloc0(sizeof(BackupState));
     259         236 :     tablespace_map = makeStringInfo();
     260             : 
     261         236 :     basebackup_progress_wait_checkpoint();
     262         236 :     do_pg_backup_start(opt->label, opt->fastcheckpoint, &state.tablespaces,
     263             :                        backup_state, tablespace_map);
     264             : 
     265         236 :     state.startptr = backup_state->startpoint;
     266         236 :     state.starttli = backup_state->starttli;
     267             : 
     268             :     /*
     269             :      * Once do_pg_backup_start has been called, ensure that any failure causes
     270             :      * us to abort the backup so we don't "leak" a backup counter. For this
     271             :      * reason, *all* functionality between do_pg_backup_start() and the end of
     272             :      * do_pg_backup_stop() should be inside the error cleanup block!
     273             :      */
     274             : 
     275         236 :     PG_ENSURE_ERROR_CLEANUP(do_pg_abort_backup, BoolGetDatum(false));
     276             :     {
     277             :         ListCell   *lc;
     278             :         tablespaceinfo *newti;
     279             : 
     280             :         /* Add a node for the base directory at the end */
     281         236 :         newti = palloc0(sizeof(tablespaceinfo));
     282         236 :         newti->size = -1;
     283         236 :         state.tablespaces = lappend(state.tablespaces, newti);
     284             : 
     285             :         /*
     286             :          * Calculate the total backup size by summing up the size of each
     287             :          * tablespace
     288             :          */
     289         236 :         if (opt->progress)
     290             :         {
     291         236 :             basebackup_progress_estimate_backup_size();
     292             : 
     293         534 :             foreach(lc, state.tablespaces)
     294             :             {
     295         298 :                 tablespaceinfo *tmp = (tablespaceinfo *) lfirst(lc);
     296             : 
     297         298 :                 if (tmp->path == NULL)
     298         236 :                     tmp->size = sendDir(sink, ".", 1, true, state.tablespaces,
     299             :                                         true, NULL, NULL);
     300             :                 else
     301          62 :                     tmp->size = sendTablespace(sink, tmp->path, tmp->oid, true,
     302             :                                                NULL);
     303         298 :                 state.bytes_total += tmp->size;
     304             :             }
     305         236 :             state.bytes_total_is_valid = true;
     306             :         }
     307             : 
     308             :         /* notify basebackup sink about start of backup */
     309         236 :         bbsink_begin_backup(sink, &state, SINK_BUFFER_LENGTH);
     310             : 
     311             :         /* Send off our tablespaces one by one */
     312         522 :         foreach(lc, state.tablespaces)
     313             :         {
     314         298 :             tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc);
     315             : 
     316         298 :             if (ti->path == NULL)
     317             :             {
     318             :                 struct stat statbuf;
     319         236 :                 bool        sendtblspclinks = true;
     320             :                 char       *backup_label;
     321             : 
     322         236 :                 bbsink_begin_archive(sink, "base.tar");
     323             : 
     324             :                 /* In the main tar, include the backup_label first... */
     325         236 :                 backup_label = build_backup_content(backup_state, false);
     326         236 :                 sendFileWithContent(sink, BACKUP_LABEL_FILE,
     327             :                                     backup_label, &manifest);
     328         236 :                 pfree(backup_label);
     329             : 
     330             :                 /* Then the tablespace_map file, if required... */
     331         236 :                 if (opt->sendtblspcmapfile)
     332             :                 {
     333          36 :                     sendFileWithContent(sink, TABLESPACE_MAP,
     334          36 :                                         tablespace_map->data, &manifest);
     335          36 :                     sendtblspclinks = false;
     336             :                 }
     337             : 
     338             :                 /* Then the bulk of the files... */
     339         236 :                 sendDir(sink, ".", 1, false, state.tablespaces,
     340             :                         sendtblspclinks, &manifest, NULL);
     341             : 
     342             :                 /* ... and pg_control after everything else. */
     343         224 :                 if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
     344           0 :                     ereport(ERROR,
     345             :                             (errcode_for_file_access(),
     346             :                              errmsg("could not stat file \"%s\": %m",
     347             :                                     XLOG_CONTROL_FILE)));
     348         224 :                 sendFile(sink, XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf,
     349             :                          false, InvalidOid, &manifest, NULL);
     350             :             }
     351             :             else
     352             :             {
     353          62 :                 char       *archive_name = psprintf("%s.tar", ti->oid);
     354             : 
     355          62 :                 bbsink_begin_archive(sink, archive_name);
     356             : 
     357          62 :                 sendTablespace(sink, ti->path, ti->oid, false, &manifest);
     358             :             }
     359             : 
     360             :             /*
     361             :              * If we're including WAL, and this is the main data directory we
     362             :              * don't treat this as the end of the tablespace. Instead, we will
     363             :              * include the xlog files below and stop afterwards. This is safe
     364             :              * since the main data directory is always sent *last*.
     365             :              */
     366         286 :             if (opt->includewal && ti->path == NULL)
     367             :             {
     368             :                 Assert(lnext(state.tablespaces, lc) == NULL);
     369             :             }
     370             :             else
     371             :             {
     372             :                 /* Properly terminate the tarfile. */
     373             :                 StaticAssertDecl(2 * TAR_BLOCK_SIZE <= BLCKSZ,
     374             :                                  "BLCKSZ too small for 2 tar blocks");
     375         264 :                 memset(sink->bbs_buffer, 0, 2 * TAR_BLOCK_SIZE);
     376         264 :                 bbsink_archive_contents(sink, 2 * TAR_BLOCK_SIZE);
     377             : 
     378             :                 /* OK, that's the end of the archive. */
     379         264 :                 bbsink_end_archive(sink);
     380             :             }
     381             :         }
     382             : 
     383         224 :         basebackup_progress_wait_wal_archive(&state);
     384         224 :         do_pg_backup_stop(backup_state, !opt->nowait);
     385             : 
     386         224 :         endptr = backup_state->stoppoint;
     387         224 :         endtli = backup_state->stoptli;
     388             : 
     389             :         /* Deallocate backup-related variables. */
     390         224 :         pfree(tablespace_map->data);
     391         224 :         pfree(tablespace_map);
     392         224 :         pfree(backup_state);
     393             :     }
     394         226 :     PG_END_ENSURE_ERROR_CLEANUP(do_pg_abort_backup, BoolGetDatum(false));
     395             : 
     396             : 
     397         224 :     if (opt->includewal)
     398             :     {
     399             :         /*
     400             :          * We've left the last tar file "open", so we can now append the
     401             :          * required WAL files to it.
     402             :          */
     403             :         char        pathbuf[MAXPGPATH];
     404             :         XLogSegNo   segno;
     405             :         XLogSegNo   startsegno;
     406             :         XLogSegNo   endsegno;
     407             :         struct stat statbuf;
     408          22 :         List       *historyFileList = NIL;
     409          22 :         List       *walFileList = NIL;
     410             :         char        firstoff[MAXFNAMELEN];
     411             :         char        lastoff[MAXFNAMELEN];
     412             :         DIR        *dir;
     413             :         struct dirent *de;
     414             :         ListCell   *lc;
     415             :         TimeLineID  tli;
     416             : 
     417          22 :         basebackup_progress_transfer_wal();
     418             : 
     419             :         /*
     420             :          * I'd rather not worry about timelines here, so scan pg_wal and
     421             :          * include all WAL files in the range between 'startptr' and 'endptr',
     422             :          * regardless of the timeline the file is stamped with. If there are
     423             :          * some spurious WAL files belonging to timelines that don't belong in
     424             :          * this server's history, they will be included too. Normally there
     425             :          * shouldn't be such files, but if there are, there's little harm in
     426             :          * including them.
     427             :          */
     428          22 :         XLByteToSeg(state.startptr, startsegno, wal_segment_size);
     429          22 :         XLogFileName(firstoff, state.starttli, startsegno, wal_segment_size);
     430          22 :         XLByteToPrevSeg(endptr, endsegno, wal_segment_size);
     431          22 :         XLogFileName(lastoff, endtli, endsegno, wal_segment_size);
     432             : 
     433          22 :         dir = AllocateDir("pg_wal");
     434         138 :         while ((de = ReadDir(dir, "pg_wal")) != NULL)
     435             :         {
     436             :             /* Does it look like a WAL segment, and is it in the range? */
     437         116 :             if (IsXLogFileName(de->d_name) &&
     438          50 :                 strcmp(de->d_name + 8, firstoff + 8) >= 0 &&
     439          50 :                 strcmp(de->d_name + 8, lastoff + 8) <= 0)
     440             :             {
     441          22 :                 walFileList = lappend(walFileList, pstrdup(de->d_name));
     442             :             }
     443             :             /* Does it look like a timeline history file? */
     444          94 :             else if (IsTLHistoryFileName(de->d_name))
     445             :             {
     446           0 :                 historyFileList = lappend(historyFileList, pstrdup(de->d_name));
     447             :             }
     448             :         }
     449          22 :         FreeDir(dir);
     450             : 
     451             :         /*
     452             :          * Before we go any further, check that none of the WAL segments we
     453             :          * need were removed.
     454             :          */
     455          22 :         CheckXLogRemoved(startsegno, state.starttli);
     456             : 
     457             :         /*
     458             :          * Sort the WAL filenames.  We want to send the files in order from
     459             :          * oldest to newest, to reduce the chance that a file is recycled
     460             :          * before we get a chance to send it over.
     461             :          */
     462          22 :         list_sort(walFileList, compareWalFileNames);
     463             : 
     464             :         /*
     465             :          * There must be at least one xlog file in the pg_wal directory, since
     466             :          * we are doing backup-including-xlog.
     467             :          */
     468          22 :         if (walFileList == NIL)
     469           0 :             ereport(ERROR,
     470             :                     (errmsg("could not find any WAL files")));
     471             : 
     472             :         /*
     473             :          * Sanity check: the first and last segment should cover startptr and
     474             :          * endptr, with no gaps in between.
     475             :          */
     476          22 :         XLogFromFileName((char *) linitial(walFileList),
     477             :                          &tli, &segno, wal_segment_size);
     478          22 :         if (segno != startsegno)
     479             :         {
     480             :             char        startfname[MAXFNAMELEN];
     481             : 
     482           0 :             XLogFileName(startfname, state.starttli, startsegno,
     483             :                          wal_segment_size);
     484           0 :             ereport(ERROR,
     485             :                     (errmsg("could not find WAL file \"%s\"", startfname)));
     486             :         }
     487          44 :         foreach(lc, walFileList)
     488             :         {
     489          22 :             char       *walFileName = (char *) lfirst(lc);
     490          22 :             XLogSegNo   currsegno = segno;
     491          22 :             XLogSegNo   nextsegno = segno + 1;
     492             : 
     493          22 :             XLogFromFileName(walFileName, &tli, &segno, wal_segment_size);
     494          22 :             if (!(nextsegno == segno || currsegno == segno))
     495             :             {
     496             :                 char        nextfname[MAXFNAMELEN];
     497             : 
     498           0 :                 XLogFileName(nextfname, tli, nextsegno, wal_segment_size);
     499           0 :                 ereport(ERROR,
     500             :                         (errmsg("could not find WAL file \"%s\"", nextfname)));
     501             :             }
     502             :         }
     503          22 :         if (segno != endsegno)
     504             :         {
     505             :             char        endfname[MAXFNAMELEN];
     506             : 
     507           0 :             XLogFileName(endfname, endtli, endsegno, wal_segment_size);
     508           0 :             ereport(ERROR,
     509             :                     (errmsg("could not find WAL file \"%s\"", endfname)));
     510             :         }
     511             : 
     512             :         /* Ok, we have everything we need. Send the WAL files. */
     513          44 :         foreach(lc, walFileList)
     514             :         {
     515          22 :             char       *walFileName = (char *) lfirst(lc);
     516             :             int         fd;
     517             :             size_t      cnt;
     518          22 :             pgoff_t     len = 0;
     519             : 
     520          22 :             snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", walFileName);
     521          22 :             XLogFromFileName(walFileName, &tli, &segno, wal_segment_size);
     522             : 
     523          22 :             fd = OpenTransientFile(pathbuf, O_RDONLY | PG_BINARY);
     524          22 :             if (fd < 0)
     525             :             {
     526           0 :                 int         save_errno = errno;
     527             : 
     528             :                 /*
     529             :                  * Most likely reason for this is that the file was already
     530             :                  * removed by a checkpoint, so check for that to get a better
     531             :                  * error message.
     532             :                  */
     533           0 :                 CheckXLogRemoved(segno, tli);
     534             : 
     535           0 :                 errno = save_errno;
     536           0 :                 ereport(ERROR,
     537             :                         (errcode_for_file_access(),
     538             :                          errmsg("could not open file \"%s\": %m", pathbuf)));
     539             :             }
     540             : 
     541          22 :             if (fstat(fd, &statbuf) != 0)
     542           0 :                 ereport(ERROR,
     543             :                         (errcode_for_file_access(),
     544             :                          errmsg("could not stat file \"%s\": %m",
     545             :                                 pathbuf)));
     546          22 :             if (statbuf.st_size != wal_segment_size)
     547             :             {
     548           0 :                 CheckXLogRemoved(segno, tli);
     549           0 :                 ereport(ERROR,
     550             :                         (errcode_for_file_access(),
     551             :                          errmsg("unexpected WAL file size \"%s\"", walFileName)));
     552             :             }
     553             : 
     554             :             /* send the WAL file itself */
     555          22 :             _tarWriteHeader(sink, pathbuf, NULL, &statbuf, false);
     556             : 
     557       11286 :             while ((cnt = basebackup_read_file(fd, sink->bbs_buffer,
     558       11264 :                                                Min(sink->bbs_buffer_length,
     559             :                                                    wal_segment_size - len),
     560             :                                                len, pathbuf, true)) > 0)
     561             :             {
     562       11264 :                 CheckXLogRemoved(segno, tli);
     563       11264 :                 bbsink_archive_contents(sink, cnt);
     564             : 
     565       11264 :                 len += cnt;
     566             : 
     567       11264 :                 if (len == wal_segment_size)
     568          22 :                     break;
     569             :             }
     570             : 
     571          22 :             if (len != wal_segment_size)
     572             :             {
     573           0 :                 CheckXLogRemoved(segno, tli);
     574           0 :                 ereport(ERROR,
     575             :                         (errcode_for_file_access(),
     576             :                          errmsg("unexpected WAL file size \"%s\"", walFileName)));
     577             :             }
     578             : 
     579             :             /*
     580             :              * wal_segment_size is a multiple of TAR_BLOCK_SIZE, so no need
     581             :              * for padding.
     582             :              */
     583             :             Assert(wal_segment_size % TAR_BLOCK_SIZE == 0);
     584             : 
     585          22 :             CloseTransientFile(fd);
     586             : 
     587             :             /*
     588             :              * Mark file as archived, otherwise files can get archived again
     589             :              * after promotion of a new node. This is in line with
     590             :              * walreceiver.c always doing an XLogArchiveForceDone() after a
     591             :              * complete segment.
     592             :              */
     593          22 :             StatusFilePath(pathbuf, walFileName, ".done");
     594          22 :             sendFileWithContent(sink, pathbuf, "", &manifest);
     595             :         }
     596             : 
     597             :         /*
     598             :          * Send timeline history files too. Only the latest timeline history
     599             :          * file is required for recovery, and even that only if there happens
     600             :          * to be a timeline switch in the first WAL segment that contains the
     601             :          * checkpoint record, or if we're taking a base backup from a standby
     602             :          * server and the target timeline changes while the backup is taken.
     603             :          * But they are small and highly useful for debugging purposes, so
     604             :          * better include them all, always.
     605             :          */
     606          22 :         foreach(lc, historyFileList)
     607             :         {
     608           0 :             char       *fname = lfirst(lc);
     609             : 
     610           0 :             snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", fname);
     611             : 
     612           0 :             if (lstat(pathbuf, &statbuf) != 0)
     613           0 :                 ereport(ERROR,
     614             :                         (errcode_for_file_access(),
     615             :                          errmsg("could not stat file \"%s\": %m", pathbuf)));
     616             : 
     617           0 :             sendFile(sink, pathbuf, pathbuf, &statbuf, false, InvalidOid,
     618             :                      &manifest, NULL);
     619             : 
     620             :             /* unconditionally mark file as archived */
     621           0 :             StatusFilePath(pathbuf, fname, ".done");
     622           0 :             sendFileWithContent(sink, pathbuf, "", &manifest);
     623             :         }
     624             : 
     625             :         /* Properly terminate the tar file. */
     626             :         StaticAssertStmt(2 * TAR_BLOCK_SIZE <= BLCKSZ,
     627             :                          "BLCKSZ too small for 2 tar blocks");
     628          22 :         memset(sink->bbs_buffer, 0, 2 * TAR_BLOCK_SIZE);
     629          22 :         bbsink_archive_contents(sink, 2 * TAR_BLOCK_SIZE);
     630             : 
     631             :         /* OK, that's the end of the archive. */
     632          22 :         bbsink_end_archive(sink);
     633             :     }
     634             : 
     635         224 :     AddWALInfoToBackupManifest(&manifest, state.startptr, state.starttli,
     636             :                                endptr, endtli);
     637             : 
     638         224 :     SendBackupManifest(&manifest, sink);
     639             : 
     640         224 :     bbsink_end_backup(sink, endptr, endtli);
     641             : 
     642         224 :     if (total_checksum_failures)
     643             :     {
     644           6 :         if (total_checksum_failures > 1)
     645           4 :             ereport(WARNING,
     646             :                     (errmsg_plural("%lld total checksum verification failure",
     647             :                                    "%lld total checksum verification failures",
     648             :                                    total_checksum_failures,
     649             :                                    total_checksum_failures)));
     650             : 
     651           6 :         ereport(ERROR,
     652             :                 (errcode(ERRCODE_DATA_CORRUPTED),
     653             :                  errmsg("checksum verification failure during base backup")));
     654             :     }
     655             : 
     656             :     /*
     657             :      * Make sure to free the manifest before the resource owners as manifests
     658             :      * use cryptohash contexts that may depend on resource owners (like
     659             :      * OpenSSL).
     660             :      */
     661         218 :     FreeBackupManifest(&manifest);
     662             : 
     663             :     /* clean up the resource owner we created */
     664         218 :     WalSndResourceCleanup(true);
     665             : 
     666         218 :     basebackup_progress_done();
     667         218 : }
     668             : 
     669             : /*
     670             :  * list_sort comparison function, to compare log/seg portion of WAL segment
     671             :  * filenames, ignoring the timeline portion.
     672             :  */
     673             : static int
     674           0 : compareWalFileNames(const ListCell *a, const ListCell *b)
     675             : {
     676           0 :     char       *fna = (char *) lfirst(a);
     677           0 :     char       *fnb = (char *) lfirst(b);
     678             : 
     679           0 :     return strcmp(fna + 8, fnb + 8);
     680             : }
     681             : 
     682             : /*
     683             :  * Parse the base backup options passed down by the parser
     684             :  */
     685             : static void
     686         268 : parse_basebackup_options(List *options, basebackup_options *opt)
     687             : {
     688             :     ListCell   *lopt;
     689         268 :     bool        o_label = false;
     690         268 :     bool        o_progress = false;
     691         268 :     bool        o_checkpoint = false;
     692         268 :     bool        o_nowait = false;
     693         268 :     bool        o_wal = false;
     694         268 :     bool        o_maxrate = false;
     695         268 :     bool        o_tablespace_map = false;
     696         268 :     bool        o_noverify_checksums = false;
     697         268 :     bool        o_manifest = false;
     698         268 :     bool        o_manifest_checksums = false;
     699         268 :     bool        o_target = false;
     700         268 :     bool        o_target_detail = false;
     701         268 :     char       *target_str = NULL;
     702         268 :     char       *target_detail_str = NULL;
     703         268 :     bool        o_compression = false;
     704         268 :     bool        o_compression_detail = false;
     705         268 :     char       *compression_detail_str = NULL;
     706             : 
     707        2948 :     MemSet(opt, 0, sizeof(*opt));
     708         268 :     opt->manifest = MANIFEST_OPTION_NO;
     709         268 :     opt->manifest_checksum_type = CHECKSUM_TYPE_CRC32C;
     710         268 :     opt->compression = PG_COMPRESSION_NONE;
     711         268 :     opt->compression_specification.algorithm = PG_COMPRESSION_NONE;
     712             : 
     713        2008 :     foreach(lopt, options)
     714             :     {
     715        1744 :         DefElem    *defel = (DefElem *) lfirst(lopt);
     716             : 
     717        1744 :         if (strcmp(defel->defname, "label") == 0)
     718             :         {
     719         268 :             if (o_label)
     720           0 :                 ereport(ERROR,
     721             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     722             :                          errmsg("duplicate option \"%s\"", defel->defname)));
     723         268 :             opt->label = defGetString(defel);
     724         268 :             o_label = true;
     725             :         }
     726        1476 :         else if (strcmp(defel->defname, "progress") == 0)
     727             :         {
     728         268 :             if (o_progress)
     729           0 :                 ereport(ERROR,
     730             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     731             :                          errmsg("duplicate option \"%s\"", defel->defname)));
     732         268 :             opt->progress = defGetBoolean(defel);
     733         268 :             o_progress = true;
     734             :         }
     735        1208 :         else if (strcmp(defel->defname, "checkpoint") == 0)
     736             :         {
     737         248 :             char       *optval = defGetString(defel);
     738             : 
     739         248 :             if (o_checkpoint)
     740           0 :                 ereport(ERROR,
     741             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     742             :                          errmsg("duplicate option \"%s\"", defel->defname)));
     743         248 :             if (pg_strcasecmp(optval, "fast") == 0)
     744         248 :                 opt->fastcheckpoint = true;
     745           0 :             else if (pg_strcasecmp(optval, "spread") == 0)
     746           0 :                 opt->fastcheckpoint = false;
     747             :             else
     748           0 :                 ereport(ERROR,
     749             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     750             :                          errmsg("unrecognized checkpoint type: \"%s\"",
     751             :                                 optval)));
     752         248 :             o_checkpoint = true;
     753             :         }
     754         960 :         else if (strcmp(defel->defname, "wait") == 0)
     755             :         {
     756         252 :             if (o_nowait)
     757           0 :                 ereport(ERROR,
     758             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     759             :                          errmsg("duplicate option \"%s\"", defel->defname)));
     760         252 :             opt->nowait = !defGetBoolean(defel);
     761         252 :             o_nowait = true;
     762             :         }
     763         708 :         else if (strcmp(defel->defname, "wal") == 0)
     764             :         {
     765          30 :             if (o_wal)
     766           0 :                 ereport(ERROR,
     767             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     768             :                          errmsg("duplicate option \"%s\"", defel->defname)));
     769          30 :             opt->includewal = defGetBoolean(defel);
     770          30 :             o_wal = true;
     771             :         }
     772         678 :         else if (strcmp(defel->defname, "max_rate") == 0)
     773             :         {
     774             :             int64       maxrate;
     775             : 
     776           2 :             if (o_maxrate)
     777           0 :                 ereport(ERROR,
     778             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     779             :                          errmsg("duplicate option \"%s\"", defel->defname)));
     780             : 
     781           2 :             maxrate = defGetInt64(defel);
     782           2 :             if (maxrate < MAX_RATE_LOWER || maxrate > MAX_RATE_UPPER)
     783           0 :                 ereport(ERROR,
     784             :                         (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
     785             :                          errmsg("%d is outside the valid range for parameter \"%s\" (%d .. %d)",
     786             :                                 (int) maxrate, "MAX_RATE", MAX_RATE_LOWER, MAX_RATE_UPPER)));
     787             : 
     788           2 :             opt->maxrate = (uint32) maxrate;
     789           2 :             o_maxrate = true;
     790             :         }
     791         676 :         else if (strcmp(defel->defname, "tablespace_map") == 0)
     792             :         {
     793          46 :             if (o_tablespace_map)
     794           0 :                 ereport(ERROR,
     795             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     796             :                          errmsg("duplicate option \"%s\"", defel->defname)));
     797          46 :             opt->sendtblspcmapfile = defGetBoolean(defel);
     798          46 :             o_tablespace_map = true;
     799             :         }
     800         630 :         else if (strcmp(defel->defname, "verify_checksums") == 0)
     801             :         {
     802           2 :             if (o_noverify_checksums)
     803           0 :                 ereport(ERROR,
     804             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     805             :                          errmsg("duplicate option \"%s\"", defel->defname)));
     806           2 :             noverify_checksums = !defGetBoolean(defel);
     807           2 :             o_noverify_checksums = true;
     808             :         }
     809         628 :         else if (strcmp(defel->defname, "manifest") == 0)
     810             :         {
     811         266 :             char       *optval = defGetString(defel);
     812             :             bool        manifest_bool;
     813             : 
     814         266 :             if (o_manifest)
     815           0 :                 ereport(ERROR,
     816             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     817             :                          errmsg("duplicate option \"%s\"", defel->defname)));
     818         266 :             if (parse_bool(optval, &manifest_bool))
     819             :             {
     820         264 :                 if (manifest_bool)
     821         264 :                     opt->manifest = MANIFEST_OPTION_YES;
     822             :                 else
     823           0 :                     opt->manifest = MANIFEST_OPTION_NO;
     824             :             }
     825           2 :             else if (pg_strcasecmp(optval, "force-encode") == 0)
     826           2 :                 opt->manifest = MANIFEST_OPTION_FORCE_ENCODE;
     827             :             else
     828           0 :                 ereport(ERROR,
     829             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     830             :                          errmsg("unrecognized manifest option: \"%s\"",
     831             :                                 optval)));
     832         266 :             o_manifest = true;
     833             :         }
     834         362 :         else if (strcmp(defel->defname, "manifest_checksums") == 0)
     835             :         {
     836          14 :             char       *optval = defGetString(defel);
     837             : 
     838          14 :             if (o_manifest_checksums)
     839           0 :                 ereport(ERROR,
     840             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     841             :                          errmsg("duplicate option \"%s\"", defel->defname)));
     842          14 :             if (!pg_checksum_parse_type(optval,
     843             :                                         &opt->manifest_checksum_type))
     844           2 :                 ereport(ERROR,
     845             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     846             :                          errmsg("unrecognized checksum algorithm: \"%s\"",
     847             :                                 optval)));
     848          12 :             o_manifest_checksums = true;
     849             :         }
     850         348 :         else if (strcmp(defel->defname, "target") == 0)
     851             :         {
     852         266 :             if (o_target)
     853           0 :                 ereport(ERROR,
     854             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     855             :                          errmsg("duplicate option \"%s\"", defel->defname)));
     856         266 :             target_str = defGetString(defel);
     857         266 :             o_target = true;
     858             :         }
     859          82 :         else if (strcmp(defel->defname, "target_detail") == 0)
     860             :         {
     861          12 :             char       *optval = defGetString(defel);
     862             : 
     863          12 :             if (o_target_detail)
     864           0 :                 ereport(ERROR,
     865             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     866             :                          errmsg("duplicate option \"%s\"", defel->defname)));
     867          12 :             target_detail_str = optval;
     868          12 :             o_target_detail = true;
     869             :         }
     870          70 :         else if (strcmp(defel->defname, "compression") == 0)
     871             :         {
     872          48 :             char       *optval = defGetString(defel);
     873             : 
     874          48 :             if (o_compression)
     875           0 :                 ereport(ERROR,
     876             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     877             :                          errmsg("duplicate option \"%s\"", defel->defname)));
     878          48 :             if (!parse_compress_algorithm(optval, &opt->compression))
     879           2 :                 ereport(ERROR,
     880             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     881             :                          errmsg("unrecognized compression algorithm: \"%s\"",
     882             :                                 optval)));
     883          46 :             o_compression = true;
     884             :         }
     885          22 :         else if (strcmp(defel->defname, "compression_detail") == 0)
     886             :         {
     887          22 :             if (o_compression_detail)
     888           0 :                 ereport(ERROR,
     889             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     890             :                          errmsg("duplicate option \"%s\"", defel->defname)));
     891          22 :             compression_detail_str = defGetString(defel);
     892          22 :             o_compression_detail = true;
     893             :         }
     894             :         else
     895           0 :             ereport(ERROR,
     896             :                     (errcode(ERRCODE_SYNTAX_ERROR),
     897             :                      errmsg("unrecognized base backup option: \"%s\"",
     898             :                             defel->defname)));
     899             :     }
     900             : 
     901         264 :     if (opt->label == NULL)
     902           0 :         opt->label = "base backup";
     903         264 :     if (opt->manifest == MANIFEST_OPTION_NO)
     904             :     {
     905           2 :         if (o_manifest_checksums)
     906           0 :             ereport(ERROR,
     907             :                     (errcode(ERRCODE_SYNTAX_ERROR),
     908             :                      errmsg("manifest checksums require a backup manifest")));
     909           2 :         opt->manifest_checksum_type = CHECKSUM_TYPE_NONE;
     910             :     }
     911             : 
     912         264 :     if (target_str == NULL)
     913             :     {
     914           0 :         if (target_detail_str != NULL)
     915           0 :             ereport(ERROR,
     916             :                     (errcode(ERRCODE_SYNTAX_ERROR),
     917             :                      errmsg("target detail cannot be used without target")));
     918           0 :         opt->use_copytblspc = true;
     919           0 :         opt->send_to_client = true;
     920             :     }
     921         264 :     else if (strcmp(target_str, "client") == 0)
     922             :     {
     923         240 :         if (target_detail_str != NULL)
     924           0 :             ereport(ERROR,
     925             :                     (errcode(ERRCODE_SYNTAX_ERROR),
     926             :                      errmsg("target \"%s\" does not accept a target detail",
     927             :                             target_str)));
     928         240 :         opt->send_to_client = true;
     929             :     }
     930             :     else
     931          20 :         opt->target_handle =
     932          24 :             BaseBackupGetTargetHandle(target_str, target_detail_str);
     933             : 
     934         260 :     if (o_compression_detail && !o_compression)
     935           0 :         ereport(ERROR,
     936             :                 (errcode(ERRCODE_SYNTAX_ERROR),
     937             :                  errmsg("compression detail cannot be specified unless compression is enabled")));
     938             : 
     939         260 :     if (o_compression)
     940             :     {
     941             :         char       *error_detail;
     942             : 
     943          42 :         parse_compress_specification(opt->compression, compression_detail_str,
     944             :                                      &opt->compression_specification);
     945             :         error_detail =
     946          42 :             validate_compress_specification(&opt->compression_specification);
     947          42 :         if (error_detail != NULL)
     948          18 :             ereport(ERROR,
     949             :                     errcode(ERRCODE_SYNTAX_ERROR),
     950             :                     errmsg("invalid compression specification: %s",
     951             :                            error_detail));
     952             :     }
     953         242 : }
     954             : 
     955             : 
     956             : /*
     957             :  * SendBaseBackup() - send a complete base backup.
     958             :  *
     959             :  * The function will put the system into backup mode like pg_backup_start()
     960             :  * does, so that the backup is consistent even though we read directly from
     961             :  * the filesystem, bypassing the buffer cache.
     962             :  */
     963             : void
     964         270 : SendBaseBackup(BaseBackupCmd *cmd)
     965             : {
     966             :     basebackup_options opt;
     967             :     bbsink     *sink;
     968         270 :     SessionBackupState status = get_backup_status();
     969             : 
     970         270 :     if (status == SESSION_BACKUP_RUNNING)
     971           2 :         ereport(ERROR,
     972             :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     973             :                  errmsg("a backup is already in progress in this session")));
     974             : 
     975         268 :     parse_basebackup_options(cmd->options, &opt);
     976             : 
     977         242 :     WalSndSetState(WALSNDSTATE_BACKUP);
     978             : 
     979         242 :     if (update_process_title)
     980             :     {
     981             :         char        activitymsg[50];
     982             : 
     983         242 :         snprintf(activitymsg, sizeof(activitymsg), "sending backup \"%s\"",
     984             :                  opt.label);
     985         242 :         set_ps_display(activitymsg);
     986             :     }
     987             : 
     988             :     /*
     989             :      * If the target is specifically 'client' then set up to stream the backup
     990             :      * to the client; otherwise, it's being sent someplace else and should not
     991             :      * be sent to the client. BaseBackupGetSink has the job of setting up a
     992             :      * sink to send the backup data wherever it needs to go.
     993             :      */
     994         242 :     sink = bbsink_copystream_new(opt.send_to_client);
     995         242 :     if (opt.target_handle != NULL)
     996          20 :         sink = BaseBackupGetSink(opt.target_handle, sink);
     997             : 
     998             :     /* Set up network throttling, if client requested it */
     999         236 :     if (opt.maxrate > 0)
    1000           2 :         sink = bbsink_throttle_new(sink, opt.maxrate);
    1001             : 
    1002             :     /* Set up server-side compression, if client requested it */
    1003         236 :     if (opt.compression == PG_COMPRESSION_GZIP)
    1004           4 :         sink = bbsink_gzip_new(sink, &opt.compression_specification);
    1005         232 :     else if (opt.compression == PG_COMPRESSION_LZ4)
    1006           2 :         sink = bbsink_lz4_new(sink, &opt.compression_specification);
    1007         230 :     else if (opt.compression == PG_COMPRESSION_ZSTD)
    1008           0 :         sink = bbsink_zstd_new(sink, &opt.compression_specification);
    1009             : 
    1010             :     /* Set up progress reporting. */
    1011         236 :     sink = bbsink_progress_new(sink, opt.progress);
    1012             : 
    1013             :     /*
    1014             :      * Perform the base backup, but make sure we clean up the bbsink even if
    1015             :      * an error occurs.
    1016             :      */
    1017         236 :     PG_TRY();
    1018             :     {
    1019         236 :         perform_base_backup(&opt, sink);
    1020             :     }
    1021           8 :     PG_FINALLY();
    1022             :     {
    1023         226 :         bbsink_cleanup(sink);
    1024             :     }
    1025         226 :     PG_END_TRY();
    1026         218 : }
    1027             : 
    1028             : /*
    1029             :  * Inject a file with given name and content in the output tar stream.
    1030             :  */
    1031             : static void
    1032         294 : sendFileWithContent(bbsink *sink, const char *filename, const char *content,
    1033             :                     backup_manifest_info *manifest)
    1034             : {
    1035             :     struct stat statbuf;
    1036         294 :     int         bytes_done = 0,
    1037             :                 len;
    1038             :     pg_checksum_context checksum_ctx;
    1039             : 
    1040         294 :     if (pg_checksum_init(&checksum_ctx, manifest->checksum_type) < 0)
    1041           0 :         elog(ERROR, "could not initialize checksum of file \"%s\"",
    1042             :              filename);
    1043             : 
    1044         294 :     len = strlen(content);
    1045             : 
    1046             :     /*
    1047             :      * Construct a stat struct for the backup_label file we're injecting in
    1048             :      * the tar.
    1049             :      */
    1050             :     /* Windows doesn't have the concept of uid and gid */
    1051             : #ifdef WIN32
    1052             :     statbuf.st_uid = 0;
    1053             :     statbuf.st_gid = 0;
    1054             : #else
    1055         294 :     statbuf.st_uid = geteuid();
    1056         294 :     statbuf.st_gid = getegid();
    1057             : #endif
    1058         294 :     statbuf.st_mtime = time(NULL);
    1059         294 :     statbuf.st_mode = pg_file_create_mode;
    1060         294 :     statbuf.st_size = len;
    1061             : 
    1062         294 :     _tarWriteHeader(sink, filename, NULL, &statbuf, false);
    1063             : 
    1064         294 :     if (pg_checksum_update(&checksum_ctx, (uint8 *) content, len) < 0)
    1065           0 :         elog(ERROR, "could not update checksum of file \"%s\"",
    1066             :              filename);
    1067             : 
    1068         534 :     while (bytes_done < len)
    1069             :     {
    1070         240 :         size_t      remaining = len - bytes_done;
    1071         240 :         size_t      nbytes = Min(sink->bbs_buffer_length, remaining);
    1072             : 
    1073         240 :         memcpy(sink->bbs_buffer, content, nbytes);
    1074         240 :         bbsink_archive_contents(sink, nbytes);
    1075         240 :         bytes_done += nbytes;
    1076         240 :         content += nbytes;
    1077             :     }
    1078             : 
    1079         294 :     _tarWritePadding(sink, len);
    1080             : 
    1081         294 :     AddFileToBackupManifest(manifest, NULL, filename, len,
    1082         294 :                             (pg_time_t) statbuf.st_mtime, &checksum_ctx);
    1083         294 : }
    1084             : 
    1085             : /*
    1086             :  * Include the tablespace directory pointed to by 'path' in the output tar
    1087             :  * stream.  If 'sizeonly' is true, we just calculate a total length and return
    1088             :  * it, without actually sending anything.
    1089             :  *
    1090             :  * Only used to send auxiliary tablespaces, not PGDATA.
    1091             :  */
    1092             : static int64
    1093         124 : sendTablespace(bbsink *sink, char *path, char *spcoid, bool sizeonly,
    1094             :                backup_manifest_info *manifest)
    1095             : {
    1096             :     int64       size;
    1097             :     char        pathbuf[MAXPGPATH];
    1098             :     struct stat statbuf;
    1099             : 
    1100             :     /*
    1101             :      * 'path' points to the tablespace location, but we only want to include
    1102             :      * the version directory in it that belongs to us.
    1103             :      */
    1104         124 :     snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path,
    1105             :              TABLESPACE_VERSION_DIRECTORY);
    1106             : 
    1107             :     /*
    1108             :      * Store a directory entry in the tar file so we get the permissions
    1109             :      * right.
    1110             :      */
    1111         124 :     if (lstat(pathbuf, &statbuf) != 0)
    1112             :     {
    1113           0 :         if (errno != ENOENT)
    1114           0 :             ereport(ERROR,
    1115             :                     (errcode_for_file_access(),
    1116             :                      errmsg("could not stat file or directory \"%s\": %m",
    1117             :                             pathbuf)));
    1118             : 
    1119             :         /* If the tablespace went away while scanning, it's no error. */
    1120           0 :         return 0;
    1121             :     }
    1122             : 
    1123         124 :     size = _tarWriteHeader(sink, TABLESPACE_VERSION_DIRECTORY, NULL, &statbuf,
    1124             :                            sizeonly);
    1125             : 
    1126             :     /* Send all the files in the tablespace version directory */
    1127         124 :     size += sendDir(sink, pathbuf, strlen(path), sizeonly, NIL, true, manifest,
    1128             :                     spcoid);
    1129             : 
    1130         124 :     return size;
    1131             : }
    1132             : 
    1133             : /*
    1134             :  * Include all files from the given directory in the output tar stream. If
    1135             :  * 'sizeonly' is true, we just calculate a total length and return it, without
    1136             :  * actually sending anything.
    1137             :  *
    1138             :  * Omit any directory in the tablespaces list, to avoid backing up
    1139             :  * tablespaces twice when they were created inside PGDATA.
    1140             :  *
    1141             :  * If sendtblspclinks is true, we need to include symlink
    1142             :  * information in the tar file. If not, we can skip that
    1143             :  * as it will be sent separately in the tablespace_map file.
    1144             :  */
    1145             : static int64
    1146        8146 : sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeonly,
    1147             :         List *tablespaces, bool sendtblspclinks, backup_manifest_info *manifest,
    1148             :         const char *spcoid)
    1149             : {
    1150             :     DIR        *dir;
    1151             :     struct dirent *de;
    1152             :     char        pathbuf[MAXPGPATH * 2];
    1153             :     struct stat statbuf;
    1154        8146 :     int64       size = 0;
    1155             :     const char *lastDir;        /* Split last dir from parent path. */
    1156        8146 :     bool        isDbDir = false;    /* Does this directory contain relations? */
    1157             : 
    1158             :     /*
    1159             :      * Determine if the current path is a database directory that can contain
    1160             :      * relations.
    1161             :      *
    1162             :      * Start by finding the location of the delimiter between the parent path
    1163             :      * and the current path.
    1164             :      */
    1165        8146 :     lastDir = last_dir_separator(path);
    1166             : 
    1167             :     /* Does this path look like a database path (i.e. all digits)? */
    1168        8146 :     if (lastDir != NULL &&
    1169        7674 :         strspn(lastDir + 1, "0123456789") == strlen(lastDir + 1))
    1170             :     {
    1171             :         /* Part of path that contains the parent directory. */
    1172        1486 :         int         parentPathLen = lastDir - path;
    1173             : 
    1174             :         /*
    1175             :          * Mark path as a database directory if the parent path is either
    1176             :          * $PGDATA/base or a tablespace version path.
    1177             :          */
    1178        1486 :         if (strncmp(path, "./base", parentPathLen) == 0 ||
    1179          76 :             (parentPathLen >= (sizeof(TABLESPACE_VERSION_DIRECTORY) - 1) &&
    1180          76 :              strncmp(lastDir - (sizeof(TABLESPACE_VERSION_DIRECTORY) - 1),
    1181             :                      TABLESPACE_VERSION_DIRECTORY,
    1182             :                      sizeof(TABLESPACE_VERSION_DIRECTORY) - 1) == 0))
    1183        1486 :             isDbDir = true;
    1184             :     }
    1185             : 
    1186        8146 :     dir = AllocateDir(path);
    1187      493166 :     while ((de = ReadDir(dir, path)) != NULL)
    1188             :     {
    1189             :         int         excludeIdx;
    1190             :         bool        excludeFound;
    1191             :         ForkNumber  relForkNum; /* Type of fork if file is a relation */
    1192             :         int         relnumchars;    /* Chars in filename that are the
    1193             :                                      * relnumber */
    1194             : 
    1195             :         /* Skip special stuff */
    1196      485042 :         if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
    1197       23048 :             continue;
    1198             : 
    1199             :         /* Skip temporary files */
    1200      468782 :         if (strncmp(de->d_name,
    1201             :                     PG_TEMP_FILE_PREFIX,
    1202             :                     strlen(PG_TEMP_FILE_PREFIX)) == 0)
    1203         460 :             continue;
    1204             : 
    1205             :         /*
    1206             :          * Check if the postmaster has signaled us to exit, and abort with an
    1207             :          * error in that case. The error handler further up will call
    1208             :          * do_pg_abort_backup() for us. Also check that if the backup was
    1209             :          * started while still in recovery, the server wasn't promoted.
    1210             :          * do_pg_backup_stop() will check that too, but it's better to stop
    1211             :          * the backup early than continue to the end and fail there.
    1212             :          */
    1213      468322 :         CHECK_FOR_INTERRUPTS();
    1214      468312 :         if (RecoveryInProgress() != backup_started_in_recovery)
    1215           0 :             ereport(ERROR,
    1216             :                     (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
    1217             :                      errmsg("the standby was promoted during online backup"),
    1218             :                      errhint("This means that the backup being taken is corrupt "
    1219             :                              "and should not be used. "
    1220             :                              "Try taking another online backup.")));
    1221             : 
    1222             :         /* Scan for files that should be excluded */
    1223      468312 :         excludeFound = false;
    1224     4207016 :         for (excludeIdx = 0; excludeFiles[excludeIdx].name != NULL; excludeIdx++)
    1225             :         {
    1226     3740646 :             int         cmplen = strlen(excludeFiles[excludeIdx].name);
    1227             : 
    1228     3740646 :             if (!excludeFiles[excludeIdx].match_prefix)
    1229     3272594 :                 cmplen++;
    1230     3740646 :             if (strncmp(de->d_name, excludeFiles[excludeIdx].name, cmplen) == 0)
    1231             :             {
    1232        1942 :                 elog(DEBUG1, "file \"%s\" excluded from backup", de->d_name);
    1233        1942 :                 excludeFound = true;
    1234        1942 :                 break;
    1235             :             }
    1236             :         }
    1237             : 
    1238      468312 :         if (excludeFound)
    1239        1942 :             continue;
    1240             : 
    1241             :         /* Exclude all forks for unlogged tables except the init fork */
    1242      888168 :         if (isDbDir &&
    1243      421798 :             parse_filename_for_nontemp_relation(de->d_name, &relnumchars,
    1244             :                                                 &relForkNum))
    1245             :         {
    1246             :             /* Never exclude init forks */
    1247      418902 :             if (relForkNum != INIT_FORKNUM)
    1248             :             {
    1249             :                 char        initForkFile[MAXPGPATH];
    1250             :                 char        relNumber[OIDCHARS + 1];
    1251             : 
    1252             :                 /*
    1253             :                  * If any other type of fork, check if there is an init fork
    1254             :                  * with the same RelFileNumber. If so, the file can be
    1255             :                  * excluded.
    1256             :                  */
    1257      418766 :                 memcpy(relNumber, de->d_name, relnumchars);
    1258      418766 :                 relNumber[relnumchars] = '\0';
    1259      418766 :                 snprintf(initForkFile, sizeof(initForkFile), "%s/%s_init",
    1260             :                          path, relNumber);
    1261             : 
    1262      418766 :                 if (lstat(initForkFile, &statbuf) == 0)
    1263             :                 {
    1264         136 :                     elog(DEBUG2,
    1265             :                          "unlogged relation file \"%s\" excluded from backup",
    1266             :                          de->d_name);
    1267             : 
    1268         136 :                     continue;
    1269             :                 }
    1270             :             }
    1271             :         }
    1272             : 
    1273             :         /* Exclude temporary relations */
    1274      466234 :         if (isDbDir && looks_like_temp_rel_name(de->d_name))
    1275             :         {
    1276          72 :             elog(DEBUG2,
    1277             :                  "temporary relation file \"%s\" excluded from backup",
    1278             :                  de->d_name);
    1279             : 
    1280          72 :             continue;
    1281             :         }
    1282             : 
    1283      466162 :         snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path, de->d_name);
    1284             : 
    1285             :         /* Skip pg_control here to back up it last */
    1286      466162 :         if (strcmp(pathbuf, "./global/pg_control") == 0)
    1287         464 :             continue;
    1288             : 
    1289      465698 :         if (lstat(pathbuf, &statbuf) != 0)
    1290             :         {
    1291           0 :             if (errno != ENOENT)
    1292           0 :                 ereport(ERROR,
    1293             :                         (errcode_for_file_access(),
    1294             :                          errmsg("could not stat file or directory \"%s\": %m",
    1295             :                                 pathbuf)));
    1296             : 
    1297             :             /* If the file went away while scanning, it's not an error. */
    1298           0 :             continue;
    1299             :         }
    1300             : 
    1301             :         /* Scan for directories whose contents should be excluded */
    1302      465698 :         excludeFound = false;
    1303     3712548 :         for (excludeIdx = 0; excludeDirContents[excludeIdx] != NULL; excludeIdx++)
    1304             :         {
    1305     3250102 :             if (strcmp(de->d_name, excludeDirContents[excludeIdx]) == 0)
    1306             :             {
    1307        3252 :                 elog(DEBUG1, "contents of directory \"%s\" excluded from backup", de->d_name);
    1308        3252 :                 convert_link_to_directory(pathbuf, &statbuf);
    1309        3252 :                 size += _tarWriteHeader(sink, pathbuf + basepathlen + 1, NULL,
    1310             :                                         &statbuf, sizeonly);
    1311        3252 :                 excludeFound = true;
    1312        3252 :                 break;
    1313             :             }
    1314             :         }
    1315             : 
    1316      465698 :         if (excludeFound)
    1317        3252 :             continue;
    1318             : 
    1319             :         /*
    1320             :          * We can skip pg_wal, the WAL segments need to be fetched from the
    1321             :          * WAL archive anyway. But include it as an empty directory anyway, so
    1322             :          * we get permissions right.
    1323             :          */
    1324      462446 :         if (strcmp(pathbuf, "./pg_wal") == 0)
    1325             :         {
    1326             :             /* If pg_wal is a symlink, write it as a directory anyway */
    1327         462 :             convert_link_to_directory(pathbuf, &statbuf);
    1328         462 :             size += _tarWriteHeader(sink, pathbuf + basepathlen + 1, NULL,
    1329             :                                     &statbuf, sizeonly);
    1330             : 
    1331             :             /*
    1332             :              * Also send archive_status directory (by hackishly reusing
    1333             :              * statbuf from above ...).
    1334             :              */
    1335         462 :             size += _tarWriteHeader(sink, "./pg_wal/archive_status", NULL,
    1336             :                                     &statbuf, sizeonly);
    1337             : 
    1338         462 :             continue;           /* don't recurse into pg_wal */
    1339             :         }
    1340             : 
    1341             :         /* Allow symbolic links in pg_tblspc only */
    1342      461984 :         if (strcmp(path, "./pg_tblspc") == 0 && S_ISLNK(statbuf.st_mode))
    1343          58 :         {
    1344             :             char        linkpath[MAXPGPATH];
    1345             :             int         rllen;
    1346             : 
    1347          58 :             rllen = readlink(pathbuf, linkpath, sizeof(linkpath));
    1348          58 :             if (rllen < 0)
    1349           0 :                 ereport(ERROR,
    1350             :                         (errcode_for_file_access(),
    1351             :                          errmsg("could not read symbolic link \"%s\": %m",
    1352             :                                 pathbuf)));
    1353          58 :             if (rllen >= sizeof(linkpath))
    1354           0 :                 ereport(ERROR,
    1355             :                         (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
    1356             :                          errmsg("symbolic link \"%s\" target is too long",
    1357             :                                 pathbuf)));
    1358          58 :             linkpath[rllen] = '\0';
    1359             : 
    1360          58 :             size += _tarWriteHeader(sink, pathbuf + basepathlen + 1, linkpath,
    1361             :                                     &statbuf, sizeonly);
    1362             :         }
    1363      461926 :         else if (S_ISDIR(statbuf.st_mode))
    1364             :         {
    1365        7642 :             bool        skip_this_dir = false;
    1366             :             ListCell   *lc;
    1367             : 
    1368             :             /*
    1369             :              * Store a directory entry in the tar file so we can get the
    1370             :              * permissions right.
    1371             :              */
    1372        7642 :             size += _tarWriteHeader(sink, pathbuf + basepathlen + 1, NULL, &statbuf,
    1373             :                                     sizeonly);
    1374             : 
    1375             :             /*
    1376             :              * Call ourselves recursively for a directory, unless it happens
    1377             :              * to be a separate tablespace located within PGDATA.
    1378             :              */
    1379       17192 :             foreach(lc, tablespaces)
    1380             :             {
    1381        9608 :                 tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc);
    1382             : 
    1383             :                 /*
    1384             :                  * ti->rpath is the tablespace relative path within PGDATA, or
    1385             :                  * NULL if the tablespace has been properly located somewhere
    1386             :                  * else.
    1387             :                  *
    1388             :                  * Skip past the leading "./" in pathbuf when comparing.
    1389             :                  */
    1390        9608 :                 if (ti->rpath && strcmp(ti->rpath, pathbuf + 2) == 0)
    1391             :                 {
    1392          58 :                     skip_this_dir = true;
    1393          58 :                     break;
    1394             :                 }
    1395             :             }
    1396             : 
    1397             :             /*
    1398             :              * skip sending directories inside pg_tblspc, if not required.
    1399             :              */
    1400        7642 :             if (strcmp(pathbuf, "./pg_tblspc") == 0 && !sendtblspclinks)
    1401          34 :                 skip_this_dir = true;
    1402             : 
    1403        7642 :             if (!skip_this_dir)
    1404        7550 :                 size += sendDir(sink, pathbuf, basepathlen, sizeonly, tablespaces,
    1405             :                                 sendtblspclinks, manifest, spcoid);
    1406             :         }
    1407      454284 :         else if (S_ISREG(statbuf.st_mode))
    1408             :         {
    1409      454284 :             bool        sent = false;
    1410             : 
    1411      454284 :             if (!sizeonly)
    1412      427134 :                 sent = sendFile(sink, pathbuf, pathbuf + basepathlen + 1, &statbuf,
    1413      205530 :                                 true, isDbDir ? atooid(lastDir + 1) : InvalidOid,
    1414             :                                 manifest, spcoid);
    1415             : 
    1416      454282 :             if (sent || sizeonly)
    1417             :             {
    1418             :                 /* Add size. */
    1419      454282 :                 size += statbuf.st_size;
    1420             : 
    1421             :                 /* Pad to a multiple of the tar block size. */
    1422      454282 :                 size += tarPaddingBytesRequired(statbuf.st_size);
    1423             : 
    1424             :                 /* Size of the header for the file. */
    1425      454282 :                 size += TAR_BLOCK_SIZE;
    1426             :             }
    1427             :         }
    1428             :         else
    1429           0 :             ereport(WARNING,
    1430             :                     (errmsg("skipping special file \"%s\"", pathbuf)));
    1431             :     }
    1432        8124 :     FreeDir(dir);
    1433        8124 :     return size;
    1434             : }
    1435             : 
    1436             : /*
    1437             :  * Check if a file should have its checksum validated.
    1438             :  * We validate checksums on files in regular tablespaces
    1439             :  * (including global and default) only, and in those there
    1440             :  * are some files that are explicitly excluded.
    1441             :  */
    1442             : static bool
    1443       54688 : is_checksummed_file(const char *fullpath, const char *filename)
    1444             : {
    1445             :     /* Check that the file is in a tablespace */
    1446       54688 :     if (strncmp(fullpath, "./global/", 9) == 0 ||
    1447       51040 :         strncmp(fullpath, "./base/", 7) == 0 ||
    1448         694 :         strncmp(fullpath, "/", 1) == 0)
    1449             :     {
    1450             :         int         excludeIdx;
    1451             : 
    1452             :         /* Compare file against noChecksumFiles skip list */
    1453      269174 :         for (excludeIdx = 0; noChecksumFiles[excludeIdx].name != NULL; excludeIdx++)
    1454             :         {
    1455      215564 :             int         cmplen = strlen(noChecksumFiles[excludeIdx].name);
    1456             : 
    1457      215564 :             if (!noChecksumFiles[excludeIdx].match_prefix)
    1458      161786 :                 cmplen++;
    1459      215564 :             if (strncmp(filename, noChecksumFiles[excludeIdx].name,
    1460             :                         cmplen) == 0)
    1461         394 :                 return false;
    1462             :         }
    1463             : 
    1464       53610 :         return true;
    1465             :     }
    1466             :     else
    1467         684 :         return false;
    1468             : }
    1469             : 
    1470             : /*
    1471             :  * Given the member, write the TAR header & send the file.
    1472             :  *
    1473             :  * If 'missing_ok' is true, will not throw an error if the file is not found.
    1474             :  *
    1475             :  * If dboid is anything other than InvalidOid then any checksum failures
    1476             :  * detected will get reported to the cumulative stats system.
    1477             :  *
    1478             :  * Returns true if the file was successfully sent, false if 'missing_ok',
    1479             :  * and the file did not exist.
    1480             :  */
    1481             : static bool
    1482      221828 : sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
    1483             :          struct stat *statbuf, bool missing_ok, Oid dboid,
    1484             :          backup_manifest_info *manifest, const char *spcoid)
    1485             : {
    1486             :     int         fd;
    1487      221828 :     BlockNumber blkno = 0;
    1488      221828 :     bool        block_retry = false;
    1489             :     uint16      checksum;
    1490      221828 :     int         checksum_failures = 0;
    1491             :     off_t       cnt;
    1492             :     int         i;
    1493      221828 :     pgoff_t     len = 0;
    1494             :     char       *page;
    1495             :     PageHeader  phdr;
    1496      221828 :     int         segmentno = 0;
    1497             :     char       *segmentpath;
    1498      221828 :     bool        verify_checksum = false;
    1499             :     pg_checksum_context checksum_ctx;
    1500             : 
    1501      221828 :     if (pg_checksum_init(&checksum_ctx, manifest->checksum_type) < 0)
    1502           0 :         elog(ERROR, "could not initialize checksum of file \"%s\"",
    1503             :              readfilename);
    1504             : 
    1505      221828 :     fd = OpenTransientFile(readfilename, O_RDONLY | PG_BINARY);
    1506      221828 :     if (fd < 0)
    1507             :     {
    1508           0 :         if (errno == ENOENT && missing_ok)
    1509           0 :             return false;
    1510           0 :         ereport(ERROR,
    1511             :                 (errcode_for_file_access(),
    1512             :                  errmsg("could not open file \"%s\": %m", readfilename)));
    1513             :     }
    1514             : 
    1515      221828 :     _tarWriteHeader(sink, tarfilename, NULL, statbuf, false);
    1516             : 
    1517      221826 :     if (!noverify_checksums && DataChecksumsEnabled())
    1518             :     {
    1519             :         char       *filename;
    1520             : 
    1521             :         /*
    1522             :          * Get the filename (excluding path).  As last_dir_separator()
    1523             :          * includes the last directory separator, we chop that off by
    1524             :          * incrementing the pointer.
    1525             :          */
    1526       54688 :         filename = last_dir_separator(readfilename) + 1;
    1527             : 
    1528       54688 :         if (is_checksummed_file(readfilename, filename))
    1529             :         {
    1530       53610 :             verify_checksum = true;
    1531             : 
    1532             :             /*
    1533             :              * Cut off at the segment boundary (".") to get the segment number
    1534             :              * in order to mix it into the checksum.
    1535             :              */
    1536       53610 :             segmentpath = strstr(filename, ".");
    1537       53610 :             if (segmentpath != NULL)
    1538             :             {
    1539           0 :                 segmentno = atoi(segmentpath + 1);
    1540           0 :                 if (segmentno == 0)
    1541           0 :                     ereport(ERROR,
    1542             :                             (errmsg("invalid segment number %d in file \"%s\"",
    1543             :                                     segmentno, filename)));
    1544             :             }
    1545             :         }
    1546             :     }
    1547             : 
    1548             :     /*
    1549             :      * Loop until we read the amount of data the caller told us to expect. The
    1550             :      * file could be longer, if it was extended while we were sending it, but
    1551             :      * for a base backup we can ignore such extended data. It will be restored
    1552             :      * from WAL.
    1553             :      */
    1554      489832 :     while (len < statbuf->st_size)
    1555             :     {
    1556      268006 :         size_t      remaining = statbuf->st_size - len;
    1557             : 
    1558             :         /* Try to read some more data. */
    1559      536012 :         cnt = basebackup_read_file(fd, sink->bbs_buffer,
    1560      268006 :                                    Min(sink->bbs_buffer_length, remaining),
    1561             :                                    len, readfilename, true);
    1562             : 
    1563             :         /*
    1564             :          * The checksums are verified at block level, so we iterate over the
    1565             :          * buffer in chunks of BLCKSZ, after making sure that
    1566             :          * TAR_SEND_SIZE/buf is divisible by BLCKSZ and we read a multiple of
    1567             :          * BLCKSZ bytes.
    1568             :          */
    1569             :         Assert((sink->bbs_buffer_length % BLCKSZ) == 0);
    1570             : 
    1571      268006 :         if (verify_checksum && (cnt % BLCKSZ != 0))
    1572             :         {
    1573           0 :             ereport(WARNING,
    1574             :                     (errmsg("could not verify checksum in file \"%s\", block "
    1575             :                             "%u: read buffer size %d and page size %d "
    1576             :                             "differ",
    1577             :                             readfilename, blkno, (int) cnt, BLCKSZ)));
    1578           0 :             verify_checksum = false;
    1579             :         }
    1580             : 
    1581      268006 :         if (verify_checksum)
    1582             :         {
    1583      224032 :             for (i = 0; i < cnt / BLCKSZ; i++)
    1584             :             {
    1585      159150 :                 page = sink->bbs_buffer + BLCKSZ * i;
    1586             : 
    1587             :                 /*
    1588             :                  * Only check pages which have not been modified since the
    1589             :                  * start of the base backup. Otherwise, they might have been
    1590             :                  * written only halfway and the checksum would not be valid.
    1591             :                  * However, replaying WAL would reinstate the correct page in
    1592             :                  * this case. We also skip completely new pages, since they
    1593             :                  * don't have a checksum yet.
    1594             :                  */
    1595      159150 :                 if (!PageIsNew(page) && PageGetLSN(page) < sink->bbs_state->startptr)
    1596             :                 {
    1597      158998 :                     checksum = pg_checksum_page((char *) page, blkno + segmentno * RELSEG_SIZE);
    1598      158998 :                     phdr = (PageHeader) page;
    1599      158998 :                     if (phdr->pd_checksum != checksum)
    1600             :                     {
    1601             :                         /*
    1602             :                          * Retry the block on the first failure.  It's
    1603             :                          * possible that we read the first 4K page of the
    1604             :                          * block just before postgres updated the entire block
    1605             :                          * so it ends up looking torn to us. If, before we
    1606             :                          * retry the read, the concurrent write of the block
    1607             :                          * finishes, the page LSN will be updated and we'll
    1608             :                          * realize that we should ignore this block.
    1609             :                          *
    1610             :                          * There's no guarantee that this will actually
    1611             :                          * happen, though: the torn write could take an
    1612             :                          * arbitrarily long time to complete. Retrying
    1613             :                          * multiple times wouldn't fix this problem, either,
    1614             :                          * though it would reduce the chances of it happening
    1615             :                          * in practice. The only real fix here seems to be to
    1616             :                          * have some kind of interlock that allows us to wait
    1617             :                          * until we can be certain that no write to the block
    1618             :                          * is in progress. Since we don't have any such thing
    1619             :                          * right now, we just do this and hope for the best.
    1620             :                          */
    1621          56 :                         if (block_retry == false)
    1622             :                         {
    1623             :                             int         reread_cnt;
    1624             : 
    1625             :                             /* Reread the failed block */
    1626             :                             reread_cnt =
    1627          28 :                                 basebackup_read_file(fd,
    1628          28 :                                                      sink->bbs_buffer + BLCKSZ * i,
    1629          28 :                                                      BLCKSZ, len + BLCKSZ * i,
    1630             :                                                      readfilename,
    1631             :                                                      false);
    1632          28 :                             if (reread_cnt == 0)
    1633             :                             {
    1634             :                                 /*
    1635             :                                  * If we hit end-of-file, a concurrent
    1636             :                                  * truncation must have occurred, so break out
    1637             :                                  * of this loop just as if the initial fread()
    1638             :                                  * returned 0. We'll drop through to the same
    1639             :                                  * code that handles that case. (We must fix
    1640             :                                  * up cnt first, though.)
    1641             :                                  */
    1642           0 :                                 cnt = BLCKSZ * i;
    1643           0 :                                 break;
    1644             :                             }
    1645             : 
    1646             :                             /* Set flag so we know a retry was attempted */
    1647          28 :                             block_retry = true;
    1648             : 
    1649             :                             /* Reset loop to validate the block again */
    1650          28 :                             i--;
    1651          28 :                             continue;
    1652             :                         }
    1653             : 
    1654          28 :                         checksum_failures++;
    1655             : 
    1656          28 :                         if (checksum_failures <= 5)
    1657          24 :                             ereport(WARNING,
    1658             :                                     (errmsg("checksum verification failed in "
    1659             :                                             "file \"%s\", block %u: calculated "
    1660             :                                             "%X but expected %X",
    1661             :                                             readfilename, blkno, checksum,
    1662             :                                             phdr->pd_checksum)));
    1663          28 :                         if (checksum_failures == 5)
    1664           4 :                             ereport(WARNING,
    1665             :                                     (errmsg("further checksum verification "
    1666             :                                             "failures in file \"%s\" will not "
    1667             :                                             "be reported", readfilename)));
    1668             :                     }
    1669             :                 }
    1670      159122 :                 block_retry = false;
    1671      159122 :                 blkno++;
    1672             :             }
    1673             :         }
    1674             : 
    1675             :         /*
    1676             :          * If we hit end-of-file, a concurrent truncation must have occurred.
    1677             :          * That's not an error condition, because WAL replay will fix things
    1678             :          * up.
    1679             :          */
    1680      268006 :         if (cnt == 0)
    1681           0 :             break;
    1682             : 
    1683             :         /* Archive the data we just read. */
    1684      268006 :         bbsink_archive_contents(sink, cnt);
    1685             : 
    1686             :         /* Also feed it to the checksum machinery. */
    1687      268006 :         if (pg_checksum_update(&checksum_ctx,
    1688      268006 :                                (uint8 *) sink->bbs_buffer, cnt) < 0)
    1689           0 :             elog(ERROR, "could not update checksum of base backup");
    1690             : 
    1691      268006 :         len += cnt;
    1692             :     }
    1693             : 
    1694             :     /* If the file was truncated while we were sending it, pad it with zeros */
    1695      221826 :     while (len < statbuf->st_size)
    1696             :     {
    1697           0 :         size_t      remaining = statbuf->st_size - len;
    1698           0 :         size_t      nbytes = Min(sink->bbs_buffer_length, remaining);
    1699             : 
    1700           0 :         MemSet(sink->bbs_buffer, 0, nbytes);
    1701           0 :         if (pg_checksum_update(&checksum_ctx,
    1702           0 :                                (uint8 *) sink->bbs_buffer,
    1703             :                                nbytes) < 0)
    1704           0 :             elog(ERROR, "could not update checksum of base backup");
    1705           0 :         bbsink_archive_contents(sink, nbytes);
    1706           0 :         len += nbytes;
    1707             :     }
    1708             : 
    1709             :     /*
    1710             :      * Pad to a block boundary, per tar format requirements. (This small piece
    1711             :      * of data is probably not worth throttling, and is not checksummed
    1712             :      * because it's not actually part of the file.)
    1713             :      */
    1714      221826 :     _tarWritePadding(sink, len);
    1715             : 
    1716      221826 :     CloseTransientFile(fd);
    1717             : 
    1718      221826 :     if (checksum_failures > 1)
    1719             :     {
    1720           4 :         ereport(WARNING,
    1721             :                 (errmsg_plural("file \"%s\" has a total of %d checksum verification failure",
    1722             :                                "file \"%s\" has a total of %d checksum verification failures",
    1723             :                                checksum_failures,
    1724             :                                readfilename, checksum_failures)));
    1725             : 
    1726           4 :         pgstat_report_checksum_failures_in_db(dboid, checksum_failures);
    1727             :     }
    1728             : 
    1729      221826 :     total_checksum_failures += checksum_failures;
    1730             : 
    1731      221826 :     AddFileToBackupManifest(manifest, spcoid, tarfilename, statbuf->st_size,
    1732      221826 :                             (pg_time_t) statbuf->st_mtime, &checksum_ctx);
    1733             : 
    1734      221826 :     return true;
    1735             : }
    1736             : 
    1737             : static int64
    1738      234144 : _tarWriteHeader(bbsink *sink, const char *filename, const char *linktarget,
    1739             :                 struct stat *statbuf, bool sizeonly)
    1740             : {
    1741             :     enum tarError rc;
    1742             : 
    1743      234144 :     if (!sizeonly)
    1744             :     {
    1745             :         /*
    1746             :          * As of this writing, the smallest supported block size is 1kB, which
    1747             :          * is twice TAR_BLOCK_SIZE. Since the buffer size is required to be a
    1748             :          * multiple of BLCKSZ, it should be safe to assume that the buffer is
    1749             :          * large enough to fit an entire tar block. We double-check by means
    1750             :          * of these assertions.
    1751             :          */
    1752             :         StaticAssertDecl(TAR_BLOCK_SIZE <= BLCKSZ,
    1753             :                          "BLCKSZ too small for tar block");
    1754             :         Assert(sink->bbs_buffer_length >= TAR_BLOCK_SIZE);
    1755             : 
    1756      228060 :         rc = tarCreateHeader(sink->bbs_buffer, filename, linktarget,
    1757             :                              statbuf->st_size, statbuf->st_mode,
    1758             :                              statbuf->st_uid, statbuf->st_gid,
    1759             :                              statbuf->st_mtime);
    1760             : 
    1761      228060 :         switch (rc)
    1762             :         {
    1763      228058 :             case TAR_OK:
    1764      228058 :                 break;
    1765           2 :             case TAR_NAME_TOO_LONG:
    1766           2 :                 ereport(ERROR,
    1767             :                         (errmsg("file name too long for tar format: \"%s\"",
    1768             :                                 filename)));
    1769             :                 break;
    1770           0 :             case TAR_SYMLINK_TOO_LONG:
    1771           0 :                 ereport(ERROR,
    1772             :                         (errmsg("symbolic link target too long for tar format: "
    1773             :                                 "file name \"%s\", target \"%s\"",
    1774             :                                 filename, linktarget)));
    1775             :                 break;
    1776           0 :             default:
    1777           0 :                 elog(ERROR, "unrecognized tar error: %d", rc);
    1778             :         }
    1779             : 
    1780      228058 :         bbsink_archive_contents(sink, TAR_BLOCK_SIZE);
    1781             :     }
    1782             : 
    1783      234142 :     return TAR_BLOCK_SIZE;
    1784             : }
    1785             : 
    1786             : /*
    1787             :  * Pad with zero bytes out to a multiple of TAR_BLOCK_SIZE.
    1788             :  */
    1789             : static void
    1790      222120 : _tarWritePadding(bbsink *sink, int len)
    1791             : {
    1792      222120 :     int         pad = tarPaddingBytesRequired(len);
    1793             : 
    1794             :     /*
    1795             :      * As in _tarWriteHeader, it should be safe to assume that the buffer is
    1796             :      * large enough that we don't need to do this in multiple chunks.
    1797             :      */
    1798             :     Assert(sink->bbs_buffer_length >= TAR_BLOCK_SIZE);
    1799             :     Assert(pad <= TAR_BLOCK_SIZE);
    1800             : 
    1801      222120 :     if (pad > 0)
    1802             :     {
    1803       43200 :         MemSet(sink->bbs_buffer, 0, pad);
    1804        3312 :         bbsink_archive_contents(sink, pad);
    1805             :     }
    1806      222120 : }
    1807             : 
    1808             : /*
    1809             :  * If the entry in statbuf is a link, then adjust statbuf to make it look like a
    1810             :  * directory, so that it will be written that way.
    1811             :  */
    1812             : static void
    1813        3714 : convert_link_to_directory(const char *pathbuf, struct stat *statbuf)
    1814             : {
    1815             :     /* If symlink, write it as a directory anyway */
    1816        3714 :     if (S_ISLNK(statbuf->st_mode))
    1817         132 :         statbuf->st_mode = S_IFDIR | pg_dir_create_mode;
    1818        3714 : }
    1819             : 
    1820             : /*
    1821             :  * Read some data from a file, setting a wait event and reporting any error
    1822             :  * encountered.
    1823             :  *
    1824             :  * If partial_read_ok is false, also report an error if the number of bytes
    1825             :  * read is not equal to the number of bytes requested.
    1826             :  *
    1827             :  * Returns the number of bytes read.
    1828             :  */
    1829             : static int
    1830      279298 : basebackup_read_file(int fd, char *buf, size_t nbytes, off_t offset,
    1831             :                      const char *filename, bool partial_read_ok)
    1832             : {
    1833             :     int         rc;
    1834             : 
    1835      279298 :     pgstat_report_wait_start(WAIT_EVENT_BASEBACKUP_READ);
    1836      279298 :     rc = pg_pread(fd, buf, nbytes, offset);
    1837      279298 :     pgstat_report_wait_end();
    1838             : 
    1839      279298 :     if (rc < 0)
    1840           0 :         ereport(ERROR,
    1841             :                 (errcode_for_file_access(),
    1842             :                  errmsg("could not read file \"%s\": %m", filename)));
    1843      279298 :     if (!partial_read_ok && rc > 0 && rc != nbytes)
    1844           0 :         ereport(ERROR,
    1845             :                 (errcode_for_file_access(),
    1846             :                  errmsg("could not read file \"%s\": read %d of %zu",
    1847             :                         filename, rc, nbytes)));
    1848             : 
    1849      279298 :     return rc;
    1850             : }

Generated by: LCOV version 1.14