LCOV - code coverage report
Current view: top level - src/backend/replication - basebackup.c (source / functions) Hit Total Coverage
Test: PostgreSQL 14devel Lines: 502 624 80.4 %
Date: 2020-11-27 11:06:40 Functions: 16 17 94.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * basebackup.c
       4             :  *    code for taking a base backup and streaming it to a standby
       5             :  *
       6             :  * Portions Copyright (c) 2010-2020, PostgreSQL Global Development Group
       7             :  *
       8             :  * IDENTIFICATION
       9             :  *    src/backend/replication/basebackup.c
      10             :  *
      11             :  *-------------------------------------------------------------------------
      12             :  */
      13             : #include "postgres.h"
      14             : 
      15             : #include <sys/stat.h>
      16             : #include <unistd.h>
      17             : #include <time.h>
      18             : 
      19             : #include "access/xlog_internal.h" /* for pg_start/stop_backup */
      20             : #include "catalog/pg_type.h"
      21             : #include "common/file_perm.h"
      22             : #include "commands/progress.h"
      23             : #include "lib/stringinfo.h"
      24             : #include "libpq/libpq.h"
      25             : #include "libpq/pqformat.h"
      26             : #include "miscadmin.h"
      27             : #include "nodes/pg_list.h"
      28             : #include "pgstat.h"
      29             : #include "pgtar.h"
      30             : #include "port.h"
      31             : #include "postmaster/syslogger.h"
      32             : #include "replication/basebackup.h"
      33             : #include "replication/backup_manifest.h"
      34             : #include "replication/walsender.h"
      35             : #include "replication/walsender_private.h"
      36             : #include "storage/bufpage.h"
      37             : #include "storage/checksum.h"
      38             : #include "storage/dsm_impl.h"
      39             : #include "storage/fd.h"
      40             : #include "storage/ipc.h"
      41             : #include "storage/reinit.h"
      42             : #include "utils/builtins.h"
      43             : #include "utils/ps_status.h"
      44             : #include "utils/relcache.h"
      45             : #include "utils/resowner.h"
      46             : #include "utils/timestamp.h"
      47             : 
      48             : typedef struct
      49             : {
      50             :     const char *label;
      51             :     bool        progress;
      52             :     bool        fastcheckpoint;
      53             :     bool        nowait;
      54             :     bool        includewal;
      55             :     uint32      maxrate;
      56             :     bool        sendtblspcmapfile;
      57             :     backup_manifest_option manifest;
      58             :     pg_checksum_type manifest_checksum_type;
      59             : } basebackup_options;
      60             : 
      61             : static int64 sendTablespace(char *path, char *oid, bool sizeonly,
      62             :                             struct backup_manifest_info *manifest);
      63             : static int64 sendDir(const char *path, int basepathlen, bool sizeonly,
      64             :                      List *tablespaces, bool sendtblspclinks,
      65             :                      backup_manifest_info *manifest, const char *spcoid);
      66             : static bool sendFile(const char *readfilename, const char *tarfilename,
      67             :                      struct stat *statbuf, bool missing_ok, Oid dboid,
      68             :                      backup_manifest_info *manifest, const char *spcoid);
      69             : static void sendFileWithContent(const char *filename, const char *content,
      70             :                                 backup_manifest_info *manifest);
      71             : static int64 _tarWriteHeader(const char *filename, const char *linktarget,
      72             :                              struct stat *statbuf, bool sizeonly);
      73             : static int64 _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf,
      74             :                           bool sizeonly);
      75             : static void send_int8_string(StringInfoData *buf, int64 intval);
      76             : static void SendBackupHeader(List *tablespaces);
      77             : static void perform_base_backup(basebackup_options *opt);
      78             : static void parse_basebackup_options(List *options, basebackup_options *opt);
      79             : static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli);
      80             : static int  compareWalFileNames(const ListCell *a, const ListCell *b);
      81             : static void throttle(size_t increment);
      82             : static void update_basebackup_progress(int64 delta);
      83             : static bool is_checksummed_file(const char *fullpath, const char *filename);
      84             : static int  basebackup_read_file(int fd, char *buf, size_t nbytes, off_t offset,
      85             :                                  const char *filename, bool partial_read_ok);
      86             : 
      87             : /* Was the backup currently in-progress initiated in recovery mode? */
      88             : static bool backup_started_in_recovery = false;
      89             : 
      90             : /* Relative path of temporary statistics directory */
      91             : static char *statrelpath = NULL;
      92             : 
      93             : /*
      94             :  * Size of each block sent into the tar stream for larger files.
      95             :  */
      96             : #define TAR_SEND_SIZE 32768
      97             : 
      98             : /*
      99             :  * How frequently to throttle, as a fraction of the specified rate-second.
     100             :  */
     101             : #define THROTTLING_FREQUENCY    8
     102             : 
     103             : /* The actual number of bytes, transfer of which may cause sleep. */
     104             : static uint64 throttling_sample;
     105             : 
     106             : /* Amount of data already transferred but not yet throttled.  */
     107             : static int64 throttling_counter;
     108             : 
     109             : /* The minimum time required to transfer throttling_sample bytes. */
     110             : static TimeOffset elapsed_min_unit;
     111             : 
     112             : /* The last check of the transfer rate. */
     113             : static TimestampTz throttled_last;
     114             : 
     115             : /* The starting XLOG position of the base backup. */
     116             : static XLogRecPtr startptr;
     117             : 
     118             : /* Total number of checksum failures during base backup. */
     119             : static long long int total_checksum_failures;
     120             : 
     121             : /* Do not verify checksums. */
     122             : static bool noverify_checksums = false;
     123             : 
     124             : /*
     125             :  * Total amount of backup data that will be streamed.
     126             :  * -1 means that the size is not estimated.
     127             :  */
     128             : static int64 backup_total = 0;
     129             : 
     130             : /* Amount of backup data already streamed */
     131             : static int64 backup_streamed = 0;
     132             : 
     133             : /*
     134             :  * Definition of one element part of an exclusion list, used for paths part
     135             :  * of checksum validation or base backups.  "name" is the name of the file
     136             :  * or path to check for exclusion.  If "match_prefix" is true, any items
     137             :  * matching the name as prefix are excluded.
     138             :  */
     139             : struct exclude_list_item
     140             : {
     141             :     const char *name;
     142             :     bool        match_prefix;
     143             : };
     144             : 
     145             : /*
     146             :  * The contents of these directories are removed or recreated during server
     147             :  * start so they are not included in backups.  The directories themselves are
     148             :  * kept and included as empty to preserve access permissions.
     149             :  *
     150             :  * Note: this list should be kept in sync with the filter lists in pg_rewind's
     151             :  * filemap.c.
     152             :  */
     153             : static const char *const excludeDirContents[] =
     154             : {
     155             :     /*
     156             :      * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped even
     157             :      * when stats_temp_directory is set because PGSS_TEXT_FILE is always
     158             :      * created there.
     159             :      */
     160             :     PG_STAT_TMP_DIR,
     161             : 
     162             :     /*
     163             :      * It is generally not useful to backup the contents of this directory
     164             :      * even if the intention is to restore to another primary. See backup.sgml
     165             :      * for a more detailed description.
     166             :      */
     167             :     "pg_replslot",
     168             : 
     169             :     /* Contents removed on startup, see dsm_cleanup_for_mmap(). */
     170             :     PG_DYNSHMEM_DIR,
     171             : 
     172             :     /* Contents removed on startup, see AsyncShmemInit(). */
     173             :     "pg_notify",
     174             : 
     175             :     /*
     176             :      * Old contents are loaded for possible debugging but are not required for
     177             :      * normal operation, see SerialInit().
     178             :      */
     179             :     "pg_serial",
     180             : 
     181             :     /* Contents removed on startup, see DeleteAllExportedSnapshotFiles(). */
     182             :     "pg_snapshots",
     183             : 
     184             :     /* Contents zeroed on startup, see StartupSUBTRANS(). */
     185             :     "pg_subtrans",
     186             : 
     187             :     /* end of list */
     188             :     NULL
     189             : };
     190             : 
     191             : /*
     192             :  * List of files excluded from backups.
     193             :  */
     194             : static const struct exclude_list_item excludeFiles[] =
     195             : {
     196             :     /* Skip auto conf temporary file. */
     197             :     {PG_AUTOCONF_FILENAME ".tmp", false},
     198             : 
     199             :     /* Skip current log file temporary file */
     200             :     {LOG_METAINFO_DATAFILE_TMP, false},
     201             : 
     202             :     /*
     203             :      * Skip relation cache because it is rebuilt on startup.  This includes
     204             :      * temporary files.
     205             :      */
     206             :     {RELCACHE_INIT_FILENAME, true},
     207             : 
     208             :     /*
     209             :      * If there's a backup_label or tablespace_map file, it belongs to a
     210             :      * backup started by the user with pg_start_backup().  It is *not* correct
     211             :      * for this backup.  Our backup_label/tablespace_map is injected into the
     212             :      * tar separately.
     213             :      */
     214             :     {BACKUP_LABEL_FILE, false},
     215             :     {TABLESPACE_MAP, false},
     216             : 
     217             :     /*
     218             :      * If there's a backup_manifest, it belongs to a backup that was used to
     219             :      * start this server. It is *not* correct for this backup. Our
     220             :      * backup_manifest is injected into the backup separately if users want
     221             :      * it.
     222             :      */
     223             :     {"backup_manifest", false},
     224             : 
     225             :     {"postmaster.pid", false},
     226             :     {"postmaster.opts", false},
     227             : 
     228             :     /* end of list */
     229             :     {NULL, false}
     230             : };
     231             : 
     232             : /*
     233             :  * List of files excluded from checksum validation.
     234             :  *
     235             :  * Note: this list should be kept in sync with what pg_checksums.c
     236             :  * includes.
     237             :  */
     238             : static const struct exclude_list_item noChecksumFiles[] = {
     239             :     {"pg_control", false},
     240             :     {"pg_filenode.map", false},
     241             :     {"pg_internal.init", true},
     242             :     {"PG_VERSION", false},
     243             : #ifdef EXEC_BACKEND
     244             :     {"config_exec_params", true},
     245             : #endif
     246             :     {NULL, false}
     247             : };
     248             : 
     249             : /*
     250             :  * Actually do a base backup for the specified tablespaces.
     251             :  *
     252             :  * This is split out mainly to avoid complaints about "variable might be
     253             :  * clobbered by longjmp" from stupider versions of gcc.
     254             :  */
     255             : static void
     256         150 : perform_base_backup(basebackup_options *opt)
     257             : {
     258             :     TimeLineID  starttli;
     259             :     XLogRecPtr  endptr;
     260             :     TimeLineID  endtli;
     261             :     StringInfo  labelfile;
     262             :     StringInfo  tblspc_map_file;
     263             :     backup_manifest_info manifest;
     264             :     int         datadirpathlen;
     265         150 :     List       *tablespaces = NIL;
     266             : 
     267         150 :     backup_total = 0;
     268         150 :     backup_streamed = 0;
     269         150 :     pgstat_progress_start_command(PROGRESS_COMMAND_BASEBACKUP, InvalidOid);
     270             : 
     271             :     /*
     272             :      * If the estimation of the total backup size is disabled, make the
     273             :      * backup_total column in the view return NULL by setting the parameter to
     274             :      * -1.
     275             :      */
     276         150 :     if (!opt->progress)
     277             :     {
     278           0 :         backup_total = -1;
     279           0 :         pgstat_progress_update_param(PROGRESS_BASEBACKUP_BACKUP_TOTAL,
     280             :                                      backup_total);
     281             :     }
     282             : 
     283             :     /* we're going to use a BufFile, so we need a ResourceOwner */
     284             :     Assert(CurrentResourceOwner == NULL);
     285         150 :     CurrentResourceOwner = ResourceOwnerCreate(NULL, "base backup");
     286             : 
     287         150 :     datadirpathlen = strlen(DataDir);
     288             : 
     289         150 :     backup_started_in_recovery = RecoveryInProgress();
     290             : 
     291         150 :     labelfile = makeStringInfo();
     292         150 :     tblspc_map_file = makeStringInfo();
     293         150 :     InitializeBackupManifest(&manifest, opt->manifest,
     294             :                              opt->manifest_checksum_type);
     295             : 
     296         150 :     total_checksum_failures = 0;
     297             : 
     298         150 :     pgstat_progress_update_param(PROGRESS_BASEBACKUP_PHASE,
     299             :                                  PROGRESS_BASEBACKUP_PHASE_WAIT_CHECKPOINT);
     300         300 :     startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli,
     301             :                                   labelfile, &tablespaces,
     302         150 :                                   tblspc_map_file, opt->sendtblspcmapfile);
     303             : 
     304             :     /*
     305             :      * Once do_pg_start_backup has been called, ensure that any failure causes
     306             :      * us to abort the backup so we don't "leak" a backup counter. For this
     307             :      * reason, *all* functionality between do_pg_start_backup() and the end of
     308             :      * do_pg_stop_backup() should be inside the error cleanup block!
     309             :      */
     310             : 
     311         150 :     PG_ENSURE_ERROR_CLEANUP(do_pg_abort_backup, BoolGetDatum(false));
     312             :     {
     313             :         ListCell   *lc;
     314             :         tablespaceinfo *ti;
     315         150 :         int         tblspc_streamed = 0;
     316             : 
     317             :         /*
     318             :          * Calculate the relative path of temporary statistics directory in
     319             :          * order to skip the files which are located in that directory later.
     320             :          */
     321         150 :         if (is_absolute_path(pgstat_stat_directory) &&
     322           0 :             strncmp(pgstat_stat_directory, DataDir, datadirpathlen) == 0)
     323           0 :             statrelpath = psprintf("./%s", pgstat_stat_directory + datadirpathlen + 1);
     324         150 :         else if (strncmp(pgstat_stat_directory, "./", 2) != 0)
     325         150 :             statrelpath = psprintf("./%s", pgstat_stat_directory);
     326             :         else
     327           0 :             statrelpath = pgstat_stat_directory;
     328             : 
     329             :         /* Add a node for the base directory at the end */
     330         150 :         ti = palloc0(sizeof(tablespaceinfo));
     331         150 :         ti->size = -1;
     332         150 :         tablespaces = lappend(tablespaces, ti);
     333             : 
     334             :         /*
     335             :          * Calculate the total backup size by summing up the size of each
     336             :          * tablespace
     337             :          */
     338         150 :         if (opt->progress)
     339             :         {
     340         150 :             pgstat_progress_update_param(PROGRESS_BASEBACKUP_PHASE,
     341             :                                          PROGRESS_BASEBACKUP_PHASE_ESTIMATE_BACKUP_SIZE);
     342             : 
     343         332 :             foreach(lc, tablespaces)
     344             :             {
     345         182 :                 tablespaceinfo *tmp = (tablespaceinfo *) lfirst(lc);
     346             : 
     347         182 :                 if (tmp->path == NULL)
     348         150 :                     tmp->size = sendDir(".", 1, true, tablespaces, true, NULL,
     349             :                                         NULL);
     350             :                 else
     351          32 :                     tmp->size = sendTablespace(tmp->path, tmp->oid, true,
     352             :                                                NULL);
     353         182 :                 backup_total += tmp->size;
     354             :             }
     355             :         }
     356             : 
     357             :         /* Report that we are now streaming database files as a base backup */
     358             :         {
     359         150 :             const int   index[] = {
     360             :                 PROGRESS_BASEBACKUP_PHASE,
     361             :                 PROGRESS_BASEBACKUP_BACKUP_TOTAL,
     362             :                 PROGRESS_BASEBACKUP_TBLSPC_TOTAL
     363             :             };
     364         300 :             const int64 val[] = {
     365             :                 PROGRESS_BASEBACKUP_PHASE_STREAM_BACKUP,
     366         150 :                 backup_total, list_length(tablespaces)
     367             :             };
     368             : 
     369         150 :             pgstat_progress_update_multi_param(3, index, val);
     370             :         }
     371             : 
     372             :         /* Send the starting position of the backup */
     373         150 :         SendXlogRecPtrResult(startptr, starttli);
     374             : 
     375             :         /* Send tablespace header */
     376         150 :         SendBackupHeader(tablespaces);
     377             : 
     378             :         /* Setup and activate network throttling, if client requested it */
     379         150 :         if (opt->maxrate > 0)
     380             :         {
     381           0 :             throttling_sample =
     382           0 :                 (int64) opt->maxrate * (int64) 1024 / THROTTLING_FREQUENCY;
     383             : 
     384             :             /*
     385             :              * The minimum amount of time for throttling_sample bytes to be
     386             :              * transferred.
     387             :              */
     388           0 :             elapsed_min_unit = USECS_PER_SEC / THROTTLING_FREQUENCY;
     389             : 
     390             :             /* Enable throttling. */
     391           0 :             throttling_counter = 0;
     392             : 
     393             :             /* The 'real data' starts now (header was ignored). */
     394           0 :             throttled_last = GetCurrentTimestamp();
     395             :         }
     396             :         else
     397             :         {
     398             :             /* Disable throttling. */
     399         150 :             throttling_counter = -1;
     400             :         }
     401             : 
     402             :         /* Send off our tablespaces one by one */
     403         326 :         foreach(lc, tablespaces)
     404             :         {
     405         182 :             tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc);
     406             :             StringInfoData buf;
     407             : 
     408             :             /* Send CopyOutResponse message */
     409         182 :             pq_beginmessage(&buf, 'H');
     410         182 :             pq_sendbyte(&buf, 0);   /* overall format */
     411         182 :             pq_sendint16(&buf, 0);  /* natts */
     412         182 :             pq_endmessage(&buf);
     413             : 
     414         182 :             if (ti->path == NULL)
     415             :             {
     416             :                 struct stat statbuf;
     417         150 :                 bool    sendtblspclinks = true;
     418             : 
     419             :                 /* In the main tar, include the backup_label first... */
     420         150 :                 sendFileWithContent(BACKUP_LABEL_FILE, labelfile->data,
     421             :                                     &manifest);
     422             : 
     423             :                 /* Then the tablespace_map file, if required... */
     424         150 :                 if (opt->sendtblspcmapfile)
     425             :                 {
     426          10 :                     sendFileWithContent(TABLESPACE_MAP, tblspc_map_file->data,
     427             :                                         &manifest);
     428          10 :                     sendtblspclinks = false;
     429             :                 }
     430             : 
     431             :                 /* Then the bulk of the files... */
     432         150 :                 sendDir(".", 1, false, tablespaces, sendtblspclinks,
     433             :                         &manifest, NULL);
     434             : 
     435             :                 /* ... and pg_control after everything else. */
     436         144 :                 if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
     437           0 :                     ereport(ERROR,
     438             :                             (errcode_for_file_access(),
     439             :                              errmsg("could not stat file \"%s\": %m",
     440             :                                     XLOG_CONTROL_FILE)));
     441         144 :                 sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf,
     442             :                          false, InvalidOid, &manifest, NULL);
     443             :             }
     444             :             else
     445          32 :                 sendTablespace(ti->path, ti->oid, false, &manifest);
     446             : 
     447             :             /*
     448             :              * If we're including WAL, and this is the main data directory we
     449             :              * don't terminate the tar stream here. Instead, we will append
     450             :              * the xlog files below and terminate it then. This is safe since
     451             :              * the main data directory is always sent *last*.
     452             :              */
     453         176 :             if (opt->includewal && ti->path == NULL)
     454             :             {
     455             :                 Assert(lnext(tablespaces, lc) == NULL);
     456             :             }
     457             :             else
     458         174 :                 pq_putemptymessage('c');    /* CopyDone */
     459             : 
     460         176 :             tblspc_streamed++;
     461         176 :             pgstat_progress_update_param(PROGRESS_BASEBACKUP_TBLSPC_STREAMED,
     462             :                                          tblspc_streamed);
     463             :         }
     464             : 
     465         144 :         pgstat_progress_update_param(PROGRESS_BASEBACKUP_PHASE,
     466             :                                      PROGRESS_BASEBACKUP_PHASE_WAIT_WAL_ARCHIVE);
     467         144 :         endptr = do_pg_stop_backup(labelfile->data, !opt->nowait, &endtli);
     468             :     }
     469         294 :     PG_END_ENSURE_ERROR_CLEANUP(do_pg_abort_backup, BoolGetDatum(false));
     470             : 
     471             : 
     472         144 :     if (opt->includewal)
     473             :     {
     474             :         /*
     475             :          * We've left the last tar file "open", so we can now append the
     476             :          * required WAL files to it.
     477             :          */
     478             :         char        pathbuf[MAXPGPATH];
     479             :         XLogSegNo   segno;
     480             :         XLogSegNo   startsegno;
     481             :         XLogSegNo   endsegno;
     482             :         struct stat statbuf;
     483           2 :         List       *historyFileList = NIL;
     484           2 :         List       *walFileList = NIL;
     485             :         char        firstoff[MAXFNAMELEN];
     486             :         char        lastoff[MAXFNAMELEN];
     487             :         DIR        *dir;
     488             :         struct dirent *de;
     489             :         ListCell   *lc;
     490             :         TimeLineID  tli;
     491             : 
     492           2 :         pgstat_progress_update_param(PROGRESS_BASEBACKUP_PHASE,
     493             :                                      PROGRESS_BASEBACKUP_PHASE_TRANSFER_WAL);
     494             : 
     495             :         /*
     496             :          * I'd rather not worry about timelines here, so scan pg_wal and
     497             :          * include all WAL files in the range between 'startptr' and 'endptr',
     498             :          * regardless of the timeline the file is stamped with. If there are
     499             :          * some spurious WAL files belonging to timelines that don't belong in
     500             :          * this server's history, they will be included too. Normally there
     501             :          * shouldn't be such files, but if there are, there's little harm in
     502             :          * including them.
     503             :          */
     504           2 :         XLByteToSeg(startptr, startsegno, wal_segment_size);
     505           2 :         XLogFileName(firstoff, ThisTimeLineID, startsegno, wal_segment_size);
     506           2 :         XLByteToPrevSeg(endptr, endsegno, wal_segment_size);
     507           2 :         XLogFileName(lastoff, ThisTimeLineID, endsegno, wal_segment_size);
     508             : 
     509           2 :         dir = AllocateDir("pg_wal");
     510          14 :         while ((de = ReadDir(dir, "pg_wal")) != NULL)
     511             :         {
     512             :             /* Does it look like a WAL segment, and is it in the range? */
     513          12 :             if (IsXLogFileName(de->d_name) &&
     514           6 :                 strcmp(de->d_name + 8, firstoff + 8) >= 0 &&
     515           6 :                 strcmp(de->d_name + 8, lastoff + 8) <= 0)
     516             :             {
     517           2 :                 walFileList = lappend(walFileList, pstrdup(de->d_name));
     518             :             }
     519             :             /* Does it look like a timeline history file? */
     520          10 :             else if (IsTLHistoryFileName(de->d_name))
     521             :             {
     522           0 :                 historyFileList = lappend(historyFileList, pstrdup(de->d_name));
     523             :             }
     524             :         }
     525           2 :         FreeDir(dir);
     526             : 
     527             :         /*
     528             :          * Before we go any further, check that none of the WAL segments we
     529             :          * need were removed.
     530             :          */
     531           2 :         CheckXLogRemoved(startsegno, ThisTimeLineID);
     532             : 
     533             :         /*
     534             :          * Sort the WAL filenames.  We want to send the files in order from
     535             :          * oldest to newest, to reduce the chance that a file is recycled
     536             :          * before we get a chance to send it over.
     537             :          */
     538           2 :         list_sort(walFileList, compareWalFileNames);
     539             : 
     540             :         /*
     541             :          * There must be at least one xlog file in the pg_wal directory, since
     542             :          * we are doing backup-including-xlog.
     543             :          */
     544           2 :         if (walFileList == NIL)
     545           0 :             ereport(ERROR,
     546             :                     (errmsg("could not find any WAL files")));
     547             : 
     548             :         /*
     549             :          * Sanity check: the first and last segment should cover startptr and
     550             :          * endptr, with no gaps in between.
     551             :          */
     552           2 :         XLogFromFileName((char *) linitial(walFileList),
     553             :                          &tli, &segno, wal_segment_size);
     554           2 :         if (segno != startsegno)
     555             :         {
     556             :             char        startfname[MAXFNAMELEN];
     557             : 
     558           0 :             XLogFileName(startfname, ThisTimeLineID, startsegno,
     559             :                          wal_segment_size);
     560           0 :             ereport(ERROR,
     561             :                     (errmsg("could not find WAL file \"%s\"", startfname)));
     562             :         }
     563           4 :         foreach(lc, walFileList)
     564             :         {
     565           2 :             char       *walFileName = (char *) lfirst(lc);
     566           2 :             XLogSegNo   currsegno = segno;
     567           2 :             XLogSegNo   nextsegno = segno + 1;
     568             : 
     569           2 :             XLogFromFileName(walFileName, &tli, &segno, wal_segment_size);
     570           2 :             if (!(nextsegno == segno || currsegno == segno))
     571             :             {
     572             :                 char        nextfname[MAXFNAMELEN];
     573             : 
     574           0 :                 XLogFileName(nextfname, ThisTimeLineID, nextsegno,
     575             :                              wal_segment_size);
     576           0 :                 ereport(ERROR,
     577             :                         (errmsg("could not find WAL file \"%s\"", nextfname)));
     578             :             }
     579             :         }
     580           2 :         if (segno != endsegno)
     581             :         {
     582             :             char        endfname[MAXFNAMELEN];
     583             : 
     584           0 :             XLogFileName(endfname, ThisTimeLineID, endsegno, wal_segment_size);
     585           0 :             ereport(ERROR,
     586             :                     (errmsg("could not find WAL file \"%s\"", endfname)));
     587             :         }
     588             : 
     589             :         /* Ok, we have everything we need. Send the WAL files. */
     590           4 :         foreach(lc, walFileList)
     591             :         {
     592           2 :             char       *walFileName = (char *) lfirst(lc);
     593             :             int         fd;
     594             :             char        buf[TAR_SEND_SIZE];
     595             :             size_t      cnt;
     596           2 :             pgoff_t     len = 0;
     597             : 
     598           2 :             snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", walFileName);
     599           2 :             XLogFromFileName(walFileName, &tli, &segno, wal_segment_size);
     600             : 
     601           2 :             fd = OpenTransientFile(pathbuf, O_RDONLY | PG_BINARY);
     602           2 :             if (fd < 0)
     603             :             {
     604           0 :                 int         save_errno = errno;
     605             : 
     606             :                 /*
     607             :                  * Most likely reason for this is that the file was already
     608             :                  * removed by a checkpoint, so check for that to get a better
     609             :                  * error message.
     610             :                  */
     611           0 :                 CheckXLogRemoved(segno, tli);
     612             : 
     613           0 :                 errno = save_errno;
     614           0 :                 ereport(ERROR,
     615             :                         (errcode_for_file_access(),
     616             :                          errmsg("could not open file \"%s\": %m", pathbuf)));
     617             :             }
     618             : 
     619           2 :             if (fstat(fd, &statbuf) != 0)
     620           0 :                 ereport(ERROR,
     621             :                         (errcode_for_file_access(),
     622             :                          errmsg("could not stat file \"%s\": %m",
     623             :                                 pathbuf)));
     624           2 :             if (statbuf.st_size != wal_segment_size)
     625             :             {
     626           0 :                 CheckXLogRemoved(segno, tli);
     627           0 :                 ereport(ERROR,
     628             :                         (errcode_for_file_access(),
     629             :                          errmsg("unexpected WAL file size \"%s\"", walFileName)));
     630             :             }
     631             : 
     632             :             /* send the WAL file itself */
     633           2 :             _tarWriteHeader(pathbuf, NULL, &statbuf, false);
     634             : 
     635        1026 :             while ((cnt = basebackup_read_file(fd, buf,
     636        1024 :                                                Min(sizeof(buf),
     637             :                                                    wal_segment_size - len),
     638             :                                                len, pathbuf, true)) > 0)
     639             :             {
     640        1024 :                 CheckXLogRemoved(segno, tli);
     641             :                 /* Send the chunk as a CopyData message */
     642        1024 :                 if (pq_putmessage('d', buf, cnt))
     643           0 :                     ereport(ERROR,
     644             :                             (errmsg("base backup could not send data, aborting backup")));
     645        1024 :                 update_basebackup_progress(cnt);
     646             : 
     647        1024 :                 len += cnt;
     648        1024 :                 throttle(cnt);
     649             : 
     650        1024 :                 if (len == wal_segment_size)
     651           2 :                     break;
     652             :             }
     653             : 
     654           2 :             if (len != wal_segment_size)
     655             :             {
     656           0 :                 CheckXLogRemoved(segno, tli);
     657           0 :                 ereport(ERROR,
     658             :                         (errcode_for_file_access(),
     659             :                          errmsg("unexpected WAL file size \"%s\"", walFileName)));
     660             :             }
     661             : 
     662             :             /*
     663             :              * wal_segment_size is a multiple of TAR_BLOCK_SIZE, so no need
     664             :              * for padding.
     665             :              */
     666             :             Assert(wal_segment_size % TAR_BLOCK_SIZE == 0);
     667             : 
     668           2 :             CloseTransientFile(fd);
     669             : 
     670             :             /*
     671             :              * Mark file as archived, otherwise files can get archived again
     672             :              * after promotion of a new node. This is in line with
     673             :              * walreceiver.c always doing an XLogArchiveForceDone() after a
     674             :              * complete segment.
     675             :              */
     676           2 :             StatusFilePath(pathbuf, walFileName, ".done");
     677           2 :             sendFileWithContent(pathbuf, "", &manifest);
     678             :         }
     679             : 
     680             :         /*
     681             :          * Send timeline history files too. Only the latest timeline history
     682             :          * file is required for recovery, and even that only if there happens
     683             :          * to be a timeline switch in the first WAL segment that contains the
     684             :          * checkpoint record, or if we're taking a base backup from a standby
     685             :          * server and the target timeline changes while the backup is taken.
     686             :          * But they are small and highly useful for debugging purposes, so
     687             :          * better include them all, always.
     688             :          */
     689           2 :         foreach(lc, historyFileList)
     690             :         {
     691           0 :             char       *fname = lfirst(lc);
     692             : 
     693           0 :             snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", fname);
     694             : 
     695           0 :             if (lstat(pathbuf, &statbuf) != 0)
     696           0 :                 ereport(ERROR,
     697             :                         (errcode_for_file_access(),
     698             :                          errmsg("could not stat file \"%s\": %m", pathbuf)));
     699             : 
     700           0 :             sendFile(pathbuf, pathbuf, &statbuf, false, InvalidOid,
     701             :                      &manifest, NULL);
     702             : 
     703             :             /* unconditionally mark file as archived */
     704           0 :             StatusFilePath(pathbuf, fname, ".done");
     705           0 :             sendFileWithContent(pathbuf, "", &manifest);
     706             :         }
     707             : 
     708             :         /* Send CopyDone message for the last tar file */
     709           2 :         pq_putemptymessage('c');
     710             :     }
     711             : 
     712         144 :     AddWALInfoToBackupManifest(&manifest, startptr, starttli, endptr, endtli);
     713             : 
     714         144 :     SendBackupManifest(&manifest);
     715             : 
     716         144 :     SendXlogRecPtrResult(endptr, endtli);
     717             : 
     718         144 :     if (total_checksum_failures)
     719             :     {
     720           6 :         if (total_checksum_failures > 1)
     721           4 :             ereport(WARNING,
     722             :                     (errmsg_plural("%lld total checksum verification failure",
     723             :                                    "%lld total checksum verification failures",
     724             :                                    total_checksum_failures,
     725             :                                    total_checksum_failures)));
     726             : 
     727           6 :         ereport(ERROR,
     728             :                 (errcode(ERRCODE_DATA_CORRUPTED),
     729             :                  errmsg("checksum verification failure during base backup")));
     730             :     }
     731             : 
     732             :     /* clean up the resource owner we created */
     733         138 :     WalSndResourceCleanup(true);
     734             : 
     735         138 :     pgstat_progress_end_command();
     736         138 : }
     737             : 
     738             : /*
     739             :  * list_sort comparison function, to compare log/seg portion of WAL segment
     740             :  * filenames, ignoring the timeline portion.
     741             :  */
     742             : static int
     743           0 : compareWalFileNames(const ListCell *a, const ListCell *b)
     744             : {
     745           0 :     char       *fna = (char *) lfirst(a);
     746           0 :     char       *fnb = (char *) lfirst(b);
     747             : 
     748           0 :     return strcmp(fna + 8, fnb + 8);
     749             : }
     750             : 
     751             : /*
     752             :  * Parse the base backup options passed down by the parser
     753             :  */
     754             : static void
     755         152 : parse_basebackup_options(List *options, basebackup_options *opt)
     756             : {
     757             :     ListCell   *lopt;
     758         152 :     bool        o_label = false;
     759         152 :     bool        o_progress = false;
     760         152 :     bool        o_fast = false;
     761         152 :     bool        o_nowait = false;
     762         152 :     bool        o_wal = false;
     763         152 :     bool        o_maxrate = false;
     764         152 :     bool        o_tablespace_map = false;
     765         152 :     bool        o_noverify_checksums = false;
     766         152 :     bool        o_manifest = false;
     767         152 :     bool        o_manifest_checksums = false;
     768             : 
     769         760 :     MemSet(opt, 0, sizeof(*opt));
     770         152 :     opt->manifest = MANIFEST_OPTION_NO;
     771         152 :     opt->manifest_checksum_type = CHECKSUM_TYPE_CRC32C;
     772             : 
     773         844 :     foreach(lopt, options)
     774             :     {
     775         694 :         DefElem    *defel = (DefElem *) lfirst(lopt);
     776             : 
     777         694 :         if (strcmp(defel->defname, "label") == 0)
     778             :         {
     779         152 :             if (o_label)
     780           0 :                 ereport(ERROR,
     781             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     782             :                          errmsg("duplicate option \"%s\"", defel->defname)));
     783         152 :             opt->label = strVal(defel->arg);
     784         152 :             o_label = true;
     785             :         }
     786         542 :         else if (strcmp(defel->defname, "progress") == 0)
     787             :         {
     788         152 :             if (o_progress)
     789           0 :                 ereport(ERROR,
     790             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     791             :                          errmsg("duplicate option \"%s\"", defel->defname)));
     792         152 :             opt->progress = true;
     793         152 :             o_progress = true;
     794             :         }
     795         390 :         else if (strcmp(defel->defname, "fast") == 0)
     796             :         {
     797          62 :             if (o_fast)
     798           0 :                 ereport(ERROR,
     799             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     800             :                          errmsg("duplicate option \"%s\"", defel->defname)));
     801          62 :             opt->fastcheckpoint = true;
     802          62 :             o_fast = true;
     803             :         }
     804         328 :         else if (strcmp(defel->defname, "nowait") == 0)
     805             :         {
     806         150 :             if (o_nowait)
     807           0 :                 ereport(ERROR,
     808             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     809             :                          errmsg("duplicate option \"%s\"", defel->defname)));
     810         150 :             opt->nowait = true;
     811         150 :             o_nowait = true;
     812             :         }
     813         178 :         else if (strcmp(defel->defname, "wal") == 0)
     814             :         {
     815           2 :             if (o_wal)
     816           0 :                 ereport(ERROR,
     817             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     818             :                          errmsg("duplicate option \"%s\"", defel->defname)));
     819           2 :             opt->includewal = true;
     820           2 :             o_wal = true;
     821             :         }
     822         176 :         else if (strcmp(defel->defname, "max_rate") == 0)
     823             :         {
     824             :             long        maxrate;
     825             : 
     826           0 :             if (o_maxrate)
     827           0 :                 ereport(ERROR,
     828             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     829             :                          errmsg("duplicate option \"%s\"", defel->defname)));
     830             : 
     831           0 :             maxrate = intVal(defel->arg);
     832           0 :             if (maxrate < MAX_RATE_LOWER || maxrate > MAX_RATE_UPPER)
     833           0 :                 ereport(ERROR,
     834             :                         (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
     835             :                          errmsg("%d is outside the valid range for parameter \"%s\" (%d .. %d)",
     836             :                                 (int) maxrate, "MAX_RATE", MAX_RATE_LOWER, MAX_RATE_UPPER)));
     837             : 
     838           0 :             opt->maxrate = (uint32) maxrate;
     839           0 :             o_maxrate = true;
     840             :         }
     841         176 :         else if (strcmp(defel->defname, "tablespace_map") == 0)
     842             :         {
     843          10 :             if (o_tablespace_map)
     844           0 :                 ereport(ERROR,
     845             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     846             :                          errmsg("duplicate option \"%s\"", defel->defname)));
     847          10 :             opt->sendtblspcmapfile = true;
     848          10 :             o_tablespace_map = true;
     849             :         }
     850         166 :         else if (strcmp(defel->defname, "noverify_checksums") == 0)
     851             :         {
     852           2 :             if (o_noverify_checksums)
     853           0 :                 ereport(ERROR,
     854             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     855             :                          errmsg("duplicate option \"%s\"", defel->defname)));
     856           2 :             noverify_checksums = true;
     857           2 :             o_noverify_checksums = true;
     858             :         }
     859         164 :         else if (strcmp(defel->defname, "manifest") == 0)
     860             :         {
     861         150 :             char       *optval = strVal(defel->arg);
     862             :             bool        manifest_bool;
     863             : 
     864         150 :             if (o_manifest)
     865           0 :                 ereport(ERROR,
     866             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     867             :                          errmsg("duplicate option \"%s\"", defel->defname)));
     868         150 :             if (parse_bool(optval, &manifest_bool))
     869             :             {
     870         148 :                 if (manifest_bool)
     871         148 :                     opt->manifest = MANIFEST_OPTION_YES;
     872             :                 else
     873           0 :                     opt->manifest = MANIFEST_OPTION_NO;
     874             :             }
     875           2 :             else if (pg_strcasecmp(optval, "force-encode") == 0)
     876           2 :                 opt->manifest = MANIFEST_OPTION_FORCE_ENCODE;
     877             :             else
     878           0 :                 ereport(ERROR,
     879             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     880             :                          errmsg("unrecognized manifest option: \"%s\"",
     881             :                                 optval)));
     882         150 :             o_manifest = true;
     883             :         }
     884          14 :         else if (strcmp(defel->defname, "manifest_checksums") == 0)
     885             :         {
     886          14 :             char       *optval = strVal(defel->arg);
     887             : 
     888          14 :             if (o_manifest_checksums)
     889           0 :                 ereport(ERROR,
     890             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     891             :                          errmsg("duplicate option \"%s\"", defel->defname)));
     892          14 :             if (!pg_checksum_parse_type(optval,
     893             :                                         &opt->manifest_checksum_type))
     894           2 :                 ereport(ERROR,
     895             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     896             :                          errmsg("unrecognized checksum algorithm: \"%s\"",
     897             :                                 optval)));
     898          12 :             o_manifest_checksums = true;
     899             :         }
     900             :         else
     901           0 :             elog(ERROR, "option \"%s\" not recognized",
     902             :                  defel->defname);
     903             :     }
     904         150 :     if (opt->label == NULL)
     905           0 :         opt->label = "base backup";
     906         150 :     if (opt->manifest == MANIFEST_OPTION_NO)
     907             :     {
     908           2 :         if (o_manifest_checksums)
     909           0 :             ereport(ERROR,
     910             :                     (errcode(ERRCODE_SYNTAX_ERROR),
     911             :                      errmsg("manifest checksums require a backup manifest")));
     912           2 :         opt->manifest_checksum_type = CHECKSUM_TYPE_NONE;
     913             :     }
     914         150 : }
     915             : 
     916             : 
     917             : /*
     918             :  * SendBaseBackup() - send a complete base backup.
     919             :  *
     920             :  * The function will put the system into backup mode like pg_start_backup()
     921             :  * does, so that the backup is consistent even though we read directly from
     922             :  * the filesystem, bypassing the buffer cache.
     923             :  */
     924             : void
     925         152 : SendBaseBackup(BaseBackupCmd *cmd)
     926             : {
     927             :     basebackup_options opt;
     928             : 
     929         152 :     parse_basebackup_options(cmd->options, &opt);
     930             : 
     931         150 :     WalSndSetState(WALSNDSTATE_BACKUP);
     932             : 
     933         150 :     if (update_process_title)
     934             :     {
     935             :         char        activitymsg[50];
     936             : 
     937         150 :         snprintf(activitymsg, sizeof(activitymsg), "sending backup \"%s\"",
     938             :                  opt.label);
     939         150 :         set_ps_display(activitymsg);
     940             :     }
     941             : 
     942         150 :     perform_base_backup(&opt);
     943         138 : }
     944             : 
     945             : static void
     946         182 : send_int8_string(StringInfoData *buf, int64 intval)
     947             : {
     948             :     char        is[32];
     949             : 
     950         182 :     sprintf(is, INT64_FORMAT, intval);
     951         182 :     pq_sendint32(buf, strlen(is));
     952         182 :     pq_sendbytes(buf, is, strlen(is));
     953         182 : }
     954             : 
     955             : static void
     956         150 : SendBackupHeader(List *tablespaces)
     957             : {
     958             :     StringInfoData buf;
     959             :     ListCell   *lc;
     960             : 
     961             :     /* Construct and send the directory information */
     962         150 :     pq_beginmessage(&buf, 'T'); /* RowDescription */
     963         150 :     pq_sendint16(&buf, 3);      /* 3 fields */
     964             : 
     965             :     /* First field - spcoid */
     966         150 :     pq_sendstring(&buf, "spcoid");
     967         150 :     pq_sendint32(&buf, 0);      /* table oid */
     968         150 :     pq_sendint16(&buf, 0);      /* attnum */
     969         150 :     pq_sendint32(&buf, OIDOID); /* type oid */
     970         150 :     pq_sendint16(&buf, 4);      /* typlen */
     971         150 :     pq_sendint32(&buf, 0);      /* typmod */
     972         150 :     pq_sendint16(&buf, 0);      /* format code */
     973             : 
     974             :     /* Second field - spclocation */
     975         150 :     pq_sendstring(&buf, "spclocation");
     976         150 :     pq_sendint32(&buf, 0);
     977         150 :     pq_sendint16(&buf, 0);
     978         150 :     pq_sendint32(&buf, TEXTOID);
     979         150 :     pq_sendint16(&buf, -1);
     980         150 :     pq_sendint32(&buf, 0);
     981         150 :     pq_sendint16(&buf, 0);
     982             : 
     983             :     /* Third field - size */
     984         150 :     pq_sendstring(&buf, "size");
     985         150 :     pq_sendint32(&buf, 0);
     986         150 :     pq_sendint16(&buf, 0);
     987         150 :     pq_sendint32(&buf, INT8OID);
     988         150 :     pq_sendint16(&buf, 8);
     989         150 :     pq_sendint32(&buf, 0);
     990         150 :     pq_sendint16(&buf, 0);
     991         150 :     pq_endmessage(&buf);
     992             : 
     993         332 :     foreach(lc, tablespaces)
     994             :     {
     995         182 :         tablespaceinfo *ti = lfirst(lc);
     996             : 
     997             :         /* Send one datarow message */
     998         182 :         pq_beginmessage(&buf, 'D');
     999         182 :         pq_sendint16(&buf, 3);  /* number of columns */
    1000         182 :         if (ti->path == NULL)
    1001             :         {
    1002         150 :             pq_sendint32(&buf, -1); /* Length = -1 ==> NULL */
    1003         150 :             pq_sendint32(&buf, -1);
    1004             :         }
    1005             :         else
    1006             :         {
    1007             :             Size        len;
    1008             : 
    1009          32 :             len = strlen(ti->oid);
    1010          32 :             pq_sendint32(&buf, len);
    1011          32 :             pq_sendbytes(&buf, ti->oid, len);
    1012             : 
    1013          32 :             len = strlen(ti->path);
    1014          32 :             pq_sendint32(&buf, len);
    1015          32 :             pq_sendbytes(&buf, ti->path, len);
    1016             :         }
    1017         182 :         if (ti->size >= 0)
    1018         182 :             send_int8_string(&buf, ti->size / 1024);
    1019             :         else
    1020           0 :             pq_sendint32(&buf, -1); /* NULL */
    1021             : 
    1022         182 :         pq_endmessage(&buf);
    1023             :     }
    1024             : 
    1025             :     /* Send a CommandComplete message */
    1026         150 :     pq_puttextmessage('C', "SELECT");
    1027         150 : }
    1028             : 
    1029             : /*
    1030             :  * Send a single resultset containing just a single
    1031             :  * XLogRecPtr record (in text format)
    1032             :  */
    1033             : static void
    1034         294 : SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli)
    1035             : {
    1036             :     StringInfoData buf;
    1037             :     char        str[MAXFNAMELEN];
    1038             :     Size        len;
    1039             : 
    1040         294 :     pq_beginmessage(&buf, 'T'); /* RowDescription */
    1041         294 :     pq_sendint16(&buf, 2);      /* 2 fields */
    1042             : 
    1043             :     /* Field headers */
    1044         294 :     pq_sendstring(&buf, "recptr");
    1045         294 :     pq_sendint32(&buf, 0);      /* table oid */
    1046         294 :     pq_sendint16(&buf, 0);      /* attnum */
    1047         294 :     pq_sendint32(&buf, TEXTOID);    /* type oid */
    1048         294 :     pq_sendint16(&buf, -1);
    1049         294 :     pq_sendint32(&buf, 0);
    1050         294 :     pq_sendint16(&buf, 0);
    1051             : 
    1052         294 :     pq_sendstring(&buf, "tli");
    1053         294 :     pq_sendint32(&buf, 0);      /* table oid */
    1054         294 :     pq_sendint16(&buf, 0);      /* attnum */
    1055             : 
    1056             :     /*
    1057             :      * int8 may seem like a surprising data type for this, but in theory int4
    1058             :      * would not be wide enough for this, as TimeLineID is unsigned.
    1059             :      */
    1060         294 :     pq_sendint32(&buf, INT8OID);    /* type oid */
    1061         294 :     pq_sendint16(&buf, -1);
    1062         294 :     pq_sendint32(&buf, 0);
    1063         294 :     pq_sendint16(&buf, 0);
    1064         294 :     pq_endmessage(&buf);
    1065             : 
    1066             :     /* Data row */
    1067         294 :     pq_beginmessage(&buf, 'D');
    1068         294 :     pq_sendint16(&buf, 2);      /* number of columns */
    1069             : 
    1070         882 :     len = snprintf(str, sizeof(str),
    1071         294 :                    "%X/%X", (uint32) (ptr >> 32), (uint32) ptr);
    1072         294 :     pq_sendint32(&buf, len);
    1073         294 :     pq_sendbytes(&buf, str, len);
    1074             : 
    1075         294 :     len = snprintf(str, sizeof(str), "%u", tli);
    1076         294 :     pq_sendint32(&buf, len);
    1077         294 :     pq_sendbytes(&buf, str, len);
    1078             : 
    1079         294 :     pq_endmessage(&buf);
    1080             : 
    1081             :     /* Send a CommandComplete message */
    1082         294 :     pq_puttextmessage('C', "SELECT");
    1083         294 : }
    1084             : 
    1085             : /*
    1086             :  * Inject a file with given name and content in the output tar stream.
    1087             :  */
    1088             : static void
    1089         162 : sendFileWithContent(const char *filename, const char *content,
    1090             :                     backup_manifest_info *manifest)
    1091             : {
    1092             :     struct stat statbuf;
    1093             :     int         pad,
    1094             :                 len;
    1095             :     pg_checksum_context checksum_ctx;
    1096             : 
    1097         162 :     pg_checksum_init(&checksum_ctx, manifest->checksum_type);
    1098             : 
    1099         162 :     len = strlen(content);
    1100             : 
    1101             :     /*
    1102             :      * Construct a stat struct for the backup_label file we're injecting in
    1103             :      * the tar.
    1104             :      */
    1105             :     /* Windows doesn't have the concept of uid and gid */
    1106             : #ifdef WIN32
    1107             :     statbuf.st_uid = 0;
    1108             :     statbuf.st_gid = 0;
    1109             : #else
    1110         162 :     statbuf.st_uid = geteuid();
    1111         162 :     statbuf.st_gid = getegid();
    1112             : #endif
    1113         162 :     statbuf.st_mtime = time(NULL);
    1114         162 :     statbuf.st_mode = pg_file_create_mode;
    1115         162 :     statbuf.st_size = len;
    1116             : 
    1117         162 :     _tarWriteHeader(filename, NULL, &statbuf, false);
    1118             :     /* Send the contents as a CopyData message */
    1119         162 :     pq_putmessage('d', content, len);
    1120         162 :     update_basebackup_progress(len);
    1121             : 
    1122             :     /* Pad to a multiple of the tar block size. */
    1123         162 :     pad = tarPaddingBytesRequired(len);
    1124         162 :     if (pad > 0)
    1125             :     {
    1126             :         char        buf[TAR_BLOCK_SIZE];
    1127             : 
    1128         154 :         MemSet(buf, 0, pad);
    1129         154 :         pq_putmessage('d', buf, pad);
    1130         154 :         update_basebackup_progress(pad);
    1131             :     }
    1132             : 
    1133         162 :     pg_checksum_update(&checksum_ctx, (uint8 *) content, len);
    1134         162 :     AddFileToBackupManifest(manifest, NULL, filename, len,
    1135         162 :                             (pg_time_t) statbuf.st_mtime, &checksum_ctx);
    1136         162 : }
    1137             : 
    1138             : /*
    1139             :  * Include the tablespace directory pointed to by 'path' in the output tar
    1140             :  * stream.  If 'sizeonly' is true, we just calculate a total length and return
    1141             :  * it, without actually sending anything.
    1142             :  *
    1143             :  * Only used to send auxiliary tablespaces, not PGDATA.
    1144             :  */
    1145             : static int64
    1146          64 : sendTablespace(char *path, char *spcoid, bool sizeonly,
    1147             :                backup_manifest_info *manifest)
    1148             : {
    1149             :     int64       size;
    1150             :     char        pathbuf[MAXPGPATH];
    1151             :     struct stat statbuf;
    1152             : 
    1153             :     /*
    1154             :      * 'path' points to the tablespace location, but we only want to include
    1155             :      * the version directory in it that belongs to us.
    1156             :      */
    1157          64 :     snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path,
    1158             :              TABLESPACE_VERSION_DIRECTORY);
    1159             : 
    1160             :     /*
    1161             :      * Store a directory entry in the tar file so we get the permissions
    1162             :      * right.
    1163             :      */
    1164          64 :     if (lstat(pathbuf, &statbuf) != 0)
    1165             :     {
    1166           0 :         if (errno != ENOENT)
    1167           0 :             ereport(ERROR,
    1168             :                     (errcode_for_file_access(),
    1169             :                      errmsg("could not stat file or directory \"%s\": %m",
    1170             :                             pathbuf)));
    1171             : 
    1172             :         /* If the tablespace went away while scanning, it's no error. */
    1173           0 :         return 0;
    1174             :     }
    1175             : 
    1176          64 :     size = _tarWriteHeader(TABLESPACE_VERSION_DIRECTORY, NULL, &statbuf,
    1177             :                            sizeonly);
    1178             : 
    1179             :     /* Send all the files in the tablespace version directory */
    1180          64 :     size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true, manifest,
    1181             :                     spcoid);
    1182             : 
    1183          64 :     return size;
    1184             : }
    1185             : 
    1186             : /*
    1187             :  * Include all files from the given directory in the output tar stream. If
    1188             :  * 'sizeonly' is true, we just calculate a total length and return it, without
    1189             :  * actually sending anything.
    1190             :  *
    1191             :  * Omit any directory in the tablespaces list, to avoid backing up
    1192             :  * tablespaces twice when they were created inside PGDATA.
    1193             :  *
    1194             :  * If sendtblspclinks is true, we need to include symlink
    1195             :  * information in the tar file. If not, we can skip that
    1196             :  * as it will be sent separately in the tablespace_map file.
    1197             :  */
    1198             : static int64
    1199        5188 : sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
    1200             :         bool sendtblspclinks, backup_manifest_info *manifest,
    1201             :         const char *spcoid)
    1202             : {
    1203             :     DIR        *dir;
    1204             :     struct dirent *de;
    1205             :     char        pathbuf[MAXPGPATH * 2];
    1206             :     struct stat statbuf;
    1207        5188 :     int64       size = 0;
    1208             :     const char *lastDir;        /* Split last dir from parent path. */
    1209        5188 :     bool        isDbDir = false;    /* Does this directory contain relations? */
    1210             : 
    1211             :     /*
    1212             :      * Determine if the current path is a database directory that can contain
    1213             :      * relations.
    1214             :      *
    1215             :      * Start by finding the location of the delimiter between the parent path
    1216             :      * and the current path.
    1217             :      */
    1218        5188 :     lastDir = last_dir_separator(path);
    1219             : 
    1220             :     /* Does this path look like a database path (i.e. all digits)? */
    1221        5188 :     if (lastDir != NULL &&
    1222        4888 :         strspn(lastDir + 1, "0123456789") == strlen(lastDir + 1))
    1223             :     {
    1224             :         /* Part of path that contains the parent directory. */
    1225         946 :         int         parentPathLen = lastDir - path;
    1226             : 
    1227             :         /*
    1228             :          * Mark path as a database directory if the parent path is either
    1229             :          * $PGDATA/base or a tablespace version path.
    1230             :          */
    1231         946 :         if (strncmp(path, "./base", parentPathLen) == 0 ||
    1232          56 :             (parentPathLen >= (sizeof(TABLESPACE_VERSION_DIRECTORY) - 1) &&
    1233          56 :              strncmp(lastDir - (sizeof(TABLESPACE_VERSION_DIRECTORY) - 1),
    1234             :                      TABLESPACE_VERSION_DIRECTORY,
    1235             :                      sizeof(TABLESPACE_VERSION_DIRECTORY) - 1) == 0))
    1236         946 :             isDbDir = true;
    1237             :     }
    1238             : 
    1239        5188 :     dir = AllocateDir(path);
    1240      301866 :     while ((de = ReadDir(dir, path)) != NULL)
    1241             :     {
    1242             :         int         excludeIdx;
    1243             :         bool        excludeFound;
    1244             :         ForkNumber  relForkNum; /* Type of fork if file is a relation */
    1245             :         int         relOidChars;    /* Chars in filename that are the rel oid */
    1246             : 
    1247             :         /* Skip special stuff */
    1248      296690 :         if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
    1249       14752 :             continue;
    1250             : 
    1251             :         /* Skip temporary files */
    1252      286330 :         if (strncmp(de->d_name,
    1253             :                     PG_TEMP_FILE_PREFIX,
    1254             :                     strlen(PG_TEMP_FILE_PREFIX)) == 0)
    1255         294 :             continue;
    1256             : 
    1257             :         /*
    1258             :          * Check if the postmaster has signaled us to exit, and abort with an
    1259             :          * error in that case. The error handler further up will call
    1260             :          * do_pg_abort_backup() for us. Also check that if the backup was
    1261             :          * started while still in recovery, the server wasn't promoted.
    1262             :          * do_pg_stop_backup() will check that too, but it's better to stop
    1263             :          * the backup early than continue to the end and fail there.
    1264             :          */
    1265      286036 :         CHECK_FOR_INTERRUPTS();
    1266      286036 :         if (RecoveryInProgress() != backup_started_in_recovery)
    1267           0 :             ereport(ERROR,
    1268             :                     (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
    1269             :                      errmsg("the standby was promoted during online backup"),
    1270             :                      errhint("This means that the backup being taken is corrupt "
    1271             :                              "and should not be used. "
    1272             :                              "Try taking another online backup.")));
    1273             : 
    1274             :         /* Scan for files that should be excluded */
    1275      286036 :         excludeFound = false;
    1276     2569148 :         for (excludeIdx = 0; excludeFiles[excludeIdx].name != NULL; excludeIdx++)
    1277             :         {
    1278     2284384 :             int         cmplen = strlen(excludeFiles[excludeIdx].name);
    1279             : 
    1280     2284384 :             if (!excludeFiles[excludeIdx].match_prefix)
    1281     1998528 :                 cmplen++;
    1282     2284384 :             if (strncmp(de->d_name, excludeFiles[excludeIdx].name, cmplen) == 0)
    1283             :             {
    1284        1272 :                 elog(DEBUG1, "file \"%s\" excluded from backup", de->d_name);
    1285        1272 :                 excludeFound = true;
    1286        1272 :                 break;
    1287             :             }
    1288             :         }
    1289             : 
    1290      286036 :         if (excludeFound)
    1291        1272 :             continue;
    1292             : 
    1293             :         /* Exclude all forks for unlogged tables except the init fork */
    1294      542594 :         if (isDbDir &&
    1295      257830 :             parse_filename_for_nontemp_relation(de->d_name, &relOidChars,
    1296             :                                                 &relForkNum))
    1297             :         {
    1298             :             /* Never exclude init forks */
    1299      255978 :             if (relForkNum != INIT_FORKNUM)
    1300             :             {
    1301             :                 char        initForkFile[MAXPGPATH];
    1302             :                 char        relOid[OIDCHARS + 1];
    1303             : 
    1304             :                 /*
    1305             :                  * If any other type of fork, check if there is an init fork
    1306             :                  * with the same OID. If so, the file can be excluded.
    1307             :                  */
    1308      255880 :                 memcpy(relOid, de->d_name, relOidChars);
    1309      255880 :                 relOid[relOidChars] = '\0';
    1310      255880 :                 snprintf(initForkFile, sizeof(initForkFile), "%s/%s_init",
    1311             :                          path, relOid);
    1312             : 
    1313      255880 :                 if (lstat(initForkFile, &statbuf) == 0)
    1314             :                 {
    1315          98 :                     elog(DEBUG2,
    1316             :                          "unlogged relation file \"%s\" excluded from backup",
    1317             :                          de->d_name);
    1318             : 
    1319          98 :                     continue;
    1320             :                 }
    1321             :             }
    1322             :         }
    1323             : 
    1324             :         /* Exclude temporary relations */
    1325      284666 :         if (isDbDir && looks_like_temp_rel_name(de->d_name))
    1326             :         {
    1327          72 :             elog(DEBUG2,
    1328             :                  "temporary relation file \"%s\" excluded from backup",
    1329             :                  de->d_name);
    1330             : 
    1331          72 :             continue;
    1332             :         }
    1333             : 
    1334      284594 :         snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path, de->d_name);
    1335             : 
    1336             :         /* Skip pg_control here to back up it last */
    1337      284594 :         if (strcmp(pathbuf, "./global/pg_control") == 0)
    1338         294 :             continue;
    1339             : 
    1340      284300 :         if (lstat(pathbuf, &statbuf) != 0)
    1341             :         {
    1342           0 :             if (errno != ENOENT)
    1343           0 :                 ereport(ERROR,
    1344             :                         (errcode_for_file_access(),
    1345             :                          errmsg("could not stat file or directory \"%s\": %m",
    1346             :                                 pathbuf)));
    1347             : 
    1348             :             /* If the file went away while scanning, it's not an error. */
    1349           0 :             continue;
    1350             :         }
    1351             : 
    1352             :         /* Scan for directories whose contents should be excluded */
    1353      284300 :         excludeFound = false;
    1354     2266112 :         for (excludeIdx = 0; excludeDirContents[excludeIdx] != NULL; excludeIdx++)
    1355             :         {
    1356     1983880 :             if (strcmp(de->d_name, excludeDirContents[excludeIdx]) == 0)
    1357             :             {
    1358        2068 :                 elog(DEBUG1, "contents of directory \"%s\" excluded from backup", de->d_name);
    1359        2068 :                 size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly);
    1360        2068 :                 excludeFound = true;
    1361        2068 :                 break;
    1362             :             }
    1363             :         }
    1364             : 
    1365      284300 :         if (excludeFound)
    1366        2068 :             continue;
    1367             : 
    1368             :         /*
    1369             :          * Exclude contents of directory specified by statrelpath if not set
    1370             :          * to the default (pg_stat_tmp) which is caught in the loop above.
    1371             :          */
    1372      282232 :         if (statrelpath != NULL && strcmp(pathbuf, statrelpath) == 0)
    1373             :         {
    1374           0 :             elog(DEBUG1, "contents of directory \"%s\" excluded from backup", statrelpath);
    1375           0 :             size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly);
    1376           0 :             continue;
    1377             :         }
    1378             : 
    1379             :         /*
    1380             :          * We can skip pg_wal, the WAL segments need to be fetched from the
    1381             :          * WAL archive anyway. But include it as an empty directory anyway, so
    1382             :          * we get permissions right.
    1383             :          */
    1384      282232 :         if (strcmp(pathbuf, "./pg_wal") == 0)
    1385             :         {
    1386             :             /* If pg_wal is a symlink, write it as a directory anyway */
    1387         294 :             size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly);
    1388             : 
    1389             :             /*
    1390             :              * Also send archive_status directory (by hackishly reusing
    1391             :              * statbuf from above ...).
    1392             :              */
    1393         294 :             size += _tarWriteHeader("./pg_wal/archive_status", NULL, &statbuf,
    1394             :                                     sizeonly);
    1395             : 
    1396         294 :             continue;           /* don't recurse into pg_wal */
    1397             :         }
    1398             : 
    1399             :         /* Allow symbolic links in pg_tblspc only */
    1400      281938 :         if (strcmp(path, "./pg_tblspc") == 0 &&
    1401             : #ifndef WIN32
    1402          58 :             S_ISLNK(statbuf.st_mode)
    1403             : #else
    1404             :             pgwin32_is_junction(pathbuf)
    1405             : #endif
    1406             :             )
    1407          58 :         {
    1408             : #if defined(HAVE_READLINK) || defined(WIN32)
    1409             :             char        linkpath[MAXPGPATH];
    1410             :             int         rllen;
    1411             : 
    1412          58 :             rllen = readlink(pathbuf, linkpath, sizeof(linkpath));
    1413          58 :             if (rllen < 0)
    1414           0 :                 ereport(ERROR,
    1415             :                         (errcode_for_file_access(),
    1416             :                          errmsg("could not read symbolic link \"%s\": %m",
    1417             :                                 pathbuf)));
    1418          58 :             if (rllen >= sizeof(linkpath))
    1419           0 :                 ereport(ERROR,
    1420             :                         (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
    1421             :                          errmsg("symbolic link \"%s\" target is too long",
    1422             :                                 pathbuf)));
    1423          58 :             linkpath[rllen] = '\0';
    1424             : 
    1425          58 :             size += _tarWriteHeader(pathbuf + basepathlen + 1, linkpath,
    1426             :                                     &statbuf, sizeonly);
    1427             : #else
    1428             : 
    1429             :             /*
    1430             :              * If the platform does not have symbolic links, it should not be
    1431             :              * possible to have tablespaces - clearly somebody else created
    1432             :              * them. Warn about it and ignore.
    1433             :              */
    1434             :             ereport(WARNING,
    1435             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1436             :                      errmsg("tablespaces are not supported on this platform")));
    1437             :             continue;
    1438             : #endif                          /* HAVE_READLINK */
    1439             :         }
    1440      281880 :         else if (S_ISDIR(statbuf.st_mode))
    1441             :         {
    1442        4832 :             bool        skip_this_dir = false;
    1443             :             ListCell   *lc;
    1444             : 
    1445             :             /*
    1446             :              * Store a directory entry in the tar file so we can get the
    1447             :              * permissions right.
    1448             :              */
    1449        4832 :             size += _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf,
    1450             :                                     sizeonly);
    1451             : 
    1452             :             /*
    1453             :              * Call ourselves recursively for a directory, unless it happens
    1454             :              * to be a separate tablespace located within PGDATA.
    1455             :              */
    1456       10610 :             foreach(lc, tablespaces)
    1457             :             {
    1458        5778 :                 tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc);
    1459             : 
    1460             :                 /*
    1461             :                  * ti->rpath is the tablespace relative path within PGDATA, or
    1462             :                  * NULL if the tablespace has been properly located somewhere
    1463             :                  * else.
    1464             :                  *
    1465             :                  * Skip past the leading "./" in pathbuf when comparing.
    1466             :                  */
    1467        5778 :                 if (ti->rpath && strcmp(ti->rpath, pathbuf + 2) == 0)
    1468             :                 {
    1469           0 :                     skip_this_dir = true;
    1470           0 :                     break;
    1471             :                 }
    1472             :             }
    1473             : 
    1474             :             /*
    1475             :              * skip sending directories inside pg_tblspc, if not required.
    1476             :              */
    1477        4832 :             if (strcmp(pathbuf, "./pg_tblspc") == 0 && !sendtblspclinks)
    1478           8 :                 skip_this_dir = true;
    1479             : 
    1480        4832 :             if (!skip_this_dir)
    1481        4824 :                 size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces,
    1482             :                                 sendtblspclinks, manifest, spcoid);
    1483             :         }
    1484      277048 :         else if (S_ISREG(statbuf.st_mode))
    1485             :         {
    1486      277048 :             bool        sent = false;
    1487             : 
    1488      277048 :             if (!sizeonly)
    1489      261980 :                 sent = sendFile(pathbuf, pathbuf + basepathlen + 1, &statbuf,
    1490      126226 :                                 true, isDbDir ? atooid(lastDir + 1) : InvalidOid,
    1491             :                                 manifest, spcoid);
    1492             : 
    1493      277042 :             if (sent || sizeonly)
    1494             :             {
    1495             :                 /* Add size. */
    1496      277042 :                 size += statbuf.st_size;
    1497             : 
    1498             :                 /* Pad to a multiple of the tar block size. */
    1499      277042 :                 size += tarPaddingBytesRequired(statbuf.st_size);
    1500             : 
    1501             :                 /* Size of the header for the file. */
    1502      277042 :                 size += TAR_BLOCK_SIZE;
    1503             :             }
    1504             :         }
    1505             :         else
    1506           0 :             ereport(WARNING,
    1507             :                     (errmsg("skipping special file \"%s\"", pathbuf)));
    1508             :     }
    1509        5176 :     FreeDir(dir);
    1510        5176 :     return size;
    1511             : }
    1512             : 
    1513             : /*
    1514             :  * Check if a file should have its checksum validated.
    1515             :  * We validate checksums on files in regular tablespaces
    1516             :  * (including global and default) only, and in those there
    1517             :  * are some files that are explicitly excluded.
    1518             :  */
    1519             : static bool
    1520       37516 : is_checksummed_file(const char *fullpath, const char *filename)
    1521             : {
    1522             :     /* Check that the file is in a tablespace */
    1523       37516 :     if (strncmp(fullpath, "./global/", 9) == 0 ||
    1524       35244 :         strncmp(fullpath, "./base/", 7) == 0 ||
    1525         512 :         strncmp(fullpath, "/", 1) == 0)
    1526             :     {
    1527             :         int         excludeIdx;
    1528             : 
    1529             :         /* Compare file against noChecksumFiles skip list */
    1530      184470 :         for (excludeIdx = 0; noChecksumFiles[excludeIdx].name != NULL; excludeIdx++)
    1531             :         {
    1532      147736 :             int         cmplen = strlen(noChecksumFiles[excludeIdx].name);
    1533             : 
    1534      147736 :             if (!noChecksumFiles[excludeIdx].match_prefix)
    1535      110882 :                 cmplen++;
    1536      147736 :             if (strncmp(filename, noChecksumFiles[excludeIdx].name,
    1537             :                         cmplen) == 0)
    1538         280 :                 return false;
    1539             :         }
    1540             : 
    1541       36734 :         return true;
    1542             :     }
    1543             :     else
    1544         502 :         return false;
    1545             : }
    1546             : 
    1547             : /*****
    1548             :  * Functions for handling tar file format
    1549             :  *
    1550             :  * Copied from pg_dump, but modified to work with libpq for sending
    1551             :  */
    1552             : 
    1553             : 
    1554             : /*
    1555             :  * Given the member, write the TAR header & send the file.
    1556             :  *
    1557             :  * If 'missing_ok' is true, will not throw an error if the file is not found.
    1558             :  *
    1559             :  * If dboid is anything other than InvalidOid then any checksum failures detected
    1560             :  * will get reported to the stats collector.
    1561             :  *
    1562             :  * Returns true if the file was successfully sent, false if 'missing_ok',
    1563             :  * and the file did not exist.
    1564             :  */
    1565             : static bool
    1566      135898 : sendFile(const char *readfilename, const char *tarfilename,
    1567             :          struct stat *statbuf, bool missing_ok, Oid dboid,
    1568             :          backup_manifest_info *manifest, const char *spcoid)
    1569             : {
    1570             :     int         fd;
    1571      135898 :     BlockNumber blkno = 0;
    1572      135898 :     bool        block_retry = false;
    1573             :     char        buf[TAR_SEND_SIZE];
    1574             :     uint16      checksum;
    1575      135898 :     int         checksum_failures = 0;
    1576             :     off_t       cnt;
    1577             :     int         i;
    1578      135898 :     pgoff_t     len = 0;
    1579             :     char       *page;
    1580             :     size_t      pad;
    1581             :     PageHeader  phdr;
    1582      135898 :     int         segmentno = 0;
    1583             :     char       *segmentpath;
    1584      135898 :     bool        verify_checksum = false;
    1585             :     pg_checksum_context checksum_ctx;
    1586             : 
    1587      135898 :     pg_checksum_init(&checksum_ctx, manifest->checksum_type);
    1588             : 
    1589      135898 :     fd = OpenTransientFile(readfilename, O_RDONLY | PG_BINARY);
    1590      135898 :     if (fd < 0)
    1591             :     {
    1592           0 :         if (errno == ENOENT && missing_ok)
    1593           0 :             return false;
    1594           0 :         ereport(ERROR,
    1595             :                 (errcode_for_file_access(),
    1596             :                  errmsg("could not open file \"%s\": %m", readfilename)));
    1597             :     }
    1598             : 
    1599      135898 :     _tarWriteHeader(tarfilename, NULL, statbuf, false);
    1600             : 
    1601      135896 :     if (!noverify_checksums && DataChecksumsEnabled())
    1602             :     {
    1603             :         char       *filename;
    1604             : 
    1605             :         /*
    1606             :          * Get the filename (excluding path).  As last_dir_separator()
    1607             :          * includes the last directory separator, we chop that off by
    1608             :          * incrementing the pointer.
    1609             :          */
    1610       37516 :         filename = last_dir_separator(readfilename) + 1;
    1611             : 
    1612       37516 :         if (is_checksummed_file(readfilename, filename))
    1613             :         {
    1614       36734 :             verify_checksum = true;
    1615             : 
    1616             :             /*
    1617             :              * Cut off at the segment boundary (".") to get the segment number
    1618             :              * in order to mix it into the checksum.
    1619             :              */
    1620       36734 :             segmentpath = strstr(filename, ".");
    1621       36734 :             if (segmentpath != NULL)
    1622             :             {
    1623           0 :                 segmentno = atoi(segmentpath + 1);
    1624           0 :                 if (segmentno == 0)
    1625           0 :                     ereport(ERROR,
    1626             :                             (errmsg("invalid segment number %d in file \"%s\"",
    1627             :                                     segmentno, filename)));
    1628             :             }
    1629             :         }
    1630             :     }
    1631             : 
    1632             :     /*
    1633             :      * Loop until we read the amount of data the caller told us to expect. The
    1634             :      * file could be longer, if it was extended while we were sending it, but
    1635             :      * for a base backup we can ignore such extended data. It will be restored
    1636             :      * from WAL.
    1637             :      */
    1638      305780 :     while (len < statbuf->st_size)
    1639             :     {
    1640             :         /* Try to read some more data. */
    1641      339776 :         cnt = basebackup_read_file(fd, buf,
    1642      169888 :                                    Min(sizeof(buf), statbuf->st_size - len),
    1643             :                                    len, readfilename, true);
    1644             : 
    1645             :         /*
    1646             :          * If we hit end-of-file, a concurrent truncation must have occurred.
    1647             :          * That's not an error condition, because WAL replay will fix things
    1648             :          * up.
    1649             :          */
    1650      169888 :         if (cnt == 0)
    1651           0 :             break;
    1652             : 
    1653             :         /*
    1654             :          * The checksums are verified at block level, so we iterate over the
    1655             :          * buffer in chunks of BLCKSZ, after making sure that
    1656             :          * TAR_SEND_SIZE/buf is divisible by BLCKSZ and we read a multiple of
    1657             :          * BLCKSZ bytes.
    1658             :          */
    1659             :         Assert(TAR_SEND_SIZE % BLCKSZ == 0);
    1660             : 
    1661      169888 :         if (verify_checksum && (cnt % BLCKSZ != 0))
    1662             :         {
    1663           0 :             ereport(WARNING,
    1664             :                     (errmsg("could not verify checksum in file \"%s\", block "
    1665             :                             "%d: read buffer size %d and page size %d "
    1666             :                             "differ",
    1667             :                             readfilename, blkno, (int) cnt, BLCKSZ)));
    1668           0 :             verify_checksum = false;
    1669             :         }
    1670             : 
    1671      169888 :         if (verify_checksum)
    1672             :         {
    1673      162368 :             for (i = 0; i < cnt / BLCKSZ; i++)
    1674             :             {
    1675      116266 :                 page = buf + BLCKSZ * i;
    1676             : 
    1677             :                 /*
    1678             :                  * Only check pages which have not been modified since the
    1679             :                  * start of the base backup. Otherwise, they might have been
    1680             :                  * written only halfway and the checksum would not be valid.
    1681             :                  * However, replaying WAL would reinstate the correct page in
    1682             :                  * this case. We also skip completely new pages, since they
    1683             :                  * don't have a checksum yet.
    1684             :                  */
    1685      116266 :                 if (!PageIsNew(page) && PageGetLSN(page) < startptr)
    1686             :                 {
    1687      116266 :                     checksum = pg_checksum_page((char *) page, blkno + segmentno * RELSEG_SIZE);
    1688      116266 :                     phdr = (PageHeader) page;
    1689      116266 :                     if (phdr->pd_checksum != checksum)
    1690             :                     {
    1691             :                         /*
    1692             :                          * Retry the block on the first failure.  It's
    1693             :                          * possible that we read the first 4K page of the
    1694             :                          * block just before postgres updated the entire block
    1695             :                          * so it ends up looking torn to us.  We only need to
    1696             :                          * retry once because the LSN should be updated to
    1697             :                          * something we can ignore on the next pass.  If the
    1698             :                          * error happens again then it is a true validation
    1699             :                          * failure.
    1700             :                          */
    1701          56 :                         if (block_retry == false)
    1702             :                         {
    1703             :                             int         reread_cnt;
    1704             : 
    1705             :                             /* Reread the failed block */
    1706             :                             reread_cnt =
    1707          28 :                                 basebackup_read_file(fd, buf + BLCKSZ * i,
    1708          28 :                                                      BLCKSZ, len + BLCKSZ * i,
    1709             :                                                      readfilename,
    1710             :                                                      false);
    1711          28 :                             if (reread_cnt == 0)
    1712             :                             {
    1713             :                                 /*
    1714             :                                  * If we hit end-of-file, a concurrent
    1715             :                                  * truncation must have occurred, so break out
    1716             :                                  * of this loop just as if the initial fread()
    1717             :                                  * returned 0. We'll drop through to the same
    1718             :                                  * code that handles that case. (We must fix
    1719             :                                  * up cnt first, though.)
    1720             :                                  */
    1721           0 :                                 cnt = BLCKSZ * i;
    1722           0 :                                 break;
    1723             :                             }
    1724             : 
    1725             :                             /* Set flag so we know a retry was attempted */
    1726          28 :                             block_retry = true;
    1727             : 
    1728             :                             /* Reset loop to validate the block again */
    1729          28 :                             i--;
    1730          28 :                             continue;
    1731             :                         }
    1732             : 
    1733          28 :                         checksum_failures++;
    1734             : 
    1735          28 :                         if (checksum_failures <= 5)
    1736          24 :                             ereport(WARNING,
    1737             :                                     (errmsg("checksum verification failed in "
    1738             :                                             "file \"%s\", block %d: calculated "
    1739             :                                             "%X but expected %X",
    1740             :                                             readfilename, blkno, checksum,
    1741             :                                             phdr->pd_checksum)));
    1742          28 :                         if (checksum_failures == 5)
    1743           4 :                             ereport(WARNING,
    1744             :                                     (errmsg("further checksum verification "
    1745             :                                             "failures in file \"%s\" will not "
    1746             :                                             "be reported", readfilename)));
    1747             :                     }
    1748             :                 }
    1749      116238 :                 block_retry = false;
    1750      116238 :                 blkno++;
    1751             :             }
    1752             :         }
    1753             : 
    1754             :         /* Send the chunk as a CopyData message */
    1755      169888 :         if (pq_putmessage('d', buf, cnt))
    1756           4 :             ereport(ERROR,
    1757             :                     (errmsg("base backup could not send data, aborting backup")));
    1758      169884 :         update_basebackup_progress(cnt);
    1759             : 
    1760             :         /* Also feed it to the checksum machinery. */
    1761      169884 :         pg_checksum_update(&checksum_ctx, (uint8 *) buf, cnt);
    1762             : 
    1763      169884 :         len += cnt;
    1764      169884 :         throttle(cnt);
    1765             :     }
    1766             : 
    1767             :     /* If the file was truncated while we were sending it, pad it with zeros */
    1768      135892 :     if (len < statbuf->st_size)
    1769             :     {
    1770           0 :         MemSet(buf, 0, sizeof(buf));
    1771           0 :         while (len < statbuf->st_size)
    1772             :         {
    1773           0 :             cnt = Min(sizeof(buf), statbuf->st_size - len);
    1774           0 :             pq_putmessage('d', buf, cnt);
    1775           0 :             pg_checksum_update(&checksum_ctx, (uint8 *) buf, cnt);
    1776           0 :             update_basebackup_progress(cnt);
    1777           0 :             len += cnt;
    1778           0 :             throttle(cnt);
    1779             :         }
    1780             :     }
    1781             : 
    1782             :     /*
    1783             :      * Pad to a block boundary, per tar format requirements. (This small
    1784             :      * piece of data is probably not worth throttling, and is not checksummed
    1785             :      * because it's not actually part of the file.)
    1786             :      */
    1787      135892 :     pad = tarPaddingBytesRequired(len);
    1788      135892 :     if (pad > 0)
    1789             :     {
    1790       20060 :         MemSet(buf, 0, pad);
    1791        1416 :         pq_putmessage('d', buf, pad);
    1792        1416 :         update_basebackup_progress(pad);
    1793             :     }
    1794             : 
    1795      135892 :     CloseTransientFile(fd);
    1796             : 
    1797      135892 :     if (checksum_failures > 1)
    1798             :     {
    1799           4 :         ereport(WARNING,
    1800             :                 (errmsg_plural("file \"%s\" has a total of %d checksum verification failure",
    1801             :                                "file \"%s\" has a total of %d checksum verification failures",
    1802             :                                checksum_failures,
    1803             :                                readfilename, checksum_failures)));
    1804             : 
    1805           4 :         pgstat_report_checksum_failures_in_db(dboid, checksum_failures);
    1806             :     }
    1807             : 
    1808      135892 :     total_checksum_failures += checksum_failures;
    1809             : 
    1810      135892 :     AddFileToBackupManifest(manifest, spcoid, tarfilename, statbuf->st_size,
    1811      135892 :                             (pg_time_t) statbuf->st_mtime, &checksum_ctx);
    1812             : 
    1813      135892 :     return true;
    1814             : }
    1815             : 
    1816             : 
    1817             : static int64
    1818      143672 : _tarWriteHeader(const char *filename, const char *linktarget,
    1819             :                 struct stat *statbuf, bool sizeonly)
    1820             : {
    1821             :     char        h[TAR_BLOCK_SIZE];
    1822             :     enum tarError rc;
    1823             : 
    1824      143672 :     if (!sizeonly)
    1825             :     {
    1826      139818 :         rc = tarCreateHeader(h, filename, linktarget, statbuf->st_size,
    1827             :                              statbuf->st_mode, statbuf->st_uid, statbuf->st_gid,
    1828             :                              statbuf->st_mtime);
    1829             : 
    1830      139818 :         switch (rc)
    1831             :         {
    1832      139816 :             case TAR_OK:
    1833      139816 :                 break;
    1834           2 :             case TAR_NAME_TOO_LONG:
    1835           2 :                 ereport(ERROR,
    1836             :                         (errmsg("file name too long for tar format: \"%s\"",
    1837             :                                 filename)));
    1838             :                 break;
    1839           0 :             case TAR_SYMLINK_TOO_LONG:
    1840           0 :                 ereport(ERROR,
    1841             :                         (errmsg("symbolic link target too long for tar format: "
    1842             :                                 "file name \"%s\", target \"%s\"",
    1843             :                                 filename, linktarget)));
    1844             :                 break;
    1845           0 :             default:
    1846           0 :                 elog(ERROR, "unrecognized tar error: %d", rc);
    1847             :         }
    1848             : 
    1849      139816 :         pq_putmessage('d', h, sizeof(h));
    1850      139816 :         update_basebackup_progress(sizeof(h));
    1851             :     }
    1852             : 
    1853      143670 :     return sizeof(h);
    1854             : }
    1855             : 
    1856             : /*
    1857             :  * Write tar header for a directory.  If the entry in statbuf is a link then
    1858             :  * write it as a directory anyway.
    1859             :  */
    1860             : static int64
    1861        2362 : _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf,
    1862             :              bool sizeonly)
    1863             : {
    1864             :     /* If symlink, write it as a directory anyway */
    1865             : #ifndef WIN32
    1866        2362 :     if (S_ISLNK(statbuf->st_mode))
    1867             : #else
    1868             :     if (pgwin32_is_junction(pathbuf))
    1869             : #endif
    1870          88 :         statbuf->st_mode = S_IFDIR | pg_dir_create_mode;
    1871             : 
    1872        2362 :     return _tarWriteHeader(pathbuf + basepathlen + 1, NULL, statbuf, sizeonly);
    1873             : }
    1874             : 
    1875             : /*
    1876             :  * Increment the network transfer counter by the given number of bytes,
    1877             :  * and sleep if necessary to comply with the requested network transfer
    1878             :  * rate.
    1879             :  */
    1880             : static void
    1881      170908 : throttle(size_t increment)
    1882             : {
    1883             :     TimeOffset  elapsed_min;
    1884             : 
    1885      170908 :     if (throttling_counter < 0)
    1886      170908 :         return;
    1887             : 
    1888           0 :     throttling_counter += increment;
    1889           0 :     if (throttling_counter < throttling_sample)
    1890           0 :         return;
    1891             : 
    1892             :     /* How much time should have elapsed at minimum? */
    1893           0 :     elapsed_min = elapsed_min_unit *
    1894           0 :         (throttling_counter / throttling_sample);
    1895             : 
    1896             :     /*
    1897             :      * Since the latch could be set repeatedly because of concurrently WAL
    1898             :      * activity, sleep in a loop to ensure enough time has passed.
    1899             :      */
    1900             :     for (;;)
    1901           0 :     {
    1902             :         TimeOffset  elapsed,
    1903             :                     sleep;
    1904             :         int         wait_result;
    1905             : 
    1906             :         /* Time elapsed since the last measurement (and possible wake up). */
    1907           0 :         elapsed = GetCurrentTimestamp() - throttled_last;
    1908             : 
    1909             :         /* sleep if the transfer is faster than it should be */
    1910           0 :         sleep = elapsed_min - elapsed;
    1911           0 :         if (sleep <= 0)
    1912           0 :             break;
    1913             : 
    1914           0 :         ResetLatch(MyLatch);
    1915             : 
    1916             :         /* We're eating a potentially set latch, so check for interrupts */
    1917           0 :         CHECK_FOR_INTERRUPTS();
    1918             : 
    1919             :         /*
    1920             :          * (TAR_SEND_SIZE / throttling_sample * elapsed_min_unit) should be
    1921             :          * the maximum time to sleep. Thus the cast to long is safe.
    1922             :          */
    1923           0 :         wait_result = WaitLatch(MyLatch,
    1924             :                                 WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
    1925           0 :                                 (long) (sleep / 1000),
    1926             :                                 WAIT_EVENT_BASE_BACKUP_THROTTLE);
    1927             : 
    1928           0 :         if (wait_result & WL_LATCH_SET)
    1929           0 :             CHECK_FOR_INTERRUPTS();
    1930             : 
    1931             :         /* Done waiting? */
    1932           0 :         if (wait_result & WL_TIMEOUT)
    1933           0 :             break;
    1934             :     }
    1935             : 
    1936             :     /*
    1937             :      * As we work with integers, only whole multiple of throttling_sample was
    1938             :      * processed. The rest will be done during the next call of this function.
    1939             :      */
    1940           0 :     throttling_counter %= throttling_sample;
    1941             : 
    1942             :     /*
    1943             :      * Time interval for the remaining amount and possible next increments
    1944             :      * starts now.
    1945             :      */
    1946           0 :     throttled_last = GetCurrentTimestamp();
    1947             : }
    1948             : 
    1949             : /*
    1950             :  * Increment the counter for the amount of data already streamed
    1951             :  * by the given number of bytes, and update the progress report for
    1952             :  * pg_stat_progress_basebackup.
    1953             :  */
    1954             : static void
    1955      312456 : update_basebackup_progress(int64 delta)
    1956             : {
    1957      312456 :     const int   index[] = {
    1958             :         PROGRESS_BASEBACKUP_BACKUP_STREAMED,
    1959             :         PROGRESS_BASEBACKUP_BACKUP_TOTAL
    1960             :     };
    1961             :     int64       val[2];
    1962      312456 :     int         nparam = 0;
    1963             : 
    1964      312456 :     backup_streamed += delta;
    1965      312456 :     val[nparam++] = backup_streamed;
    1966             : 
    1967             :     /*
    1968             :      * Avoid overflowing past 100% or the full size. This may make the total
    1969             :      * size number change as we approach the end of the backup (the estimate
    1970             :      * will always be wrong if WAL is included), but that's better than having
    1971             :      * the done column be bigger than the total.
    1972             :      */
    1973      312456 :     if (backup_total > -1 && backup_streamed > backup_total)
    1974             :     {
    1975        1740 :         backup_total = backup_streamed;
    1976        1740 :         val[nparam++] = backup_total;
    1977             :     }
    1978             : 
    1979      312456 :     pgstat_progress_update_multi_param(nparam, index, val);
    1980      312456 : }
    1981             : 
    1982             : /*
    1983             :  * Read some data from a file, setting a wait event and reporting any error
    1984             :  * encountered.
    1985             :  *
    1986             :  * If partial_read_ok is false, also report an error if the number of bytes
    1987             :  * read is not equal to the number of bytes requested.
    1988             :  *
    1989             :  * Returns the number of bytes read.
    1990             :  */
    1991             : static int
    1992      170940 : basebackup_read_file(int fd, char *buf, size_t nbytes, off_t offset,
    1993             :                      const char *filename, bool partial_read_ok)
    1994             : {
    1995             :     int         rc;
    1996             : 
    1997      170940 :     pgstat_report_wait_start(WAIT_EVENT_BASEBACKUP_READ);
    1998      170940 :     rc = pg_pread(fd, buf, nbytes, offset);
    1999      170940 :     pgstat_report_wait_end();
    2000             : 
    2001      170940 :     if (rc < 0)
    2002           0 :         ereport(ERROR,
    2003             :                 (errcode_for_file_access(),
    2004             :                  errmsg("could not read file \"%s\": %m", filename)));
    2005      170940 :     if (!partial_read_ok && rc > 0 && rc != nbytes)
    2006           0 :         ereport(ERROR,
    2007             :                 (errcode_for_file_access(),
    2008             :                  errmsg("could not read file \"%s\": read %d of %zu",
    2009             :                         filename, rc, nbytes)));
    2010             : 
    2011      170940 :     return rc;
    2012             : }

Generated by: LCOV version 1.13