LCOV - code coverage report
Current view: top level - src/bin/pg_rewind - pg_rewind.c (source / functions) Hit Total Coverage
Test: PostgreSQL 14devel Lines: 321 388 82.7 %
Date: 2020-12-05 16:06:12 Functions: 14 14 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * pg_rewind.c
       4             :  *    Synchronizes a PostgreSQL data directory to a new timeline
       5             :  *
       6             :  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
       7             :  *
       8             :  *-------------------------------------------------------------------------
       9             :  */
      10             : #include "postgres_fe.h"
      11             : 
      12             : #include <sys/stat.h>
      13             : #include <fcntl.h>
      14             : #include <time.h>
      15             : #include <unistd.h>
      16             : 
      17             : #include "access/timeline.h"
      18             : #include "access/xlog_internal.h"
      19             : #include "catalog/catversion.h"
      20             : #include "catalog/pg_control.h"
      21             : #include "common/controldata_utils.h"
      22             : #include "common/file_perm.h"
      23             : #include "common/restricted_token.h"
      24             : #include "common/string.h"
      25             : #include "fe_utils/recovery_gen.h"
      26             : #include "file_ops.h"
      27             : #include "filemap.h"
      28             : #include "getopt_long.h"
      29             : #include "pg_rewind.h"
      30             : #include "rewind_source.h"
      31             : #include "storage/bufpage.h"
      32             : 
      33             : static void usage(const char *progname);
      34             : 
      35             : static void perform_rewind(filemap_t *filemap, rewind_source *source,
      36             :                            XLogRecPtr chkptrec,
      37             :                            TimeLineID chkpttli,
      38             :                            XLogRecPtr chkptredo);
      39             : 
      40             : static void createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli,
      41             :                               XLogRecPtr checkpointloc);
      42             : 
      43             : static void digestControlFile(ControlFileData *ControlFile,
      44             :                               const char *content, size_t size);
      45             : static void getRestoreCommand(const char *argv0);
      46             : static void sanityChecks(void);
      47             : static void findCommonAncestorTimeline(XLogRecPtr *recptr, int *tliIndex);
      48             : static void ensureCleanShutdown(const char *argv0);
      49             : static void disconnect_atexit(void);
      50             : 
      51             : static ControlFileData ControlFile_target;
      52             : static ControlFileData ControlFile_source;
      53             : static ControlFileData ControlFile_source_after;
      54             : 
      55             : const char *progname;
      56             : int         WalSegSz;
      57             : 
      58             : /* Configuration options */
      59             : char       *datadir_target = NULL;
      60             : char       *datadir_source = NULL;
      61             : char       *connstr_source = NULL;
      62             : char       *restore_command = NULL;
      63             : 
      64             : static bool debug = false;
      65             : bool        showprogress = false;
      66             : bool        dry_run = false;
      67             : bool        do_sync = true;
      68             : bool        restore_wal = false;
      69             : 
      70             : /* Target history */
      71             : TimeLineHistoryEntry *targetHistory;
      72             : int         targetNentries;
      73             : 
      74             : /* Progress counters */
      75             : uint64      fetch_size;
      76             : uint64      fetch_done;
      77             : 
      78             : static PGconn *conn;
      79             : static rewind_source *source;
      80             : 
      81             : static void
      82           2 : usage(const char *progname)
      83             : {
      84           2 :     printf(_("%s resynchronizes a PostgreSQL cluster with another copy of the cluster.\n\n"), progname);
      85           2 :     printf(_("Usage:\n  %s [OPTION]...\n\n"), progname);
      86           2 :     printf(_("Options:\n"));
      87           2 :     printf(_("  -c, --restore-target-wal       use restore_command in target configuration to\n"
      88             :              "                                 retrieve WAL files from archives\n"));
      89           2 :     printf(_("  -D, --target-pgdata=DIRECTORY  existing data directory to modify\n"));
      90           2 :     printf(_("      --source-pgdata=DIRECTORY  source data directory to synchronize with\n"));
      91           2 :     printf(_("      --source-server=CONNSTR    source server to synchronize with\n"));
      92           2 :     printf(_("  -n, --dry-run                  stop before modifying anything\n"));
      93           2 :     printf(_("  -N, --no-sync                  do not wait for changes to be written\n"
      94             :              "                                 safely to disk\n"));
      95           2 :     printf(_("  -P, --progress                 write progress messages\n"));
      96           2 :     printf(_("  -R, --write-recovery-conf      write configuration for replication\n"
      97             :              "                                 (requires --source-server)\n"));
      98           2 :     printf(_("      --debug                    write a lot of debug messages\n"));
      99           2 :     printf(_("      --no-ensure-shutdown       do not automatically fix unclean shutdown\n"));
     100           2 :     printf(_("  -V, --version                  output version information, then exit\n"));
     101           2 :     printf(_("  -?, --help                     show this help, then exit\n"));
     102           2 :     printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
     103           2 :     printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
     104           2 : }
     105             : 
     106             : 
     107             : int
     108          46 : main(int argc, char **argv)
     109             : {
     110             :     static struct option long_options[] = {
     111             :         {"help", no_argument, NULL, '?'},
     112             :         {"target-pgdata", required_argument, NULL, 'D'},
     113             :         {"write-recovery-conf", no_argument, NULL, 'R'},
     114             :         {"source-pgdata", required_argument, NULL, 1},
     115             :         {"source-server", required_argument, NULL, 2},
     116             :         {"no-ensure-shutdown", no_argument, NULL, 4},
     117             :         {"version", no_argument, NULL, 'V'},
     118             :         {"restore-target-wal", no_argument, NULL, 'c'},
     119             :         {"dry-run", no_argument, NULL, 'n'},
     120             :         {"no-sync", no_argument, NULL, 'N'},
     121             :         {"progress", no_argument, NULL, 'P'},
     122             :         {"debug", no_argument, NULL, 3},
     123             :         {NULL, 0, NULL, 0}
     124             :     };
     125             :     int         option_index;
     126             :     int         c;
     127             :     XLogRecPtr  divergerec;
     128             :     int         lastcommontliIndex;
     129             :     XLogRecPtr  chkptrec;
     130             :     TimeLineID  chkpttli;
     131             :     XLogRecPtr  chkptredo;
     132             :     XLogRecPtr  target_wal_endrec;
     133             :     size_t      size;
     134             :     char       *buffer;
     135          46 :     bool        no_ensure_shutdown = false;
     136             :     bool        rewind_needed;
     137          46 :     bool        writerecoveryconf = false;
     138             :     filemap_t  *filemap;
     139             : 
     140          46 :     pg_logging_init(argv[0]);
     141          46 :     set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_rewind"));
     142          46 :     progname = get_progname(argv[0]);
     143             : 
     144             :     /* Process command-line arguments */
     145          46 :     if (argc > 1)
     146             :     {
     147          46 :         if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
     148             :         {
     149           2 :             usage(progname);
     150           2 :             exit(0);
     151             :         }
     152          44 :         if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
     153             :         {
     154           2 :             puts("pg_rewind (PostgreSQL) " PG_VERSION);
     155           2 :             exit(0);
     156             :         }
     157             :     }
     158             : 
     159         252 :     while ((c = getopt_long(argc, argv, "cD:nNPR", long_options, &option_index)) != -1)
     160             :     {
     161         170 :         switch (c)
     162             :         {
     163           2 :             case '?':
     164           2 :                 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
     165           2 :                 exit(1);
     166             : 
     167           2 :             case 'c':
     168           2 :                 restore_wal = true;
     169           2 :                 break;
     170             : 
     171           0 :             case 'P':
     172           0 :                 showprogress = true;
     173           0 :                 break;
     174             : 
     175           2 :             case 'n':
     176           2 :                 dry_run = true;
     177           2 :                 break;
     178             : 
     179          30 :             case 'N':
     180          30 :                 do_sync = false;
     181          30 :                 break;
     182             : 
     183          12 :             case 'R':
     184          12 :                 writerecoveryconf = true;
     185          12 :                 break;
     186             : 
     187          36 :             case 3:
     188          36 :                 debug = true;
     189          36 :                 pg_logging_increase_verbosity();
     190          36 :                 break;
     191             : 
     192          40 :             case 'D':           /* -D or --target-pgdata */
     193          40 :                 datadir_target = pg_strdup(optarg);
     194          40 :                 break;
     195             : 
     196          26 :             case 1:             /* --source-pgdata */
     197          26 :                 datadir_source = pg_strdup(optarg);
     198          26 :                 break;
     199             : 
     200          14 :             case 2:             /* --source-server */
     201          14 :                 connstr_source = pg_strdup(optarg);
     202          14 :                 break;
     203             : 
     204           6 :             case 4:
     205           6 :                 no_ensure_shutdown = true;
     206           6 :                 break;
     207             :         }
     208         210 :     }
     209             : 
     210          40 :     if (datadir_source == NULL && connstr_source == NULL)
     211             :     {
     212           2 :         pg_log_error("no source specified (--source-pgdata or --source-server)");
     213           2 :         fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
     214           2 :         exit(1);
     215             :     }
     216             : 
     217          38 :     if (datadir_source != NULL && connstr_source != NULL)
     218             :     {
     219           2 :         pg_log_error("only one of --source-pgdata or --source-server can be specified");
     220           2 :         fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
     221           2 :         exit(1);
     222             :     }
     223             : 
     224          36 :     if (datadir_target == NULL)
     225             :     {
     226           0 :         pg_log_error("no target data directory specified (--target-pgdata)");
     227           0 :         fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
     228           0 :         exit(1);
     229             :     }
     230             : 
     231          36 :     if (writerecoveryconf && connstr_source == NULL)
     232             :     {
     233           2 :         pg_log_error("no source server information (--source-server) specified for --write-recovery-conf");
     234           2 :         fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
     235           2 :         exit(1);
     236             :     }
     237             : 
     238          34 :     if (optind < argc)
     239             :     {
     240           2 :         pg_log_error("too many command-line arguments (first is \"%s\")",
     241             :                      argv[optind]);
     242           2 :         fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
     243           2 :         exit(1);
     244             :     }
     245             : 
     246             :     /*
     247             :      * Don't allow pg_rewind to be run as root, to avoid overwriting the
     248             :      * ownership of files in the data directory. We need only check for root
     249             :      * -- any other user won't have sufficient permissions to modify files in
     250             :      * the data directory.
     251             :      */
     252             : #ifndef WIN32
     253          32 :     if (geteuid() == 0)
     254             :     {
     255           0 :         pg_log_error("cannot be executed by \"root\"");
     256           0 :         fprintf(stderr, _("You must run %s as the PostgreSQL superuser.\n"),
     257             :                 progname);
     258           0 :         exit(1);
     259             :     }
     260             : #endif
     261             : 
     262          32 :     get_restricted_token();
     263             : 
     264             :     /* Set mask based on PGDATA permissions */
     265          32 :     if (!GetDataDirectoryCreatePerm(datadir_target))
     266             :     {
     267           0 :         pg_log_error("could not read permissions of directory \"%s\": %m",
     268             :                      datadir_target);
     269           0 :         exit(1);
     270             :     }
     271             : 
     272          32 :     umask(pg_mode_mask);
     273             : 
     274          32 :     getRestoreCommand(argv[0]);
     275             : 
     276          32 :     atexit(disconnect_atexit);
     277             : 
     278             :     /*
     279             :      * Ok, we have all the options and we're ready to start. First, connect to
     280             :      * remote server.
     281             :      */
     282          32 :     if (connstr_source)
     283             :     {
     284          12 :         conn = PQconnectdb(connstr_source);
     285             : 
     286          12 :         if (PQstatus(conn) == CONNECTION_BAD)
     287           0 :             pg_fatal("%s", PQerrorMessage(conn));
     288             : 
     289          12 :         if (showprogress)
     290           0 :             pg_log_info("connected to server");
     291             : 
     292          12 :         source = init_libpq_source(conn);
     293             :     }
     294             :     else
     295          20 :         source = init_local_source(datadir_source);
     296             : 
     297             :     /*
     298             :      * Check the status of the target instance.
     299             :      *
     300             :      * If the target instance was not cleanly shut down, start and stop the
     301             :      * target cluster once in single-user mode to enforce recovery to finish,
     302             :      * ensuring that the cluster can be used by pg_rewind.  Note that if
     303             :      * no_ensure_shutdown is specified, pg_rewind ignores this step, and users
     304             :      * need to make sure by themselves that the target cluster is in a clean
     305             :      * state.
     306             :      */
     307          32 :     buffer = slurpFile(datadir_target, "global/pg_control", &size);
     308          32 :     digestControlFile(&ControlFile_target, buffer, size);
     309          32 :     pg_free(buffer);
     310             : 
     311          32 :     if (!no_ensure_shutdown &&
     312          26 :         ControlFile_target.state != DB_SHUTDOWNED &&
     313          22 :         ControlFile_target.state != DB_SHUTDOWNED_IN_RECOVERY)
     314             :     {
     315          20 :         ensureCleanShutdown(argv[0]);
     316             : 
     317          18 :         buffer = slurpFile(datadir_target, "global/pg_control", &size);
     318          18 :         digestControlFile(&ControlFile_target, buffer, size);
     319          18 :         pg_free(buffer);
     320             :     }
     321             : 
     322          30 :     buffer = source->fetch_file(source, "global/pg_control", &size);
     323          30 :     digestControlFile(&ControlFile_source, buffer, size);
     324          30 :     pg_free(buffer);
     325             : 
     326          30 :     sanityChecks();
     327             : 
     328             :     /*
     329             :      * Find the common ancestor timeline between the clusters.
     330             :      *
     331             :      * If both clusters are already on the same timeline, there's nothing to
     332             :      * do.
     333             :      */
     334          52 :     if (ControlFile_target.checkPointCopy.ThisTimeLineID ==
     335          26 :         ControlFile_source.checkPointCopy.ThisTimeLineID)
     336             :     {
     337           2 :         pg_log_info("source and target cluster are on the same timeline");
     338           2 :         rewind_needed = false;
     339           2 :         target_wal_endrec = 0;
     340             :     }
     341             :     else
     342             :     {
     343             :         XLogRecPtr  chkptendrec;
     344             : 
     345          24 :         findCommonAncestorTimeline(&divergerec, &lastcommontliIndex);
     346          24 :         pg_log_info("servers diverged at WAL location %X/%X on timeline %u",
     347             :                     (uint32) (divergerec >> 32), (uint32) divergerec,
     348             :                     targetHistory[lastcommontliIndex].tli);
     349             : 
     350             :         /*
     351             :          * Determine the end-of-WAL on the target.
     352             :          *
     353             :          * The WAL ends at the last shutdown checkpoint, or at
     354             :          * minRecoveryPoint if it was a standby. (If we supported rewinding a
     355             :          * server that was not shut down cleanly, we would need to replay
     356             :          * until we reach the first invalid record, like crash recovery does.)
     357             :          */
     358             : 
     359             :         /* read the checkpoint record on the target to see where it ends. */
     360          24 :         chkptendrec = readOneRecord(datadir_target,
     361             :                                     ControlFile_target.checkPoint,
     362             :                                     targetNentries - 1,
     363             :                                     restore_command);
     364             : 
     365          24 :         if (ControlFile_target.minRecoveryPoint > chkptendrec)
     366             :         {
     367           2 :             target_wal_endrec = ControlFile_target.minRecoveryPoint;
     368             :         }
     369             :         else
     370             :         {
     371          22 :             target_wal_endrec = chkptendrec;
     372             :         }
     373             : 
     374             :         /*
     375             :          * Check for the possibility that the target is in fact a direct
     376             :          * ancestor of the source. In that case, there is no divergent history
     377             :          * in the target that needs rewinding.
     378             :          */
     379          24 :         if (target_wal_endrec > divergerec)
     380             :         {
     381          24 :             rewind_needed = true;
     382             :         }
     383             :         else
     384             :         {
     385             :             /* the last common checkpoint record must be part of target WAL */
     386             :             Assert(target_wal_endrec == divergerec);
     387             : 
     388           0 :             rewind_needed = false;
     389             :         }
     390             :     }
     391             : 
     392          26 :     if (!rewind_needed)
     393             :     {
     394           2 :         pg_log_info("no rewind required");
     395           2 :         if (writerecoveryconf && !dry_run)
     396           0 :             WriteRecoveryConfig(conn, datadir_target,
     397             :                                 GenerateRecoveryConfig(conn, NULL));
     398           2 :         exit(0);
     399             :     }
     400             : 
     401          24 :     findLastCheckpoint(datadir_target, divergerec, lastcommontliIndex,
     402             :                        &chkptrec, &chkpttli, &chkptredo, restore_command);
     403          24 :     pg_log_info("rewinding from last common checkpoint at %X/%X on timeline %u",
     404             :                 (uint32) (chkptrec >> 32), (uint32) chkptrec,
     405             :                 chkpttli);
     406             : 
     407             :     /* Initialize the hash table to track the status of each file */
     408          24 :     filehash_init();
     409             : 
     410             :     /*
     411             :      * Collect information about all files in the both data directories.
     412             :      */
     413          24 :     if (showprogress)
     414           0 :         pg_log_info("reading source file list");
     415          24 :     source->traverse_files(source, &process_source_file);
     416             : 
     417          24 :     if (showprogress)
     418           0 :         pg_log_info("reading target file list");
     419          24 :     traverse_datadir(datadir_target, &process_target_file);
     420             : 
     421             :     /*
     422             :      * Read the target WAL from last checkpoint before the point of fork, to
     423             :      * extract all the pages that were modified on the target cluster after
     424             :      * the fork.
     425             :      */
     426          24 :     if (showprogress)
     427           0 :         pg_log_info("reading WAL in target");
     428          24 :     extractPageMap(datadir_target, chkptrec, lastcommontliIndex,
     429             :                    target_wal_endrec, restore_command);
     430             : 
     431             :     /*
     432             :      * We have collected all information we need from both systems. Decide
     433             :      * what to do with each file.
     434             :      */
     435          24 :     filemap = decide_file_actions();
     436          24 :     if (showprogress)
     437           0 :         calculate_totals(filemap);
     438             : 
     439             :     /* this is too verbose even for verbose mode */
     440          24 :     if (debug)
     441          22 :         print_filemap(filemap);
     442             : 
     443             :     /*
     444             :      * Ok, we're ready to start copying things over.
     445             :      */
     446          24 :     if (showprogress)
     447             :     {
     448           0 :         pg_log_info("need to copy %lu MB (total source directory size is %lu MB)",
     449             :                     (unsigned long) (filemap->fetch_size / (1024 * 1024)),
     450             :                     (unsigned long) (filemap->total_size / (1024 * 1024)));
     451             : 
     452           0 :         fetch_size = filemap->fetch_size;
     453           0 :         fetch_done = 0;
     454             :     }
     455             : 
     456             :     /*
     457             :      * We have now collected all the information we need from both systems,
     458             :      * and we are ready to start modifying the target directory.
     459             :      *
     460             :      * This is the point of no return. Once we start copying things, there is
     461             :      * no turning back!
     462             :      */
     463          24 :     perform_rewind(filemap, source, chkptrec, chkpttli, chkptredo);
     464             : 
     465          24 :     if (showprogress)
     466           0 :         pg_log_info("syncing target data directory");
     467          24 :     sync_target_dir();
     468             : 
     469             :     /* Also update the standby configuration, if requested. */
     470          24 :     if (writerecoveryconf && !dry_run)
     471          10 :         WriteRecoveryConfig(conn, datadir_target,
     472             :                             GenerateRecoveryConfig(conn, NULL));
     473             : 
     474             :     /* don't need the source connection anymore */
     475          24 :     source->destroy(source);
     476          24 :     if (conn)
     477             :     {
     478          12 :         PQfinish(conn);
     479          12 :         conn = NULL;
     480             :     }
     481             : 
     482          24 :     pg_log_info("Done!");
     483             : 
     484          24 :     return 0;
     485             : }
     486             : 
     487             : /*
     488             :  * Perform the rewind.
     489             :  *
     490             :  * We have already collected all the information we need from the
     491             :  * target and the source.
     492             :  */
     493             : static void
     494          24 : perform_rewind(filemap_t *filemap, rewind_source *source,
     495             :                XLogRecPtr chkptrec,
     496             :                TimeLineID chkpttli,
     497             :                XLogRecPtr chkptredo)
     498             : {
     499             :     XLogRecPtr  endrec;
     500             :     TimeLineID  endtli;
     501             :     ControlFileData ControlFile_new;
     502             :     size_t      size;
     503             :     char       *buffer;
     504             : 
     505             :     /*
     506             :      * Execute the actions in the file map, fetching data from the source
     507             :      * system as needed.
     508             :      */
     509       28276 :     for (int i = 0; i < filemap->nentries; i++)
     510             :     {
     511       28252 :         file_entry_t *entry = filemap->entries[i];
     512             : 
     513             :         /*
     514             :          * If this is a relation file, copy the modified blocks.
     515             :          *
     516             :          * This is in addition to any other changes.
     517             :          */
     518       28252 :         if (entry->target_pages_to_overwrite.bitmapsize > 0)
     519             :         {
     520             :             datapagemap_iterator_t *iter;
     521             :             BlockNumber blkno;
     522             :             off_t       offset;
     523             : 
     524         152 :             iter = datapagemap_iterate(&entry->target_pages_to_overwrite);
     525         336 :             while (datapagemap_next(iter, &blkno))
     526             :             {
     527         184 :                 offset = blkno * BLCKSZ;
     528         184 :                 source->queue_fetch_range(source, entry->path, offset, BLCKSZ);
     529             :             }
     530         152 :             pg_free(iter);
     531             :         }
     532             : 
     533       28252 :         switch (entry->action)
     534             :         {
     535       19130 :             case FILE_ACTION_NONE:
     536             :                 /* nothing else to do */
     537       19130 :                 break;
     538             : 
     539        7724 :             case FILE_ACTION_COPY:
     540             :                 /* Truncate the old file out of the way, if any */
     541        7724 :                 open_target_file(entry->path, true);
     542        7724 :                 source->queue_fetch_range(source, entry->path,
     543             :                                           0, entry->source_size);
     544        7724 :                 break;
     545             : 
     546           8 :             case FILE_ACTION_TRUNCATE:
     547           8 :                 truncate_target_file(entry->path, entry->source_size);
     548           8 :                 break;
     549             : 
     550          10 :             case FILE_ACTION_COPY_TAIL:
     551          30 :                 source->queue_fetch_range(source, entry->path,
     552          10 :                                           entry->target_size,
     553          10 :                                           entry->source_size - entry->target_size);
     554          10 :                 break;
     555             : 
     556        1366 :             case FILE_ACTION_REMOVE:
     557        1366 :                 remove_target(entry);
     558        1366 :                 break;
     559             : 
     560          14 :             case FILE_ACTION_CREATE:
     561          14 :                 create_target(entry);
     562          14 :                 break;
     563             : 
     564           0 :             case FILE_ACTION_UNDECIDED:
     565           0 :                 pg_fatal("no action decided for \"%s\"", entry->path);
     566             :                 break;
     567             :         }
     568       28252 :     }
     569             : 
     570             :     /* Complete any remaining range-fetches that we queued up above. */
     571          24 :     source->finish_fetch(source);
     572             : 
     573          24 :     close_target_file();
     574             : 
     575          24 :     progress_report(true);
     576             : 
     577             :     /*
     578             :      * Fetch the control file from the source last. This ensures that the
     579             :      * minRecoveryPoint is up-to-date.
     580             :      */
     581          24 :     buffer = source->fetch_file(source, "global/pg_control", &size);
     582          24 :     digestControlFile(&ControlFile_source_after, buffer, size);
     583          24 :     pg_free(buffer);
     584             : 
     585             :     /*
     586             :      * Sanity check: If the source is a local system, the control file should
     587             :      * not have changed since we started.
     588             :      *
     589             :      * XXX: We assume it hasn't been modified, but actually, what could go
     590             :      * wrong? The logic handles a libpq source that's modified concurrently,
     591             :      * why not a local datadir?
     592             :      */
     593          24 :     if (datadir_source &&
     594          12 :         memcmp(&ControlFile_source, &ControlFile_source_after,
     595             :                sizeof(ControlFileData)) != 0)
     596             :     {
     597           0 :         pg_fatal("source system was modified while pg_rewind was running");
     598             :     }
     599             : 
     600          24 :     if (showprogress)
     601           0 :         pg_log_info("creating backup label and updating control file");
     602             : 
     603             :     /*
     604             :      * Create a backup label file, to tell the target where to begin the WAL
     605             :      * replay. Normally, from the last common checkpoint between the source
     606             :      * and the target. But if the source is a standby server, it's possible
     607             :      * that the last common checkpoint is *after* the standby's restartpoint.
     608             :      * That implies that the source server has applied the checkpoint record,
     609             :      * but hasn't perfomed a corresponding restartpoint yet. Make sure we
     610             :      * start at the restartpoint's redo point in that case.
     611             :      *
     612             :      * Use the old version of the source's control file for this. The server
     613             :      * might have finished the restartpoint after we started copying files,
     614             :      * but we must begin from the redo point at the time that started copying.
     615             :      */
     616          24 :     if (ControlFile_source.checkPointCopy.redo < chkptredo)
     617             :     {
     618           2 :         chkptredo = ControlFile_source.checkPointCopy.redo;
     619           2 :         chkpttli = ControlFile_source.checkPointCopy.ThisTimeLineID;
     620           2 :         chkptrec = ControlFile_source.checkPoint;
     621             :     }
     622          24 :     createBackupLabel(chkptredo, chkpttli, chkptrec);
     623             : 
     624             :     /*
     625             :      * Update control file of target, to tell the target how far it must
     626             :      * replay the WAL (minRecoveryPoint).
     627             :      */
     628          24 :     if (connstr_source)
     629             :     {
     630             :         /*
     631             :          * The source is a live server. Like in an online backup, it's
     632             :          * important that we recover all the WAL that was generated while we
     633             :          * were copying files.
     634             :          */
     635          12 :         if (ControlFile_source_after.state == DB_IN_ARCHIVE_RECOVERY)
     636             :         {
     637             :             /*
     638             :              * Source is a standby server. We must replay to its
     639             :              * minRecoveryPoint.
     640             :              */
     641           2 :             endrec = ControlFile_source_after.minRecoveryPoint;
     642           2 :             endtli = ControlFile_source_after.minRecoveryPointTLI;
     643             :         }
     644             :         else
     645             :         {
     646             :             /*
     647             :              * Source is a production, non-standby, server. We must replay to
     648             :              * the last WAL insert location.
     649             :              */
     650          10 :             if (ControlFile_source_after.state != DB_IN_PRODUCTION)
     651           0 :                 pg_fatal("source system was in unexpected state at end of rewind");
     652             : 
     653          10 :             endrec = source->get_current_wal_insert_lsn(source);
     654          10 :             endtli = ControlFile_source_after.checkPointCopy.ThisTimeLineID;
     655             :         }
     656             :     }
     657             :     else
     658             :     {
     659             :         /*
     660             :          * Source is a local data directory. It should've shut down cleanly,
     661             :          * and we must replay to the latest shutdown checkpoint.
     662             :          */
     663          12 :         endrec = ControlFile_source_after.checkPoint;
     664          12 :         endtli = ControlFile_source_after.checkPointCopy.ThisTimeLineID;
     665             :     }
     666             : 
     667          24 :     memcpy(&ControlFile_new, &ControlFile_source_after, sizeof(ControlFileData));
     668          24 :     ControlFile_new.minRecoveryPoint = endrec;
     669          24 :     ControlFile_new.minRecoveryPointTLI = endtli;
     670          24 :     ControlFile_new.state = DB_IN_ARCHIVE_RECOVERY;
     671          24 :     if (!dry_run)
     672          22 :         update_controlfile(datadir_target, &ControlFile_new, do_sync);
     673          24 : }
     674             : 
     675             : static void
     676          30 : sanityChecks(void)
     677             : {
     678             :     /* TODO Check that there's no backup_label in either cluster */
     679             : 
     680             :     /* Check system_identifier match */
     681          30 :     if (ControlFile_target.system_identifier != ControlFile_source.system_identifier)
     682           0 :         pg_fatal("source and target clusters are from different systems");
     683             : 
     684             :     /* check version */
     685          30 :     if (ControlFile_target.pg_control_version != PG_CONTROL_VERSION ||
     686          30 :         ControlFile_source.pg_control_version != PG_CONTROL_VERSION ||
     687          30 :         ControlFile_target.catalog_version_no != CATALOG_VERSION_NO ||
     688          30 :         ControlFile_source.catalog_version_no != CATALOG_VERSION_NO)
     689             :     {
     690           0 :         pg_fatal("clusters are not compatible with this version of pg_rewind");
     691             :     }
     692             : 
     693             :     /*
     694             :      * Target cluster need to use checksums or hint bit wal-logging, this to
     695             :      * prevent from data corruption that could occur because of hint bits.
     696             :      */
     697          30 :     if (ControlFile_target.data_checksum_version != PG_DATA_CHECKSUM_VERSION &&
     698          30 :         !ControlFile_target.wal_log_hints)
     699             :     {
     700           0 :         pg_fatal("target server needs to use either data checksums or \"wal_log_hints = on\"");
     701             :     }
     702             : 
     703             :     /*
     704             :      * Target cluster better not be running. This doesn't guard against
     705             :      * someone starting the cluster concurrently. Also, this is probably more
     706             :      * strict than necessary; it's OK if the target node was not shut down
     707             :      * cleanly, as long as it isn't running at the moment.
     708             :      */
     709          30 :     if (ControlFile_target.state != DB_SHUTDOWNED &&
     710           4 :         ControlFile_target.state != DB_SHUTDOWNED_IN_RECOVERY)
     711           2 :         pg_fatal("target server must be shut down cleanly");
     712             : 
     713             :     /*
     714             :      * When the source is a data directory, also require that the source
     715             :      * server is shut down. There isn't any very strong reason for this
     716             :      * limitation, but better safe than sorry.
     717             :      */
     718          28 :     if (datadir_source &&
     719          16 :         ControlFile_source.state != DB_SHUTDOWNED &&
     720           4 :         ControlFile_source.state != DB_SHUTDOWNED_IN_RECOVERY)
     721           2 :         pg_fatal("source data directory must be shut down cleanly");
     722          26 : }
     723             : 
     724             : /*
     725             :  * Print a progress report based on the fetch_size and fetch_done variables.
     726             :  *
     727             :  * Progress report is written at maximum once per second, except that the
     728             :  * last progress report is always printed.
     729             :  *
     730             :  * If finished is set to true, this is the last progress report. The cursor
     731             :  * is moved to the next line.
     732             :  */
     733             : void
     734       82862 : progress_report(bool finished)
     735             : {
     736             :     static pg_time_t last_progress_report = 0;
     737             :     int         percent;
     738             :     char        fetch_done_str[32];
     739             :     char        fetch_size_str[32];
     740             :     pg_time_t   now;
     741             : 
     742       82862 :     if (!showprogress)
     743       82862 :         return;
     744             : 
     745           0 :     now = time(NULL);
     746           0 :     if (now == last_progress_report && !finished)
     747           0 :         return;                 /* Max once per second */
     748             : 
     749           0 :     last_progress_report = now;
     750           0 :     percent = fetch_size ? (int) ((fetch_done) * 100 / fetch_size) : 0;
     751             : 
     752             :     /*
     753             :      * Avoid overflowing past 100% or the full size. This may make the total
     754             :      * size number change as we approach the end of the backup (the estimate
     755             :      * will always be wrong if WAL is included), but that's better than having
     756             :      * the done column be bigger than the total.
     757             :      */
     758           0 :     if (percent > 100)
     759           0 :         percent = 100;
     760           0 :     if (fetch_done > fetch_size)
     761           0 :         fetch_size = fetch_done;
     762             : 
     763             :     /*
     764             :      * Separate step to keep platform-dependent format code out of
     765             :      * translatable strings.  And we only test for INT64_FORMAT availability
     766             :      * in snprintf, not fprintf.
     767             :      */
     768           0 :     snprintf(fetch_done_str, sizeof(fetch_done_str), INT64_FORMAT,
     769             :              fetch_done / 1024);
     770           0 :     snprintf(fetch_size_str, sizeof(fetch_size_str), INT64_FORMAT,
     771             :              fetch_size / 1024);
     772             : 
     773           0 :     fprintf(stderr, _("%*s/%s kB (%d%%) copied"),
     774           0 :             (int) strlen(fetch_size_str), fetch_done_str, fetch_size_str,
     775             :             percent);
     776             : 
     777             :     /*
     778             :      * Stay on the same line if reporting to a terminal and we're not done
     779             :      * yet.
     780             :      */
     781           0 :     fputc((!finished && isatty(fileno(stderr))) ? '\r' : '\n', stderr);
     782             : }
     783             : 
     784             : /*
     785             :  * Find minimum from two WAL locations assuming InvalidXLogRecPtr means
     786             :  * infinity as src/include/access/timeline.h states. This routine should
     787             :  * be used only when comparing WAL locations related to history files.
     788             :  */
     789             : static XLogRecPtr
     790          24 : MinXLogRecPtr(XLogRecPtr a, XLogRecPtr b)
     791             : {
     792          24 :     if (XLogRecPtrIsInvalid(a))
     793           2 :         return b;
     794          22 :     else if (XLogRecPtrIsInvalid(b))
     795          22 :         return a;
     796             :     else
     797           0 :         return Min(a, b);
     798             : }
     799             : 
     800             : /*
     801             :  * Retrieve timeline history for given control file which should behold
     802             :  * either source or target.
     803             :  */
     804             : static TimeLineHistoryEntry *
     805          48 : getTimelineHistory(ControlFileData *controlFile, int *nentries)
     806             : {
     807             :     TimeLineHistoryEntry *history;
     808             :     TimeLineID  tli;
     809             : 
     810          48 :     tli = controlFile->checkPointCopy.ThisTimeLineID;
     811             : 
     812             :     /*
     813             :      * Timeline 1 does not have a history file, so there is no need to check
     814             :      * and fake an entry with infinite start and end positions.
     815             :      */
     816          48 :     if (tli == 1)
     817             :     {
     818          22 :         history = (TimeLineHistoryEntry *) pg_malloc(sizeof(TimeLineHistoryEntry));
     819          22 :         history->tli = tli;
     820          22 :         history->begin = history->end = InvalidXLogRecPtr;
     821          22 :         *nentries = 1;
     822             :     }
     823             :     else
     824             :     {
     825             :         char        path[MAXPGPATH];
     826             :         char       *histfile;
     827             : 
     828          26 :         TLHistoryFilePath(path, tli);
     829             : 
     830             :         /* Get history file from appropriate source */
     831          26 :         if (controlFile == &ControlFile_source)
     832          22 :             histfile = source->fetch_file(source, path, NULL);
     833           4 :         else if (controlFile == &ControlFile_target)
     834           4 :             histfile = slurpFile(datadir_target, path, NULL);
     835             :         else
     836           0 :             pg_fatal("invalid control file");
     837             : 
     838          26 :         history = rewind_parseTimeLineHistory(histfile, tli, nentries);
     839          26 :         pg_free(histfile);
     840             :     }
     841             : 
     842          48 :     if (debug)
     843             :     {
     844             :         int         i;
     845             : 
     846          44 :         if (controlFile == &ControlFile_source)
     847          22 :             pg_log_debug("Source timeline history:");
     848          22 :         else if (controlFile == &ControlFile_target)
     849          22 :             pg_log_debug("Target timeline history:");
     850             :         else
     851             :             Assert(false);
     852             : 
     853             :         /*
     854             :          * Print the target timeline history.
     855             :          */
     856          68 :         for (i = 0; i < targetNentries; i++)
     857             :         {
     858             :             TimeLineHistoryEntry *entry;
     859             : 
     860          24 :             entry = &history[i];
     861          24 :             pg_log_debug("%d: %X/%X - %X/%X", entry->tli,
     862             :                          (uint32) (entry->begin >> 32), (uint32) (entry->begin),
     863             :                          (uint32) (entry->end >> 32), (uint32) (entry->end));
     864             :         }
     865             :     }
     866             : 
     867          48 :     return history;
     868             : }
     869             : 
     870             : /*
     871             :  * Determine the TLI of the last common timeline in the timeline history of the
     872             :  * two clusters. targetHistory is filled with target timeline history and
     873             :  * targetNentries is number of items in targetHistory. *tliIndex is set to the
     874             :  * index of last common timeline in targetHistory array, and *recptr is set to
     875             :  * the position where the timeline history diverged (ie. the first WAL record
     876             :  * that's not the same in both clusters).
     877             :  *
     878             :  * Control files of both clusters must be read into ControlFile_target/source
     879             :  * before calling this routine.
     880             :  */
     881             : static void
     882          24 : findCommonAncestorTimeline(XLogRecPtr *recptr, int *tliIndex)
     883             : {
     884             :     TimeLineHistoryEntry *sourceHistory;
     885             :     int         sourceNentries;
     886             :     int         i,
     887             :                 n;
     888             : 
     889             :     /* Retrieve timelines for both source and target */
     890          24 :     sourceHistory = getTimelineHistory(&ControlFile_source, &sourceNentries);
     891          24 :     targetHistory = getTimelineHistory(&ControlFile_target, &targetNentries);
     892             : 
     893             :     /*
     894             :      * Trace the history forward, until we hit the timeline diverge. It may
     895             :      * still be possible that the source and target nodes used the same
     896             :      * timeline number in their history but with different start position
     897             :      * depending on the history files that each node has fetched in previous
     898             :      * recovery processes. Hence check the start position of the new timeline
     899             :      * as well and move down by one extra timeline entry if they do not match.
     900             :      */
     901          24 :     n = Min(sourceNentries, targetNentries);
     902          50 :     for (i = 0; i < n; i++)
     903             :     {
     904          26 :         if (sourceHistory[i].tli != targetHistory[i].tli ||
     905          26 :             sourceHistory[i].begin != targetHistory[i].begin)
     906             :             break;
     907             :     }
     908             : 
     909          24 :     if (i > 0)
     910             :     {
     911          24 :         i--;
     912          24 :         *recptr = MinXLogRecPtr(sourceHistory[i].end, targetHistory[i].end);
     913          24 :         *tliIndex = i;
     914             : 
     915          24 :         pg_free(sourceHistory);
     916          24 :         return;
     917             :     }
     918             :     else
     919             :     {
     920           0 :         pg_fatal("could not find common ancestor of the source and target cluster's timelines");
     921             :     }
     922             : }
     923             : 
     924             : 
     925             : /*
     926             :  * Create a backup_label file that forces recovery to begin at the last common
     927             :  * checkpoint.
     928             :  */
     929             : static void
     930          24 : createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli, XLogRecPtr checkpointloc)
     931             : {
     932             :     XLogSegNo   startsegno;
     933             :     time_t      stamp_time;
     934             :     char        strfbuf[128];
     935             :     char        xlogfilename[MAXFNAMELEN];
     936             :     struct tm  *tmp;
     937             :     char        buf[1000];
     938             :     int         len;
     939             : 
     940          24 :     XLByteToSeg(startpoint, startsegno, WalSegSz);
     941          24 :     XLogFileName(xlogfilename, starttli, startsegno, WalSegSz);
     942             : 
     943             :     /*
     944             :      * Construct backup label file
     945             :      */
     946          24 :     stamp_time = time(NULL);
     947          24 :     tmp = localtime(&stamp_time);
     948          24 :     strftime(strfbuf, sizeof(strfbuf), "%Y-%m-%d %H:%M:%S %Z", tmp);
     949             : 
     950          72 :     len = snprintf(buf, sizeof(buf),
     951             :                    "START WAL LOCATION: %X/%X (file %s)\n"
     952             :                    "CHECKPOINT LOCATION: %X/%X\n"
     953             :                    "BACKUP METHOD: pg_rewind\n"
     954             :                    "BACKUP FROM: standby\n"
     955             :                    "START TIME: %s\n",
     956             :     /* omit LABEL: line */
     957          24 :                    (uint32) (startpoint >> 32), (uint32) startpoint, xlogfilename,
     958          24 :                    (uint32) (checkpointloc >> 32), (uint32) checkpointloc,
     959             :                    strfbuf);
     960          24 :     if (len >= sizeof(buf))
     961           0 :         pg_fatal("backup label buffer too small");    /* shouldn't happen */
     962             : 
     963             :     /* TODO: move old file out of the way, if any. */
     964          24 :     open_target_file("backup_label", true); /* BACKUP_LABEL_FILE */
     965          24 :     write_target_range(buf, 0, len);
     966          24 :     close_target_file();
     967          24 : }
     968             : 
     969             : /*
     970             :  * Check CRC of control file
     971             :  */
     972             : static void
     973         104 : checkControlFile(ControlFileData *ControlFile)
     974             : {
     975             :     pg_crc32c   crc;
     976             : 
     977             :     /* Calculate CRC */
     978         104 :     INIT_CRC32C(crc);
     979         104 :     COMP_CRC32C(crc, (char *) ControlFile, offsetof(ControlFileData, crc));
     980         104 :     FIN_CRC32C(crc);
     981             : 
     982             :     /* And simply compare it */
     983         104 :     if (!EQ_CRC32C(crc, ControlFile->crc))
     984           0 :         pg_fatal("unexpected control file CRC");
     985         104 : }
     986             : 
     987             : /*
     988             :  * Verify control file contents in the buffer 'content', and copy it to
     989             :  * *ControlFile.
     990             :  */
     991             : static void
     992         104 : digestControlFile(ControlFileData *ControlFile, const char *content,
     993             :                   size_t size)
     994             : {
     995         104 :     if (size != PG_CONTROL_FILE_SIZE)
     996           0 :         pg_fatal("unexpected control file size %d, expected %d",
     997             :                  (int) size, PG_CONTROL_FILE_SIZE);
     998             : 
     999         104 :     memcpy(ControlFile, content, sizeof(ControlFileData));
    1000             : 
    1001             :     /* set and validate WalSegSz */
    1002         104 :     WalSegSz = ControlFile->xlog_seg_size;
    1003             : 
    1004         104 :     if (!IsValidWalSegSize(WalSegSz))
    1005           0 :         pg_fatal(ngettext("WAL segment size must be a power of two between 1 MB and 1 GB, but the control file specifies %d byte",
    1006             :                           "WAL segment size must be a power of two between 1 MB and 1 GB, but the control file specifies %d bytes",
    1007             :                           WalSegSz),
    1008             :                  WalSegSz);
    1009             : 
    1010             :     /* Additional checks on control file */
    1011         104 :     checkControlFile(ControlFile);
    1012         104 : }
    1013             : 
    1014             : /*
    1015             :  * Get value of GUC parameter restore_command from the target cluster.
    1016             :  *
    1017             :  * This uses a logic based on "postgres -C" to get the value from the
    1018             :  * cluster.
    1019             :  */
    1020             : static void
    1021          32 : getRestoreCommand(const char *argv0)
    1022             : {
    1023             :     int         rc;
    1024             :     char        postgres_exec_path[MAXPGPATH],
    1025             :                 postgres_cmd[MAXPGPATH],
    1026             :                 cmd_output[MAXPGPATH];
    1027             : 
    1028          32 :     if (!restore_wal)
    1029          30 :         return;
    1030             : 
    1031             :     /* find postgres executable */
    1032           2 :     rc = find_other_exec(argv0, "postgres",
    1033             :                          PG_BACKEND_VERSIONSTR,
    1034             :                          postgres_exec_path);
    1035             : 
    1036           2 :     if (rc < 0)
    1037             :     {
    1038             :         char        full_path[MAXPGPATH];
    1039             : 
    1040           0 :         if (find_my_exec(argv0, full_path) < 0)
    1041           0 :             strlcpy(full_path, progname, sizeof(full_path));
    1042             : 
    1043           0 :         if (rc == -1)
    1044           0 :             pg_log_error("The program \"%s\" is needed by %s but was not found in the\n"
    1045             :                          "same directory as \"%s\".\n"
    1046             :                          "Check your installation.",
    1047             :                          "postgres", progname, full_path);
    1048             :         else
    1049           0 :             pg_log_error("The program \"%s\" was found by \"%s\"\n"
    1050             :                          "but was not the same version as %s.\n"
    1051             :                          "Check your installation.",
    1052             :                          "postgres", full_path, progname);
    1053           0 :         exit(1);
    1054             :     }
    1055             : 
    1056             :     /*
    1057             :      * Build a command able to retrieve the value of GUC parameter
    1058             :      * restore_command, if set.
    1059             :      */
    1060           2 :     snprintf(postgres_cmd, sizeof(postgres_cmd),
    1061             :              "\"%s\" -D \"%s\" -C restore_command",
    1062             :              postgres_exec_path, datadir_target);
    1063             : 
    1064           2 :     if (!pipe_read_line(postgres_cmd, cmd_output, sizeof(cmd_output)))
    1065           0 :         exit(1);
    1066             : 
    1067           2 :     (void) pg_strip_crlf(cmd_output);
    1068             : 
    1069           2 :     if (strcmp(cmd_output, "") == 0)
    1070           0 :         pg_fatal("restore_command is not set in the target cluster");
    1071             : 
    1072           2 :     restore_command = pg_strdup(cmd_output);
    1073             : 
    1074           2 :     pg_log_debug("using for rewind restore_command = \'%s\'",
    1075             :                  restore_command);
    1076             : }
    1077             : 
    1078             : 
    1079             : /*
    1080             :  * Ensure clean shutdown of target instance by launching single-user mode
    1081             :  * postgres to do crash recovery.
    1082             :  */
    1083             : static void
    1084          20 : ensureCleanShutdown(const char *argv0)
    1085             : {
    1086             :     int         ret;
    1087             : #define MAXCMDLEN (2 * MAXPGPATH)
    1088             :     char        exec_path[MAXPGPATH];
    1089             :     char        cmd[MAXCMDLEN];
    1090             : 
    1091             :     /* locate postgres binary */
    1092          20 :     if ((ret = find_other_exec(argv0, "postgres",
    1093             :                                PG_BACKEND_VERSIONSTR,
    1094             :                                exec_path)) < 0)
    1095             :     {
    1096             :         char        full_path[MAXPGPATH];
    1097             : 
    1098           0 :         if (find_my_exec(argv0, full_path) < 0)
    1099           0 :             strlcpy(full_path, progname, sizeof(full_path));
    1100             : 
    1101           0 :         if (ret == -1)
    1102           0 :             pg_fatal("The program \"%s\" is needed by %s but was not found in the\n"
    1103             :                      "same directory as \"%s\".\n"
    1104             :                      "Check your installation.",
    1105             :                      "postgres", progname, full_path);
    1106             :         else
    1107           0 :             pg_fatal("The program \"%s\" was found by \"%s\"\n"
    1108             :                      "but was not the same version as %s.\n"
    1109             :                      "Check your installation.",
    1110             :                      "postgres", full_path, progname);
    1111             :     }
    1112             : 
    1113          20 :     pg_log_info("executing \"%s\" for target server to complete crash recovery",
    1114             :                 exec_path);
    1115             : 
    1116             :     /*
    1117             :      * Skip processing if requested, but only after ensuring presence of
    1118             :      * postgres.
    1119             :      */
    1120          20 :     if (dry_run)
    1121           0 :         return;
    1122             : 
    1123             :     /*
    1124             :      * Finally run postgres in single-user mode.  There is no need to use
    1125             :      * fsync here.  This makes the recovery faster, and the target data folder
    1126             :      * is synced at the end anyway.
    1127             :      */
    1128          20 :     snprintf(cmd, MAXCMDLEN, "\"%s\" --single -F -D \"%s\" template1 < \"%s\"",
    1129             :              exec_path, datadir_target, DEVNULL);
    1130             : 
    1131          20 :     if (system(cmd) != 0)
    1132             :     {
    1133           2 :         pg_log_error("postgres single-user mode in target cluster failed");
    1134           2 :         pg_fatal("Command was: %s", cmd);
    1135             :     }
    1136             : }
    1137             : 
    1138             : static void
    1139          32 : disconnect_atexit(void)
    1140             : {
    1141          32 :     if (conn != NULL)
    1142           0 :         PQfinish(conn);
    1143          32 : }

Generated by: LCOV version 1.13