Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pg_rewind.c
4 : * Synchronizes a PostgreSQL data directory to a new timeline
5 : *
6 : * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
7 : *
8 : *-------------------------------------------------------------------------
9 : */
10 : #include "postgres_fe.h"
11 :
12 : #include <sys/stat.h>
13 : #include <fcntl.h>
14 : #include <time.h>
15 : #include <unistd.h>
16 :
17 : #include "access/timeline.h"
18 : #include "access/xlog_internal.h"
19 : #include "catalog/catversion.h"
20 : #include "catalog/pg_control.h"
21 : #include "common/controldata_utils.h"
22 : #include "common/file_perm.h"
23 : #include "common/restricted_token.h"
24 : #include "common/string.h"
25 : #include "fe_utils/recovery_gen.h"
26 : #include "fe_utils/string_utils.h"
27 : #include "file_ops.h"
28 : #include "filemap.h"
29 : #include "getopt_long.h"
30 : #include "pg_rewind.h"
31 : #include "rewind_source.h"
32 : #include "storage/bufpage.h"
33 :
34 : static void usage(const char *progname);
35 :
36 : static void perform_rewind(filemap_t *filemap, rewind_source *source,
37 : XLogRecPtr chkptrec,
38 : TimeLineID chkpttli,
39 : XLogRecPtr chkptredo);
40 :
41 : static void createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli,
42 : XLogRecPtr checkpointloc);
43 :
44 : static void digestControlFile(ControlFileData *ControlFile,
45 : const char *content, size_t size);
46 : static void getRestoreCommand(const char *argv0);
47 : static void sanityChecks(void);
48 : static void findCommonAncestorTimeline(XLogRecPtr *recptr, int *tliIndex);
49 : static void ensureCleanShutdown(const char *argv0);
50 : static void disconnect_atexit(void);
51 :
52 : static ControlFileData ControlFile_target;
53 : static ControlFileData ControlFile_source;
54 : static ControlFileData ControlFile_source_after;
55 :
56 : const char *progname;
57 : int WalSegSz;
58 :
59 : /* Configuration options */
60 : char *datadir_target = NULL;
61 : char *datadir_source = NULL;
62 : char *connstr_source = NULL;
63 : char *restore_command = NULL;
64 : char *config_file = NULL;
65 :
66 : static bool debug = false;
67 : bool showprogress = false;
68 : bool dry_run = false;
69 : bool do_sync = true;
70 : bool restore_wal = false;
71 :
72 : /* Target history */
73 : TimeLineHistoryEntry *targetHistory;
74 : int targetNentries;
75 :
76 : /* Progress counters */
77 : uint64 fetch_size;
78 : uint64 fetch_done;
79 :
80 : static PGconn *conn;
81 : static rewind_source *source;
82 :
83 : static void
84 2 : usage(const char *progname)
85 : {
86 2 : printf(_("%s resynchronizes a PostgreSQL cluster with another copy of the cluster.\n\n"), progname);
87 2 : printf(_("Usage:\n %s [OPTION]...\n\n"), progname);
88 2 : printf(_("Options:\n"));
89 2 : printf(_(" -c, --restore-target-wal use restore_command in target configuration to\n"
90 : " retrieve WAL files from archives\n"));
91 2 : printf(_(" -D, --target-pgdata=DIRECTORY existing data directory to modify\n"));
92 2 : printf(_(" --source-pgdata=DIRECTORY source data directory to synchronize with\n"));
93 2 : printf(_(" --source-server=CONNSTR source server to synchronize with\n"));
94 2 : printf(_(" -n, --dry-run stop before modifying anything\n"));
95 2 : printf(_(" -N, --no-sync do not wait for changes to be written\n"
96 : " safely to disk\n"));
97 2 : printf(_(" -P, --progress write progress messages\n"));
98 2 : printf(_(" -R, --write-recovery-conf write configuration for replication\n"
99 : " (requires --source-server)\n"));
100 2 : printf(_(" --config-file=FILENAME use specified main server configuration\n"
101 : " file when running target cluster\n"));
102 2 : printf(_(" --debug write a lot of debug messages\n"));
103 2 : printf(_(" --no-ensure-shutdown do not automatically fix unclean shutdown\n"));
104 2 : printf(_(" -V, --version output version information, then exit\n"));
105 2 : printf(_(" -?, --help show this help, then exit\n"));
106 2 : printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
107 2 : printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
108 2 : }
109 :
110 :
111 : int
112 48 : main(int argc, char **argv)
113 : {
114 : static struct option long_options[] = {
115 : {"help", no_argument, NULL, '?'},
116 : {"target-pgdata", required_argument, NULL, 'D'},
117 : {"write-recovery-conf", no_argument, NULL, 'R'},
118 : {"source-pgdata", required_argument, NULL, 1},
119 : {"source-server", required_argument, NULL, 2},
120 : {"no-ensure-shutdown", no_argument, NULL, 4},
121 : {"config-file", required_argument, NULL, 5},
122 : {"version", no_argument, NULL, 'V'},
123 : {"restore-target-wal", no_argument, NULL, 'c'},
124 : {"dry-run", no_argument, NULL, 'n'},
125 : {"no-sync", no_argument, NULL, 'N'},
126 : {"progress", no_argument, NULL, 'P'},
127 : {"debug", no_argument, NULL, 3},
128 : {NULL, 0, NULL, 0}
129 : };
130 : int option_index;
131 : int c;
132 : XLogRecPtr divergerec;
133 : int lastcommontliIndex;
134 : XLogRecPtr chkptrec;
135 : TimeLineID chkpttli;
136 : XLogRecPtr chkptredo;
137 : XLogRecPtr target_wal_endrec;
138 : size_t size;
139 : char *buffer;
140 48 : bool no_ensure_shutdown = false;
141 : bool rewind_needed;
142 48 : bool writerecoveryconf = false;
143 : filemap_t *filemap;
144 :
145 48 : pg_logging_init(argv[0]);
146 48 : set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_rewind"));
147 48 : progname = get_progname(argv[0]);
148 :
149 : /* Process command-line arguments */
150 48 : if (argc > 1)
151 : {
152 48 : if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
153 : {
154 2 : usage(progname);
155 2 : exit(0);
156 : }
157 46 : if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
158 : {
159 2 : puts("pg_rewind (PostgreSQL) " PG_VERSION);
160 2 : exit(0);
161 : }
162 : }
163 :
164 242 : while ((c = getopt_long(argc, argv, "cD:nNPR", long_options, &option_index)) != -1)
165 : {
166 200 : switch (c)
167 : {
168 2 : case 'c':
169 2 : restore_wal = true;
170 2 : break;
171 :
172 0 : case 'P':
173 0 : showprogress = true;
174 0 : break;
175 :
176 2 : case 'n':
177 2 : dry_run = true;
178 2 : break;
179 :
180 32 : case 'N':
181 32 : do_sync = false;
182 32 : break;
183 :
184 12 : case 'R':
185 12 : writerecoveryconf = true;
186 12 : break;
187 :
188 40 : case 3:
189 40 : debug = true;
190 40 : pg_logging_increase_verbosity();
191 40 : break;
192 :
193 42 : case 'D': /* -D or --target-pgdata */
194 42 : datadir_target = pg_strdup(optarg);
195 42 : break;
196 :
197 28 : case 1: /* --source-pgdata */
198 28 : datadir_source = pg_strdup(optarg);
199 28 : break;
200 :
201 14 : case 2: /* --source-server */
202 14 : connstr_source = pg_strdup(optarg);
203 14 : break;
204 :
205 6 : case 4:
206 6 : no_ensure_shutdown = true;
207 6 : break;
208 :
209 20 : case 5:
210 20 : config_file = pg_strdup(optarg);
211 20 : break;
212 :
213 2 : default:
214 : /* getopt_long already emitted a complaint */
215 2 : pg_log_error_hint("Try \"%s --help\" for more information.", progname);
216 2 : exit(1);
217 : }
218 : }
219 :
220 42 : if (datadir_source == NULL && connstr_source == NULL)
221 : {
222 2 : pg_log_error("no source specified (--source-pgdata or --source-server)");
223 2 : pg_log_error_hint("Try \"%s --help\" for more information.", progname);
224 2 : exit(1);
225 : }
226 :
227 40 : if (datadir_source != NULL && connstr_source != NULL)
228 : {
229 2 : pg_log_error("only one of --source-pgdata or --source-server can be specified");
230 2 : pg_log_error_hint("Try \"%s --help\" for more information.", progname);
231 2 : exit(1);
232 : }
233 :
234 38 : if (datadir_target == NULL)
235 : {
236 0 : pg_log_error("no target data directory specified (--target-pgdata)");
237 0 : pg_log_error_hint("Try \"%s --help\" for more information.", progname);
238 0 : exit(1);
239 : }
240 :
241 38 : if (writerecoveryconf && connstr_source == NULL)
242 : {
243 2 : pg_log_error("no source server information (--source-server) specified for --write-recovery-conf");
244 2 : pg_log_error_hint("Try \"%s --help\" for more information.", progname);
245 2 : exit(1);
246 : }
247 :
248 36 : if (optind < argc)
249 : {
250 2 : pg_log_error("too many command-line arguments (first is \"%s\")",
251 : argv[optind]);
252 2 : pg_log_error_hint("Try \"%s --help\" for more information.", progname);
253 2 : exit(1);
254 : }
255 :
256 : /*
257 : * Don't allow pg_rewind to be run as root, to avoid overwriting the
258 : * ownership of files in the data directory. We need only check for root
259 : * -- any other user won't have sufficient permissions to modify files in
260 : * the data directory.
261 : */
262 : #ifndef WIN32
263 34 : if (geteuid() == 0)
264 : {
265 0 : pg_log_error("cannot be executed by \"root\"");
266 0 : pg_log_error_hint("You must run %s as the PostgreSQL superuser.",
267 : progname);
268 0 : exit(1);
269 : }
270 : #endif
271 :
272 34 : get_restricted_token();
273 :
274 : /* Set mask based on PGDATA permissions */
275 34 : if (!GetDataDirectoryCreatePerm(datadir_target))
276 0 : pg_fatal("could not read permissions of directory \"%s\": %m",
277 : datadir_target);
278 :
279 34 : umask(pg_mode_mask);
280 :
281 34 : getRestoreCommand(argv[0]);
282 :
283 34 : atexit(disconnect_atexit);
284 :
285 : /*
286 : * Ok, we have all the options and we're ready to start. First, connect to
287 : * remote server.
288 : */
289 34 : if (connstr_source)
290 : {
291 12 : conn = PQconnectdb(connstr_source);
292 :
293 12 : if (PQstatus(conn) == CONNECTION_BAD)
294 0 : pg_fatal("%s", PQerrorMessage(conn));
295 :
296 12 : if (showprogress)
297 0 : pg_log_info("connected to server");
298 :
299 12 : source = init_libpq_source(conn);
300 : }
301 : else
302 22 : source = init_local_source(datadir_source);
303 :
304 : /*
305 : * Check the status of the target instance.
306 : *
307 : * If the target instance was not cleanly shut down, start and stop the
308 : * target cluster once in single-user mode to enforce recovery to finish,
309 : * ensuring that the cluster can be used by pg_rewind. Note that if
310 : * no_ensure_shutdown is specified, pg_rewind ignores this step, and users
311 : * need to make sure by themselves that the target cluster is in a clean
312 : * state.
313 : */
314 34 : buffer = slurpFile(datadir_target, "global/pg_control", &size);
315 34 : digestControlFile(&ControlFile_target, buffer, size);
316 34 : pg_free(buffer);
317 :
318 34 : if (!no_ensure_shutdown &&
319 28 : ControlFile_target.state != DB_SHUTDOWNED &&
320 22 : ControlFile_target.state != DB_SHUTDOWNED_IN_RECOVERY)
321 : {
322 20 : ensureCleanShutdown(argv[0]);
323 :
324 18 : buffer = slurpFile(datadir_target, "global/pg_control", &size);
325 18 : digestControlFile(&ControlFile_target, buffer, size);
326 18 : pg_free(buffer);
327 : }
328 :
329 32 : buffer = source->fetch_file(source, "global/pg_control", &size);
330 32 : digestControlFile(&ControlFile_source, buffer, size);
331 32 : pg_free(buffer);
332 :
333 32 : sanityChecks();
334 :
335 : /*
336 : * Find the common ancestor timeline between the clusters.
337 : *
338 : * If both clusters are already on the same timeline, there's nothing to
339 : * do.
340 : */
341 28 : if (ControlFile_target.checkPointCopy.ThisTimeLineID ==
342 28 : ControlFile_source.checkPointCopy.ThisTimeLineID)
343 : {
344 2 : pg_log_info("source and target cluster are on the same timeline");
345 2 : rewind_needed = false;
346 2 : target_wal_endrec = 0;
347 : }
348 : else
349 : {
350 : XLogRecPtr chkptendrec;
351 :
352 26 : findCommonAncestorTimeline(&divergerec, &lastcommontliIndex);
353 26 : pg_log_info("servers diverged at WAL location %X/%X on timeline %u",
354 : LSN_FORMAT_ARGS(divergerec),
355 : targetHistory[lastcommontliIndex].tli);
356 :
357 : /*
358 : * Determine the end-of-WAL on the target.
359 : *
360 : * The WAL ends at the last shutdown checkpoint, or at
361 : * minRecoveryPoint if it was a standby. (If we supported rewinding a
362 : * server that was not shut down cleanly, we would need to replay
363 : * until we reach the first invalid record, like crash recovery does.)
364 : */
365 :
366 : /* read the checkpoint record on the target to see where it ends. */
367 26 : chkptendrec = readOneRecord(datadir_target,
368 : ControlFile_target.checkPoint,
369 : targetNentries - 1,
370 : restore_command);
371 :
372 26 : if (ControlFile_target.minRecoveryPoint > chkptendrec)
373 : {
374 2 : target_wal_endrec = ControlFile_target.minRecoveryPoint;
375 : }
376 : else
377 : {
378 24 : target_wal_endrec = chkptendrec;
379 : }
380 :
381 : /*
382 : * Check for the possibility that the target is in fact a direct
383 : * ancestor of the source. In that case, there is no divergent history
384 : * in the target that needs rewinding.
385 : */
386 26 : if (target_wal_endrec > divergerec)
387 : {
388 26 : rewind_needed = true;
389 : }
390 : else
391 : {
392 : /* the last common checkpoint record must be part of target WAL */
393 : Assert(target_wal_endrec == divergerec);
394 :
395 0 : rewind_needed = false;
396 : }
397 : }
398 :
399 28 : if (!rewind_needed)
400 : {
401 2 : pg_log_info("no rewind required");
402 2 : if (writerecoveryconf && !dry_run)
403 0 : WriteRecoveryConfig(conn, datadir_target,
404 : GenerateRecoveryConfig(conn, NULL));
405 2 : exit(0);
406 : }
407 :
408 26 : findLastCheckpoint(datadir_target, divergerec, lastcommontliIndex,
409 : &chkptrec, &chkpttli, &chkptredo, restore_command);
410 26 : pg_log_info("rewinding from last common checkpoint at %X/%X on timeline %u",
411 : LSN_FORMAT_ARGS(chkptrec), chkpttli);
412 :
413 : /* Initialize the hash table to track the status of each file */
414 26 : filehash_init();
415 :
416 : /*
417 : * Collect information about all files in the both data directories.
418 : */
419 26 : if (showprogress)
420 0 : pg_log_info("reading source file list");
421 26 : source->traverse_files(source, &process_source_file);
422 :
423 26 : if (showprogress)
424 0 : pg_log_info("reading target file list");
425 26 : traverse_datadir(datadir_target, &process_target_file);
426 :
427 : /*
428 : * Read the target WAL from last checkpoint before the point of fork, to
429 : * extract all the pages that were modified on the target cluster after
430 : * the fork.
431 : */
432 26 : if (showprogress)
433 0 : pg_log_info("reading WAL in target");
434 26 : extractPageMap(datadir_target, chkptrec, lastcommontliIndex,
435 : target_wal_endrec, restore_command);
436 :
437 : /*
438 : * We have collected all information we need from both systems. Decide
439 : * what to do with each file.
440 : */
441 26 : filemap = decide_file_actions();
442 26 : if (showprogress)
443 0 : calculate_totals(filemap);
444 :
445 : /* this is too verbose even for verbose mode */
446 26 : if (debug)
447 26 : print_filemap(filemap);
448 :
449 : /*
450 : * Ok, we're ready to start copying things over.
451 : */
452 26 : if (showprogress)
453 : {
454 0 : pg_log_info("need to copy %lu MB (total source directory size is %lu MB)",
455 : (unsigned long) (filemap->fetch_size / (1024 * 1024)),
456 : (unsigned long) (filemap->total_size / (1024 * 1024)));
457 :
458 0 : fetch_size = filemap->fetch_size;
459 0 : fetch_done = 0;
460 : }
461 :
462 : /*
463 : * We have now collected all the information we need from both systems,
464 : * and we are ready to start modifying the target directory.
465 : *
466 : * This is the point of no return. Once we start copying things, there is
467 : * no turning back!
468 : */
469 26 : perform_rewind(filemap, source, chkptrec, chkpttli, chkptredo);
470 :
471 24 : if (showprogress)
472 0 : pg_log_info("syncing target data directory");
473 24 : sync_target_dir();
474 :
475 : /* Also update the standby configuration, if requested. */
476 24 : if (writerecoveryconf && !dry_run)
477 10 : WriteRecoveryConfig(conn, datadir_target,
478 : GenerateRecoveryConfig(conn, NULL));
479 :
480 : /* don't need the source connection anymore */
481 24 : source->destroy(source);
482 24 : if (conn)
483 : {
484 12 : PQfinish(conn);
485 12 : conn = NULL;
486 : }
487 :
488 24 : pg_log_info("Done!");
489 :
490 24 : return 0;
491 : }
492 :
493 : /*
494 : * Perform the rewind.
495 : *
496 : * We have already collected all the information we need from the
497 : * target and the source.
498 : */
499 : static void
500 26 : perform_rewind(filemap_t *filemap, rewind_source *source,
501 : XLogRecPtr chkptrec,
502 : TimeLineID chkpttli,
503 : XLogRecPtr chkptredo)
504 : {
505 : XLogRecPtr endrec;
506 : TimeLineID endtli;
507 : ControlFileData ControlFile_new;
508 : size_t size;
509 : char *buffer;
510 :
511 : /*
512 : * Execute the actions in the file map, fetching data from the source
513 : * system as needed.
514 : */
515 29608 : for (int i = 0; i < filemap->nentries; i++)
516 : {
517 29584 : file_entry_t *entry = filemap->entries[i];
518 :
519 : /*
520 : * If this is a relation file, copy the modified blocks.
521 : *
522 : * This is in addition to any other changes.
523 : */
524 29584 : if (entry->target_pages_to_overwrite.bitmapsize > 0)
525 : {
526 : datapagemap_iterator_t *iter;
527 : BlockNumber blkno;
528 : off_t offset;
529 :
530 774 : iter = datapagemap_iterate(&entry->target_pages_to_overwrite);
531 3828 : while (datapagemap_next(iter, &blkno))
532 : {
533 3054 : offset = blkno * BLCKSZ;
534 3054 : source->queue_fetch_range(source, entry->path, offset, BLCKSZ);
535 : }
536 774 : pg_free(iter);
537 : }
538 :
539 29584 : switch (entry->action)
540 : {
541 19800 : case FILE_ACTION_NONE:
542 : /* nothing else to do */
543 19800 : break;
544 :
545 8348 : case FILE_ACTION_COPY:
546 8348 : source->queue_fetch_file(source, entry->path, entry->source_size);
547 8346 : break;
548 :
549 8 : case FILE_ACTION_TRUNCATE:
550 8 : truncate_target_file(entry->path, entry->source_size);
551 8 : break;
552 :
553 10 : case FILE_ACTION_COPY_TAIL:
554 10 : source->queue_fetch_range(source, entry->path,
555 10 : entry->target_size,
556 10 : entry->source_size - entry->target_size);
557 10 : break;
558 :
559 1402 : case FILE_ACTION_REMOVE:
560 1402 : remove_target(entry);
561 1402 : break;
562 :
563 16 : case FILE_ACTION_CREATE:
564 16 : create_target(entry);
565 16 : break;
566 :
567 0 : case FILE_ACTION_UNDECIDED:
568 0 : pg_fatal("no action decided for file \"%s\"", entry->path);
569 : break;
570 : }
571 29582 : }
572 :
573 : /* Complete any remaining range-fetches that we queued up above. */
574 24 : source->finish_fetch(source);
575 :
576 24 : close_target_file();
577 :
578 24 : progress_report(true);
579 :
580 : /*
581 : * Fetch the control file from the source last. This ensures that the
582 : * minRecoveryPoint is up-to-date.
583 : */
584 24 : buffer = source->fetch_file(source, "global/pg_control", &size);
585 24 : digestControlFile(&ControlFile_source_after, buffer, size);
586 24 : pg_free(buffer);
587 :
588 : /*
589 : * Sanity check: If the source is a local system, the control file should
590 : * not have changed since we started.
591 : *
592 : * XXX: We assume it hasn't been modified, but actually, what could go
593 : * wrong? The logic handles a libpq source that's modified concurrently,
594 : * why not a local datadir?
595 : */
596 24 : if (datadir_source &&
597 12 : memcmp(&ControlFile_source, &ControlFile_source_after,
598 : sizeof(ControlFileData)) != 0)
599 : {
600 0 : pg_fatal("source system was modified while pg_rewind was running");
601 : }
602 :
603 24 : if (showprogress)
604 0 : pg_log_info("creating backup label and updating control file");
605 :
606 : /*
607 : * Create a backup label file, to tell the target where to begin the WAL
608 : * replay. Normally, from the last common checkpoint between the source
609 : * and the target. But if the source is a standby server, it's possible
610 : * that the last common checkpoint is *after* the standby's restartpoint.
611 : * That implies that the source server has applied the checkpoint record,
612 : * but hasn't performed a corresponding restartpoint yet. Make sure we
613 : * start at the restartpoint's redo point in that case.
614 : *
615 : * Use the old version of the source's control file for this. The server
616 : * might have finished the restartpoint after we started copying files,
617 : * but we must begin from the redo point at the time that started copying.
618 : */
619 24 : if (ControlFile_source.checkPointCopy.redo < chkptredo)
620 : {
621 2 : chkptredo = ControlFile_source.checkPointCopy.redo;
622 2 : chkpttli = ControlFile_source.checkPointCopy.ThisTimeLineID;
623 2 : chkptrec = ControlFile_source.checkPoint;
624 : }
625 24 : createBackupLabel(chkptredo, chkpttli, chkptrec);
626 :
627 : /*
628 : * Update control file of target, to tell the target how far it must
629 : * replay the WAL (minRecoveryPoint).
630 : */
631 24 : if (connstr_source)
632 : {
633 : /*
634 : * The source is a live server. Like in an online backup, it's
635 : * important that we recover all the WAL that was generated while we
636 : * were copying files.
637 : */
638 12 : if (ControlFile_source_after.state == DB_IN_ARCHIVE_RECOVERY)
639 : {
640 : /*
641 : * Source is a standby server. We must replay to its
642 : * minRecoveryPoint.
643 : */
644 2 : endrec = ControlFile_source_after.minRecoveryPoint;
645 2 : endtli = ControlFile_source_after.minRecoveryPointTLI;
646 : }
647 : else
648 : {
649 : /*
650 : * Source is a production, non-standby, server. We must replay to
651 : * the last WAL insert location.
652 : */
653 10 : if (ControlFile_source_after.state != DB_IN_PRODUCTION)
654 0 : pg_fatal("source system was in unexpected state at end of rewind");
655 :
656 10 : endrec = source->get_current_wal_insert_lsn(source);
657 10 : endtli = ControlFile_source_after.checkPointCopy.ThisTimeLineID;
658 : }
659 : }
660 : else
661 : {
662 : /*
663 : * Source is a local data directory. It should've shut down cleanly,
664 : * and we must replay to the latest shutdown checkpoint.
665 : */
666 12 : endrec = ControlFile_source_after.checkPoint;
667 12 : endtli = ControlFile_source_after.checkPointCopy.ThisTimeLineID;
668 : }
669 :
670 24 : memcpy(&ControlFile_new, &ControlFile_source_after, sizeof(ControlFileData));
671 24 : ControlFile_new.minRecoveryPoint = endrec;
672 24 : ControlFile_new.minRecoveryPointTLI = endtli;
673 24 : ControlFile_new.state = DB_IN_ARCHIVE_RECOVERY;
674 24 : if (!dry_run)
675 22 : update_controlfile(datadir_target, &ControlFile_new, do_sync);
676 24 : }
677 :
678 : static void
679 32 : sanityChecks(void)
680 : {
681 : /* TODO Check that there's no backup_label in either cluster */
682 :
683 : /* Check system_identifier match */
684 32 : if (ControlFile_target.system_identifier != ControlFile_source.system_identifier)
685 0 : pg_fatal("source and target clusters are from different systems");
686 :
687 : /* check version */
688 32 : if (ControlFile_target.pg_control_version != PG_CONTROL_VERSION ||
689 32 : ControlFile_source.pg_control_version != PG_CONTROL_VERSION ||
690 32 : ControlFile_target.catalog_version_no != CATALOG_VERSION_NO ||
691 32 : ControlFile_source.catalog_version_no != CATALOG_VERSION_NO)
692 : {
693 0 : pg_fatal("clusters are not compatible with this version of pg_rewind");
694 : }
695 :
696 : /*
697 : * Target cluster need to use checksums or hint bit wal-logging, this to
698 : * prevent from data corruption that could occur because of hint bits.
699 : */
700 32 : if (ControlFile_target.data_checksum_version != PG_DATA_CHECKSUM_VERSION &&
701 32 : !ControlFile_target.wal_log_hints)
702 : {
703 0 : pg_fatal("target server needs to use either data checksums or \"wal_log_hints = on\"");
704 : }
705 :
706 : /*
707 : * Target cluster better not be running. This doesn't guard against
708 : * someone starting the cluster concurrently. Also, this is probably more
709 : * strict than necessary; it's OK if the target node was not shut down
710 : * cleanly, as long as it isn't running at the moment.
711 : */
712 32 : if (ControlFile_target.state != DB_SHUTDOWNED &&
713 4 : ControlFile_target.state != DB_SHUTDOWNED_IN_RECOVERY)
714 2 : pg_fatal("target server must be shut down cleanly");
715 :
716 : /*
717 : * When the source is a data directory, also require that the source
718 : * server is shut down. There isn't any very strong reason for this
719 : * limitation, but better safe than sorry.
720 : */
721 30 : if (datadir_source &&
722 18 : ControlFile_source.state != DB_SHUTDOWNED &&
723 4 : ControlFile_source.state != DB_SHUTDOWNED_IN_RECOVERY)
724 2 : pg_fatal("source data directory must be shut down cleanly");
725 28 : }
726 :
727 : /*
728 : * Print a progress report based on the fetch_size and fetch_done variables.
729 : *
730 : * Progress report is written at maximum once per second, except that the
731 : * last progress report is always printed.
732 : *
733 : * If finished is set to true, this is the last progress report. The cursor
734 : * is moved to the next line.
735 : */
736 : void
737 97148 : progress_report(bool finished)
738 : {
739 : static pg_time_t last_progress_report = 0;
740 : int percent;
741 : char fetch_done_str[32];
742 : char fetch_size_str[32];
743 : pg_time_t now;
744 :
745 97148 : if (!showprogress)
746 97148 : return;
747 :
748 0 : now = time(NULL);
749 0 : if (now == last_progress_report && !finished)
750 0 : return; /* Max once per second */
751 :
752 0 : last_progress_report = now;
753 0 : percent = fetch_size ? (int) ((fetch_done) * 100 / fetch_size) : 0;
754 :
755 : /*
756 : * Avoid overflowing past 100% or the full size. This may make the total
757 : * size number change as we approach the end of the backup (the estimate
758 : * will always be wrong if WAL is included), but that's better than having
759 : * the done column be bigger than the total.
760 : */
761 0 : if (percent > 100)
762 0 : percent = 100;
763 0 : if (fetch_done > fetch_size)
764 0 : fetch_size = fetch_done;
765 :
766 0 : snprintf(fetch_done_str, sizeof(fetch_done_str), UINT64_FORMAT,
767 : fetch_done / 1024);
768 0 : snprintf(fetch_size_str, sizeof(fetch_size_str), UINT64_FORMAT,
769 : fetch_size / 1024);
770 :
771 0 : fprintf(stderr, _("%*s/%s kB (%d%%) copied"),
772 0 : (int) strlen(fetch_size_str), fetch_done_str, fetch_size_str,
773 : percent);
774 :
775 : /*
776 : * Stay on the same line if reporting to a terminal and we're not done
777 : * yet.
778 : */
779 0 : fputc((!finished && isatty(fileno(stderr))) ? '\r' : '\n', stderr);
780 : }
781 :
782 : /*
783 : * Find minimum from two WAL locations assuming InvalidXLogRecPtr means
784 : * infinity as src/include/access/timeline.h states. This routine should
785 : * be used only when comparing WAL locations related to history files.
786 : */
787 : static XLogRecPtr
788 26 : MinXLogRecPtr(XLogRecPtr a, XLogRecPtr b)
789 : {
790 26 : if (XLogRecPtrIsInvalid(a))
791 2 : return b;
792 24 : else if (XLogRecPtrIsInvalid(b))
793 24 : return a;
794 : else
795 0 : return Min(a, b);
796 : }
797 :
798 : /*
799 : * Retrieve timeline history for given control file which should behold
800 : * either source or target.
801 : */
802 : static TimeLineHistoryEntry *
803 52 : getTimelineHistory(ControlFileData *controlFile, int *nentries)
804 : {
805 : TimeLineHistoryEntry *history;
806 : TimeLineID tli;
807 :
808 52 : tli = controlFile->checkPointCopy.ThisTimeLineID;
809 :
810 : /*
811 : * Timeline 1 does not have a history file, so there is no need to check
812 : * and fake an entry with infinite start and end positions.
813 : */
814 52 : if (tli == 1)
815 : {
816 24 : history = (TimeLineHistoryEntry *) pg_malloc(sizeof(TimeLineHistoryEntry));
817 24 : history->tli = tli;
818 24 : history->begin = history->end = InvalidXLogRecPtr;
819 24 : *nentries = 1;
820 : }
821 : else
822 : {
823 : char path[MAXPGPATH];
824 : char *histfile;
825 :
826 28 : TLHistoryFilePath(path, tli);
827 :
828 : /* Get history file from appropriate source */
829 28 : if (controlFile == &ControlFile_source)
830 24 : histfile = source->fetch_file(source, path, NULL);
831 4 : else if (controlFile == &ControlFile_target)
832 4 : histfile = slurpFile(datadir_target, path, NULL);
833 : else
834 0 : pg_fatal("invalid control file");
835 :
836 28 : history = rewind_parseTimeLineHistory(histfile, tli, nentries);
837 28 : pg_free(histfile);
838 : }
839 :
840 52 : if (debug)
841 : {
842 : int i;
843 :
844 52 : if (controlFile == &ControlFile_source)
845 26 : pg_log_debug("Source timeline history:");
846 26 : else if (controlFile == &ControlFile_target)
847 26 : pg_log_debug("Target timeline history:");
848 : else
849 : Assert(false);
850 :
851 : /*
852 : * Print the target timeline history.
853 : */
854 82 : for (i = 0; i < targetNentries; i++)
855 : {
856 : TimeLineHistoryEntry *entry;
857 :
858 30 : entry = &history[i];
859 30 : pg_log_debug("%u: %X/%X - %X/%X", entry->tli,
860 : LSN_FORMAT_ARGS(entry->begin),
861 : LSN_FORMAT_ARGS(entry->end));
862 : }
863 : }
864 :
865 52 : return history;
866 : }
867 :
868 : /*
869 : * Determine the TLI of the last common timeline in the timeline history of the
870 : * two clusters. targetHistory is filled with target timeline history and
871 : * targetNentries is number of items in targetHistory. *tliIndex is set to the
872 : * index of last common timeline in targetHistory array, and *recptr is set to
873 : * the position where the timeline history diverged (ie. the first WAL record
874 : * that's not the same in both clusters).
875 : *
876 : * Control files of both clusters must be read into ControlFile_target/source
877 : * before calling this routine.
878 : */
879 : static void
880 26 : findCommonAncestorTimeline(XLogRecPtr *recptr, int *tliIndex)
881 : {
882 : TimeLineHistoryEntry *sourceHistory;
883 : int sourceNentries;
884 : int i,
885 : n;
886 :
887 : /* Retrieve timelines for both source and target */
888 26 : sourceHistory = getTimelineHistory(&ControlFile_source, &sourceNentries);
889 26 : targetHistory = getTimelineHistory(&ControlFile_target, &targetNentries);
890 :
891 : /*
892 : * Trace the history forward, until we hit the timeline diverge. It may
893 : * still be possible that the source and target nodes used the same
894 : * timeline number in their history but with different start position
895 : * depending on the history files that each node has fetched in previous
896 : * recovery processes. Hence check the start position of the new timeline
897 : * as well and move down by one extra timeline entry if they do not match.
898 : */
899 26 : n = Min(sourceNentries, targetNentries);
900 54 : for (i = 0; i < n; i++)
901 : {
902 28 : if (sourceHistory[i].tli != targetHistory[i].tli ||
903 28 : sourceHistory[i].begin != targetHistory[i].begin)
904 : break;
905 : }
906 :
907 26 : if (i > 0)
908 : {
909 26 : i--;
910 26 : *recptr = MinXLogRecPtr(sourceHistory[i].end, targetHistory[i].end);
911 26 : *tliIndex = i;
912 :
913 26 : pg_free(sourceHistory);
914 26 : return;
915 : }
916 : else
917 : {
918 0 : pg_fatal("could not find common ancestor of the source and target cluster's timelines");
919 : }
920 : }
921 :
922 :
923 : /*
924 : * Create a backup_label file that forces recovery to begin at the last common
925 : * checkpoint.
926 : */
927 : static void
928 24 : createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli, XLogRecPtr checkpointloc)
929 : {
930 : XLogSegNo startsegno;
931 : time_t stamp_time;
932 : char strfbuf[128];
933 : char xlogfilename[MAXFNAMELEN];
934 : struct tm *tmp;
935 : char buf[1000];
936 : int len;
937 :
938 24 : XLByteToSeg(startpoint, startsegno, WalSegSz);
939 24 : XLogFileName(xlogfilename, starttli, startsegno, WalSegSz);
940 :
941 : /*
942 : * Construct backup label file
943 : */
944 24 : stamp_time = time(NULL);
945 24 : tmp = localtime(&stamp_time);
946 24 : strftime(strfbuf, sizeof(strfbuf), "%Y-%m-%d %H:%M:%S %Z", tmp);
947 :
948 24 : len = snprintf(buf, sizeof(buf),
949 : "START WAL LOCATION: %X/%X (file %s)\n"
950 : "CHECKPOINT LOCATION: %X/%X\n"
951 : "BACKUP METHOD: pg_rewind\n"
952 : "BACKUP FROM: standby\n"
953 : "START TIME: %s\n",
954 : /* omit LABEL: line */
955 24 : LSN_FORMAT_ARGS(startpoint), xlogfilename,
956 24 : LSN_FORMAT_ARGS(checkpointloc),
957 : strfbuf);
958 24 : if (len >= sizeof(buf))
959 0 : pg_fatal("backup label buffer too small"); /* shouldn't happen */
960 :
961 : /* TODO: move old file out of the way, if any. */
962 24 : open_target_file("backup_label", true); /* BACKUP_LABEL_FILE */
963 24 : write_target_range(buf, 0, len);
964 24 : close_target_file();
965 24 : }
966 :
967 : /*
968 : * Check CRC of control file
969 : */
970 : static void
971 108 : checkControlFile(ControlFileData *ControlFile)
972 : {
973 : pg_crc32c crc;
974 :
975 : /* Calculate CRC */
976 108 : INIT_CRC32C(crc);
977 108 : COMP_CRC32C(crc, (char *) ControlFile, offsetof(ControlFileData, crc));
978 108 : FIN_CRC32C(crc);
979 :
980 : /* And simply compare it */
981 108 : if (!EQ_CRC32C(crc, ControlFile->crc))
982 0 : pg_fatal("unexpected control file CRC");
983 108 : }
984 :
985 : /*
986 : * Verify control file contents in the buffer 'content', and copy it to
987 : * *ControlFile.
988 : */
989 : static void
990 108 : digestControlFile(ControlFileData *ControlFile, const char *content,
991 : size_t size)
992 : {
993 108 : if (size != PG_CONTROL_FILE_SIZE)
994 0 : pg_fatal("unexpected control file size %d, expected %d",
995 : (int) size, PG_CONTROL_FILE_SIZE);
996 :
997 108 : memcpy(ControlFile, content, sizeof(ControlFileData));
998 :
999 : /* set and validate WalSegSz */
1000 108 : WalSegSz = ControlFile->xlog_seg_size;
1001 :
1002 108 : if (!IsValidWalSegSize(WalSegSz))
1003 0 : pg_fatal(ngettext("WAL segment size must be a power of two between 1 MB and 1 GB, but the control file specifies %d byte",
1004 : "WAL segment size must be a power of two between 1 MB and 1 GB, but the control file specifies %d bytes",
1005 : WalSegSz),
1006 : WalSegSz);
1007 :
1008 : /* Additional checks on control file */
1009 108 : checkControlFile(ControlFile);
1010 108 : }
1011 :
1012 : /*
1013 : * Get value of GUC parameter restore_command from the target cluster.
1014 : *
1015 : * This uses a logic based on "postgres -C" to get the value from the
1016 : * cluster.
1017 : */
1018 : static void
1019 34 : getRestoreCommand(const char *argv0)
1020 : {
1021 : int rc;
1022 : char postgres_exec_path[MAXPGPATH],
1023 : cmd_output[MAXPGPATH];
1024 : PQExpBuffer postgres_cmd;
1025 :
1026 34 : if (!restore_wal)
1027 32 : return;
1028 :
1029 : /* find postgres executable */
1030 2 : rc = find_other_exec(argv0, "postgres",
1031 : PG_BACKEND_VERSIONSTR,
1032 : postgres_exec_path);
1033 :
1034 2 : if (rc < 0)
1035 : {
1036 : char full_path[MAXPGPATH];
1037 :
1038 0 : if (find_my_exec(argv0, full_path) < 0)
1039 0 : strlcpy(full_path, progname, sizeof(full_path));
1040 :
1041 0 : if (rc == -1)
1042 0 : pg_fatal("program \"%s\" is needed by %s but was not found in the same directory as \"%s\"",
1043 : "postgres", progname, full_path);
1044 : else
1045 0 : pg_fatal("program \"%s\" was found by \"%s\" but was not the same version as %s",
1046 : "postgres", full_path, progname);
1047 : }
1048 :
1049 : /*
1050 : * Build a command able to retrieve the value of GUC parameter
1051 : * restore_command, if set.
1052 : */
1053 2 : postgres_cmd = createPQExpBuffer();
1054 :
1055 : /* path to postgres, properly quoted */
1056 2 : appendShellString(postgres_cmd, postgres_exec_path);
1057 :
1058 : /* add -D switch, with properly quoted data directory */
1059 2 : appendPQExpBufferStr(postgres_cmd, " -D ");
1060 2 : appendShellString(postgres_cmd, datadir_target);
1061 :
1062 : /* add custom configuration file only if requested */
1063 2 : if (config_file != NULL)
1064 : {
1065 2 : appendPQExpBufferStr(postgres_cmd, " -c config_file=");
1066 2 : appendShellString(postgres_cmd, config_file);
1067 : }
1068 :
1069 : /* add -C switch, for restore_command */
1070 2 : appendPQExpBufferStr(postgres_cmd, " -C restore_command");
1071 :
1072 2 : if (!pipe_read_line(postgres_cmd->data, cmd_output, sizeof(cmd_output)))
1073 0 : exit(1);
1074 :
1075 2 : (void) pg_strip_crlf(cmd_output);
1076 :
1077 2 : if (strcmp(cmd_output, "") == 0)
1078 0 : pg_fatal("restore_command is not set in the target cluster");
1079 :
1080 2 : restore_command = pg_strdup(cmd_output);
1081 :
1082 2 : pg_log_debug("using for rewind restore_command = \'%s\'",
1083 : restore_command);
1084 :
1085 2 : destroyPQExpBuffer(postgres_cmd);
1086 : }
1087 :
1088 :
1089 : /*
1090 : * Ensure clean shutdown of target instance by launching single-user mode
1091 : * postgres to do crash recovery.
1092 : */
1093 : static void
1094 20 : ensureCleanShutdown(const char *argv0)
1095 : {
1096 : int ret;
1097 : #define MAXCMDLEN (2 * MAXPGPATH)
1098 : char exec_path[MAXPGPATH];
1099 : PQExpBuffer postgres_cmd;
1100 :
1101 : /* locate postgres binary */
1102 20 : if ((ret = find_other_exec(argv0, "postgres",
1103 : PG_BACKEND_VERSIONSTR,
1104 : exec_path)) < 0)
1105 : {
1106 : char full_path[MAXPGPATH];
1107 :
1108 0 : if (find_my_exec(argv0, full_path) < 0)
1109 0 : strlcpy(full_path, progname, sizeof(full_path));
1110 :
1111 0 : if (ret == -1)
1112 0 : pg_fatal("program \"%s\" is needed by %s but was not found in the same directory as \"%s\"",
1113 : "postgres", progname, full_path);
1114 : else
1115 0 : pg_fatal("program \"%s\" was found by \"%s\" but was not the same version as %s",
1116 : "postgres", full_path, progname);
1117 : }
1118 :
1119 20 : pg_log_info("executing \"%s\" for target server to complete crash recovery",
1120 : exec_path);
1121 :
1122 : /*
1123 : * Skip processing if requested, but only after ensuring presence of
1124 : * postgres.
1125 : */
1126 20 : if (dry_run)
1127 0 : return;
1128 :
1129 : /*
1130 : * Finally run postgres in single-user mode. There is no need to use
1131 : * fsync here. This makes the recovery faster, and the target data folder
1132 : * is synced at the end anyway.
1133 : */
1134 20 : postgres_cmd = createPQExpBuffer();
1135 :
1136 : /* path to postgres, properly quoted */
1137 20 : appendShellString(postgres_cmd, exec_path);
1138 :
1139 : /* add set of options with properly quoted data directory */
1140 20 : appendPQExpBufferStr(postgres_cmd, " --single -F -D ");
1141 20 : appendShellString(postgres_cmd, datadir_target);
1142 :
1143 : /* add custom configuration file only if requested */
1144 20 : if (config_file != NULL)
1145 : {
1146 18 : appendPQExpBufferStr(postgres_cmd, " -c config_file=");
1147 18 : appendShellString(postgres_cmd, config_file);
1148 : }
1149 :
1150 : /* finish with the database name, and a properly quoted redirection */
1151 20 : appendPQExpBufferStr(postgres_cmd, " template1 < ");
1152 20 : appendShellString(postgres_cmd, DEVNULL);
1153 :
1154 20 : if (system(postgres_cmd->data) != 0)
1155 : {
1156 2 : pg_log_error("postgres single-user mode in target cluster failed");
1157 2 : pg_log_error_detail("Command was: %s", postgres_cmd->data);
1158 2 : exit(1);
1159 : }
1160 :
1161 18 : destroyPQExpBuffer(postgres_cmd);
1162 : }
1163 :
1164 : static void
1165 34 : disconnect_atexit(void)
1166 : {
1167 34 : if (conn != NULL)
1168 0 : PQfinish(conn);
1169 34 : }
|