Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * filemap.c
4 : * A data structure for keeping track of files that have changed.
5 : *
6 : * This source file contains the logic to decide what to do with different
7 : * kinds of files, and the data structure to support it. Before modifying
8 : * anything, pg_rewind collects information about all the files and their
9 : * attributes in the target and source data directories. It also scans the
10 : * WAL log in the target, and collects information about data blocks that
11 : * were changed. All this information is stored in a hash table, using the
12 : * file path relative to the root of the data directory as the key.
13 : *
14 : * After collecting all the information required, the decide_file_actions()
15 : * function scans the hash table and decides what action needs to be taken
16 : * for each file. Finally, it sorts the array to the final order that the
17 : * actions should be executed in.
18 : *
19 : * Copyright (c) 2013-2025, PostgreSQL Global Development Group
20 : *
21 : *-------------------------------------------------------------------------
22 : */
23 :
24 : #include "postgres_fe.h"
25 :
26 : #include <sys/stat.h>
27 : #include <unistd.h>
28 :
29 : #include "access/xlog_internal.h"
30 : #include "catalog/pg_tablespace_d.h"
31 : #include "common/file_utils.h"
32 : #include "common/hashfn_unstable.h"
33 : #include "common/string.h"
34 : #include "datapagemap.h"
35 : #include "filemap.h"
36 : #include "pg_rewind.h"
37 :
38 : /*
39 : * Define a hash table which we can use to store information about the files
40 : * appearing in source and target systems.
41 : */
42 : #define SH_PREFIX filehash
43 : #define SH_ELEMENT_TYPE file_entry_t
44 : #define SH_KEY_TYPE const char *
45 : #define SH_KEY path
46 : #define SH_HASH_KEY(tb, key) hash_string(key)
47 : #define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0)
48 : #define SH_SCOPE static inline
49 : #define SH_RAW_ALLOCATOR pg_malloc0
50 : #define SH_DECLARE
51 : #define SH_DEFINE
52 : #include "lib/simplehash.h"
53 :
54 : #define FILEHASH_INITIAL_SIZE 1000
55 :
56 : static filehash_hash *filehash;
57 :
58 : static file_content_type_t getFileContentType(const char *path);
59 : static char *datasegpath(RelFileLocator rlocator, ForkNumber forknum,
60 : BlockNumber segno);
61 :
62 : static file_entry_t *insert_filehash_entry(const char *path);
63 : static file_entry_t *lookup_filehash_entry(const char *path);
64 :
65 : /*
66 : * A separate hash table which tracks WAL files that must not be deleted.
67 : */
68 : typedef struct keepwal_entry
69 : {
70 : const char *path;
71 : uint32 status;
72 : } keepwal_entry;
73 :
74 : #define SH_PREFIX keepwal
75 : #define SH_ELEMENT_TYPE keepwal_entry
76 : #define SH_KEY_TYPE const char *
77 : #define SH_KEY path
78 : #define SH_HASH_KEY(tb, key) hash_string(key)
79 : #define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0)
80 : #define SH_SCOPE static inline
81 : #define SH_RAW_ALLOCATOR pg_malloc0
82 : #define SH_DECLARE
83 : #define SH_DEFINE
84 : #include "lib/simplehash.h"
85 :
86 : #define KEEPWAL_INITIAL_SIZE 1000
87 :
88 :
89 : static keepwal_hash *keepwal = NULL;
90 : static bool keepwal_entry_exists(const char *path);
91 :
92 : static int final_filemap_cmp(const void *a, const void *b);
93 :
94 : static bool check_file_excluded(const char *path, bool is_source);
95 :
96 : /*
97 : * Definition of one element part of an exclusion list, used to exclude
98 : * contents when rewinding. "name" is the name of the file or path to
99 : * check for exclusion. If "match_prefix" is true, any items matching
100 : * the name as prefix are excluded.
101 : */
102 : struct exclude_list_item
103 : {
104 : const char *name;
105 : bool match_prefix;
106 : };
107 :
108 : /*
109 : * The contents of these directories are removed or recreated during server
110 : * start so they are not included in data processed by pg_rewind.
111 : *
112 : * Note: those lists should be kept in sync with what basebackup.c provides.
113 : * Some of the values, contrary to what basebackup.c uses, are hardcoded as
114 : * they are defined in backend-only headers. So this list is maintained
115 : * with a best effort in mind.
116 : */
117 : static const char *const excludeDirContents[] =
118 : {
119 : /*
120 : * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped
121 : * because extensions like pg_stat_statements store data there.
122 : */
123 : "pg_stat_tmp", /* defined as PG_STAT_TMP_DIR */
124 :
125 : /*
126 : * It is generally not useful to backup the contents of this directory
127 : * even if the intention is to restore to another primary. See backup.sgml
128 : * for a more detailed description.
129 : */
130 : "pg_replslot", /* defined as PG_REPLSLOT_DIR */
131 :
132 : /* Contents removed on startup, see dsm_cleanup_for_mmap(). */
133 : "pg_dynshmem", /* defined as PG_DYNSHMEM_DIR */
134 :
135 : /* Contents removed on startup, see AsyncShmemInit(). */
136 : "pg_notify",
137 :
138 : /*
139 : * Old contents are loaded for possible debugging but are not required for
140 : * normal operation, see SerialInit().
141 : */
142 : "pg_serial",
143 :
144 : /* Contents removed on startup, see DeleteAllExportedSnapshotFiles(). */
145 : "pg_snapshots",
146 :
147 : /* Contents zeroed on startup, see StartupSUBTRANS(). */
148 : "pg_subtrans",
149 :
150 : /* end of list */
151 : NULL
152 : };
153 :
154 : /*
155 : * List of files excluded from filemap processing. Files are excluded
156 : * if their prefix match.
157 : */
158 : static const struct exclude_list_item excludeFiles[] =
159 : {
160 : /* Skip auto conf temporary file. */
161 : {"postgresql.auto.conf.tmp", false}, /* defined as PG_AUTOCONF_FILENAME */
162 :
163 : /* Skip current log file temporary file */
164 : {"current_logfiles.tmp", false}, /* defined as
165 : * LOG_METAINFO_DATAFILE_TMP */
166 :
167 : /* Skip relation cache because it is rebuilt on startup */
168 : {"pg_internal.init", true}, /* defined as RELCACHE_INIT_FILENAME */
169 :
170 : /*
171 : * If there is a backup_label or tablespace_map file, it indicates that a
172 : * recovery failed and this cluster probably can't be rewound, but exclude
173 : * them anyway if they are found.
174 : */
175 : {"backup_label", false}, /* defined as BACKUP_LABEL_FILE */
176 : {"tablespace_map", false}, /* defined as TABLESPACE_MAP */
177 :
178 : /*
179 : * If there's a backup_manifest, it belongs to a backup that was used to
180 : * start this server. It is *not* correct for this backup. Our
181 : * backup_manifest is injected into the backup separately if users want
182 : * it.
183 : */
184 : {"backup_manifest", false},
185 :
186 : {"postmaster.pid", false},
187 : {"postmaster.opts", false},
188 :
189 : /* end of list */
190 : {NULL, false}
191 : };
192 :
193 : /*
194 : * Initialize the hash table for the file map.
195 : */
196 : void
197 30 : filehash_init(void)
198 : {
199 30 : filehash = filehash_create(FILEHASH_INITIAL_SIZE, NULL);
200 30 : }
201 :
202 : /* Look up entry for 'path', creating a new one if it doesn't exist */
203 : static file_entry_t *
204 67956 : insert_filehash_entry(const char *path)
205 : {
206 : file_entry_t *entry;
207 : bool found;
208 :
209 67956 : entry = filehash_insert(filehash, path, &found);
210 67956 : if (!found)
211 : {
212 35374 : entry->path = pg_strdup(path);
213 35374 : entry->content_type = getFileContentType(path);
214 :
215 35374 : entry->target_exists = false;
216 35374 : entry->target_type = FILE_TYPE_UNDEFINED;
217 35374 : entry->target_size = 0;
218 35374 : entry->target_link_target = NULL;
219 35374 : entry->target_pages_to_overwrite.bitmap = NULL;
220 35374 : entry->target_pages_to_overwrite.bitmapsize = 0;
221 :
222 35374 : entry->source_exists = false;
223 35374 : entry->source_type = FILE_TYPE_UNDEFINED;
224 35374 : entry->source_size = 0;
225 35374 : entry->source_link_target = NULL;
226 :
227 35374 : entry->action = FILE_ACTION_UNDECIDED;
228 : }
229 :
230 67956 : return entry;
231 : }
232 :
233 : static file_entry_t *
234 169074 : lookup_filehash_entry(const char *path)
235 : {
236 169074 : return filehash_lookup(filehash, path);
237 : }
238 :
239 : /*
240 : * Initialize a hash table to store WAL file names that must be kept.
241 : */
242 : void
243 30 : keepwal_init(void)
244 : {
245 : /* An initial hash size out of thin air */
246 30 : keepwal = keepwal_create(KEEPWAL_INITIAL_SIZE, NULL);
247 30 : }
248 :
249 : /* Mark the given file to prevent its removal */
250 : void
251 40 : keepwal_add_entry(const char *path)
252 : {
253 : keepwal_entry *entry;
254 : bool found;
255 :
256 : /* Should only be called with keepwal initialized */
257 : Assert(keepwal != NULL);
258 :
259 40 : entry = keepwal_insert(keepwal, path, &found);
260 :
261 40 : if (!found)
262 40 : entry->path = pg_strdup(path);
263 40 : }
264 :
265 : /* Return true if file is marked as not to be removed, false otherwise */
266 : static bool
267 1306 : keepwal_entry_exists(const char *path)
268 : {
269 1306 : return keepwal_lookup(keepwal, path) != NULL;
270 : }
271 :
272 : /*
273 : * Callback for processing source file list.
274 : *
275 : * This is called once for every file in the source server. We record the
276 : * type and size of the file, so that decide_file_action() can later decide what
277 : * to do with it.
278 : */
279 : void
280 34018 : process_source_file(const char *path, file_type_t type, size_t size,
281 : const char *link_target)
282 : {
283 : file_entry_t *entry;
284 :
285 : /*
286 : * Pretend that pg_wal is a directory, even if it's really a symlink. We
287 : * don't want to mess with the symlink itself, nor complain if it's a
288 : * symlink in source but not in target or vice versa.
289 : */
290 34018 : if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
291 0 : type = FILE_TYPE_DIRECTORY;
292 :
293 : /*
294 : * sanity check: a filename that looks like a data file better be a
295 : * regular file
296 : */
297 34018 : if (type != FILE_TYPE_REGULAR && getFileContentType(path) == FILE_CONTENT_TYPE_RELATION)
298 0 : pg_fatal("data file \"%s\" in source is not a regular file", path);
299 :
300 : /* Remember this source file */
301 34018 : entry = insert_filehash_entry(path);
302 34018 : if (entry->source_exists)
303 0 : pg_fatal("duplicate source file \"%s\"", path);
304 34018 : entry->source_exists = true;
305 34018 : entry->source_type = type;
306 34018 : entry->source_size = size;
307 34018 : entry->source_link_target = link_target ? pg_strdup(link_target) : NULL;
308 34018 : }
309 :
310 : /*
311 : * Callback for processing target file list.
312 : *
313 : * Record the type and size of the file, like process_source_file() does.
314 : */
315 : void
316 33938 : process_target_file(const char *path, file_type_t type, size_t size,
317 : const char *link_target)
318 : {
319 : file_entry_t *entry;
320 :
321 : /*
322 : * Do not apply any exclusion filters here. This has advantage to remove
323 : * from the target data folder all paths which have been filtered out from
324 : * the source data folder when processing the source files.
325 : */
326 :
327 : /*
328 : * Like in process_source_file, pretend that pg_wal is always a directory.
329 : */
330 33938 : if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
331 4 : type = FILE_TYPE_DIRECTORY;
332 :
333 : /* Remember this target file */
334 33938 : entry = insert_filehash_entry(path);
335 33938 : if (entry->target_exists)
336 0 : pg_fatal("duplicate source file \"%s\"", path);
337 33938 : entry->target_exists = true;
338 33938 : entry->target_type = type;
339 33938 : entry->target_size = size;
340 33938 : entry->target_link_target = link_target ? pg_strdup(link_target) : NULL;
341 33938 : }
342 :
343 : /*
344 : * This callback gets called while we read the WAL in the target, for every
345 : * block that has changed in the target system. It decides if the given
346 : * 'blkno' in the target relfile needs to be overwritten from the source, and
347 : * if so, records it in 'target_pages_to_overwrite' bitmap.
348 : *
349 : * NOTE: All the files on both systems must have already been added to the
350 : * hash table!
351 : */
352 : void
353 169074 : process_target_wal_block_change(ForkNumber forknum, RelFileLocator rlocator,
354 : BlockNumber blkno)
355 : {
356 : char *path;
357 : file_entry_t *entry;
358 : BlockNumber blkno_inseg;
359 : int segno;
360 :
361 169074 : segno = blkno / RELSEG_SIZE;
362 169074 : blkno_inseg = blkno % RELSEG_SIZE;
363 :
364 169074 : path = datasegpath(rlocator, forknum, segno);
365 169074 : entry = lookup_filehash_entry(path);
366 169074 : pfree(path);
367 :
368 : /*
369 : * If the block still exists in both systems, remember it. Otherwise we
370 : * can safely ignore it.
371 : *
372 : * If the block is beyond the EOF in the source system, or the file
373 : * doesn't exist in the source at all, we're going to truncate/remove it
374 : * away from the target anyway. Likewise, if it doesn't exist in the
375 : * target anymore, we will copy it over with the "tail" from the source
376 : * system, anyway.
377 : *
378 : * It is possible to find WAL for a file that doesn't exist on either
379 : * system anymore. It means that the relation was dropped later in the
380 : * target system, and independently on the source system too, or that it
381 : * was created and dropped in the target system and it never existed in
382 : * the source. Either way, we can safely ignore it.
383 : */
384 169074 : if (entry)
385 : {
386 : Assert(entry->content_type == FILE_CONTENT_TYPE_RELATION);
387 :
388 169074 : if (entry->target_exists)
389 : {
390 169066 : if (entry->target_type != FILE_TYPE_REGULAR)
391 0 : pg_fatal("unexpected page modification for non-regular file \"%s\"",
392 : entry->path);
393 :
394 169066 : if (entry->source_exists)
395 : {
396 : off_t end_offset;
397 :
398 165758 : end_offset = (blkno_inseg + 1) * BLCKSZ;
399 165758 : if (end_offset <= entry->source_size && end_offset <= entry->target_size)
400 5758 : datapagemap_add(&entry->target_pages_to_overwrite, blkno_inseg);
401 : }
402 : }
403 : }
404 169074 : }
405 :
406 : /*
407 : * Is this the path of file that pg_rewind can skip copying?
408 : */
409 : static bool
410 35340 : check_file_excluded(const char *path, bool is_source)
411 : {
412 : char localpath[MAXPGPATH];
413 : int excludeIdx;
414 : const char *filename;
415 :
416 : /*
417 : * Skip all temporary files, .../pgsql_tmp/... and .../pgsql_tmp.*
418 : */
419 35340 : if (strstr(path, "/" PG_TEMP_FILE_PREFIX) != NULL ||
420 35310 : strstr(path, "/" PG_TEMP_FILES_DIR "/") != NULL)
421 : {
422 30 : return true;
423 : }
424 :
425 : /* check individual files... */
426 317190 : for (excludeIdx = 0; excludeFiles[excludeIdx].name != NULL; excludeIdx++)
427 : {
428 282028 : int cmplen = strlen(excludeFiles[excludeIdx].name);
429 :
430 282028 : filename = last_dir_separator(path);
431 282028 : if (filename == NULL)
432 6200 : filename = path;
433 : else
434 275828 : filename++;
435 :
436 282028 : if (!excludeFiles[excludeIdx].match_prefix)
437 246718 : cmplen++;
438 282028 : if (strncmp(filename, excludeFiles[excludeIdx].name, cmplen) == 0)
439 : {
440 148 : if (is_source)
441 148 : pg_log_debug("entry \"%s\" excluded from source file list",
442 : path);
443 : else
444 0 : pg_log_debug("entry \"%s\" excluded from target file list",
445 : path);
446 148 : return true;
447 : }
448 : }
449 :
450 : /*
451 : * ... And check some directories. Note that this includes any contents
452 : * within the directories themselves.
453 : */
454 281266 : for (excludeIdx = 0; excludeDirContents[excludeIdx] != NULL; excludeIdx++)
455 : {
456 246134 : snprintf(localpath, sizeof(localpath), "%s/",
457 246134 : excludeDirContents[excludeIdx]);
458 246134 : if (strstr(path, localpath) == path)
459 : {
460 30 : if (is_source)
461 30 : pg_log_debug("entry \"%s\" excluded from source file list",
462 : path);
463 : else
464 0 : pg_log_debug("entry \"%s\" excluded from target file list",
465 : path);
466 30 : return true;
467 : }
468 : }
469 :
470 35132 : return false;
471 : }
472 :
473 : static const char *
474 11710 : action_to_str(file_action_t action)
475 : {
476 11710 : switch (action)
477 : {
478 866 : case FILE_ACTION_NONE:
479 866 : return "NONE";
480 9356 : case FILE_ACTION_COPY:
481 9356 : return "COPY";
482 8 : case FILE_ACTION_TRUNCATE:
483 8 : return "TRUNCATE";
484 10 : case FILE_ACTION_COPY_TAIL:
485 10 : return "COPY_TAIL";
486 18 : case FILE_ACTION_CREATE:
487 18 : return "CREATE";
488 1452 : case FILE_ACTION_REMOVE:
489 1452 : return "REMOVE";
490 :
491 0 : default:
492 0 : return "unknown";
493 : }
494 : }
495 :
496 : /*
497 : * Calculate the totals needed for progress reports.
498 : */
499 : void
500 0 : calculate_totals(filemap_t *filemap)
501 : {
502 : file_entry_t *entry;
503 : int i;
504 :
505 0 : filemap->total_size = 0;
506 0 : filemap->fetch_size = 0;
507 :
508 0 : for (i = 0; i < filemap->nentries; i++)
509 : {
510 0 : entry = filemap->entries[i];
511 :
512 0 : if (entry->source_type != FILE_TYPE_REGULAR)
513 0 : continue;
514 :
515 0 : filemap->total_size += entry->source_size;
516 :
517 0 : if (entry->action == FILE_ACTION_COPY)
518 : {
519 0 : filemap->fetch_size += entry->source_size;
520 0 : continue;
521 : }
522 :
523 0 : if (entry->action == FILE_ACTION_COPY_TAIL)
524 0 : filemap->fetch_size += (entry->source_size - entry->target_size);
525 :
526 0 : if (entry->target_pages_to_overwrite.bitmapsize > 0)
527 : {
528 : datapagemap_iterator_t *iter;
529 : BlockNumber blk;
530 :
531 0 : iter = datapagemap_iterate(&entry->target_pages_to_overwrite);
532 0 : while (datapagemap_next(iter, &blk))
533 0 : filemap->fetch_size += BLCKSZ;
534 :
535 0 : pg_free(iter);
536 : }
537 : }
538 0 : }
539 :
540 : void
541 30 : print_filemap(filemap_t *filemap)
542 : {
543 : file_entry_t *entry;
544 : int i;
545 :
546 35404 : for (i = 0; i < filemap->nentries; i++)
547 : {
548 35374 : entry = filemap->entries[i];
549 :
550 35374 : if (entry->action != FILE_ACTION_NONE ||
551 24530 : entry->content_type == FILE_CONTENT_TYPE_WAL ||
552 24494 : entry->target_pages_to_overwrite.bitmapsize > 0)
553 : {
554 11710 : pg_log_debug("%s (%s)", entry->path,
555 : action_to_str(entry->action));
556 :
557 11710 : if (entry->target_pages_to_overwrite.bitmapsize > 0)
558 846 : datapagemap_print(&entry->target_pages_to_overwrite);
559 : }
560 : }
561 30 : fflush(stdout);
562 30 : }
563 :
564 : /*
565 : * Determine what kind of file this one looks like.
566 : */
567 : static file_content_type_t
568 36212 : getFileContentType(const char *path)
569 : {
570 : RelFileLocator rlocator;
571 : unsigned int segNo;
572 : int nmatch;
573 36212 : file_content_type_t result = FILE_CONTENT_TYPE_OTHER;
574 :
575 : /* Check if it is a WAL file. */
576 36212 : if (strncmp("pg_wal/", path, 7) == 0)
577 : {
578 316 : const char *filename = path + 7; /* Skip "pg_wal/" */
579 :
580 316 : if (IsXLogFileName(filename))
581 130 : return FILE_CONTENT_TYPE_WAL;
582 : else
583 186 : return FILE_CONTENT_TYPE_OTHER;
584 : }
585 :
586 : /*----
587 : * Does it look like a relation data file?
588 : *
589 : * For our purposes, only files belonging to the main fork are considered
590 : * relation files. Other forks are always copied in toto, because we
591 : * cannot reliably track changes to them, because WAL only contains block
592 : * references for the main fork.
593 : *
594 : * Relation data files can be in one of the following directories:
595 : *
596 : * global/
597 : * shared relations
598 : *
599 : * base/<db oid>/
600 : * regular relations, default tablespace
601 : *
602 : * pg_tblspc/<tblspc oid>/<tblspc version>/
603 : * within a non-default tablespace (the name of the directory
604 : * depends on version)
605 : *
606 : * And the relation data files themselves have a filename like:
607 : *
608 : * <oid>.<segment number>
609 : *
610 : *----
611 : */
612 35896 : rlocator.spcOid = InvalidOid;
613 35896 : rlocator.dbOid = InvalidOid;
614 35896 : rlocator.relNumber = InvalidRelFileNumber;
615 35896 : segNo = 0;
616 35896 : result = FILE_CONTENT_TYPE_OTHER;
617 :
618 35896 : nmatch = sscanf(path, "global/%u.%u", &rlocator.relNumber, &segNo);
619 35896 : if (nmatch == 1 || nmatch == 2)
620 : {
621 1680 : rlocator.spcOid = GLOBALTABLESPACE_OID;
622 1680 : rlocator.dbOid = 0;
623 1680 : result = FILE_CONTENT_TYPE_RELATION;
624 : }
625 : else
626 : {
627 34216 : nmatch = sscanf(path, "base/%u/%u.%u",
628 : &rlocator.dbOid, &rlocator.relNumber, &segNo);
629 34216 : if (nmatch == 2 || nmatch == 3)
630 : {
631 31764 : rlocator.spcOid = DEFAULTTABLESPACE_OID;
632 31764 : result = FILE_CONTENT_TYPE_RELATION;
633 : }
634 : else
635 : {
636 2452 : nmatch = sscanf(path, "pg_tblspc/%u/" TABLESPACE_VERSION_DIRECTORY "/%u/%u.%u",
637 : &rlocator.spcOid, &rlocator.dbOid, &rlocator.relNumber,
638 : &segNo);
639 2452 : if (nmatch == 3 || nmatch == 4)
640 24 : result = FILE_CONTENT_TYPE_RELATION;
641 : }
642 : }
643 :
644 : /*
645 : * The sscanf tests above can match files that have extra characters at
646 : * the end. To eliminate such cases, cross-check that GetRelationPath
647 : * creates the exact same filename, when passed the RelFileLocator
648 : * information we extracted from the filename.
649 : */
650 35896 : if (result == FILE_CONTENT_TYPE_RELATION)
651 : {
652 33468 : char *check_path = datasegpath(rlocator, MAIN_FORKNUM, segNo);
653 :
654 33468 : if (strcmp(check_path, path) != 0)
655 8168 : result = FILE_CONTENT_TYPE_OTHER;
656 :
657 33468 : pfree(check_path);
658 : }
659 :
660 35896 : return result;
661 : }
662 :
663 : /*
664 : * A helper function to create the path of a relation file and segment.
665 : *
666 : * The returned path is palloc'd
667 : */
668 : static char *
669 202542 : datasegpath(RelFileLocator rlocator, ForkNumber forknum, BlockNumber segno)
670 : {
671 : RelPathStr path;
672 : char *segpath;
673 :
674 202542 : path = relpathperm(rlocator, forknum);
675 202542 : if (segno > 0)
676 : {
677 0 : segpath = psprintf("%s.%u", path.str, segno);
678 0 : return segpath;
679 : }
680 : else
681 202542 : return pstrdup(path.str);
682 : }
683 :
684 : /*
685 : * In the final stage, the filemap is sorted so that removals come last.
686 : * From disk space usage point of view, it would be better to do removals
687 : * first, but for now, safety first. If a whole directory is deleted, all
688 : * files and subdirectories inside it need to removed first. On creation,
689 : * parent directory needs to be created before files and directories inside
690 : * it. To achieve that, the file_action_t enum is ordered so that we can
691 : * just sort on that first. Furthermore, sort REMOVE entries in reverse
692 : * path order, so that "foo/bar" subdirectory is removed before "foo".
693 : */
694 : static int
695 375636 : final_filemap_cmp(const void *a, const void *b)
696 : {
697 375636 : file_entry_t *fa = *((file_entry_t **) a);
698 375636 : file_entry_t *fb = *((file_entry_t **) b);
699 :
700 375636 : if (fa->action > fb->action)
701 13910 : return 1;
702 361726 : if (fa->action < fb->action)
703 19208 : return -1;
704 :
705 342518 : if (fa->action == FILE_ACTION_REMOVE)
706 11800 : return strcmp(fb->path, fa->path);
707 : else
708 330718 : return strcmp(fa->path, fb->path);
709 : }
710 :
711 : /*
712 : * Decide what to do with a WAL segment file based on its position
713 : * relative to the point of divergence.
714 : *
715 : * Caller is responsible for ensuring that the file exists on both
716 : * source and target servers.
717 : */
718 : static file_action_t
719 64 : decide_wal_file_action(const char *fname, XLogSegNo last_common_segno,
720 : size_t source_size, size_t target_size)
721 : {
722 : TimeLineID file_tli;
723 : XLogSegNo file_segno;
724 :
725 : /* Get current WAL segment number given current segment file name */
726 64 : XLogFromFileName(fname, &file_tli, &file_segno, WalSegSz);
727 :
728 : /*
729 : * Avoid copying files before the last common segment.
730 : *
731 : * These files exist on the source and the target servers, so they should
732 : * be identical and located strictly before the segment that contains the
733 : * LSN where target and source servers have diverged.
734 : *
735 : * While we are on it, double-check the size of each file and copy the
736 : * file if they do not match, in case.
737 : */
738 64 : if (file_segno < last_common_segno &&
739 : source_size == target_size)
740 32 : return FILE_ACTION_NONE;
741 :
742 32 : return FILE_ACTION_COPY;
743 : }
744 :
745 : /*
746 : * Decide what action to perform to a file.
747 : */
748 : static file_action_t
749 35374 : decide_file_action(file_entry_t *entry, XLogSegNo last_common_segno)
750 : {
751 35374 : const char *path = entry->path;
752 :
753 : /*
754 : * Don't touch the control file. It is handled specially, after copying
755 : * all the other files.
756 : */
757 35374 : if (strcmp(path, XLOG_CONTROL_FILE) == 0)
758 30 : return FILE_ACTION_NONE;
759 :
760 : /* Skip macOS system files */
761 35344 : if (strstr(path, ".DS_Store") != NULL)
762 4 : return FILE_ACTION_NONE;
763 :
764 : /*
765 : * Remove all files matching the exclusion filters in the target.
766 : */
767 35340 : if (check_file_excluded(path, true))
768 : {
769 208 : if (entry->target_exists)
770 150 : return FILE_ACTION_REMOVE;
771 : else
772 58 : return FILE_ACTION_NONE;
773 : }
774 :
775 : /*
776 : * Handle cases where the file is missing from one of the systems.
777 : */
778 35132 : if (!entry->target_exists && entry->source_exists)
779 : {
780 : /*
781 : * File exists in source, but not in target. Copy it in toto. (If it's
782 : * a relation data file, WAL replay after rewinding should re-create
783 : * it anyway. But there's no harm in copying it now.)
784 : */
785 1374 : switch (entry->source_type)
786 : {
787 18 : case FILE_TYPE_DIRECTORY:
788 : case FILE_TYPE_SYMLINK:
789 18 : return FILE_ACTION_CREATE;
790 1356 : case FILE_TYPE_REGULAR:
791 1356 : return FILE_ACTION_COPY;
792 0 : case FILE_TYPE_UNDEFINED:
793 0 : pg_fatal("unknown file type for \"%s\"", entry->path);
794 : break;
795 : }
796 : }
797 33758 : else if (entry->target_exists && !entry->source_exists)
798 : {
799 : /*
800 : * For files that exist in target but not in source, we check the
801 : * keepwal hash table; any files listed therein must not be removed.
802 : */
803 1306 : if (keepwal_entry_exists(path))
804 : {
805 4 : pg_log_debug("Not removing file \"%s\" because it is required for recovery", path);
806 4 : return FILE_ACTION_NONE;
807 : }
808 1302 : return FILE_ACTION_REMOVE;
809 : }
810 32452 : else if (!entry->target_exists && !entry->source_exists)
811 : {
812 : /*
813 : * Doesn't exist in either server. Why does it have an entry in the
814 : * first place??
815 : */
816 : Assert(false);
817 0 : return FILE_ACTION_NONE;
818 : }
819 :
820 : /*
821 : * Otherwise, the file exists on both systems
822 : */
823 : Assert(entry->target_exists && entry->source_exists);
824 :
825 32452 : if (entry->source_type != entry->target_type)
826 : {
827 : /* But it's a different kind of object. Strange.. */
828 0 : pg_fatal("file \"%s\" is of different type in source and target", entry->path);
829 : }
830 :
831 : /*
832 : * PG_VERSION files should be identical on both systems, but avoid
833 : * overwriting them for paranoia.
834 : */
835 32452 : if (pg_str_endswith(entry->path, "PG_VERSION"))
836 128 : return FILE_ACTION_NONE;
837 :
838 32324 : switch (entry->source_type)
839 : {
840 816 : case FILE_TYPE_DIRECTORY:
841 816 : return FILE_ACTION_NONE;
842 :
843 0 : case FILE_TYPE_SYMLINK:
844 :
845 : /*
846 : * XXX: Should we check if it points to the same target?
847 : */
848 0 : return FILE_ACTION_NONE;
849 :
850 31508 : case FILE_TYPE_REGULAR:
851 31508 : if (entry->content_type == FILE_CONTENT_TYPE_WAL)
852 : {
853 : /* Handle WAL segment file */
854 64 : const char *filename = last_dir_separator(entry->path);
855 :
856 64 : if (filename == NULL)
857 0 : filename = entry->path;
858 : else
859 64 : filename++; /* Skip the separator */
860 :
861 64 : return decide_wal_file_action(filename, last_common_segno,
862 : entry->source_size,
863 : entry->target_size);
864 : }
865 31444 : else if (entry->content_type != FILE_CONTENT_TYPE_RELATION)
866 : {
867 : /*
868 : * It's a non-data file that we have no special processing
869 : * for. Copy it in toto.
870 : */
871 7968 : return FILE_ACTION_COPY;
872 : }
873 : else
874 : {
875 : /*
876 : * It's a data file that exists in both systems.
877 : *
878 : * If it's larger in target, we can truncate it. There will
879 : * also be a WAL record of the truncation in the source
880 : * system, so WAL replay would eventually truncate the target
881 : * too, but we might as well do it now.
882 : *
883 : * If it's smaller in the target, it means that it has been
884 : * truncated in the target, or enlarged in the source, or
885 : * both. If it was truncated in the target, we need to copy
886 : * the missing tail from the source system. If it was enlarged
887 : * in the source system, there will be WAL records in the
888 : * source system for the new blocks, so we wouldn't need to
889 : * copy them here. But we don't know which scenario we're
890 : * dealing with, and there's no harm in copying the missing
891 : * blocks now, so do it now.
892 : *
893 : * If it's the same size, do nothing here. Any blocks modified
894 : * in the target will be copied based on parsing the target
895 : * system's WAL, and any blocks modified in the source will be
896 : * updated after rewinding, when the source system's WAL is
897 : * replayed.
898 : */
899 23476 : if (entry->target_size < entry->source_size)
900 10 : return FILE_ACTION_COPY_TAIL;
901 23466 : else if (entry->target_size > entry->source_size)
902 8 : return FILE_ACTION_TRUNCATE;
903 : else
904 23458 : return FILE_ACTION_NONE;
905 : }
906 : break;
907 :
908 0 : case FILE_TYPE_UNDEFINED:
909 0 : pg_fatal("unknown file type for \"%s\"", path);
910 : break;
911 : }
912 :
913 : /* unreachable */
914 0 : pg_fatal("could not decide what to do with file \"%s\"", path);
915 : }
916 :
917 : /*
918 : * Decide what to do with each file.
919 : *
920 : * Returns a 'filemap' with the entries in the order that their actions
921 : * should be executed.
922 : */
923 : filemap_t *
924 30 : decide_file_actions(XLogSegNo last_common_segno)
925 : {
926 : int i;
927 : filehash_iterator it;
928 : file_entry_t *entry;
929 : filemap_t *filemap;
930 :
931 30 : filehash_start_iterate(filehash, &it);
932 35404 : while ((entry = filehash_iterate(filehash, &it)) != NULL)
933 : {
934 35374 : entry->action = decide_file_action(entry, last_common_segno);
935 : }
936 :
937 : /*
938 : * Turn the hash table into an array, and sort in the order that the
939 : * actions should be performed.
940 : */
941 30 : filemap = pg_malloc(offsetof(filemap_t, entries) +
942 30 : filehash->members * sizeof(file_entry_t *));
943 30 : filemap->nentries = filehash->members;
944 30 : filehash_start_iterate(filehash, &it);
945 30 : i = 0;
946 35404 : while ((entry = filehash_iterate(filehash, &it)) != NULL)
947 : {
948 35374 : filemap->entries[i++] = entry;
949 : }
950 :
951 30 : qsort(&filemap->entries, filemap->nentries, sizeof(file_entry_t *),
952 : final_filemap_cmp);
953 :
954 30 : return filemap;
955 : }
|