|           Line data    Source code 
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * filemap.c
       4             :  *    A data structure for keeping track of files that have changed.
       5             :  *
       6             :  * This source file contains the logic to decide what to do with different
       7             :  * kinds of files, and the data structure to support it.  Before modifying
       8             :  * anything, pg_rewind collects information about all the files and their
       9             :  * attributes in the target and source data directories.  It also scans the
      10             :  * WAL log in the target, and collects information about data blocks that
      11             :  * were changed.  All this information is stored in a hash table, using the
      12             :  * file path relative to the root of the data directory as the key.
      13             :  *
      14             :  * After collecting all the information required, the decide_file_actions()
      15             :  * function scans the hash table and decides what action needs to be taken
      16             :  * for each file.  Finally, it sorts the array to the final order that the
      17             :  * actions should be executed in.
      18             :  *
      19             :  * Copyright (c) 2013-2025, PostgreSQL Global Development Group
      20             :  *
      21             :  *-------------------------------------------------------------------------
      22             :  */
      23             : 
      24             : #include "postgres_fe.h"
      25             : 
      26             : #include <sys/stat.h>
      27             : #include <unistd.h>
      28             : 
      29             : #include "access/xlog_internal.h"
      30             : #include "catalog/pg_tablespace_d.h"
      31             : #include "common/file_utils.h"
      32             : #include "common/hashfn_unstable.h"
      33             : #include "common/string.h"
      34             : #include "datapagemap.h"
      35             : #include "filemap.h"
      36             : #include "pg_rewind.h"
      37             : 
      38             : /*
      39             :  * Define a hash table which we can use to store information about the files
      40             :  * appearing in source and target systems.
      41             :  */
      42             : #define SH_PREFIX               filehash
      43             : #define SH_ELEMENT_TYPE         file_entry_t
      44             : #define SH_KEY_TYPE             const char *
      45             : #define SH_KEY                  path
      46             : #define SH_HASH_KEY(tb, key)    hash_string(key)
      47             : #define SH_EQUAL(tb, a, b)      (strcmp(a, b) == 0)
      48             : #define SH_SCOPE                static inline
      49             : #define SH_RAW_ALLOCATOR        pg_malloc0
      50             : #define SH_DECLARE
      51             : #define SH_DEFINE
      52             : #include "lib/simplehash.h"
      53             : 
      54             : #define FILEHASH_INITIAL_SIZE   1000
      55             : 
      56             : static filehash_hash *filehash;
      57             : 
      58             : static file_content_type_t getFileContentType(const char *path);
      59             : static char *datasegpath(RelFileLocator rlocator, ForkNumber forknum,
      60             :                          BlockNumber segno);
      61             : 
      62             : static file_entry_t *insert_filehash_entry(const char *path);
      63             : static file_entry_t *lookup_filehash_entry(const char *path);
      64             : 
      65             : /*
      66             :  * A separate hash table which tracks WAL files that must not be deleted.
      67             :  */
      68             : typedef struct keepwal_entry
      69             : {
      70             :     const char *path;
      71             :     uint32      status;
      72             : } keepwal_entry;
      73             : 
      74             : #define SH_PREFIX               keepwal
      75             : #define SH_ELEMENT_TYPE         keepwal_entry
      76             : #define SH_KEY_TYPE             const char *
      77             : #define SH_KEY                  path
      78             : #define SH_HASH_KEY(tb, key)    hash_string(key)
      79             : #define SH_EQUAL(tb, a, b)      (strcmp(a, b) == 0)
      80             : #define SH_SCOPE                static inline
      81             : #define SH_RAW_ALLOCATOR        pg_malloc0
      82             : #define SH_DECLARE
      83             : #define SH_DEFINE
      84             : #include "lib/simplehash.h"
      85             : 
      86             : #define KEEPWAL_INITIAL_SIZE    1000
      87             : 
      88             : 
      89             : static keepwal_hash *keepwal = NULL;
      90             : static bool keepwal_entry_exists(const char *path);
      91             : 
      92             : static int  final_filemap_cmp(const void *a, const void *b);
      93             : 
      94             : static bool check_file_excluded(const char *path, bool is_source);
      95             : 
      96             : /*
      97             :  * Definition of one element part of an exclusion list, used to exclude
      98             :  * contents when rewinding.  "name" is the name of the file or path to
      99             :  * check for exclusion.  If "match_prefix" is true, any items matching
     100             :  * the name as prefix are excluded.
     101             :  */
     102             : struct exclude_list_item
     103             : {
     104             :     const char *name;
     105             :     bool        match_prefix;
     106             : };
     107             : 
     108             : /*
     109             :  * The contents of these directories are removed or recreated during server
     110             :  * start so they are not included in data processed by pg_rewind.
     111             :  *
     112             :  * Note: those lists should be kept in sync with what basebackup.c provides.
     113             :  * Some of the values, contrary to what basebackup.c uses, are hardcoded as
     114             :  * they are defined in backend-only headers.  So this list is maintained
     115             :  * with a best effort in mind.
     116             :  */
     117             : static const char *const excludeDirContents[] =
     118             : {
     119             :     /*
     120             :      * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped
     121             :      * because extensions like pg_stat_statements store data there.
     122             :      */
     123             :     "pg_stat_tmp",                /* defined as PG_STAT_TMP_DIR */
     124             : 
     125             :     /*
     126             :      * It is generally not useful to backup the contents of this directory
     127             :      * even if the intention is to restore to another primary. See backup.sgml
     128             :      * for a more detailed description.
     129             :      */
     130             :     "pg_replslot",                /* defined as PG_REPLSLOT_DIR */
     131             : 
     132             :     /* Contents removed on startup, see dsm_cleanup_for_mmap(). */
     133             :     "pg_dynshmem",                /* defined as PG_DYNSHMEM_DIR */
     134             : 
     135             :     /* Contents removed on startup, see AsyncShmemInit(). */
     136             :     "pg_notify",
     137             : 
     138             :     /*
     139             :      * Old contents are loaded for possible debugging but are not required for
     140             :      * normal operation, see SerialInit().
     141             :      */
     142             :     "pg_serial",
     143             : 
     144             :     /* Contents removed on startup, see DeleteAllExportedSnapshotFiles(). */
     145             :     "pg_snapshots",
     146             : 
     147             :     /* Contents zeroed on startup, see StartupSUBTRANS(). */
     148             :     "pg_subtrans",
     149             : 
     150             :     /* end of list */
     151             :     NULL
     152             : };
     153             : 
     154             : /*
     155             :  * List of files excluded from filemap processing.   Files are excluded
     156             :  * if their prefix match.
     157             :  */
     158             : static const struct exclude_list_item excludeFiles[] =
     159             : {
     160             :     /* Skip auto conf temporary file. */
     161             :     {"postgresql.auto.conf.tmp", false},  /* defined as PG_AUTOCONF_FILENAME */
     162             : 
     163             :     /* Skip current log file temporary file */
     164             :     {"current_logfiles.tmp", false},  /* defined as
     165             :                                          * LOG_METAINFO_DATAFILE_TMP */
     166             : 
     167             :     /* Skip relation cache because it is rebuilt on startup */
     168             :     {"pg_internal.init", true}, /* defined as RELCACHE_INIT_FILENAME */
     169             : 
     170             :     /*
     171             :      * If there is a backup_label or tablespace_map file, it indicates that a
     172             :      * recovery failed and this cluster probably can't be rewound, but exclude
     173             :      * them anyway if they are found.
     174             :      */
     175             :     {"backup_label", false},  /* defined as BACKUP_LABEL_FILE */
     176             :     {"tablespace_map", false},    /* defined as TABLESPACE_MAP */
     177             : 
     178             :     /*
     179             :      * If there's a backup_manifest, it belongs to a backup that was used to
     180             :      * start this server. It is *not* correct for this backup. Our
     181             :      * backup_manifest is injected into the backup separately if users want
     182             :      * it.
     183             :      */
     184             :     {"backup_manifest", false},
     185             : 
     186             :     {"postmaster.pid", false},
     187             :     {"postmaster.opts", false},
     188             : 
     189             :     /* end of list */
     190             :     {NULL, false}
     191             : };
     192             : 
     193             : /*
     194             :  * Initialize the hash table for the file map.
     195             :  */
     196             : void
     197          30 : filehash_init(void)
     198             : {
     199          30 :     filehash = filehash_create(FILEHASH_INITIAL_SIZE, NULL);
     200          30 : }
     201             : 
     202             : /* Look up entry for 'path', creating a new one if it doesn't exist */
     203             : static file_entry_t *
     204       67968 : insert_filehash_entry(const char *path)
     205             : {
     206             :     file_entry_t *entry;
     207             :     bool        found;
     208             : 
     209       67968 :     entry = filehash_insert(filehash, path, &found);
     210       67968 :     if (!found)
     211             :     {
     212       35388 :         entry->path = pg_strdup(path);
     213       35388 :         entry->content_type = getFileContentType(path);
     214             : 
     215       35388 :         entry->target_exists = false;
     216       35388 :         entry->target_type = FILE_TYPE_UNDEFINED;
     217       35388 :         entry->target_size = 0;
     218       35388 :         entry->target_link_target = NULL;
     219       35388 :         entry->target_pages_to_overwrite.bitmap = NULL;
     220       35388 :         entry->target_pages_to_overwrite.bitmapsize = 0;
     221             : 
     222       35388 :         entry->source_exists = false;
     223       35388 :         entry->source_type = FILE_TYPE_UNDEFINED;
     224       35388 :         entry->source_size = 0;
     225       35388 :         entry->source_link_target = NULL;
     226             : 
     227       35388 :         entry->action = FILE_ACTION_UNDECIDED;
     228             :     }
     229             : 
     230       67968 :     return entry;
     231             : }
     232             : 
     233             : static file_entry_t *
     234      169074 : lookup_filehash_entry(const char *path)
     235             : {
     236      169074 :     return filehash_lookup(filehash, path);
     237             : }
     238             : 
     239             : /*
     240             :  * Initialize a hash table to store WAL file names that must be kept.
     241             :  */
     242             : void
     243          30 : keepwal_init(void)
     244             : {
     245             :     /* An initial hash size out of thin air */
     246          30 :     keepwal = keepwal_create(KEEPWAL_INITIAL_SIZE, NULL);
     247          30 : }
     248             : 
     249             : /* Mark the given file to prevent its removal */
     250             : void
     251          40 : keepwal_add_entry(const char *path)
     252             : {
     253             :     keepwal_entry *entry;
     254             :     bool        found;
     255             : 
     256             :     /* Should only be called with keepwal initialized */
     257             :     Assert(keepwal != NULL);
     258             : 
     259          40 :     entry = keepwal_insert(keepwal, path, &found);
     260             : 
     261          40 :     if (!found)
     262          40 :         entry->path = pg_strdup(path);
     263          40 : }
     264             : 
     265             : /* Return true if file is marked as not to be removed, false otherwise */
     266             : static bool
     267        1310 : keepwal_entry_exists(const char *path)
     268             : {
     269        1310 :     return keepwal_lookup(keepwal, path) != NULL;
     270             : }
     271             : 
     272             : /*
     273             :  * Callback for processing source file list.
     274             :  *
     275             :  * This is called once for every file in the source server.  We record the
     276             :  * type and size of the file, so that decide_file_action() can later decide what
     277             :  * to do with it.
     278             :  */
     279             : void
     280       34028 : process_source_file(const char *path, file_type_t type, size_t size,
     281             :                     const char *link_target)
     282             : {
     283             :     file_entry_t *entry;
     284             : 
     285             :     /*
     286             :      * Pretend that pg_wal is a directory, even if it's really a symlink. We
     287             :      * don't want to mess with the symlink itself, nor complain if it's a
     288             :      * symlink in source but not in target or vice versa.
     289             :      */
     290       34028 :     if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
     291           0 :         type = FILE_TYPE_DIRECTORY;
     292             : 
     293             :     /*
     294             :      * sanity check: a filename that looks like a data file better be a
     295             :      * regular file
     296             :      */
     297       34028 :     if (type != FILE_TYPE_REGULAR && getFileContentType(path) == FILE_CONTENT_TYPE_RELATION)
     298           0 :         pg_fatal("data file \"%s\" in source is not a regular file", path);
     299             : 
     300             :     /* Remember this source file */
     301       34028 :     entry = insert_filehash_entry(path);
     302       34028 :     if (entry->source_exists)
     303           0 :         pg_fatal("duplicate source file \"%s\"", path);
     304       34028 :     entry->source_exists = true;
     305       34028 :     entry->source_type = type;
     306       34028 :     entry->source_size = size;
     307       34028 :     entry->source_link_target = link_target ? pg_strdup(link_target) : NULL;
     308       34028 : }
     309             : 
     310             : /*
     311             :  * Callback for processing target file list.
     312             :  *
     313             :  * Record the type and size of the file, like process_source_file() does.
     314             :  */
     315             : void
     316       33940 : process_target_file(const char *path, file_type_t type, size_t size,
     317             :                     const char *link_target)
     318             : {
     319             :     file_entry_t *entry;
     320             : 
     321             :     /*
     322             :      * Do not apply any exclusion filters here.  This has advantage to remove
     323             :      * from the target data folder all paths which have been filtered out from
     324             :      * the source data folder when processing the source files.
     325             :      */
     326             : 
     327             :     /*
     328             :      * Like in process_source_file, pretend that pg_wal is always a directory.
     329             :      */
     330       33940 :     if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
     331           4 :         type = FILE_TYPE_DIRECTORY;
     332             : 
     333             :     /* Remember this target file */
     334       33940 :     entry = insert_filehash_entry(path);
     335       33940 :     if (entry->target_exists)
     336           0 :         pg_fatal("duplicate source file \"%s\"", path);
     337       33940 :     entry->target_exists = true;
     338       33940 :     entry->target_type = type;
     339       33940 :     entry->target_size = size;
     340       33940 :     entry->target_link_target = link_target ? pg_strdup(link_target) : NULL;
     341       33940 : }
     342             : 
     343             : /*
     344             :  * This callback gets called while we read the WAL in the target, for every
     345             :  * block that has changed in the target system.  It decides if the given
     346             :  * 'blkno' in the target relfile needs to be overwritten from the source, and
     347             :  * if so, records it in 'target_pages_to_overwrite' bitmap.
     348             :  *
     349             :  * NOTE: All the files on both systems must have already been added to the
     350             :  * hash table!
     351             :  */
     352             : void
     353      169074 : process_target_wal_block_change(ForkNumber forknum, RelFileLocator rlocator,
     354             :                                 BlockNumber blkno)
     355             : {
     356             :     char       *path;
     357             :     file_entry_t *entry;
     358             :     BlockNumber blkno_inseg;
     359             :     int         segno;
     360             : 
     361      169074 :     segno = blkno / RELSEG_SIZE;
     362      169074 :     blkno_inseg = blkno % RELSEG_SIZE;
     363             : 
     364      169074 :     path = datasegpath(rlocator, forknum, segno);
     365      169074 :     entry = lookup_filehash_entry(path);
     366      169074 :     pfree(path);
     367             : 
     368             :     /*
     369             :      * If the block still exists in both systems, remember it. Otherwise we
     370             :      * can safely ignore it.
     371             :      *
     372             :      * If the block is beyond the EOF in the source system, or the file
     373             :      * doesn't exist in the source at all, we're going to truncate/remove it
     374             :      * away from the target anyway. Likewise, if it doesn't exist in the
     375             :      * target anymore, we will copy it over with the "tail" from the source
     376             :      * system, anyway.
     377             :      *
     378             :      * It is possible to find WAL for a file that doesn't exist on either
     379             :      * system anymore. It means that the relation was dropped later in the
     380             :      * target system, and independently on the source system too, or that it
     381             :      * was created and dropped in the target system and it never existed in
     382             :      * the source. Either way, we can safely ignore it.
     383             :      */
     384      169074 :     if (entry)
     385             :     {
     386             :         Assert(entry->content_type == FILE_CONTENT_TYPE_RELATION);
     387             : 
     388      169074 :         if (entry->target_exists)
     389             :         {
     390      169066 :             if (entry->target_type != FILE_TYPE_REGULAR)
     391           0 :                 pg_fatal("unexpected page modification for non-regular file \"%s\"",
     392             :                          entry->path);
     393             : 
     394      169066 :             if (entry->source_exists)
     395             :             {
     396             :                 off_t       end_offset;
     397             : 
     398      165758 :                 end_offset = (blkno_inseg + 1) * BLCKSZ;
     399      165758 :                 if (end_offset <= entry->source_size && end_offset <= entry->target_size)
     400        5758 :                     datapagemap_add(&entry->target_pages_to_overwrite, blkno_inseg);
     401             :             }
     402             :         }
     403             :     }
     404      169074 : }
     405             : 
     406             : /*
     407             :  * Is this the path of file that pg_rewind can skip copying?
     408             :  */
     409             : static bool
     410       35354 : check_file_excluded(const char *path, bool is_source)
     411             : {
     412             :     char        localpath[MAXPGPATH];
     413             :     int         excludeIdx;
     414             :     const char *filename;
     415             : 
     416             :     /*
     417             :      * Skip all temporary files, .../pgsql_tmp/... and .../pgsql_tmp.*
     418             :      */
     419       35354 :     if (strstr(path, "/" PG_TEMP_FILE_PREFIX) != NULL ||
     420       35324 :         strstr(path, "/" PG_TEMP_FILES_DIR "/") != NULL)
     421             :     {
     422          30 :         return true;
     423             :     }
     424             : 
     425             :     /* check individual files... */
     426      317316 :     for (excludeIdx = 0; excludeFiles[excludeIdx].name != NULL; excludeIdx++)
     427             :     {
     428      282140 :         int         cmplen = strlen(excludeFiles[excludeIdx].name);
     429             : 
     430      282140 :         filename = last_dir_separator(path);
     431      282140 :         if (filename == NULL)
     432        6200 :             filename = path;
     433             :         else
     434      275940 :             filename++;
     435             : 
     436      282140 :         if (!excludeFiles[excludeIdx].match_prefix)
     437      246816 :             cmplen++;
     438      282140 :         if (strncmp(filename, excludeFiles[excludeIdx].name, cmplen) == 0)
     439             :         {
     440         148 :             if (is_source)
     441         148 :                 pg_log_debug("entry \"%s\" excluded from source file list",
     442             :                              path);
     443             :             else
     444           0 :                 pg_log_debug("entry \"%s\" excluded from target file list",
     445             :                              path);
     446         148 :             return true;
     447             :         }
     448             :     }
     449             : 
     450             :     /*
     451             :      * ... And check some directories.  Note that this includes any contents
     452             :      * within the directories themselves.
     453             :      */
     454      281378 :     for (excludeIdx = 0; excludeDirContents[excludeIdx] != NULL; excludeIdx++)
     455             :     {
     456      246232 :         snprintf(localpath, sizeof(localpath), "%s/",
     457      246232 :                  excludeDirContents[excludeIdx]);
     458      246232 :         if (strstr(path, localpath) == path)
     459             :         {
     460          30 :             if (is_source)
     461          30 :                 pg_log_debug("entry \"%s\" excluded from source file list",
     462             :                              path);
     463             :             else
     464           0 :                 pg_log_debug("entry \"%s\" excluded from target file list",
     465             :                              path);
     466          30 :             return true;
     467             :         }
     468             :     }
     469             : 
     470       35146 :     return false;
     471             : }
     472             : 
     473             : static const char *
     474       11724 : action_to_str(file_action_t action)
     475             : {
     476       11724 :     switch (action)
     477             :     {
     478         866 :         case FILE_ACTION_NONE:
     479         866 :             return "NONE";
     480        9366 :         case FILE_ACTION_COPY:
     481        9366 :             return "COPY";
     482           8 :         case FILE_ACTION_TRUNCATE:
     483           8 :             return "TRUNCATE";
     484          10 :         case FILE_ACTION_COPY_TAIL:
     485          10 :             return "COPY_TAIL";
     486          18 :         case FILE_ACTION_CREATE:
     487          18 :             return "CREATE";
     488        1456 :         case FILE_ACTION_REMOVE:
     489        1456 :             return "REMOVE";
     490             : 
     491           0 :         default:
     492           0 :             return "unknown";
     493             :     }
     494             : }
     495             : 
     496             : /*
     497             :  * Calculate the totals needed for progress reports.
     498             :  */
     499             : void
     500           0 : calculate_totals(filemap_t *filemap)
     501             : {
     502             :     file_entry_t *entry;
     503             :     int         i;
     504             : 
     505           0 :     filemap->total_size = 0;
     506           0 :     filemap->fetch_size = 0;
     507             : 
     508           0 :     for (i = 0; i < filemap->nentries; i++)
     509             :     {
     510           0 :         entry = filemap->entries[i];
     511             : 
     512           0 :         if (entry->source_type != FILE_TYPE_REGULAR)
     513           0 :             continue;
     514             : 
     515           0 :         filemap->total_size += entry->source_size;
     516             : 
     517           0 :         if (entry->action == FILE_ACTION_COPY)
     518             :         {
     519           0 :             filemap->fetch_size += entry->source_size;
     520           0 :             continue;
     521             :         }
     522             : 
     523           0 :         if (entry->action == FILE_ACTION_COPY_TAIL)
     524           0 :             filemap->fetch_size += (entry->source_size - entry->target_size);
     525             : 
     526           0 :         if (entry->target_pages_to_overwrite.bitmapsize > 0)
     527             :         {
     528             :             datapagemap_iterator_t *iter;
     529             :             BlockNumber blk;
     530             : 
     531           0 :             iter = datapagemap_iterate(&entry->target_pages_to_overwrite);
     532           0 :             while (datapagemap_next(iter, &blk))
     533           0 :                 filemap->fetch_size += BLCKSZ;
     534             : 
     535           0 :             pg_free(iter);
     536             :         }
     537             :     }
     538           0 : }
     539             : 
     540             : void
     541          30 : print_filemap(filemap_t *filemap)
     542             : {
     543             :     file_entry_t *entry;
     544             :     int         i;
     545             : 
     546       35418 :     for (i = 0; i < filemap->nentries; i++)
     547             :     {
     548       35388 :         entry = filemap->entries[i];
     549             : 
     550       35388 :         if (entry->action != FILE_ACTION_NONE ||
     551       24530 :             entry->content_type == FILE_CONTENT_TYPE_WAL ||
     552       24494 :             entry->target_pages_to_overwrite.bitmapsize > 0)
     553             :         {
     554       11724 :             pg_log_debug("%s (%s)", entry->path,
     555             :                          action_to_str(entry->action));
     556             : 
     557       11724 :             if (entry->target_pages_to_overwrite.bitmapsize > 0)
     558         846 :                 datapagemap_print(&entry->target_pages_to_overwrite);
     559             :         }
     560             :     }
     561          30 :     fflush(stdout);
     562          30 : }
     563             : 
     564             : /*
     565             :  * Determine what kind of file this one looks like.
     566             :  */
     567             : static file_content_type_t
     568       36226 : getFileContentType(const char *path)
     569             : {
     570             :     RelFileLocator rlocator;
     571             :     unsigned int segNo;
     572             :     int         nmatch;
     573       36226 :     file_content_type_t result = FILE_CONTENT_TYPE_OTHER;
     574             : 
     575             :     /* Check if it is a WAL file. */
     576       36226 :     if (strncmp("pg_wal/", path, 7) == 0)
     577             :     {
     578         330 :         const char *filename = path + 7;    /* Skip "pg_wal/" */
     579             : 
     580         330 :         if (IsXLogFileName(filename))
     581         130 :             return FILE_CONTENT_TYPE_WAL;
     582             :         else
     583         200 :             return FILE_CONTENT_TYPE_OTHER;
     584             :     }
     585             : 
     586             :     /*----
     587             :      * Does it look like a relation data file?
     588             :      *
     589             :      * For our purposes, only files belonging to the main fork are considered
     590             :      * relation files. Other forks are always copied in toto, because we
     591             :      * cannot reliably track changes to them, because WAL only contains block
     592             :      * references for the main fork.
     593             :      *
     594             :      * Relation data files can be in one of the following directories:
     595             :      *
     596             :      * global/
     597             :      *      shared relations
     598             :      *
     599             :      * base/<db oid>/
     600             :      *      regular relations, default tablespace
     601             :      *
     602             :      * pg_tblspc/<tblspc oid>/<tblspc version>/
     603             :      *      within a non-default tablespace (the name of the directory
     604             :      *      depends on version)
     605             :      *
     606             :      * And the relation data files themselves have a filename like:
     607             :      *
     608             :      * <oid>.<segment number>
     609             :      *
     610             :      *----
     611             :      */
     612       35896 :     rlocator.spcOid = InvalidOid;
     613       35896 :     rlocator.dbOid = InvalidOid;
     614       35896 :     rlocator.relNumber = InvalidRelFileNumber;
     615       35896 :     segNo = 0;
     616       35896 :     result = FILE_CONTENT_TYPE_OTHER;
     617             : 
     618       35896 :     nmatch = sscanf(path, "global/%u.%u", &rlocator.relNumber, &segNo);
     619       35896 :     if (nmatch == 1 || nmatch == 2)
     620             :     {
     621        1680 :         rlocator.spcOid = GLOBALTABLESPACE_OID;
     622        1680 :         rlocator.dbOid = 0;
     623        1680 :         result = FILE_CONTENT_TYPE_RELATION;
     624             :     }
     625             :     else
     626             :     {
     627       34216 :         nmatch = sscanf(path, "base/%u/%u.%u",
     628             :                         &rlocator.dbOid, &rlocator.relNumber, &segNo);
     629       34216 :         if (nmatch == 2 || nmatch == 3)
     630             :         {
     631       31764 :             rlocator.spcOid = DEFAULTTABLESPACE_OID;
     632       31764 :             result = FILE_CONTENT_TYPE_RELATION;
     633             :         }
     634             :         else
     635             :         {
     636        2452 :             nmatch = sscanf(path, "pg_tblspc/%u/" TABLESPACE_VERSION_DIRECTORY "/%u/%u.%u",
     637             :                             &rlocator.spcOid, &rlocator.dbOid, &rlocator.relNumber,
     638             :                             &segNo);
     639        2452 :             if (nmatch == 3 || nmatch == 4)
     640          24 :                 result = FILE_CONTENT_TYPE_RELATION;
     641             :         }
     642             :     }
     643             : 
     644             :     /*
     645             :      * The sscanf tests above can match files that have extra characters at
     646             :      * the end. To eliminate such cases, cross-check that GetRelationPath
     647             :      * creates the exact same filename, when passed the RelFileLocator
     648             :      * information we extracted from the filename.
     649             :      */
     650       35896 :     if (result == FILE_CONTENT_TYPE_RELATION)
     651             :     {
     652       33468 :         char       *check_path = datasegpath(rlocator, MAIN_FORKNUM, segNo);
     653             : 
     654       33468 :         if (strcmp(check_path, path) != 0)
     655        8168 :             result = FILE_CONTENT_TYPE_OTHER;
     656             : 
     657       33468 :         pfree(check_path);
     658             :     }
     659             : 
     660       35896 :     return result;
     661             : }
     662             : 
     663             : /*
     664             :  * A helper function to create the path of a relation file and segment.
     665             :  *
     666             :  * The returned path is palloc'd
     667             :  */
     668             : static char *
     669      202542 : datasegpath(RelFileLocator rlocator, ForkNumber forknum, BlockNumber segno)
     670             : {
     671             :     RelPathStr  path;
     672             :     char       *segpath;
     673             : 
     674      202542 :     path = relpathperm(rlocator, forknum);
     675      202542 :     if (segno > 0)
     676             :     {
     677           0 :         segpath = psprintf("%s.%u", path.str, segno);
     678           0 :         return segpath;
     679             :     }
     680             :     else
     681      202542 :         return pstrdup(path.str);
     682             : }
     683             : 
     684             : /*
     685             :  * In the final stage, the filemap is sorted so that removals come last.
     686             :  * From disk space usage point of view, it would be better to do removals
     687             :  * first, but for now, safety first. If a whole directory is deleted, all
     688             :  * files and subdirectories inside it need to removed first. On creation,
     689             :  * parent directory needs to be created before files and directories inside
     690             :  * it. To achieve that, the file_action_t enum is ordered so that we can
     691             :  * just sort on that first. Furthermore, sort REMOVE entries in reverse
     692             :  * path order, so that "foo/bar" subdirectory is removed before "foo".
     693             :  */
     694             : static int
     695      376558 : final_filemap_cmp(const void *a, const void *b)
     696             : {
     697      376558 :     file_entry_t *fa = *((file_entry_t **) a);
     698      376558 :     file_entry_t *fb = *((file_entry_t **) b);
     699             : 
     700      376558 :     if (fa->action > fb->action)
     701       11414 :         return 1;
     702      365144 :     if (fa->action < fb->action)
     703       20592 :         return -1;
     704             : 
     705      344552 :     if (fa->action == FILE_ACTION_REMOVE)
     706       11990 :         return strcmp(fb->path, fa->path);
     707             :     else
     708      332562 :         return strcmp(fa->path, fb->path);
     709             : }
     710             : 
     711             : /*
     712             :  * Decide what to do with a WAL segment file based on its position
     713             :  * relative to the point of divergence.
     714             :  *
     715             :  * Caller is responsible for ensuring that the file exists on both
     716             :  * source and target servers.
     717             :  */
     718             : static file_action_t
     719          64 : decide_wal_file_action(const char *fname, XLogSegNo last_common_segno,
     720             :                        size_t source_size, size_t target_size)
     721             : {
     722             :     TimeLineID  file_tli;
     723             :     XLogSegNo   file_segno;
     724             : 
     725             :     /* Get current WAL segment number given current segment file name */
     726          64 :     XLogFromFileName(fname, &file_tli, &file_segno, WalSegSz);
     727             : 
     728             :     /*
     729             :      * Avoid copying files before the last common segment.
     730             :      *
     731             :      * These files exist on the source and the target servers, so they should
     732             :      * be identical and located strictly before the segment that contains the
     733             :      * LSN where target and source servers have diverged.
     734             :      *
     735             :      * While we are on it, double-check the size of each file and copy the
     736             :      * file if they do not match, in case.
     737             :      */
     738          64 :     if (file_segno < last_common_segno &&
     739             :         source_size == target_size)
     740          32 :         return FILE_ACTION_NONE;
     741             : 
     742          32 :     return FILE_ACTION_COPY;
     743             : }
     744             : 
     745             : /*
     746             :  * Decide what action to perform to a file.
     747             :  */
     748             : static file_action_t
     749       35388 : decide_file_action(file_entry_t *entry, XLogSegNo last_common_segno)
     750             : {
     751       35388 :     const char *path = entry->path;
     752             : 
     753             :     /*
     754             :      * Don't touch the control file. It is handled specially, after copying
     755             :      * all the other files.
     756             :      */
     757       35388 :     if (strcmp(path, XLOG_CONTROL_FILE) == 0)
     758          30 :         return FILE_ACTION_NONE;
     759             : 
     760             :     /* Skip macOS system files */
     761       35358 :     if (strstr(path, ".DS_Store") != NULL)
     762           4 :         return FILE_ACTION_NONE;
     763             : 
     764             :     /*
     765             :      * Remove all files matching the exclusion filters in the target.
     766             :      */
     767       35354 :     if (check_file_excluded(path, true))
     768             :     {
     769         208 :         if (entry->target_exists)
     770         150 :             return FILE_ACTION_REMOVE;
     771             :         else
     772          58 :             return FILE_ACTION_NONE;
     773             :     }
     774             : 
     775             :     /*
     776             :      * Handle cases where the file is missing from one of the systems.
     777             :      */
     778       35146 :     if (!entry->target_exists && entry->source_exists)
     779             :     {
     780             :         /*
     781             :          * File exists in source, but not in target. Copy it in toto. (If it's
     782             :          * a relation data file, WAL replay after rewinding should re-create
     783             :          * it anyway. But there's no harm in copying it now.)
     784             :          */
     785        1386 :         switch (entry->source_type)
     786             :         {
     787          18 :             case FILE_TYPE_DIRECTORY:
     788             :             case FILE_TYPE_SYMLINK:
     789          18 :                 return FILE_ACTION_CREATE;
     790        1368 :             case FILE_TYPE_REGULAR:
     791        1368 :                 return FILE_ACTION_COPY;
     792           0 :             case FILE_TYPE_UNDEFINED:
     793           0 :                 pg_fatal("unknown file type for \"%s\"", entry->path);
     794             :                 break;
     795             :         }
     796             :     }
     797       33760 :     else if (entry->target_exists && !entry->source_exists)
     798             :     {
     799             :         /*
     800             :          * For files that exist in target but not in source, we check the
     801             :          * keepwal hash table; any files listed therein must not be removed.
     802             :          */
     803        1310 :         if (keepwal_entry_exists(path))
     804             :         {
     805           4 :             pg_log_debug("Not removing file \"%s\" because it is required for recovery", path);
     806           4 :             return FILE_ACTION_NONE;
     807             :         }
     808        1306 :         return FILE_ACTION_REMOVE;
     809             :     }
     810       32450 :     else if (!entry->target_exists && !entry->source_exists)
     811             :     {
     812             :         /*
     813             :          * Doesn't exist in either server. Why does it have an entry in the
     814             :          * first place??
     815             :          */
     816             :         Assert(false);
     817           0 :         return FILE_ACTION_NONE;
     818             :     }
     819             : 
     820             :     /*
     821             :      * Otherwise, the file exists on both systems
     822             :      */
     823             :     Assert(entry->target_exists && entry->source_exists);
     824             : 
     825       32450 :     if (entry->source_type != entry->target_type)
     826             :     {
     827             :         /* But it's a different kind of object. Strange.. */
     828           0 :         pg_fatal("file \"%s\" is of different type in source and target", entry->path);
     829             :     }
     830             : 
     831             :     /*
     832             :      * PG_VERSION files should be identical on both systems, but avoid
     833             :      * overwriting them for paranoia.
     834             :      */
     835       32450 :     if (pg_str_endswith(entry->path, "PG_VERSION"))
     836         128 :         return FILE_ACTION_NONE;
     837             : 
     838       32322 :     switch (entry->source_type)
     839             :     {
     840         816 :         case FILE_TYPE_DIRECTORY:
     841         816 :             return FILE_ACTION_NONE;
     842             : 
     843           0 :         case FILE_TYPE_SYMLINK:
     844             : 
     845             :             /*
     846             :              * XXX: Should we check if it points to the same target?
     847             :              */
     848           0 :             return FILE_ACTION_NONE;
     849             : 
     850       31506 :         case FILE_TYPE_REGULAR:
     851       31506 :             if (entry->content_type == FILE_CONTENT_TYPE_WAL)
     852             :             {
     853             :                 /* Handle WAL segment file */
     854          64 :                 const char *filename = last_dir_separator(entry->path);
     855             : 
     856          64 :                 if (filename == NULL)
     857           0 :                     filename = entry->path;
     858             :                 else
     859          64 :                     filename++; /* Skip the separator */
     860             : 
     861          64 :                 return decide_wal_file_action(filename, last_common_segno,
     862             :                                               entry->source_size,
     863             :                                               entry->target_size);
     864             :             }
     865       31442 :             else if (entry->content_type != FILE_CONTENT_TYPE_RELATION)
     866             :             {
     867             :                 /*
     868             :                  * It's a non-data file that we have no special processing
     869             :                  * for. Copy it in toto.
     870             :                  */
     871        7966 :                 return FILE_ACTION_COPY;
     872             :             }
     873             :             else
     874             :             {
     875             :                 /*
     876             :                  * It's a data file that exists in both systems.
     877             :                  *
     878             :                  * If it's larger in target, we can truncate it. There will
     879             :                  * also be a WAL record of the truncation in the source
     880             :                  * system, so WAL replay would eventually truncate the target
     881             :                  * too, but we might as well do it now.
     882             :                  *
     883             :                  * If it's smaller in the target, it means that it has been
     884             :                  * truncated in the target, or enlarged in the source, or
     885             :                  * both. If it was truncated in the target, we need to copy
     886             :                  * the missing tail from the source system. If it was enlarged
     887             :                  * in the source system, there will be WAL records in the
     888             :                  * source system for the new blocks, so we wouldn't need to
     889             :                  * copy them here. But we don't know which scenario we're
     890             :                  * dealing with, and there's no harm in copying the missing
     891             :                  * blocks now, so do it now.
     892             :                  *
     893             :                  * If it's the same size, do nothing here. Any blocks modified
     894             :                  * in the target will be copied based on parsing the target
     895             :                  * system's WAL, and any blocks modified in the source will be
     896             :                  * updated after rewinding, when the source system's WAL is
     897             :                  * replayed.
     898             :                  */
     899       23476 :                 if (entry->target_size < entry->source_size)
     900          10 :                     return FILE_ACTION_COPY_TAIL;
     901       23466 :                 else if (entry->target_size > entry->source_size)
     902           8 :                     return FILE_ACTION_TRUNCATE;
     903             :                 else
     904       23458 :                     return FILE_ACTION_NONE;
     905             :             }
     906             :             break;
     907             : 
     908           0 :         case FILE_TYPE_UNDEFINED:
     909           0 :             pg_fatal("unknown file type for \"%s\"", path);
     910             :             break;
     911             :     }
     912             : 
     913             :     /* unreachable */
     914           0 :     pg_fatal("could not decide what to do with file \"%s\"", path);
     915             : }
     916             : 
     917             : /*
     918             :  * Decide what to do with each file.
     919             :  *
     920             :  * Returns a 'filemap' with the entries in the order that their actions
     921             :  * should be executed.
     922             :  */
     923             : filemap_t *
     924          30 : decide_file_actions(XLogSegNo last_common_segno)
     925             : {
     926             :     int         i;
     927             :     filehash_iterator it;
     928             :     file_entry_t *entry;
     929             :     filemap_t  *filemap;
     930             : 
     931          30 :     filehash_start_iterate(filehash, &it);
     932       35418 :     while ((entry = filehash_iterate(filehash, &it)) != NULL)
     933             :     {
     934       35388 :         entry->action = decide_file_action(entry, last_common_segno);
     935             :     }
     936             : 
     937             :     /*
     938             :      * Turn the hash table into an array, and sort in the order that the
     939             :      * actions should be performed.
     940             :      */
     941          30 :     filemap = pg_malloc(offsetof(filemap_t, entries) +
     942          30 :                         filehash->members * sizeof(file_entry_t *));
     943          30 :     filemap->nentries = filehash->members;
     944          30 :     filehash_start_iterate(filehash, &it);
     945          30 :     i = 0;
     946       35418 :     while ((entry = filehash_iterate(filehash, &it)) != NULL)
     947             :     {
     948       35388 :         filemap->entries[i++] = entry;
     949             :     }
     950             : 
     951          30 :     qsort(&filemap->entries, filemap->nentries, sizeof(file_entry_t *),
     952             :           final_filemap_cmp);
     953             : 
     954          30 :     return filemap;
     955             : }
 |