LCOV - code coverage report
Current view: top level - src/bin/pg_rewind - filemap.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 200 240 83.3 %
Date: 2025-01-18 04:15:08 Functions: 17 18 94.4 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * filemap.c
       4             :  *    A data structure for keeping track of files that have changed.
       5             :  *
       6             :  * This source file contains the logic to decide what to do with different
       7             :  * kinds of files, and the data structure to support it.  Before modifying
       8             :  * anything, pg_rewind collects information about all the files and their
       9             :  * attributes in the target and source data directories.  It also scans the
      10             :  * WAL log in the target, and collects information about data blocks that
      11             :  * were changed.  All this information is stored in a hash table, using the
      12             :  * file path relative to the root of the data directory as the key.
      13             :  *
      14             :  * After collecting all the information required, the decide_file_actions()
      15             :  * function scans the hash table and decides what action needs to be taken
      16             :  * for each file.  Finally, it sorts the array to the final order that the
      17             :  * actions should be executed in.
      18             :  *
      19             :  * Copyright (c) 2013-2025, PostgreSQL Global Development Group
      20             :  *
      21             :  *-------------------------------------------------------------------------
      22             :  */
      23             : 
      24             : #include "postgres_fe.h"
      25             : 
      26             : #include <sys/stat.h>
      27             : #include <unistd.h>
      28             : 
      29             : #include "catalog/pg_tablespace_d.h"
      30             : #include "common/file_utils.h"
      31             : #include "common/hashfn_unstable.h"
      32             : #include "common/string.h"
      33             : #include "datapagemap.h"
      34             : #include "filemap.h"
      35             : #include "pg_rewind.h"
      36             : 
      37             : /*
      38             :  * Define a hash table which we can use to store information about the files
      39             :  * appearing in source and target systems.
      40             :  */
      41             : #define SH_PREFIX               filehash
      42             : #define SH_ELEMENT_TYPE         file_entry_t
      43             : #define SH_KEY_TYPE             const char *
      44             : #define SH_KEY                  path
      45             : #define SH_HASH_KEY(tb, key)    hash_string(key)
      46             : #define SH_EQUAL(tb, a, b)      (strcmp(a, b) == 0)
      47             : #define SH_SCOPE                static inline
      48             : #define SH_RAW_ALLOCATOR        pg_malloc0
      49             : #define SH_DECLARE
      50             : #define SH_DEFINE
      51             : #include "lib/simplehash.h"
      52             : 
      53             : #define FILEHASH_INITIAL_SIZE   1000
      54             : 
      55             : static filehash_hash *filehash;
      56             : 
      57             : static bool isRelDataFile(const char *path);
      58             : static char *datasegpath(RelFileLocator rlocator, ForkNumber forknum,
      59             :                          BlockNumber segno);
      60             : 
      61             : static file_entry_t *insert_filehash_entry(const char *path);
      62             : static file_entry_t *lookup_filehash_entry(const char *path);
      63             : 
      64             : /*
      65             :  * A separate hash table which tracks WAL files that must not be deleted.
      66             :  */
      67             : typedef struct keepwal_entry
      68             : {
      69             :     const char *path;
      70             :     uint32      status;
      71             : } keepwal_entry;
      72             : 
      73             : #define SH_PREFIX               keepwal
      74             : #define SH_ELEMENT_TYPE         keepwal_entry
      75             : #define SH_KEY_TYPE             const char *
      76             : #define SH_KEY                  path
      77             : #define SH_HASH_KEY(tb, key)    hash_string(key)
      78             : #define SH_EQUAL(tb, a, b)      (strcmp(a, b) == 0)
      79             : #define SH_SCOPE                static inline
      80             : #define SH_RAW_ALLOCATOR        pg_malloc0
      81             : #define SH_DECLARE
      82             : #define SH_DEFINE
      83             : #include "lib/simplehash.h"
      84             : 
      85             : #define KEEPWAL_INITIAL_SIZE    1000
      86             : 
      87             : 
      88             : static keepwal_hash *keepwal = NULL;
      89             : static bool keepwal_entry_exists(const char *path);
      90             : 
      91             : static int  final_filemap_cmp(const void *a, const void *b);
      92             : 
      93             : static bool check_file_excluded(const char *path, bool is_source);
      94             : 
      95             : /*
      96             :  * Definition of one element part of an exclusion list, used to exclude
      97             :  * contents when rewinding.  "name" is the name of the file or path to
      98             :  * check for exclusion.  If "match_prefix" is true, any items matching
      99             :  * the name as prefix are excluded.
     100             :  */
     101             : struct exclude_list_item
     102             : {
     103             :     const char *name;
     104             :     bool        match_prefix;
     105             : };
     106             : 
     107             : /*
     108             :  * The contents of these directories are removed or recreated during server
     109             :  * start so they are not included in data processed by pg_rewind.
     110             :  *
     111             :  * Note: those lists should be kept in sync with what basebackup.c provides.
     112             :  * Some of the values, contrary to what basebackup.c uses, are hardcoded as
     113             :  * they are defined in backend-only headers.  So this list is maintained
     114             :  * with a best effort in mind.
     115             :  */
     116             : static const char *const excludeDirContents[] =
     117             : {
     118             :     /*
     119             :      * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped
     120             :      * because extensions like pg_stat_statements store data there.
     121             :      */
     122             :     "pg_stat_tmp",                /* defined as PG_STAT_TMP_DIR */
     123             : 
     124             :     /*
     125             :      * It is generally not useful to backup the contents of this directory
     126             :      * even if the intention is to restore to another primary. See backup.sgml
     127             :      * for a more detailed description.
     128             :      */
     129             :     "pg_replslot",                /* defined as PG_REPLSLOT_DIR */
     130             : 
     131             :     /* Contents removed on startup, see dsm_cleanup_for_mmap(). */
     132             :     "pg_dynshmem",                /* defined as PG_DYNSHMEM_DIR */
     133             : 
     134             :     /* Contents removed on startup, see AsyncShmemInit(). */
     135             :     "pg_notify",
     136             : 
     137             :     /*
     138             :      * Old contents are loaded for possible debugging but are not required for
     139             :      * normal operation, see SerialInit().
     140             :      */
     141             :     "pg_serial",
     142             : 
     143             :     /* Contents removed on startup, see DeleteAllExportedSnapshotFiles(). */
     144             :     "pg_snapshots",
     145             : 
     146             :     /* Contents zeroed on startup, see StartupSUBTRANS(). */
     147             :     "pg_subtrans",
     148             : 
     149             :     /* end of list */
     150             :     NULL
     151             : };
     152             : 
     153             : /*
     154             :  * List of files excluded from filemap processing.   Files are excluded
     155             :  * if their prefix match.
     156             :  */
     157             : static const struct exclude_list_item excludeFiles[] =
     158             : {
     159             :     /* Skip auto conf temporary file. */
     160             :     {"postgresql.auto.conf.tmp", false},  /* defined as PG_AUTOCONF_FILENAME */
     161             : 
     162             :     /* Skip current log file temporary file */
     163             :     {"current_logfiles.tmp", false},  /* defined as
     164             :                                          * LOG_METAINFO_DATAFILE_TMP */
     165             : 
     166             :     /* Skip relation cache because it is rebuilt on startup */
     167             :     {"pg_internal.init", true}, /* defined as RELCACHE_INIT_FILENAME */
     168             : 
     169             :     /*
     170             :      * If there is a backup_label or tablespace_map file, it indicates that a
     171             :      * recovery failed and this cluster probably can't be rewound, but exclude
     172             :      * them anyway if they are found.
     173             :      */
     174             :     {"backup_label", false},  /* defined as BACKUP_LABEL_FILE */
     175             :     {"tablespace_map", false},    /* defined as TABLESPACE_MAP */
     176             : 
     177             :     /*
     178             :      * If there's a backup_manifest, it belongs to a backup that was used to
     179             :      * start this server. It is *not* correct for this backup. Our
     180             :      * backup_manifest is injected into the backup separately if users want
     181             :      * it.
     182             :      */
     183             :     {"backup_manifest", false},
     184             : 
     185             :     {"postmaster.pid", false},
     186             :     {"postmaster.opts", false},
     187             : 
     188             :     /* end of list */
     189             :     {NULL, false}
     190             : };
     191             : 
     192             : /*
     193             :  * Initialize the hash table for the file map.
     194             :  */
     195             : void
     196          28 : filehash_init(void)
     197             : {
     198          28 :     filehash = filehash_create(FILEHASH_INITIAL_SIZE, NULL);
     199          28 : }
     200             : 
     201             : /* Look up entry for 'path', creating a new one if it doesn't exist */
     202             : static file_entry_t *
     203       64028 : insert_filehash_entry(const char *path)
     204             : {
     205             :     file_entry_t *entry;
     206             :     bool        found;
     207             : 
     208       64028 :     entry = filehash_insert(filehash, path, &found);
     209       64028 :     if (!found)
     210             :     {
     211       33400 :         entry->path = pg_strdup(path);
     212       33400 :         entry->isrelfile = isRelDataFile(path);
     213             : 
     214       33400 :         entry->target_exists = false;
     215       33400 :         entry->target_type = FILE_TYPE_UNDEFINED;
     216       33400 :         entry->target_size = 0;
     217       33400 :         entry->target_link_target = NULL;
     218       33400 :         entry->target_pages_to_overwrite.bitmap = NULL;
     219       33400 :         entry->target_pages_to_overwrite.bitmapsize = 0;
     220             : 
     221       33400 :         entry->source_exists = false;
     222       33400 :         entry->source_type = FILE_TYPE_UNDEFINED;
     223       33400 :         entry->source_size = 0;
     224       33400 :         entry->source_link_target = NULL;
     225             : 
     226       33400 :         entry->action = FILE_ACTION_UNDECIDED;
     227             :     }
     228             : 
     229       64028 :     return entry;
     230             : }
     231             : 
     232             : static file_entry_t *
     233      169040 : lookup_filehash_entry(const char *path)
     234             : {
     235      169040 :     return filehash_lookup(filehash, path);
     236             : }
     237             : 
     238             : /*
     239             :  * Initialize a hash table to store WAL file names that must be kept.
     240             :  */
     241             : void
     242          28 : keepwal_init(void)
     243             : {
     244             :     /* An initial hash size out of thin air */
     245          28 :     keepwal = keepwal_create(KEEPWAL_INITIAL_SIZE, NULL);
     246          28 : }
     247             : 
     248             : /* Mark the given file to prevent its removal */
     249             : void
     250          38 : keepwal_add_entry(const char *path)
     251             : {
     252             :     keepwal_entry *entry;
     253             :     bool        found;
     254             : 
     255             :     /* Should only be called with keepwal initialized */
     256             :     Assert(keepwal != NULL);
     257             : 
     258          38 :     entry = keepwal_insert(keepwal, path, &found);
     259             : 
     260          38 :     if (!found)
     261          38 :         entry->path = pg_strdup(path);
     262          38 : }
     263             : 
     264             : /* Return true if file is marked as not to be removed, false otherwise */
     265             : static bool
     266        1304 : keepwal_entry_exists(const char *path)
     267             : {
     268        1304 :     return keepwal_lookup(keepwal, path) != NULL;
     269             : }
     270             : 
     271             : /*
     272             :  * Callback for processing source file list.
     273             :  *
     274             :  * This is called once for every file in the source server.  We record the
     275             :  * type and size of the file, so that decide_file_action() can later decide what
     276             :  * to do with it.
     277             :  */
     278             : void
     279       32048 : process_source_file(const char *path, file_type_t type, size_t size,
     280             :                     const char *link_target)
     281             : {
     282             :     file_entry_t *entry;
     283             : 
     284             :     /*
     285             :      * Pretend that pg_wal is a directory, even if it's really a symlink. We
     286             :      * don't want to mess with the symlink itself, nor complain if it's a
     287             :      * symlink in source but not in target or vice versa.
     288             :      */
     289       32048 :     if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
     290           0 :         type = FILE_TYPE_DIRECTORY;
     291             : 
     292             :     /*
     293             :      * sanity check: a filename that looks like a data file better be a
     294             :      * regular file
     295             :      */
     296       32048 :     if (type != FILE_TYPE_REGULAR && isRelDataFile(path))
     297           0 :         pg_fatal("data file \"%s\" in source is not a regular file", path);
     298             : 
     299             :     /* Remember this source file */
     300       32048 :     entry = insert_filehash_entry(path);
     301       32048 :     if (entry->source_exists)
     302           0 :         pg_fatal("duplicate source file \"%s\"", path);
     303       32048 :     entry->source_exists = true;
     304       32048 :     entry->source_type = type;
     305       32048 :     entry->source_size = size;
     306       32048 :     entry->source_link_target = link_target ? pg_strdup(link_target) : NULL;
     307       32048 : }
     308             : 
     309             : /*
     310             :  * Callback for processing target file list.
     311             :  *
     312             :  * Record the type and size of the file, like process_source_file() does.
     313             :  */
     314             : void
     315       31980 : process_target_file(const char *path, file_type_t type, size_t size,
     316             :                     const char *link_target)
     317             : {
     318             :     file_entry_t *entry;
     319             : 
     320             :     /*
     321             :      * Do not apply any exclusion filters here.  This has advantage to remove
     322             :      * from the target data folder all paths which have been filtered out from
     323             :      * the source data folder when processing the source files.
     324             :      */
     325             : 
     326             :     /*
     327             :      * Like in process_source_file, pretend that pg_wal is always a directory.
     328             :      */
     329       31980 :     if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
     330           4 :         type = FILE_TYPE_DIRECTORY;
     331             : 
     332             :     /* Remember this target file */
     333       31980 :     entry = insert_filehash_entry(path);
     334       31980 :     if (entry->target_exists)
     335           0 :         pg_fatal("duplicate source file \"%s\"", path);
     336       31980 :     entry->target_exists = true;
     337       31980 :     entry->target_type = type;
     338       31980 :     entry->target_size = size;
     339       31980 :     entry->target_link_target = link_target ? pg_strdup(link_target) : NULL;
     340       31980 : }
     341             : 
     342             : /*
     343             :  * This callback gets called while we read the WAL in the target, for every
     344             :  * block that has changed in the target system.  It decides if the given
     345             :  * 'blkno' in the target relfile needs to be overwritten from the source, and
     346             :  * if so, records it in 'target_pages_to_overwrite' bitmap.
     347             :  *
     348             :  * NOTE: All the files on both systems must have already been added to the
     349             :  * hash table!
     350             :  */
     351             : void
     352      169040 : process_target_wal_block_change(ForkNumber forknum, RelFileLocator rlocator,
     353             :                                 BlockNumber blkno)
     354             : {
     355             :     char       *path;
     356             :     file_entry_t *entry;
     357             :     BlockNumber blkno_inseg;
     358             :     int         segno;
     359             : 
     360      169040 :     segno = blkno / RELSEG_SIZE;
     361      169040 :     blkno_inseg = blkno % RELSEG_SIZE;
     362             : 
     363      169040 :     path = datasegpath(rlocator, forknum, segno);
     364      169040 :     entry = lookup_filehash_entry(path);
     365      169040 :     pfree(path);
     366             : 
     367             :     /*
     368             :      * If the block still exists in both systems, remember it. Otherwise we
     369             :      * can safely ignore it.
     370             :      *
     371             :      * If the block is beyond the EOF in the source system, or the file
     372             :      * doesn't exist in the source at all, we're going to truncate/remove it
     373             :      * away from the target anyway. Likewise, if it doesn't exist in the
     374             :      * target anymore, we will copy it over with the "tail" from the source
     375             :      * system, anyway.
     376             :      *
     377             :      * It is possible to find WAL for a file that doesn't exist on either
     378             :      * system anymore. It means that the relation was dropped later in the
     379             :      * target system, and independently on the source system too, or that it
     380             :      * was created and dropped in the target system and it never existed in
     381             :      * the source. Either way, we can safely ignore it.
     382             :      */
     383      169040 :     if (entry)
     384             :     {
     385             :         Assert(entry->isrelfile);
     386             : 
     387      169040 :         if (entry->target_exists)
     388             :         {
     389      169032 :             if (entry->target_type != FILE_TYPE_REGULAR)
     390           0 :                 pg_fatal("unexpected page modification for non-regular file \"%s\"",
     391             :                          entry->path);
     392             : 
     393      169032 :             if (entry->source_exists)
     394             :             {
     395             :                 off_t       end_offset;
     396             : 
     397      165748 :                 end_offset = (blkno_inseg + 1) * BLCKSZ;
     398      165748 :                 if (end_offset <= entry->source_size && end_offset <= entry->target_size)
     399        5748 :                     datapagemap_add(&entry->target_pages_to_overwrite, blkno_inseg);
     400             :             }
     401             :         }
     402             :     }
     403      169040 : }
     404             : 
     405             : /*
     406             :  * Is this the path of file that pg_rewind can skip copying?
     407             :  */
     408             : static bool
     409       33368 : check_file_excluded(const char *path, bool is_source)
     410             : {
     411             :     char        localpath[MAXPGPATH];
     412             :     int         excludeIdx;
     413             :     const char *filename;
     414             : 
     415             :     /*
     416             :      * Skip all temporary files, .../pgsql_tmp/... and .../pgsql_tmp.*
     417             :      */
     418       33368 :     if (strstr(path, "/" PG_TEMP_FILE_PREFIX) != NULL ||
     419       33340 :         strstr(path, "/" PG_TEMP_FILES_DIR "/") != NULL)
     420             :     {
     421          28 :         return true;
     422             :     }
     423             : 
     424             :     /* check individual files... */
     425      299492 :     for (excludeIdx = 0; excludeFiles[excludeIdx].name != NULL; excludeIdx++)
     426             :     {
     427      266292 :         int         cmplen = strlen(excludeFiles[excludeIdx].name);
     428             : 
     429      266292 :         filename = last_dir_separator(path);
     430      266292 :         if (filename == NULL)
     431        5804 :             filename = path;
     432             :         else
     433      260488 :             filename++;
     434             : 
     435      266292 :         if (!excludeFiles[excludeIdx].match_prefix)
     436      232952 :             cmplen++;
     437      266292 :         if (strncmp(filename, excludeFiles[excludeIdx].name, cmplen) == 0)
     438             :         {
     439         140 :             if (is_source)
     440         140 :                 pg_log_debug("entry \"%s\" excluded from source file list",
     441             :                              path);
     442             :             else
     443           0 :                 pg_log_debug("entry \"%s\" excluded from target file list",
     444             :                              path);
     445         140 :             return true;
     446             :         }
     447             :     }
     448             : 
     449             :     /*
     450             :      * ... And check some directories.  Note that this includes any contents
     451             :      * within the directories themselves.
     452             :      */
     453      265572 :     for (excludeIdx = 0; excludeDirContents[excludeIdx] != NULL; excludeIdx++)
     454             :     {
     455      232400 :         snprintf(localpath, sizeof(localpath), "%s/",
     456             :                  excludeDirContents[excludeIdx]);
     457      232400 :         if (strstr(path, localpath) == path)
     458             :         {
     459          28 :             if (is_source)
     460          28 :                 pg_log_debug("entry \"%s\" excluded from source file list",
     461             :                              path);
     462             :             else
     463           0 :                 pg_log_debug("entry \"%s\" excluded from target file list",
     464             :                              path);
     465          28 :             return true;
     466             :         }
     467             :     }
     468             : 
     469       33172 :     return false;
     470             : }
     471             : 
     472             : static const char *
     473       11176 : action_to_str(file_action_t action)
     474             : {
     475       11176 :     switch (action)
     476             :     {
     477         830 :         case FILE_ACTION_NONE:
     478         830 :             return "NONE";
     479        8872 :         case FILE_ACTION_COPY:
     480        8872 :             return "COPY";
     481           8 :         case FILE_ACTION_TRUNCATE:
     482           8 :             return "TRUNCATE";
     483          10 :         case FILE_ACTION_COPY_TAIL:
     484          10 :             return "COPY_TAIL";
     485          18 :         case FILE_ACTION_CREATE:
     486          18 :             return "CREATE";
     487        1438 :         case FILE_ACTION_REMOVE:
     488        1438 :             return "REMOVE";
     489             : 
     490           0 :         default:
     491           0 :             return "unknown";
     492             :     }
     493             : }
     494             : 
     495             : /*
     496             :  * Calculate the totals needed for progress reports.
     497             :  */
     498             : void
     499           0 : calculate_totals(filemap_t *filemap)
     500             : {
     501             :     file_entry_t *entry;
     502             :     int         i;
     503             : 
     504           0 :     filemap->total_size = 0;
     505           0 :     filemap->fetch_size = 0;
     506             : 
     507           0 :     for (i = 0; i < filemap->nentries; i++)
     508             :     {
     509           0 :         entry = filemap->entries[i];
     510             : 
     511           0 :         if (entry->source_type != FILE_TYPE_REGULAR)
     512           0 :             continue;
     513             : 
     514           0 :         filemap->total_size += entry->source_size;
     515             : 
     516           0 :         if (entry->action == FILE_ACTION_COPY)
     517             :         {
     518           0 :             filemap->fetch_size += entry->source_size;
     519           0 :             continue;
     520             :         }
     521             : 
     522           0 :         if (entry->action == FILE_ACTION_COPY_TAIL)
     523           0 :             filemap->fetch_size += (entry->source_size - entry->target_size);
     524             : 
     525           0 :         if (entry->target_pages_to_overwrite.bitmapsize > 0)
     526             :         {
     527             :             datapagemap_iterator_t *iter;
     528             :             BlockNumber blk;
     529             : 
     530           0 :             iter = datapagemap_iterate(&entry->target_pages_to_overwrite);
     531           0 :             while (datapagemap_next(iter, &blk))
     532           0 :                 filemap->fetch_size += BLCKSZ;
     533             : 
     534           0 :             pg_free(iter);
     535             :         }
     536             :     }
     537           0 : }
     538             : 
     539             : void
     540          28 : print_filemap(filemap_t *filemap)
     541             : {
     542             :     file_entry_t *entry;
     543             :     int         i;
     544             : 
     545       33428 :     for (i = 0; i < filemap->nentries; i++)
     546             :     {
     547       33400 :         entry = filemap->entries[i];
     548       33400 :         if (entry->action != FILE_ACTION_NONE ||
     549       23054 :             entry->target_pages_to_overwrite.bitmapsize > 0)
     550             :         {
     551       11176 :             pg_log_debug("%s (%s)", entry->path,
     552             :                          action_to_str(entry->action));
     553             : 
     554       11176 :             if (entry->target_pages_to_overwrite.bitmapsize > 0)
     555         846 :                 datapagemap_print(&entry->target_pages_to_overwrite);
     556             :         }
     557             :     }
     558          28 :     fflush(stdout);
     559          28 : }
     560             : 
     561             : /*
     562             :  * Does it look like a relation data file?
     563             :  *
     564             :  * For our purposes, only files belonging to the main fork are considered
     565             :  * relation files. Other forks are always copied in toto, because we cannot
     566             :  * reliably track changes to them, because WAL only contains block references
     567             :  * for the main fork.
     568             :  */
     569             : static bool
     570       34186 : isRelDataFile(const char *path)
     571             : {
     572             :     RelFileLocator rlocator;
     573             :     unsigned int segNo;
     574             :     int         nmatch;
     575             :     bool        matched;
     576             : 
     577             :     /*----
     578             :      * Relation data files can be in one of the following directories:
     579             :      *
     580             :      * global/
     581             :      *      shared relations
     582             :      *
     583             :      * base/<db oid>/
     584             :      *      regular relations, default tablespace
     585             :      *
     586             :      * pg_tblspc/<tblspc oid>/<tblspc version>/
     587             :      *      within a non-default tablespace (the name of the directory
     588             :      *      depends on version)
     589             :      *
     590             :      * And the relation data files themselves have a filename like:
     591             :      *
     592             :      * <oid>.<segment number>
     593             :      *
     594             :      *----
     595             :      */
     596       34186 :     rlocator.spcOid = InvalidOid;
     597       34186 :     rlocator.dbOid = InvalidOid;
     598       34186 :     rlocator.relNumber = InvalidRelFileNumber;
     599       34186 :     segNo = 0;
     600       34186 :     matched = false;
     601             : 
     602       34186 :     nmatch = sscanf(path, "global/%u.%u", &rlocator.relNumber, &segNo);
     603       34186 :     if (nmatch == 1 || nmatch == 2)
     604             :     {
     605        1624 :         rlocator.spcOid = GLOBALTABLESPACE_OID;
     606        1624 :         rlocator.dbOid = 0;
     607        1624 :         matched = true;
     608             :     }
     609             :     else
     610             :     {
     611       32562 :         nmatch = sscanf(path, "base/%u/%u.%u",
     612             :                         &rlocator.dbOid, &rlocator.relNumber, &segNo);
     613       32562 :         if (nmatch == 2 || nmatch == 3)
     614             :         {
     615       29974 :             rlocator.spcOid = DEFAULTTABLESPACE_OID;
     616       29974 :             matched = true;
     617             :         }
     618             :         else
     619             :         {
     620        2588 :             nmatch = sscanf(path, "pg_tblspc/%u/" TABLESPACE_VERSION_DIRECTORY "/%u/%u.%u",
     621             :                             &rlocator.spcOid, &rlocator.dbOid, &rlocator.relNumber,
     622             :                             &segNo);
     623        2588 :             if (nmatch == 3 || nmatch == 4)
     624          24 :                 matched = true;
     625             :         }
     626             :     }
     627             : 
     628             :     /*
     629             :      * The sscanf tests above can match files that have extra characters at
     630             :      * the end. To eliminate such cases, cross-check that GetRelationPath
     631             :      * creates the exact same filename, when passed the RelFileLocator
     632             :      * information we extracted from the filename.
     633             :      */
     634       34186 :     if (matched)
     635             :     {
     636       31622 :         char       *check_path = datasegpath(rlocator, MAIN_FORKNUM, segNo);
     637             : 
     638       31622 :         if (strcmp(check_path, path) != 0)
     639        7704 :             matched = false;
     640             : 
     641       31622 :         pfree(check_path);
     642             :     }
     643             : 
     644       34186 :     return matched;
     645             : }
     646             : 
     647             : /*
     648             :  * A helper function to create the path of a relation file and segment.
     649             :  *
     650             :  * The returned path is palloc'd
     651             :  */
     652             : static char *
     653      200662 : datasegpath(RelFileLocator rlocator, ForkNumber forknum, BlockNumber segno)
     654             : {
     655             :     char       *path;
     656             :     char       *segpath;
     657             : 
     658      200662 :     path = relpathperm(rlocator, forknum);
     659      200662 :     if (segno > 0)
     660             :     {
     661           0 :         segpath = psprintf("%s.%u", path, segno);
     662           0 :         pfree(path);
     663           0 :         return segpath;
     664             :     }
     665             :     else
     666      200662 :         return path;
     667             : }
     668             : 
     669             : /*
     670             :  * In the final stage, the filemap is sorted so that removals come last.
     671             :  * From disk space usage point of view, it would be better to do removals
     672             :  * first, but for now, safety first. If a whole directory is deleted, all
     673             :  * files and subdirectories inside it need to removed first. On creation,
     674             :  * parent directory needs to be created before files and directories inside
     675             :  * it. To achieve that, the file_action_t enum is ordered so that we can
     676             :  * just sort on that first. Furthermore, sort REMOVE entries in reverse
     677             :  * path order, so that "foo/bar" subdirectory is removed before "foo".
     678             :  */
     679             : static int
     680      354464 : final_filemap_cmp(const void *a, const void *b)
     681             : {
     682      354464 :     file_entry_t *fa = *((file_entry_t **) a);
     683      354464 :     file_entry_t *fb = *((file_entry_t **) b);
     684             : 
     685      354464 :     if (fa->action > fb->action)
     686        9584 :         return 1;
     687      344880 :     if (fa->action < fb->action)
     688       20862 :         return -1;
     689             : 
     690      324018 :     if (fa->action == FILE_ACTION_REMOVE)
     691       11264 :         return strcmp(fb->path, fa->path);
     692             :     else
     693      312754 :         return strcmp(fa->path, fb->path);
     694             : }
     695             : 
     696             : /*
     697             :  * Decide what action to perform to a file.
     698             :  */
     699             : static file_action_t
     700       33400 : decide_file_action(file_entry_t *entry)
     701             : {
     702       33400 :     const char *path = entry->path;
     703             : 
     704             :     /*
     705             :      * Don't touch the control file. It is handled specially, after copying
     706             :      * all the other files.
     707             :      */
     708       33400 :     if (strcmp(path, "global/pg_control") == 0)
     709          28 :         return FILE_ACTION_NONE;
     710             : 
     711             :     /* Skip macOS system files */
     712       33372 :     if (strstr(path, ".DS_Store") != NULL)
     713           4 :         return FILE_ACTION_NONE;
     714             : 
     715             :     /*
     716             :      * Remove all files matching the exclusion filters in the target.
     717             :      */
     718       33368 :     if (check_file_excluded(path, true))
     719             :     {
     720         196 :         if (entry->target_exists)
     721         140 :             return FILE_ACTION_REMOVE;
     722             :         else
     723          56 :             return FILE_ACTION_NONE;
     724             :     }
     725             : 
     726             :     /*
     727             :      * Handle cases where the file is missing from one of the systems.
     728             :      */
     729       33172 :     if (!entry->target_exists && entry->source_exists)
     730             :     {
     731             :         /*
     732             :          * File exists in source, but not in target. Copy it in toto. (If it's
     733             :          * a relation data file, WAL replay after rewinding should re-create
     734             :          * it anyway. But there's no harm in copying it now.)
     735             :          */
     736        1360 :         switch (entry->source_type)
     737             :         {
     738          18 :             case FILE_TYPE_DIRECTORY:
     739             :             case FILE_TYPE_SYMLINK:
     740          18 :                 return FILE_ACTION_CREATE;
     741        1342 :             case FILE_TYPE_REGULAR:
     742        1342 :                 return FILE_ACTION_COPY;
     743           0 :             case FILE_TYPE_UNDEFINED:
     744           0 :                 pg_fatal("unknown file type for \"%s\"", entry->path);
     745             :                 break;
     746             :         }
     747             :     }
     748       31812 :     else if (entry->target_exists && !entry->source_exists)
     749             :     {
     750             :         /*
     751             :          * For files that exist in target but not in source, we check the
     752             :          * keepwal hash table; any files listed therein must not be removed.
     753             :          */
     754        1304 :         if (keepwal_entry_exists(path))
     755             :         {
     756           6 :             pg_log_debug("Not removing file \"%s\" because it is required for recovery", path);
     757           6 :             return FILE_ACTION_NONE;
     758             :         }
     759        1298 :         return FILE_ACTION_REMOVE;
     760             :     }
     761       30508 :     else if (!entry->target_exists && !entry->source_exists)
     762             :     {
     763             :         /*
     764             :          * Doesn't exist in either server. Why does it have an entry in the
     765             :          * first place??
     766             :          */
     767             :         Assert(false);
     768           0 :         return FILE_ACTION_NONE;
     769             :     }
     770             : 
     771             :     /*
     772             :      * Otherwise, the file exists on both systems
     773             :      */
     774             :     Assert(entry->target_exists && entry->source_exists);
     775             : 
     776       30508 :     if (entry->source_type != entry->target_type)
     777             :     {
     778             :         /* But it's a different kind of object. Strange.. */
     779           0 :         pg_fatal("file \"%s\" is of different type in source and target", entry->path);
     780             :     }
     781             : 
     782             :     /*
     783             :      * PG_VERSION files should be identical on both systems, but avoid
     784             :      * overwriting them for paranoia.
     785             :      */
     786       30508 :     if (pg_str_endswith(entry->path, "PG_VERSION"))
     787         120 :         return FILE_ACTION_NONE;
     788             : 
     789       30388 :     switch (entry->source_type)
     790             :     {
     791         764 :         case FILE_TYPE_DIRECTORY:
     792         764 :             return FILE_ACTION_NONE;
     793             : 
     794           0 :         case FILE_TYPE_SYMLINK:
     795             : 
     796             :             /*
     797             :              * XXX: Should we check if it points to the same target?
     798             :              */
     799           0 :             return FILE_ACTION_NONE;
     800             : 
     801       29624 :         case FILE_TYPE_REGULAR:
     802       29624 :             if (!entry->isrelfile)
     803             :             {
     804             :                 /*
     805             :                  * It's a non-data file that we have no special processing
     806             :                  * for. Copy it in toto.
     807             :                  */
     808        7530 :                 return FILE_ACTION_COPY;
     809             :             }
     810             :             else
     811             :             {
     812             :                 /*
     813             :                  * It's a data file that exists in both systems.
     814             :                  *
     815             :                  * If it's larger in target, we can truncate it. There will
     816             :                  * also be a WAL record of the truncation in the source
     817             :                  * system, so WAL replay would eventually truncate the target
     818             :                  * too, but we might as well do it now.
     819             :                  *
     820             :                  * If it's smaller in the target, it means that it has been
     821             :                  * truncated in the target, or enlarged in the source, or
     822             :                  * both. If it was truncated in the target, we need to copy
     823             :                  * the missing tail from the source system. If it was enlarged
     824             :                  * in the source system, there will be WAL records in the
     825             :                  * source system for the new blocks, so we wouldn't need to
     826             :                  * copy them here. But we don't know which scenario we're
     827             :                  * dealing with, and there's no harm in copying the missing
     828             :                  * blocks now, so do it now.
     829             :                  *
     830             :                  * If it's the same size, do nothing here. Any blocks modified
     831             :                  * in the target will be copied based on parsing the target
     832             :                  * system's WAL, and any blocks modified in the source will be
     833             :                  * updated after rewinding, when the source system's WAL is
     834             :                  * replayed.
     835             :                  */
     836       22094 :                 if (entry->target_size < entry->source_size)
     837          10 :                     return FILE_ACTION_COPY_TAIL;
     838       22084 :                 else if (entry->target_size > entry->source_size)
     839           8 :                     return FILE_ACTION_TRUNCATE;
     840             :                 else
     841       22076 :                     return FILE_ACTION_NONE;
     842             :             }
     843             :             break;
     844             : 
     845           0 :         case FILE_TYPE_UNDEFINED:
     846           0 :             pg_fatal("unknown file type for \"%s\"", path);
     847             :             break;
     848             :     }
     849             : 
     850             :     /* unreachable */
     851           0 :     pg_fatal("could not decide what to do with file \"%s\"", path);
     852             : }
     853             : 
     854             : /*
     855             :  * Decide what to do with each file.
     856             :  *
     857             :  * Returns a 'filemap' with the entries in the order that their actions
     858             :  * should be executed.
     859             :  */
     860             : filemap_t *
     861          28 : decide_file_actions(void)
     862             : {
     863             :     int         i;
     864             :     filehash_iterator it;
     865             :     file_entry_t *entry;
     866             :     filemap_t  *filemap;
     867             : 
     868          28 :     filehash_start_iterate(filehash, &it);
     869       33428 :     while ((entry = filehash_iterate(filehash, &it)) != NULL)
     870             :     {
     871       33400 :         entry->action = decide_file_action(entry);
     872             :     }
     873             : 
     874             :     /*
     875             :      * Turn the hash table into an array, and sort in the order that the
     876             :      * actions should be performed.
     877             :      */
     878          28 :     filemap = pg_malloc(offsetof(filemap_t, entries) +
     879          28 :                         filehash->members * sizeof(file_entry_t *));
     880          28 :     filemap->nentries = filehash->members;
     881          28 :     filehash_start_iterate(filehash, &it);
     882          28 :     i = 0;
     883       33428 :     while ((entry = filehash_iterate(filehash, &it)) != NULL)
     884             :     {
     885       33400 :         filemap->entries[i++] = entry;
     886             :     }
     887             : 
     888          28 :     qsort(&filemap->entries, filemap->nentries, sizeof(file_entry_t *),
     889             :           final_filemap_cmp);
     890             : 
     891          28 :     return filemap;
     892             : }

Generated by: LCOV version 1.14