LCOV - code coverage report
Current view: top level - src/bin/pg_rewind - filemap.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 190 230 82.6 %
Date: 2024-03-29 00:11:46 Functions: 15 16 93.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * filemap.c
       4             :  *    A data structure for keeping track of files that have changed.
       5             :  *
       6             :  * This source file contains the logic to decide what to do with different
       7             :  * kinds of files, and the data structure to support it.  Before modifying
       8             :  * anything, pg_rewind collects information about all the files and their
       9             :  * attributes in the target and source data directories.  It also scans the
      10             :  * WAL log in the target, and collects information about data blocks that
      11             :  * were changed.  All this information is stored in a hash table, using the
      12             :  * file path relative to the root of the data directory as the key.
      13             :  *
      14             :  * After collecting all the information required, the decide_file_actions()
      15             :  * function scans the hash table and decides what action needs to be taken
      16             :  * for each file.  Finally, it sorts the array to the final order that the
      17             :  * actions should be executed in.
      18             :  *
      19             :  * Copyright (c) 2013-2024, PostgreSQL Global Development Group
      20             :  *
      21             :  *-------------------------------------------------------------------------
      22             :  */
      23             : 
      24             : #include "postgres_fe.h"
      25             : 
      26             : #include <sys/stat.h>
      27             : #include <unistd.h>
      28             : 
      29             : #include "catalog/pg_tablespace_d.h"
      30             : #include "common/file_utils.h"
      31             : #include "common/hashfn.h"
      32             : #include "common/string.h"
      33             : #include "datapagemap.h"
      34             : #include "filemap.h"
      35             : #include "pg_rewind.h"
      36             : 
      37             : /*
      38             :  * Define a hash table which we can use to store information about the files
      39             :  * appearing in source and target systems.
      40             :  */
      41             : static uint32 hash_string_pointer(const char *s);
      42             : #define SH_PREFIX       filehash
      43             : #define SH_ELEMENT_TYPE file_entry_t
      44             : #define SH_KEY_TYPE     const char *
      45             : #define SH_KEY          path
      46             : #define SH_HASH_KEY(tb, key)    hash_string_pointer(key)
      47             : #define SH_EQUAL(tb, a, b)      (strcmp(a, b) == 0)
      48             : #define SH_SCOPE        static inline
      49             : #define SH_RAW_ALLOCATOR    pg_malloc0
      50             : #define SH_DECLARE
      51             : #define SH_DEFINE
      52             : #include "lib/simplehash.h"
      53             : 
      54             : #define FILEHASH_INITIAL_SIZE   1000
      55             : 
      56             : static filehash_hash *filehash;
      57             : 
      58             : static bool isRelDataFile(const char *path);
      59             : static char *datasegpath(RelFileLocator rlocator, ForkNumber forknum,
      60             :                          BlockNumber segno);
      61             : 
      62             : static file_entry_t *insert_filehash_entry(const char *path);
      63             : static file_entry_t *lookup_filehash_entry(const char *path);
      64             : static int  final_filemap_cmp(const void *a, const void *b);
      65             : static bool check_file_excluded(const char *path, bool is_source);
      66             : 
      67             : /*
      68             :  * Definition of one element part of an exclusion list, used to exclude
      69             :  * contents when rewinding.  "name" is the name of the file or path to
      70             :  * check for exclusion.  If "match_prefix" is true, any items matching
      71             :  * the name as prefix are excluded.
      72             :  */
      73             : struct exclude_list_item
      74             : {
      75             :     const char *name;
      76             :     bool        match_prefix;
      77             : };
      78             : 
      79             : /*
      80             :  * The contents of these directories are removed or recreated during server
      81             :  * start so they are not included in data processed by pg_rewind.
      82             :  *
      83             :  * Note: those lists should be kept in sync with what basebackup.c provides.
      84             :  * Some of the values, contrary to what basebackup.c uses, are hardcoded as
      85             :  * they are defined in backend-only headers.  So this list is maintained
      86             :  * with a best effort in mind.
      87             :  */
      88             : static const char *const excludeDirContents[] =
      89             : {
      90             :     /*
      91             :      * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped
      92             :      * because extensions like pg_stat_statements store data there.
      93             :      */
      94             :     "pg_stat_tmp",                /* defined as PG_STAT_TMP_DIR */
      95             : 
      96             :     /*
      97             :      * It is generally not useful to backup the contents of this directory
      98             :      * even if the intention is to restore to another primary. See backup.sgml
      99             :      * for a more detailed description.
     100             :      */
     101             :     "pg_replslot",
     102             : 
     103             :     /* Contents removed on startup, see dsm_cleanup_for_mmap(). */
     104             :     "pg_dynshmem",                /* defined as PG_DYNSHMEM_DIR */
     105             : 
     106             :     /* Contents removed on startup, see AsyncShmemInit(). */
     107             :     "pg_notify",
     108             : 
     109             :     /*
     110             :      * Old contents are loaded for possible debugging but are not required for
     111             :      * normal operation, see SerialInit().
     112             :      */
     113             :     "pg_serial",
     114             : 
     115             :     /* Contents removed on startup, see DeleteAllExportedSnapshotFiles(). */
     116             :     "pg_snapshots",
     117             : 
     118             :     /* Contents zeroed on startup, see StartupSUBTRANS(). */
     119             :     "pg_subtrans",
     120             : 
     121             :     /* end of list */
     122             :     NULL
     123             : };
     124             : 
     125             : /*
     126             :  * List of files excluded from filemap processing.   Files are excluded
     127             :  * if their prefix match.
     128             :  */
     129             : static const struct exclude_list_item excludeFiles[] =
     130             : {
     131             :     /* Skip auto conf temporary file. */
     132             :     {"postgresql.auto.conf.tmp", false},  /* defined as PG_AUTOCONF_FILENAME */
     133             : 
     134             :     /* Skip current log file temporary file */
     135             :     {"current_logfiles.tmp", false},  /* defined as
     136             :                                          * LOG_METAINFO_DATAFILE_TMP */
     137             : 
     138             :     /* Skip relation cache because it is rebuilt on startup */
     139             :     {"pg_internal.init", true}, /* defined as RELCACHE_INIT_FILENAME */
     140             : 
     141             :     /*
     142             :      * If there is a backup_label or tablespace_map file, it indicates that a
     143             :      * recovery failed and this cluster probably can't be rewound, but exclude
     144             :      * them anyway if they are found.
     145             :      */
     146             :     {"backup_label", false},  /* defined as BACKUP_LABEL_FILE */
     147             :     {"tablespace_map", false},    /* defined as TABLESPACE_MAP */
     148             : 
     149             :     /*
     150             :      * If there's a backup_manifest, it belongs to a backup that was used to
     151             :      * start this server. It is *not* correct for this backup. Our
     152             :      * backup_manifest is injected into the backup separately if users want
     153             :      * it.
     154             :      */
     155             :     {"backup_manifest", false},
     156             : 
     157             :     {"postmaster.pid", false},
     158             :     {"postmaster.opts", false},
     159             : 
     160             :     /* end of list */
     161             :     {NULL, false}
     162             : };
     163             : 
     164             : /*
     165             :  * Initialize the hash table for the file map.
     166             :  */
     167             : void
     168          26 : filehash_init(void)
     169             : {
     170          26 :     filehash = filehash_create(FILEHASH_INITIAL_SIZE, NULL);
     171          26 : }
     172             : 
     173             : /* Look up entry for 'path', creating a new one if it doesn't exist */
     174             : static file_entry_t *
     175       59724 : insert_filehash_entry(const char *path)
     176             : {
     177             :     file_entry_t *entry;
     178             :     bool        found;
     179             : 
     180       59724 :     entry = filehash_insert(filehash, path, &found);
     181       59724 :     if (!found)
     182             :     {
     183       31220 :         entry->path = pg_strdup(path);
     184       31220 :         entry->isrelfile = isRelDataFile(path);
     185             : 
     186       31220 :         entry->target_exists = false;
     187       31220 :         entry->target_type = FILE_TYPE_UNDEFINED;
     188       31220 :         entry->target_size = 0;
     189       31220 :         entry->target_link_target = NULL;
     190       31220 :         entry->target_pages_to_overwrite.bitmap = NULL;
     191       31220 :         entry->target_pages_to_overwrite.bitmapsize = 0;
     192             : 
     193       31220 :         entry->source_exists = false;
     194       31220 :         entry->source_type = FILE_TYPE_UNDEFINED;
     195       31220 :         entry->source_size = 0;
     196       31220 :         entry->source_link_target = NULL;
     197             : 
     198       31220 :         entry->action = FILE_ACTION_UNDECIDED;
     199             :     }
     200             : 
     201       59724 :     return entry;
     202             : }
     203             : 
     204             : static file_entry_t *
     205      168800 : lookup_filehash_entry(const char *path)
     206             : {
     207      168800 :     return filehash_lookup(filehash, path);
     208             : }
     209             : 
     210             : /*
     211             :  * Callback for processing source file list.
     212             :  *
     213             :  * This is called once for every file in the source server.  We record the
     214             :  * type and size of the file, so that decide_file_action() can later decide what
     215             :  * to do with it.
     216             :  */
     217             : void
     218       29894 : process_source_file(const char *path, file_type_t type, size_t size,
     219             :                     const char *link_target)
     220             : {
     221             :     file_entry_t *entry;
     222             : 
     223             :     /*
     224             :      * Pretend that pg_wal is a directory, even if it's really a symlink. We
     225             :      * don't want to mess with the symlink itself, nor complain if it's a
     226             :      * symlink in source but not in target or vice versa.
     227             :      */
     228       29894 :     if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
     229           0 :         type = FILE_TYPE_DIRECTORY;
     230             : 
     231             :     /*
     232             :      * sanity check: a filename that looks like a data file better be a
     233             :      * regular file
     234             :      */
     235       29894 :     if (type != FILE_TYPE_REGULAR && isRelDataFile(path))
     236           0 :         pg_fatal("data file \"%s\" in source is not a regular file", path);
     237             : 
     238             :     /* Remember this source file */
     239       29894 :     entry = insert_filehash_entry(path);
     240       29894 :     if (entry->source_exists)
     241           0 :         pg_fatal("duplicate source file \"%s\"", path);
     242       29894 :     entry->source_exists = true;
     243       29894 :     entry->source_type = type;
     244       29894 :     entry->source_size = size;
     245       29894 :     entry->source_link_target = link_target ? pg_strdup(link_target) : NULL;
     246       29894 : }
     247             : 
     248             : /*
     249             :  * Callback for processing target file list.
     250             :  *
     251             :  * Record the type and size of the file, like process_source_file() does.
     252             :  */
     253             : void
     254       29830 : process_target_file(const char *path, file_type_t type, size_t size,
     255             :                     const char *link_target)
     256             : {
     257             :     file_entry_t *entry;
     258             : 
     259             :     /*
     260             :      * Do not apply any exclusion filters here.  This has advantage to remove
     261             :      * from the target data folder all paths which have been filtered out from
     262             :      * the source data folder when processing the source files.
     263             :      */
     264             : 
     265             :     /*
     266             :      * Like in process_source_file, pretend that pg_wal is always a directory.
     267             :      */
     268       29830 :     if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
     269           4 :         type = FILE_TYPE_DIRECTORY;
     270             : 
     271             :     /* Remember this target file */
     272       29830 :     entry = insert_filehash_entry(path);
     273       29830 :     if (entry->target_exists)
     274           0 :         pg_fatal("duplicate source file \"%s\"", path);
     275       29830 :     entry->target_exists = true;
     276       29830 :     entry->target_type = type;
     277       29830 :     entry->target_size = size;
     278       29830 :     entry->target_link_target = link_target ? pg_strdup(link_target) : NULL;
     279       29830 : }
     280             : 
     281             : /*
     282             :  * This callback gets called while we read the WAL in the target, for every
     283             :  * block that has changed in the target system.  It decides if the given
     284             :  * 'blkno' in the target relfile needs to be overwritten from the source, and
     285             :  * if so, records it in 'target_pages_to_overwrite' bitmap.
     286             :  *
     287             :  * NOTE: All the files on both systems must have already been added to the
     288             :  * hash table!
     289             :  */
     290             : void
     291      168800 : process_target_wal_block_change(ForkNumber forknum, RelFileLocator rlocator,
     292             :                                 BlockNumber blkno)
     293             : {
     294             :     char       *path;
     295             :     file_entry_t *entry;
     296             :     BlockNumber blkno_inseg;
     297             :     int         segno;
     298             : 
     299      168800 :     segno = blkno / RELSEG_SIZE;
     300      168800 :     blkno_inseg = blkno % RELSEG_SIZE;
     301             : 
     302      168800 :     path = datasegpath(rlocator, forknum, segno);
     303      168800 :     entry = lookup_filehash_entry(path);
     304      168800 :     pfree(path);
     305             : 
     306             :     /*
     307             :      * If the block still exists in both systems, remember it. Otherwise we
     308             :      * can safely ignore it.
     309             :      *
     310             :      * If the block is beyond the EOF in the source system, or the file
     311             :      * doesn't exist in the source at all, we're going to truncate/remove it
     312             :      * away from the target anyway. Likewise, if it doesn't exist in the
     313             :      * target anymore, we will copy it over with the "tail" from the source
     314             :      * system, anyway.
     315             :      *
     316             :      * It is possible to find WAL for a file that doesn't exist on either
     317             :      * system anymore. It means that the relation was dropped later in the
     318             :      * target system, and independently on the source system too, or that it
     319             :      * was created and dropped in the target system and it never existed in
     320             :      * the source. Either way, we can safely ignore it.
     321             :      */
     322      168800 :     if (entry)
     323             :     {
     324             :         Assert(entry->isrelfile);
     325             : 
     326      168800 :         if (entry->target_exists)
     327             :         {
     328      168792 :             if (entry->target_type != FILE_TYPE_REGULAR)
     329           0 :                 pg_fatal("unexpected page modification for non-regular file \"%s\"",
     330             :                          entry->path);
     331             : 
     332      168792 :             if (entry->source_exists)
     333             :             {
     334             :                 off_t       end_offset;
     335             : 
     336      165588 :                 end_offset = (blkno_inseg + 1) * BLCKSZ;
     337      165588 :                 if (end_offset <= entry->source_size && end_offset <= entry->target_size)
     338        5588 :                     datapagemap_add(&entry->target_pages_to_overwrite, blkno_inseg);
     339             :             }
     340             :         }
     341             :     }
     342      168800 : }
     343             : 
     344             : /*
     345             :  * Is this the path of file that pg_rewind can skip copying?
     346             :  */
     347             : static bool
     348       31190 : check_file_excluded(const char *path, bool is_source)
     349             : {
     350             :     char        localpath[MAXPGPATH];
     351             :     int         excludeIdx;
     352             :     const char *filename;
     353             : 
     354             :     /*
     355             :      * Skip all temporary files, .../pgsql_tmp/... and .../pgsql_tmp.*
     356             :      */
     357       31190 :     if (strstr(path, "/" PG_TEMP_FILE_PREFIX) != NULL ||
     358       31164 :         strstr(path, "/" PG_TEMP_FILES_DIR "/") != NULL)
     359             :     {
     360          26 :         return true;
     361             :     }
     362             : 
     363             :     /* check individual files... */
     364      279940 :     for (excludeIdx = 0; excludeFiles[excludeIdx].name != NULL; excludeIdx++)
     365             :     {
     366      248908 :         int         cmplen = strlen(excludeFiles[excludeIdx].name);
     367             : 
     368      248908 :         filename = last_dir_separator(path);
     369      248908 :         if (filename == NULL)
     370        5392 :             filename = path;
     371             :         else
     372      243516 :             filename++;
     373             : 
     374      248908 :         if (!excludeFiles[excludeIdx].match_prefix)
     375      217744 :             cmplen++;
     376      248908 :         if (strncmp(filename, excludeFiles[excludeIdx].name, cmplen) == 0)
     377             :         {
     378         132 :             if (is_source)
     379         132 :                 pg_log_debug("entry \"%s\" excluded from source file list",
     380             :                              path);
     381             :             else
     382           0 :                 pg_log_debug("entry \"%s\" excluded from target file list",
     383             :                              path);
     384         132 :             return true;
     385             :         }
     386             :     }
     387             : 
     388             :     /*
     389             :      * ... And check some directories.  Note that this includes any contents
     390             :      * within the directories themselves.
     391             :      */
     392      248230 :     for (excludeIdx = 0; excludeDirContents[excludeIdx] != NULL; excludeIdx++)
     393             :     {
     394      217224 :         snprintf(localpath, sizeof(localpath), "%s/",
     395             :                  excludeDirContents[excludeIdx]);
     396      217224 :         if (strstr(path, localpath) == path)
     397             :         {
     398          26 :             if (is_source)
     399          26 :                 pg_log_debug("entry \"%s\" excluded from source file list",
     400             :                              path);
     401             :             else
     402           0 :                 pg_log_debug("entry \"%s\" excluded from target file list",
     403             :                              path);
     404          26 :             return true;
     405             :         }
     406             :     }
     407             : 
     408       31006 :     return false;
     409             : }
     410             : 
     411             : static const char *
     412       10594 : action_to_str(file_action_t action)
     413             : {
     414       10594 :     switch (action)
     415             :     {
     416         796 :         case FILE_ACTION_NONE:
     417         796 :             return "NONE";
     418        8352 :         case FILE_ACTION_COPY:
     419        8352 :             return "COPY";
     420           8 :         case FILE_ACTION_TRUNCATE:
     421           8 :             return "TRUNCATE";
     422          10 :         case FILE_ACTION_COPY_TAIL:
     423          10 :             return "COPY_TAIL";
     424          18 :         case FILE_ACTION_CREATE:
     425          18 :             return "CREATE";
     426        1410 :         case FILE_ACTION_REMOVE:
     427        1410 :             return "REMOVE";
     428             : 
     429           0 :         default:
     430           0 :             return "unknown";
     431             :     }
     432             : }
     433             : 
     434             : /*
     435             :  * Calculate the totals needed for progress reports.
     436             :  */
     437             : void
     438           0 : calculate_totals(filemap_t *filemap)
     439             : {
     440             :     file_entry_t *entry;
     441             :     int         i;
     442             : 
     443           0 :     filemap->total_size = 0;
     444           0 :     filemap->fetch_size = 0;
     445             : 
     446           0 :     for (i = 0; i < filemap->nentries; i++)
     447             :     {
     448           0 :         entry = filemap->entries[i];
     449             : 
     450           0 :         if (entry->source_type != FILE_TYPE_REGULAR)
     451           0 :             continue;
     452             : 
     453           0 :         filemap->total_size += entry->source_size;
     454             : 
     455           0 :         if (entry->action == FILE_ACTION_COPY)
     456             :         {
     457           0 :             filemap->fetch_size += entry->source_size;
     458           0 :             continue;
     459             :         }
     460             : 
     461           0 :         if (entry->action == FILE_ACTION_COPY_TAIL)
     462           0 :             filemap->fetch_size += (entry->source_size - entry->target_size);
     463             : 
     464           0 :         if (entry->target_pages_to_overwrite.bitmapsize > 0)
     465             :         {
     466             :             datapagemap_iterator_t *iter;
     467             :             BlockNumber blk;
     468             : 
     469           0 :             iter = datapagemap_iterate(&entry->target_pages_to_overwrite);
     470           0 :             while (datapagemap_next(iter, &blk))
     471           0 :                 filemap->fetch_size += BLCKSZ;
     472             : 
     473           0 :             pg_free(iter);
     474             :         }
     475             :     }
     476           0 : }
     477             : 
     478             : void
     479          26 : print_filemap(filemap_t *filemap)
     480             : {
     481             :     file_entry_t *entry;
     482             :     int         i;
     483             : 
     484       31246 :     for (i = 0; i < filemap->nentries; i++)
     485             :     {
     486       31220 :         entry = filemap->entries[i];
     487       31220 :         if (entry->action != FILE_ACTION_NONE ||
     488       21422 :             entry->target_pages_to_overwrite.bitmapsize > 0)
     489             :         {
     490       10594 :             pg_log_debug("%s (%s)", entry->path,
     491             :                          action_to_str(entry->action));
     492             : 
     493       10594 :             if (entry->target_pages_to_overwrite.bitmapsize > 0)
     494         812 :                 datapagemap_print(&entry->target_pages_to_overwrite);
     495             :         }
     496             :     }
     497          26 :     fflush(stdout);
     498          26 : }
     499             : 
     500             : /*
     501             :  * Does it look like a relation data file?
     502             :  *
     503             :  * For our purposes, only files belonging to the main fork are considered
     504             :  * relation files. Other forks are always copied in toto, because we cannot
     505             :  * reliably track changes to them, because WAL only contains block references
     506             :  * for the main fork.
     507             :  */
     508             : static bool
     509       31954 : isRelDataFile(const char *path)
     510             : {
     511             :     RelFileLocator rlocator;
     512             :     unsigned int segNo;
     513             :     int         nmatch;
     514             :     bool        matched;
     515             : 
     516             :     /*----
     517             :      * Relation data files can be in one of the following directories:
     518             :      *
     519             :      * global/
     520             :      *      shared relations
     521             :      *
     522             :      * base/<db oid>/
     523             :      *      regular relations, default tablespace
     524             :      *
     525             :      * pg_tblspc/<tblspc oid>/<tblspc version>/
     526             :      *      within a non-default tablespace (the name of the directory
     527             :      *      depends on version)
     528             :      *
     529             :      * And the relation data files themselves have a filename like:
     530             :      *
     531             :      * <oid>.<segment number>
     532             :      *
     533             :      *----
     534             :      */
     535       31954 :     rlocator.spcOid = InvalidOid;
     536       31954 :     rlocator.dbOid = InvalidOid;
     537       31954 :     rlocator.relNumber = InvalidRelFileNumber;
     538       31954 :     segNo = 0;
     539       31954 :     matched = false;
     540             : 
     541       31954 :     nmatch = sscanf(path, "global/%u.%u", &rlocator.relNumber, &segNo);
     542       31954 :     if (nmatch == 1 || nmatch == 2)
     543             :     {
     544        1560 :         rlocator.spcOid = GLOBALTABLESPACE_OID;
     545        1560 :         rlocator.dbOid = 0;
     546        1560 :         matched = true;
     547             :     }
     548             :     else
     549             :     {
     550       30394 :         nmatch = sscanf(path, "base/%u/%u.%u",
     551             :                         &rlocator.dbOid, &rlocator.relNumber, &segNo);
     552       30394 :         if (nmatch == 2 || nmatch == 3)
     553             :         {
     554       27996 :             rlocator.spcOid = DEFAULTTABLESPACE_OID;
     555       27996 :             matched = true;
     556             :         }
     557             :         else
     558             :         {
     559        2398 :             nmatch = sscanf(path, "pg_tblspc/%u/" TABLESPACE_VERSION_DIRECTORY "/%u/%u.%u",
     560             :                             &rlocator.spcOid, &rlocator.dbOid, &rlocator.relNumber,
     561             :                             &segNo);
     562        2398 :             if (nmatch == 3 || nmatch == 4)
     563          24 :                 matched = true;
     564             :         }
     565             :     }
     566             : 
     567             :     /*
     568             :      * The sscanf tests above can match files that have extra characters at
     569             :      * the end. To eliminate such cases, cross-check that GetRelationPath
     570             :      * creates the exact same filename, when passed the RelFileLocator
     571             :      * information we extracted from the filename.
     572             :      */
     573       31954 :     if (matched)
     574             :     {
     575       29580 :         char       *check_path = datasegpath(rlocator, MAIN_FORKNUM, segNo);
     576             : 
     577       29580 :         if (strcmp(check_path, path) != 0)
     578        7240 :             matched = false;
     579             : 
     580       29580 :         pfree(check_path);
     581             :     }
     582             : 
     583       31954 :     return matched;
     584             : }
     585             : 
     586             : /*
     587             :  * A helper function to create the path of a relation file and segment.
     588             :  *
     589             :  * The returned path is palloc'd
     590             :  */
     591             : static char *
     592      198380 : datasegpath(RelFileLocator rlocator, ForkNumber forknum, BlockNumber segno)
     593             : {
     594             :     char       *path;
     595             :     char       *segpath;
     596             : 
     597      198380 :     path = relpathperm(rlocator, forknum);
     598      198380 :     if (segno > 0)
     599             :     {
     600           0 :         segpath = psprintf("%s.%u", path, segno);
     601           0 :         pfree(path);
     602           0 :         return segpath;
     603             :     }
     604             :     else
     605      198380 :         return path;
     606             : }
     607             : 
     608             : /*
     609             :  * In the final stage, the filemap is sorted so that removals come last.
     610             :  * From disk space usage point of view, it would be better to do removals
     611             :  * first, but for now, safety first. If a whole directory is deleted, all
     612             :  * files and subdirectories inside it need to removed first. On creation,
     613             :  * parent directory needs to be created before files and directories inside
     614             :  * it. To achieve that, the file_action_t enum is ordered so that we can
     615             :  * just sort on that first. Furthermore, sort REMOVE entries in reverse
     616             :  * path order, so that "foo/bar" subdirectory is removed before "foo".
     617             :  */
     618             : static int
     619      331450 : final_filemap_cmp(const void *a, const void *b)
     620             : {
     621      331450 :     file_entry_t *fa = *((file_entry_t **) a);
     622      331450 :     file_entry_t *fb = *((file_entry_t **) b);
     623             : 
     624      331450 :     if (fa->action > fb->action)
     625       11526 :         return 1;
     626      319924 :     if (fa->action < fb->action)
     627       15678 :         return -1;
     628             : 
     629      304246 :     if (fa->action == FILE_ACTION_REMOVE)
     630       10896 :         return strcmp(fb->path, fa->path);
     631             :     else
     632      293350 :         return strcmp(fa->path, fb->path);
     633             : }
     634             : 
     635             : /*
     636             :  * Decide what action to perform to a file.
     637             :  */
     638             : static file_action_t
     639       31220 : decide_file_action(file_entry_t *entry)
     640             : {
     641       31220 :     const char *path = entry->path;
     642             : 
     643             :     /*
     644             :      * Don't touch the control file. It is handled specially, after copying
     645             :      * all the other files.
     646             :      */
     647       31220 :     if (strcmp(path, "global/pg_control") == 0)
     648          26 :         return FILE_ACTION_NONE;
     649             : 
     650             :     /* Skip macOS system files */
     651       31194 :     if (strstr(path, ".DS_Store") != NULL)
     652           4 :         return FILE_ACTION_NONE;
     653             : 
     654             :     /*
     655             :      * Remove all files matching the exclusion filters in the target.
     656             :      */
     657       31190 :     if (check_file_excluded(path, true))
     658             :     {
     659         184 :         if (entry->target_exists)
     660         130 :             return FILE_ACTION_REMOVE;
     661             :         else
     662          54 :             return FILE_ACTION_NONE;
     663             :     }
     664             : 
     665             :     /*
     666             :      * Handle cases where the file is missing from one of the systems.
     667             :      */
     668       31006 :     if (!entry->target_exists && entry->source_exists)
     669             :     {
     670             :         /*
     671             :          * File exists in source, but not in target. Copy it in toto. (If it's
     672             :          * a relation data file, WAL replay after rewinding should re-create
     673             :          * it anyway. But there's no harm in copying it now.)
     674             :          */
     675        1332 :         switch (entry->source_type)
     676             :         {
     677          18 :             case FILE_TYPE_DIRECTORY:
     678             :             case FILE_TYPE_SYMLINK:
     679          18 :                 return FILE_ACTION_CREATE;
     680        1314 :             case FILE_TYPE_REGULAR:
     681        1314 :                 return FILE_ACTION_COPY;
     682           0 :             case FILE_TYPE_UNDEFINED:
     683           0 :                 pg_fatal("unknown file type for \"%s\"", entry->path);
     684             :                 break;
     685             :         }
     686             :     }
     687       29674 :     else if (entry->target_exists && !entry->source_exists)
     688             :     {
     689             :         /* File exists in target, but not source. Remove it. */
     690        1280 :         return FILE_ACTION_REMOVE;
     691             :     }
     692       28394 :     else if (!entry->target_exists && !entry->source_exists)
     693             :     {
     694             :         /*
     695             :          * Doesn't exist in either server. Why does it have an entry in the
     696             :          * first place??
     697             :          */
     698             :         Assert(false);
     699           0 :         return FILE_ACTION_NONE;
     700             :     }
     701             : 
     702             :     /*
     703             :      * Otherwise, the file exists on both systems
     704             :      */
     705             :     Assert(entry->target_exists && entry->source_exists);
     706             : 
     707       28394 :     if (entry->source_type != entry->target_type)
     708             :     {
     709             :         /* But it's a different kind of object. Strange.. */
     710           0 :         pg_fatal("file \"%s\" is of different type in source and target", entry->path);
     711             :     }
     712             : 
     713             :     /*
     714             :      * PG_VERSION files should be identical on both systems, but avoid
     715             :      * overwriting them for paranoia.
     716             :      */
     717       28394 :     if (pg_str_endswith(entry->path, "PG_VERSION"))
     718         112 :         return FILE_ACTION_NONE;
     719             : 
     720       28282 :     switch (entry->source_type)
     721             :     {
     722         712 :         case FILE_TYPE_DIRECTORY:
     723         712 :             return FILE_ACTION_NONE;
     724             : 
     725           0 :         case FILE_TYPE_SYMLINK:
     726             : 
     727             :             /*
     728             :              * XXX: Should we check if it points to the same target?
     729             :              */
     730           0 :             return FILE_ACTION_NONE;
     731             : 
     732       27570 :         case FILE_TYPE_REGULAR:
     733       27570 :             if (!entry->isrelfile)
     734             :             {
     735             :                 /*
     736             :                  * It's a non-data file that we have no special processing
     737             :                  * for. Copy it in toto.
     738             :                  */
     739        7038 :                 return FILE_ACTION_COPY;
     740             :             }
     741             :             else
     742             :             {
     743             :                 /*
     744             :                  * It's a data file that exists in both systems.
     745             :                  *
     746             :                  * If it's larger in target, we can truncate it. There will
     747             :                  * also be a WAL record of the truncation in the source
     748             :                  * system, so WAL replay would eventually truncate the target
     749             :                  * too, but we might as well do it now.
     750             :                  *
     751             :                  * If it's smaller in the target, it means that it has been
     752             :                  * truncated in the target, or enlarged in the source, or
     753             :                  * both. If it was truncated in the target, we need to copy
     754             :                  * the missing tail from the source system. If it was enlarged
     755             :                  * in the source system, there will be WAL records in the
     756             :                  * source system for the new blocks, so we wouldn't need to
     757             :                  * copy them here. But we don't know which scenario we're
     758             :                  * dealing with, and there's no harm in copying the missing
     759             :                  * blocks now, so do it now.
     760             :                  *
     761             :                  * If it's the same size, do nothing here. Any blocks modified
     762             :                  * in the target will be copied based on parsing the target
     763             :                  * system's WAL, and any blocks modified in the source will be
     764             :                  * updated after rewinding, when the source system's WAL is
     765             :                  * replayed.
     766             :                  */
     767       20532 :                 if (entry->target_size < entry->source_size)
     768          10 :                     return FILE_ACTION_COPY_TAIL;
     769       20522 :                 else if (entry->target_size > entry->source_size)
     770           8 :                     return FILE_ACTION_TRUNCATE;
     771             :                 else
     772       20514 :                     return FILE_ACTION_NONE;
     773             :             }
     774             :             break;
     775             : 
     776           0 :         case FILE_TYPE_UNDEFINED:
     777           0 :             pg_fatal("unknown file type for \"%s\"", path);
     778             :             break;
     779             :     }
     780             : 
     781             :     /* unreachable */
     782           0 :     pg_fatal("could not decide what to do with file \"%s\"", path);
     783             : }
     784             : 
     785             : /*
     786             :  * Decide what to do with each file.
     787             :  *
     788             :  * Returns a 'filemap' with the entries in the order that their actions
     789             :  * should be executed.
     790             :  */
     791             : filemap_t *
     792          26 : decide_file_actions(void)
     793             : {
     794             :     int         i;
     795             :     filehash_iterator it;
     796             :     file_entry_t *entry;
     797             :     filemap_t  *filemap;
     798             : 
     799          26 :     filehash_start_iterate(filehash, &it);
     800       31246 :     while ((entry = filehash_iterate(filehash, &it)) != NULL)
     801             :     {
     802       31220 :         entry->action = decide_file_action(entry);
     803             :     }
     804             : 
     805             :     /*
     806             :      * Turn the hash table into an array, and sort in the order that the
     807             :      * actions should be performed.
     808             :      */
     809          26 :     filemap = pg_malloc(offsetof(filemap_t, entries) +
     810          26 :                         filehash->members * sizeof(file_entry_t *));
     811          26 :     filemap->nentries = filehash->members;
     812          26 :     filehash_start_iterate(filehash, &it);
     813          26 :     i = 0;
     814       31246 :     while ((entry = filehash_iterate(filehash, &it)) != NULL)
     815             :     {
     816       31220 :         filemap->entries[i++] = entry;
     817             :     }
     818             : 
     819          26 :     qsort(&filemap->entries, filemap->nentries, sizeof(file_entry_t *),
     820             :           final_filemap_cmp);
     821             : 
     822          26 :     return filemap;
     823             : }
     824             : 
     825             : 
     826             : /*
     827             :  * Helper function for filemap hash table.
     828             :  */
     829             : static uint32
     830      268626 : hash_string_pointer(const char *s)
     831             : {
     832      268626 :     unsigned char *ss = (unsigned char *) s;
     833             : 
     834      268626 :     return hash_bytes(ss, strlen(s));
     835             : }

Generated by: LCOV version 1.14