LCOV - code coverage report
Current view: top level - src/backend/storage/file - reinit.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 109 124 87.9 %
Date: 2025-01-18 03:14:54 Functions: 4 4 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * reinit.c
       4             :  *    Reinitialization of unlogged relations
       5             :  *
       6             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  * IDENTIFICATION
      10             :  *    src/backend/storage/file/reinit.c
      11             :  *
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : 
      15             : #include "postgres.h"
      16             : 
      17             : #include <unistd.h>
      18             : 
      19             : #include "common/relpath.h"
      20             : #include "postmaster/startup.h"
      21             : #include "storage/copydir.h"
      22             : #include "storage/fd.h"
      23             : #include "storage/reinit.h"
      24             : #include "utils/hsearch.h"
      25             : #include "utils/memutils.h"
      26             : 
      27             : static void ResetUnloggedRelationsInTablespaceDir(const char *tsdirname,
      28             :                                                   int op);
      29             : static void ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname,
      30             :                                                int op);
      31             : 
      32             : typedef struct
      33             : {
      34             :     RelFileNumber relnumber;    /* hash key */
      35             : } unlogged_relation_entry;
      36             : 
      37             : /*
      38             :  * Reset unlogged relations from before the last restart.
      39             :  *
      40             :  * If op includes UNLOGGED_RELATION_CLEANUP, we remove all forks of any
      41             :  * relation with an "init" fork, except for the "init" fork itself.
      42             :  *
      43             :  * If op includes UNLOGGED_RELATION_INIT, we copy the "init" fork to the main
      44             :  * fork.
      45             :  */
      46             : void
      47         714 : ResetUnloggedRelations(int op)
      48             : {
      49             :     char        temp_path[MAXPGPATH + sizeof(PG_TBLSPC_DIR) + sizeof(TABLESPACE_VERSION_DIRECTORY)];
      50             :     DIR        *spc_dir;
      51             :     struct dirent *spc_de;
      52             :     MemoryContext tmpctx,
      53             :                 oldctx;
      54             : 
      55             :     /* Log it. */
      56         714 :     elog(DEBUG1, "resetting unlogged relations: cleanup %d init %d",
      57             :          (op & UNLOGGED_RELATION_CLEANUP) != 0,
      58             :          (op & UNLOGGED_RELATION_INIT) != 0);
      59             : 
      60             :     /*
      61             :      * Just to be sure we don't leak any memory, let's create a temporary
      62             :      * memory context for this operation.
      63             :      */
      64         714 :     tmpctx = AllocSetContextCreate(CurrentMemoryContext,
      65             :                                    "ResetUnloggedRelations",
      66             :                                    ALLOCSET_DEFAULT_SIZES);
      67         714 :     oldctx = MemoryContextSwitchTo(tmpctx);
      68             : 
      69             :     /* Prepare to report progress resetting unlogged relations. */
      70         714 :     begin_startup_progress_phase();
      71             : 
      72             :     /*
      73             :      * First process unlogged files in pg_default ($PGDATA/base)
      74             :      */
      75         714 :     ResetUnloggedRelationsInTablespaceDir("base", op);
      76             : 
      77             :     /*
      78             :      * Cycle through directories for all non-default tablespaces.
      79             :      */
      80         714 :     spc_dir = AllocateDir(PG_TBLSPC_DIR);
      81             : 
      82        2316 :     while ((spc_de = ReadDir(spc_dir, PG_TBLSPC_DIR)) != NULL)
      83             :     {
      84        1602 :         if (strcmp(spc_de->d_name, ".") == 0 ||
      85         888 :             strcmp(spc_de->d_name, "..") == 0)
      86        1428 :             continue;
      87             : 
      88         174 :         snprintf(temp_path, sizeof(temp_path), "%s/%s/%s",
      89         174 :                  PG_TBLSPC_DIR, spc_de->d_name, TABLESPACE_VERSION_DIRECTORY);
      90         174 :         ResetUnloggedRelationsInTablespaceDir(temp_path, op);
      91             :     }
      92             : 
      93         714 :     FreeDir(spc_dir);
      94             : 
      95             :     /*
      96             :      * Restore memory context.
      97             :      */
      98         714 :     MemoryContextSwitchTo(oldctx);
      99         714 :     MemoryContextDelete(tmpctx);
     100         714 : }
     101             : 
     102             : /*
     103             :  * Process one tablespace directory for ResetUnloggedRelations
     104             :  */
     105             : static void
     106         888 : ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
     107             : {
     108             :     DIR        *ts_dir;
     109             :     struct dirent *de;
     110             :     char        dbspace_path[MAXPGPATH * 2];
     111             : 
     112         888 :     ts_dir = AllocateDir(tsdirname);
     113             : 
     114             :     /*
     115             :      * If we get ENOENT on a tablespace directory, log it and return.  This
     116             :      * can happen if a previous DROP TABLESPACE crashed between removing the
     117             :      * tablespace directory and removing the symlink in pg_tblspc.  We don't
     118             :      * really want to prevent database startup in that scenario, so let it
     119             :      * pass instead.  Any other type of error will be reported by ReadDir
     120             :      * (causing a startup failure).
     121             :      */
     122         888 :     if (ts_dir == NULL && errno == ENOENT)
     123             :     {
     124           0 :         ereport(LOG,
     125             :                 (errcode_for_file_access(),
     126             :                  errmsg("could not open directory \"%s\": %m",
     127             :                         tsdirname)));
     128           0 :         return;
     129             :     }
     130             : 
     131        5246 :     while ((de = ReadDir(ts_dir, tsdirname)) != NULL)
     132             :     {
     133             :         /*
     134             :          * We're only interested in the per-database directories, which have
     135             :          * numeric names.  Note that this code will also (properly) ignore "."
     136             :          * and "..".
     137             :          */
     138        4358 :         if (strspn(de->d_name, "0123456789") != strlen(de->d_name))
     139        1902 :             continue;
     140             : 
     141        2456 :         snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s",
     142        2456 :                  tsdirname, de->d_name);
     143             : 
     144        2456 :         if (op & UNLOGGED_RELATION_INIT)
     145        1042 :             ereport_startup_progress("resetting unlogged relations (init), elapsed time: %ld.%02d s, current path: %s",
     146             :                                      dbspace_path);
     147        1414 :         else if (op & UNLOGGED_RELATION_CLEANUP)
     148        1414 :             ereport_startup_progress("resetting unlogged relations (cleanup), elapsed time: %ld.%02d s, current path: %s",
     149             :                                      dbspace_path);
     150             : 
     151        2456 :         ResetUnloggedRelationsInDbspaceDir(dbspace_path, op);
     152             :     }
     153             : 
     154         888 :     FreeDir(ts_dir);
     155             : }
     156             : 
     157             : /*
     158             :  * Process one per-dbspace directory for ResetUnloggedRelations
     159             :  */
     160             : static void
     161        2456 : ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
     162             : {
     163             :     DIR        *dbspace_dir;
     164             :     struct dirent *de;
     165             :     char        rm_path[MAXPGPATH * 2];
     166             : 
     167             :     /* Caller must specify at least one operation. */
     168             :     Assert((op & (UNLOGGED_RELATION_CLEANUP | UNLOGGED_RELATION_INIT)) != 0);
     169             : 
     170             :     /*
     171             :      * Cleanup is a two-pass operation.  First, we go through and identify all
     172             :      * the files with init forks.  Then, we go through again and nuke
     173             :      * everything with the same OID except the init fork.
     174             :      */
     175        2456 :     if ((op & UNLOGGED_RELATION_CLEANUP) != 0)
     176             :     {
     177             :         HTAB       *hash;
     178             :         HASHCTL     ctl;
     179             : 
     180             :         /*
     181             :          * It's possible that someone could create a ton of unlogged relations
     182             :          * in the same database & tablespace, so we'd better use a hash table
     183             :          * rather than an array or linked list to keep track of which files
     184             :          * need to be reset.  Otherwise, this cleanup operation would be
     185             :          * O(n^2).
     186             :          */
     187        1414 :         ctl.keysize = sizeof(Oid);
     188        1414 :         ctl.entrysize = sizeof(unlogged_relation_entry);
     189        1414 :         ctl.hcxt = CurrentMemoryContext;
     190        1414 :         hash = hash_create("unlogged relation OIDs", 32, &ctl,
     191             :                            HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
     192             : 
     193             :         /* Scan the directory. */
     194        1414 :         dbspace_dir = AllocateDir(dbspacedirname);
     195      405706 :         while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
     196             :         {
     197             :             ForkNumber  forkNum;
     198             :             unsigned    segno;
     199             :             unlogged_relation_entry ent;
     200             : 
     201             :             /* Skip anything that doesn't look like a relation data file. */
     202      404292 :             if (!parse_filename_for_nontemp_relation(de->d_name,
     203             :                                                      &ent.relnumber,
     204             :                                                      &forkNum, &segno))
     205      404276 :                 continue;
     206             : 
     207             :             /* Also skip it unless this is the init fork. */
     208      398808 :             if (forkNum != INIT_FORKNUM)
     209      398792 :                 continue;
     210             : 
     211             :             /*
     212             :              * Put the RelFileNumber into the hash table, if it isn't already.
     213             :              */
     214          16 :             (void) hash_search(hash, &ent, HASH_ENTER, NULL);
     215             :         }
     216             : 
     217             :         /* Done with the first pass. */
     218        1414 :         FreeDir(dbspace_dir);
     219             : 
     220             :         /*
     221             :          * If we didn't find any init forks, there's no point in continuing;
     222             :          * we can bail out now.
     223             :          */
     224        1414 :         if (hash_get_num_entries(hash) == 0)
     225             :         {
     226        1406 :             hash_destroy(hash);
     227        1406 :             return;
     228             :         }
     229             : 
     230             :         /*
     231             :          * Now, make a second pass and remove anything that matches.
     232             :          */
     233           8 :         dbspace_dir = AllocateDir(dbspacedirname);
     234        1874 :         while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
     235             :         {
     236             :             ForkNumber  forkNum;
     237             :             unsigned    segno;
     238             :             unlogged_relation_entry ent;
     239             : 
     240             :             /* Skip anything that doesn't look like a relation data file. */
     241        1866 :             if (!parse_filename_for_nontemp_relation(de->d_name,
     242             :                                                      &ent.relnumber,
     243             :                                                      &forkNum, &segno))
     244          44 :                 continue;
     245             : 
     246             :             /* We never remove the init fork. */
     247        1838 :             if (forkNum == INIT_FORKNUM)
     248          16 :                 continue;
     249             : 
     250             :             /*
     251             :              * See whether the OID portion of the name shows up in the hash
     252             :              * table.  If so, nuke it!
     253             :              */
     254        1822 :             if (hash_search(hash, &ent, HASH_FIND, NULL))
     255             :             {
     256          14 :                 snprintf(rm_path, sizeof(rm_path), "%s/%s",
     257          14 :                          dbspacedirname, de->d_name);
     258          14 :                 if (unlink(rm_path) < 0)
     259           0 :                     ereport(ERROR,
     260             :                             (errcode_for_file_access(),
     261             :                              errmsg("could not remove file \"%s\": %m",
     262             :                                     rm_path)));
     263             :                 else
     264          14 :                     elog(DEBUG2, "unlinked file \"%s\"", rm_path);
     265             :             }
     266             :         }
     267             : 
     268             :         /* Cleanup is complete. */
     269           8 :         FreeDir(dbspace_dir);
     270           8 :         hash_destroy(hash);
     271             :     }
     272             : 
     273             :     /*
     274             :      * Initialization happens after cleanup is complete: we copy each init
     275             :      * fork file to the corresponding main fork file.  Note that if we are
     276             :      * asked to do both cleanup and init, we may never get here: if the
     277             :      * cleanup code determines that there are no init forks in this dbspace,
     278             :      * it will return before we get to this point.
     279             :      */
     280        1050 :     if ((op & UNLOGGED_RELATION_INIT) != 0)
     281             :     {
     282             :         /* Scan the directory. */
     283        1042 :         dbspace_dir = AllocateDir(dbspacedirname);
     284      292812 :         while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
     285             :         {
     286             :             ForkNumber  forkNum;
     287             :             RelFileNumber relNumber;
     288             :             unsigned    segno;
     289             :             char        srcpath[MAXPGPATH * 2];
     290             :             char        dstpath[MAXPGPATH];
     291             : 
     292             :             /* Skip anything that doesn't look like a relation data file. */
     293      291770 :             if (!parse_filename_for_nontemp_relation(de->d_name, &relNumber,
     294             :                                                      &forkNum, &segno))
     295      291756 :                 continue;
     296             : 
     297             :             /* Also skip it unless this is the init fork. */
     298      287728 :             if (forkNum != INIT_FORKNUM)
     299      287714 :                 continue;
     300             : 
     301             :             /* Construct source pathname. */
     302          14 :             snprintf(srcpath, sizeof(srcpath), "%s/%s",
     303          14 :                      dbspacedirname, de->d_name);
     304             : 
     305             :             /* Construct destination pathname. */
     306          14 :             if (segno == 0)
     307          14 :                 snprintf(dstpath, sizeof(dstpath), "%s/%u",
     308             :                          dbspacedirname, relNumber);
     309             :             else
     310           0 :                 snprintf(dstpath, sizeof(dstpath), "%s/%u.%u",
     311             :                          dbspacedirname, relNumber, segno);
     312             : 
     313             :             /* OK, we're ready to perform the actual copy. */
     314          14 :             elog(DEBUG2, "copying %s to %s", srcpath, dstpath);
     315          14 :             copy_file(srcpath, dstpath);
     316             :         }
     317             : 
     318        1042 :         FreeDir(dbspace_dir);
     319             : 
     320             :         /*
     321             :          * copy_file() above has already called pg_flush_data() on the files
     322             :          * it created. Now we need to fsync those files, because a checkpoint
     323             :          * won't do it for us while we're in recovery. We do this in a
     324             :          * separate pass to allow the kernel to perform all the flushes
     325             :          * (especially the metadata ones) at once.
     326             :          */
     327        1042 :         dbspace_dir = AllocateDir(dbspacedirname);
     328      292826 :         while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
     329             :         {
     330             :             RelFileNumber relNumber;
     331             :             ForkNumber  forkNum;
     332             :             unsigned    segno;
     333             :             char        mainpath[MAXPGPATH];
     334             : 
     335             :             /* Skip anything that doesn't look like a relation data file. */
     336      291784 :             if (!parse_filename_for_nontemp_relation(de->d_name, &relNumber,
     337             :                                                      &forkNum, &segno))
     338      291770 :                 continue;
     339             : 
     340             :             /* Also skip it unless this is the init fork. */
     341      287742 :             if (forkNum != INIT_FORKNUM)
     342      287728 :                 continue;
     343             : 
     344             :             /* Construct main fork pathname. */
     345          14 :             if (segno == 0)
     346          14 :                 snprintf(mainpath, sizeof(mainpath), "%s/%u",
     347             :                          dbspacedirname, relNumber);
     348             :             else
     349           0 :                 snprintf(mainpath, sizeof(mainpath), "%s/%u.%u",
     350             :                          dbspacedirname, relNumber, segno);
     351             : 
     352          14 :             fsync_fname(mainpath, false);
     353             :         }
     354             : 
     355        1042 :         FreeDir(dbspace_dir);
     356             : 
     357             :         /*
     358             :          * Lastly, fsync the database directory itself, ensuring the
     359             :          * filesystem remembers the file creations and deletions we've done.
     360             :          * We don't bother with this during a call that does only
     361             :          * UNLOGGED_RELATION_CLEANUP, because if recovery crashes before we
     362             :          * get to doing UNLOGGED_RELATION_INIT, we'll redo the cleanup step
     363             :          * too at the next startup attempt.
     364             :          */
     365        1042 :         fsync_fname(dbspacedirname, true);
     366             :     }
     367             : }
     368             : 
     369             : /*
     370             :  * Basic parsing of putative relation filenames.
     371             :  *
     372             :  * This function returns true if the file appears to be in the correct format
     373             :  * for a non-temporary relation and false otherwise.
     374             :  *
     375             :  * If it returns true, it sets *relnumber, *fork, and *segno to the values
     376             :  * extracted from the filename. If it returns false, these values are set to
     377             :  * InvalidRelFileNumber, InvalidForkNumber, and 0, respectively.
     378             :  */
     379             : bool
     380     1598268 : parse_filename_for_nontemp_relation(const char *name, RelFileNumber *relnumber,
     381             :                                     ForkNumber *fork, unsigned *segno)
     382             : {
     383             :     unsigned long n,
     384             :                 s;
     385             :     ForkNumber  f;
     386             :     char       *endp;
     387             : 
     388     1598268 :     *relnumber = InvalidRelFileNumber;
     389     1598268 :     *fork = InvalidForkNumber;
     390     1598268 :     *segno = 0;
     391             : 
     392             :     /*
     393             :      * Relation filenames should begin with a digit that is not a zero. By
     394             :      * rejecting cases involving leading zeroes, the caller can assume that
     395             :      * there's only one possible string of characters that could have produced
     396             :      * any given value for *relnumber.
     397             :      *
     398             :      * (To be clear, we don't expect files with names like 0017.3 to exist at
     399             :      * all -- but if 0017.3 does exist, it's a non-relation file, not part of
     400             :      * the main fork for relfilenode 17.)
     401             :      */
     402     1598268 :     if (name[0] < '1' || name[0] > '9')
     403       18702 :         return false;
     404             : 
     405             :     /*
     406             :      * Parse the leading digit string. If the value is out of range, we
     407             :      * conclude that this isn't a relation file at all.
     408             :      */
     409     1579566 :     errno = 0;
     410     1579566 :     n = strtoul(name, &endp, 10);
     411     1579566 :     if (errno || name == endp || n <= 0 || n > PG_UINT32_MAX)
     412           0 :         return false;
     413     1579566 :     name = endp;
     414             : 
     415             :     /* Check for a fork name. */
     416     1579566 :     if (*name != '_')
     417     1191294 :         f = MAIN_FORKNUM;
     418             :     else
     419             :     {
     420             :         int         forkchar;
     421             : 
     422      388272 :         forkchar = forkname_chars(name + 1, &f);
     423      388272 :         if (forkchar <= 0)
     424           0 :             return false;
     425      388272 :         name += forkchar + 1;
     426             :     }
     427             : 
     428             :     /* Check for a segment number. */
     429     1579566 :     if (*name != '.')
     430     1579566 :         s = 0;
     431             :     else
     432             :     {
     433             :         /* Reject leading zeroes, just like we do for RelFileNumber. */
     434           0 :         if (name[1] < '1' || name[1] > '9')
     435           0 :             return false;
     436             : 
     437           0 :         errno = 0;
     438           0 :         s = strtoul(name + 1, &endp, 10);
     439           0 :         if (errno || name + 1 == endp || s <= 0 || s > PG_UINT32_MAX)
     440           0 :             return false;
     441           0 :         name = endp;
     442             :     }
     443             : 
     444             :     /* Now we should be at the end. */
     445     1579566 :     if (*name != '\0')
     446           0 :         return false;
     447             : 
     448             :     /* Set out parameters and return. */
     449     1579566 :     *relnumber = (RelFileNumber) n;
     450     1579566 :     *fork = f;
     451     1579566 :     *segno = (unsigned) s;
     452     1579566 :     return true;
     453             : }

Generated by: LCOV version 1.14