LCOV - code coverage report
Current view: top level - src/backend/storage/file - reinit.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 87.9 % 124 109
Test Date: 2026-02-17 17:20:33 Functions: 100.0 % 4 4
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * reinit.c
       4              :  *    Reinitialization of unlogged relations
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  * IDENTIFICATION
      10              :  *    src/backend/storage/file/reinit.c
      11              :  *
      12              :  *-------------------------------------------------------------------------
      13              :  */
      14              : 
      15              : #include "postgres.h"
      16              : 
      17              : #include <unistd.h>
      18              : 
      19              : #include "common/relpath.h"
      20              : #include "postmaster/startup.h"
      21              : #include "storage/copydir.h"
      22              : #include "storage/fd.h"
      23              : #include "storage/reinit.h"
      24              : #include "utils/hsearch.h"
      25              : #include "utils/memutils.h"
      26              : 
      27              : static void ResetUnloggedRelationsInTablespaceDir(const char *tsdirname,
      28              :                                                   int op);
      29              : static void ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname,
      30              :                                                int op);
      31              : 
      32              : typedef struct
      33              : {
      34              :     RelFileNumber relnumber;    /* hash key */
      35              : } unlogged_relation_entry;
      36              : 
      37              : /*
      38              :  * Reset unlogged relations from before the last restart.
      39              :  *
      40              :  * If op includes UNLOGGED_RELATION_CLEANUP, we remove all forks of any
      41              :  * relation with an "init" fork, except for the "init" fork itself.
      42              :  *
      43              :  * If op includes UNLOGGED_RELATION_INIT, we copy the "init" fork to the main
      44              :  * fork.
      45              :  */
      46              : void
      47          387 : ResetUnloggedRelations(int op)
      48              : {
      49              :     char        temp_path[MAXPGPATH + sizeof(PG_TBLSPC_DIR) + sizeof(TABLESPACE_VERSION_DIRECTORY)];
      50              :     DIR        *spc_dir;
      51              :     struct dirent *spc_de;
      52              :     MemoryContext tmpctx,
      53              :                 oldctx;
      54              : 
      55              :     /* Log it. */
      56          387 :     elog(DEBUG1, "resetting unlogged relations: cleanup %d init %d",
      57              :          (op & UNLOGGED_RELATION_CLEANUP) != 0,
      58              :          (op & UNLOGGED_RELATION_INIT) != 0);
      59              : 
      60              :     /*
      61              :      * Just to be sure we don't leak any memory, let's create a temporary
      62              :      * memory context for this operation.
      63              :      */
      64          387 :     tmpctx = AllocSetContextCreate(CurrentMemoryContext,
      65              :                                    "ResetUnloggedRelations",
      66              :                                    ALLOCSET_DEFAULT_SIZES);
      67          387 :     oldctx = MemoryContextSwitchTo(tmpctx);
      68              : 
      69              :     /* Prepare to report progress resetting unlogged relations. */
      70          387 :     begin_startup_progress_phase();
      71              : 
      72              :     /*
      73              :      * First process unlogged files in pg_default ($PGDATA/base)
      74              :      */
      75          387 :     ResetUnloggedRelationsInTablespaceDir("base", op);
      76              : 
      77              :     /*
      78              :      * Cycle through directories for all non-default tablespaces.
      79              :      */
      80          387 :     spc_dir = AllocateDir(PG_TBLSPC_DIR);
      81              : 
      82         1248 :     while ((spc_de = ReadDir(spc_dir, PG_TBLSPC_DIR)) != NULL)
      83              :     {
      84          861 :         if (strcmp(spc_de->d_name, ".") == 0 ||
      85          474 :             strcmp(spc_de->d_name, "..") == 0)
      86          774 :             continue;
      87              : 
      88           87 :         snprintf(temp_path, sizeof(temp_path), "%s/%s/%s",
      89           87 :                  PG_TBLSPC_DIR, spc_de->d_name, TABLESPACE_VERSION_DIRECTORY);
      90           87 :         ResetUnloggedRelationsInTablespaceDir(temp_path, op);
      91              :     }
      92              : 
      93          387 :     FreeDir(spc_dir);
      94              : 
      95              :     /*
      96              :      * Restore memory context.
      97              :      */
      98          387 :     MemoryContextSwitchTo(oldctx);
      99          387 :     MemoryContextDelete(tmpctx);
     100          387 : }
     101              : 
     102              : /*
     103              :  * Process one tablespace directory for ResetUnloggedRelations
     104              :  */
     105              : static void
     106          474 : ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
     107              : {
     108              :     DIR        *ts_dir;
     109              :     struct dirent *de;
     110              :     char        dbspace_path[MAXPGPATH * 2];
     111              : 
     112          474 :     ts_dir = AllocateDir(tsdirname);
     113              : 
     114              :     /*
     115              :      * If we get ENOENT on a tablespace directory, log it and return.  This
     116              :      * can happen if a previous DROP TABLESPACE crashed between removing the
     117              :      * tablespace directory and removing the symlink in pg_tblspc.  We don't
     118              :      * really want to prevent database startup in that scenario, so let it
     119              :      * pass instead.  Any other type of error will be reported by ReadDir
     120              :      * (causing a startup failure).
     121              :      */
     122          474 :     if (ts_dir == NULL && errno == ENOENT)
     123              :     {
     124            0 :         ereport(LOG,
     125              :                 (errcode_for_file_access(),
     126              :                  errmsg("could not open directory \"%s\": %m",
     127              :                         tsdirname)));
     128            0 :         return;
     129              :     }
     130              : 
     131         2813 :     while ((de = ReadDir(ts_dir, tsdirname)) != NULL)
     132              :     {
     133              :         /*
     134              :          * We're only interested in the per-database directories, which have
     135              :          * numeric names.  Note that this code will also (properly) ignore "."
     136              :          * and "..".
     137              :          */
     138         2339 :         if (strspn(de->d_name, "0123456789") != strlen(de->d_name))
     139         1014 :             continue;
     140              : 
     141         1325 :         snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s",
     142         1325 :                  tsdirname, de->d_name);
     143              : 
     144         1325 :         if (op & UNLOGGED_RELATION_INIT)
     145          554 :             ereport_startup_progress("resetting unlogged relations (init), elapsed time: %ld.%02d s, current path: %s",
     146              :                                      dbspace_path);
     147          771 :         else if (op & UNLOGGED_RELATION_CLEANUP)
     148          771 :             ereport_startup_progress("resetting unlogged relations (cleanup), elapsed time: %ld.%02d s, current path: %s",
     149              :                                      dbspace_path);
     150              : 
     151         1325 :         ResetUnloggedRelationsInDbspaceDir(dbspace_path, op);
     152              :     }
     153              : 
     154          474 :     FreeDir(ts_dir);
     155              : }
     156              : 
     157              : /*
     158              :  * Process one per-dbspace directory for ResetUnloggedRelations
     159              :  */
     160              : static void
     161         1325 : ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
     162              : {
     163              :     DIR        *dbspace_dir;
     164              :     struct dirent *de;
     165              :     char        rm_path[MAXPGPATH * 2];
     166              : 
     167              :     /* Caller must specify at least one operation. */
     168              :     Assert((op & (UNLOGGED_RELATION_CLEANUP | UNLOGGED_RELATION_INIT)) != 0);
     169              : 
     170              :     /*
     171              :      * Cleanup is a two-pass operation.  First, we go through and identify all
     172              :      * the files with init forks.  Then, we go through again and nuke
     173              :      * everything with the same OID except the init fork.
     174              :      */
     175         1325 :     if ((op & UNLOGGED_RELATION_CLEANUP) != 0)
     176              :     {
     177              :         HTAB       *hash;
     178              :         HASHCTL     ctl;
     179              : 
     180              :         /*
     181              :          * It's possible that someone could create a ton of unlogged relations
     182              :          * in the same database & tablespace, so we'd better use a hash table
     183              :          * rather than an array or linked list to keep track of which files
     184              :          * need to be reset.  Otherwise, this cleanup operation would be
     185              :          * O(n^2).
     186              :          */
     187          771 :         ctl.keysize = sizeof(Oid);
     188          771 :         ctl.entrysize = sizeof(unlogged_relation_entry);
     189          771 :         ctl.hcxt = CurrentMemoryContext;
     190          771 :         hash = hash_create("unlogged relation OIDs", 32, &ctl,
     191              :                            HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
     192              : 
     193              :         /* Scan the directory. */
     194          771 :         dbspace_dir = AllocateDir(dbspacedirname);
     195       222270 :         while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
     196              :         {
     197              :             ForkNumber  forkNum;
     198              :             unsigned    segno;
     199              :             unlogged_relation_entry ent;
     200              : 
     201              :             /* Skip anything that doesn't look like a relation data file. */
     202       221499 :             if (!parse_filename_for_nontemp_relation(de->d_name,
     203              :                                                      &ent.relnumber,
     204              :                                                      &forkNum, &segno))
     205       221491 :                 continue;
     206              : 
     207              :             /* Also skip it unless this is the init fork. */
     208       218501 :             if (forkNum != INIT_FORKNUM)
     209       218493 :                 continue;
     210              : 
     211              :             /*
     212              :              * Put the RelFileNumber into the hash table, if it isn't already.
     213              :              */
     214            8 :             (void) hash_search(hash, &ent, HASH_ENTER, NULL);
     215              :         }
     216              : 
     217              :         /* Done with the first pass. */
     218          771 :         FreeDir(dbspace_dir);
     219              : 
     220              :         /*
     221              :          * If we didn't find any init forks, there's no point in continuing;
     222              :          * we can bail out now.
     223              :          */
     224          771 :         if (hash_get_num_entries(hash) == 0)
     225              :         {
     226          767 :             hash_destroy(hash);
     227          767 :             return;
     228              :         }
     229              : 
     230              :         /*
     231              :          * Now, make a second pass and remove anything that matches.
     232              :          */
     233            4 :         dbspace_dir = AllocateDir(dbspacedirname);
     234          937 :         while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
     235              :         {
     236              :             ForkNumber  forkNum;
     237              :             unsigned    segno;
     238              :             unlogged_relation_entry ent;
     239              : 
     240              :             /* Skip anything that doesn't look like a relation data file. */
     241          933 :             if (!parse_filename_for_nontemp_relation(de->d_name,
     242              :                                                      &ent.relnumber,
     243              :                                                      &forkNum, &segno))
     244           22 :                 continue;
     245              : 
     246              :             /* We never remove the init fork. */
     247          919 :             if (forkNum == INIT_FORKNUM)
     248            8 :                 continue;
     249              : 
     250              :             /*
     251              :              * See whether the OID portion of the name shows up in the hash
     252              :              * table.  If so, nuke it!
     253              :              */
     254          911 :             if (hash_search(hash, &ent, HASH_FIND, NULL))
     255              :             {
     256            7 :                 snprintf(rm_path, sizeof(rm_path), "%s/%s",
     257            7 :                          dbspacedirname, de->d_name);
     258            7 :                 if (unlink(rm_path) < 0)
     259            0 :                     ereport(ERROR,
     260              :                             (errcode_for_file_access(),
     261              :                              errmsg("could not remove file \"%s\": %m",
     262              :                                     rm_path)));
     263              :                 else
     264            7 :                     elog(DEBUG2, "unlinked file \"%s\"", rm_path);
     265              :             }
     266              :         }
     267              : 
     268              :         /* Cleanup is complete. */
     269            4 :         FreeDir(dbspace_dir);
     270            4 :         hash_destroy(hash);
     271              :     }
     272              : 
     273              :     /*
     274              :      * Initialization happens after cleanup is complete: we copy each init
     275              :      * fork file to the corresponding main fork file.  Note that if we are
     276              :      * asked to do both cleanup and init, we may never get here: if the
     277              :      * cleanup code determines that there are no init forks in this dbspace,
     278              :      * it will return before we get to this point.
     279              :      */
     280          558 :     if ((op & UNLOGGED_RELATION_INIT) != 0)
     281              :     {
     282              :         /* Scan the directory. */
     283          554 :         dbspace_dir = AllocateDir(dbspacedirname);
     284       156415 :         while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
     285              :         {
     286              :             ForkNumber  forkNum;
     287              :             RelFileNumber relNumber;
     288              :             unsigned    segno;
     289              :             char        srcpath[MAXPGPATH * 2];
     290              :             char        dstpath[MAXPGPATH];
     291              : 
     292              :             /* Skip anything that doesn't look like a relation data file. */
     293       155861 :             if (!parse_filename_for_nontemp_relation(de->d_name, &relNumber,
     294              :                                                      &forkNum, &segno))
     295       155854 :                 continue;
     296              : 
     297              :             /* Also skip it unless this is the init fork. */
     298       153705 :             if (forkNum != INIT_FORKNUM)
     299       153698 :                 continue;
     300              : 
     301              :             /* Construct source pathname. */
     302            7 :             snprintf(srcpath, sizeof(srcpath), "%s/%s",
     303            7 :                      dbspacedirname, de->d_name);
     304              : 
     305              :             /* Construct destination pathname. */
     306            7 :             if (segno == 0)
     307            7 :                 snprintf(dstpath, sizeof(dstpath), "%s/%u",
     308              :                          dbspacedirname, relNumber);
     309              :             else
     310            0 :                 snprintf(dstpath, sizeof(dstpath), "%s/%u.%u",
     311              :                          dbspacedirname, relNumber, segno);
     312              : 
     313              :             /* OK, we're ready to perform the actual copy. */
     314            7 :             elog(DEBUG2, "copying %s to %s", srcpath, dstpath);
     315            7 :             copy_file(srcpath, dstpath);
     316              :         }
     317              : 
     318          554 :         FreeDir(dbspace_dir);
     319              : 
     320              :         /*
     321              :          * copy_file() above has already called pg_flush_data() on the files
     322              :          * it created. Now we need to fsync those files, because a checkpoint
     323              :          * won't do it for us while we're in recovery. We do this in a
     324              :          * separate pass to allow the kernel to perform all the flushes
     325              :          * (especially the metadata ones) at once.
     326              :          */
     327          554 :         dbspace_dir = AllocateDir(dbspacedirname);
     328       156422 :         while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
     329              :         {
     330              :             RelFileNumber relNumber;
     331              :             ForkNumber  forkNum;
     332              :             unsigned    segno;
     333              :             char        mainpath[MAXPGPATH];
     334              : 
     335              :             /* Skip anything that doesn't look like a relation data file. */
     336       155868 :             if (!parse_filename_for_nontemp_relation(de->d_name, &relNumber,
     337              :                                                      &forkNum, &segno))
     338       155861 :                 continue;
     339              : 
     340              :             /* Also skip it unless this is the init fork. */
     341       153712 :             if (forkNum != INIT_FORKNUM)
     342       153705 :                 continue;
     343              : 
     344              :             /* Construct main fork pathname. */
     345            7 :             if (segno == 0)
     346            7 :                 snprintf(mainpath, sizeof(mainpath), "%s/%u",
     347              :                          dbspacedirname, relNumber);
     348              :             else
     349            0 :                 snprintf(mainpath, sizeof(mainpath), "%s/%u.%u",
     350              :                          dbspacedirname, relNumber, segno);
     351              : 
     352            7 :             fsync_fname(mainpath, false);
     353              :         }
     354              : 
     355          554 :         FreeDir(dbspace_dir);
     356              : 
     357              :         /*
     358              :          * Lastly, fsync the database directory itself, ensuring the
     359              :          * filesystem remembers the file creations and deletions we've done.
     360              :          * We don't bother with this during a call that does only
     361              :          * UNLOGGED_RELATION_CLEANUP, because if recovery crashes before we
     362              :          * get to doing UNLOGGED_RELATION_INIT, we'll redo the cleanup step
     363              :          * too at the next startup attempt.
     364              :          */
     365          554 :         fsync_fname(dbspacedirname, true);
     366              :     }
     367              : }
     368              : 
     369              : /*
     370              :  * Basic parsing of putative relation filenames.
     371              :  *
     372              :  * This function returns true if the file appears to be in the correct format
     373              :  * for a non-temporary relation and false otherwise.
     374              :  *
     375              :  * If it returns true, it sets *relnumber, *fork, and *segno to the values
     376              :  * extracted from the filename. If it returns false, these values are set to
     377              :  * InvalidRelFileNumber, InvalidForkNumber, and 0, respectively.
     378              :  */
     379              : bool
     380       864701 : parse_filename_for_nontemp_relation(const char *name, RelFileNumber *relnumber,
     381              :                                     ForkNumber *fork, unsigned *segno)
     382              : {
     383              :     unsigned long n,
     384              :                 s;
     385              :     ForkNumber  f;
     386              :     char       *endp;
     387              : 
     388       864701 :     *relnumber = InvalidRelFileNumber;
     389       864701 :     *fork = InvalidForkNumber;
     390       864701 :     *segno = 0;
     391              : 
     392              :     /*
     393              :      * Relation filenames should begin with a digit that is not a zero. By
     394              :      * rejecting cases involving leading zeroes, the caller can assume that
     395              :      * there's only one possible string of characters that could have produced
     396              :      * any given value for *relnumber.
     397              :      *
     398              :      * (To be clear, we don't expect files with names like 0017.3 to exist at
     399              :      * all -- but if 0017.3 does exist, it's a non-relation file, not part of
     400              :      * the main fork for relfilenode 17.)
     401              :      */
     402       864701 :     if (name[0] < '1' || name[0] > '9')
     403        10098 :         return false;
     404              : 
     405              :     /*
     406              :      * Parse the leading digit string. If the value is out of range, we
     407              :      * conclude that this isn't a relation file at all.
     408              :      */
     409       854603 :     errno = 0;
     410       854603 :     n = strtoul(name, &endp, 10);
     411       854603 :     if (errno || name == endp || n <= 0 || n > PG_UINT32_MAX)
     412            0 :         return false;
     413       854603 :     name = endp;
     414              : 
     415              :     /* Check for a fork name. */
     416       854603 :     if (*name != '_')
     417       644327 :         f = MAIN_FORKNUM;
     418              :     else
     419              :     {
     420              :         int         forkchar;
     421              : 
     422       210276 :         forkchar = forkname_chars(name + 1, &f);
     423       210276 :         if (forkchar <= 0)
     424            0 :             return false;
     425       210276 :         name += forkchar + 1;
     426              :     }
     427              : 
     428              :     /* Check for a segment number. */
     429       854603 :     if (*name != '.')
     430       854603 :         s = 0;
     431              :     else
     432              :     {
     433              :         /* Reject leading zeroes, just like we do for RelFileNumber. */
     434            0 :         if (name[1] < '1' || name[1] > '9')
     435            0 :             return false;
     436              : 
     437            0 :         errno = 0;
     438            0 :         s = strtoul(name + 1, &endp, 10);
     439            0 :         if (errno || name + 1 == endp || s <= 0 || s > PG_UINT32_MAX)
     440            0 :             return false;
     441            0 :         name = endp;
     442              :     }
     443              : 
     444              :     /* Now we should be at the end. */
     445       854603 :     if (*name != '\0')
     446            0 :         return false;
     447              : 
     448              :     /* Set out parameters and return. */
     449       854603 :     *relnumber = (RelFileNumber) n;
     450       854603 :     *fork = f;
     451       854603 :     *segno = (unsigned) s;
     452       854603 :     return true;
     453              : }
        

Generated by: LCOV version 2.0-1