LCOV - code coverage report
Current view: top level - src/backend/storage/file - reinit.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 106 115 92.2 %
Date: 2019-09-19 02:07:14 Functions: 4 4 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * reinit.c
       4             :  *    Reinitialization of unlogged relations
       5             :  *
       6             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  * IDENTIFICATION
      10             :  *    src/backend/storage/file/reinit.c
      11             :  *
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : 
      15             : #include "postgres.h"
      16             : 
      17             : #include <unistd.h>
      18             : 
      19             : #include "common/relpath.h"
      20             : #include "storage/copydir.h"
      21             : #include "storage/fd.h"
      22             : #include "storage/reinit.h"
      23             : #include "utils/hsearch.h"
      24             : #include "utils/memutils.h"
      25             : 
      26             : static void ResetUnloggedRelationsInTablespaceDir(const char *tsdirname,
      27             :                                                   int op);
      28             : static void ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname,
      29             :                                                int op);
      30             : 
      31             : typedef struct
      32             : {
      33             :     char        oid[OIDCHARS + 1];
      34             : } unlogged_relation_entry;
      35             : 
      36             : /*
      37             :  * Reset unlogged relations from before the last restart.
      38             :  *
      39             :  * If op includes UNLOGGED_RELATION_CLEANUP, we remove all forks of any
      40             :  * relation with an "init" fork, except for the "init" fork itself.
      41             :  *
      42             :  * If op includes UNLOGGED_RELATION_INIT, we copy the "init" fork to the main
      43             :  * fork.
      44             :  */
      45             : void
      46         166 : ResetUnloggedRelations(int op)
      47             : {
      48             :     char        temp_path[MAXPGPATH + 10 + sizeof(TABLESPACE_VERSION_DIRECTORY)];
      49             :     DIR        *spc_dir;
      50             :     struct dirent *spc_de;
      51             :     MemoryContext tmpctx,
      52             :                 oldctx;
      53             : 
      54             :     /* Log it. */
      55         166 :     elog(DEBUG1, "resetting unlogged relations: cleanup %d init %d",
      56             :          (op & UNLOGGED_RELATION_CLEANUP) != 0,
      57             :          (op & UNLOGGED_RELATION_INIT) != 0);
      58             : 
      59             :     /*
      60             :      * Just to be sure we don't leak any memory, let's create a temporary
      61             :      * memory context for this operation.
      62             :      */
      63         166 :     tmpctx = AllocSetContextCreate(CurrentMemoryContext,
      64             :                                    "ResetUnloggedRelations",
      65             :                                    ALLOCSET_DEFAULT_SIZES);
      66         166 :     oldctx = MemoryContextSwitchTo(tmpctx);
      67             : 
      68             :     /*
      69             :      * First process unlogged files in pg_default ($PGDATA/base)
      70             :      */
      71         166 :     ResetUnloggedRelationsInTablespaceDir("base", op);
      72             : 
      73             :     /*
      74             :      * Cycle through directories for all non-default tablespaces.
      75             :      */
      76         166 :     spc_dir = AllocateDir("pg_tblspc");
      77             : 
      78         668 :     while ((spc_de = ReadDir(spc_dir, "pg_tblspc")) != NULL)
      79             :     {
      80         506 :         if (strcmp(spc_de->d_name, ".") == 0 ||
      81         170 :             strcmp(spc_de->d_name, "..") == 0)
      82         332 :             continue;
      83             : 
      84           4 :         snprintf(temp_path, sizeof(temp_path), "pg_tblspc/%s/%s",
      85           4 :                  spc_de->d_name, TABLESPACE_VERSION_DIRECTORY);
      86           4 :         ResetUnloggedRelationsInTablespaceDir(temp_path, op);
      87             :     }
      88             : 
      89         166 :     FreeDir(spc_dir);
      90             : 
      91             :     /*
      92             :      * Restore memory context.
      93             :      */
      94         166 :     MemoryContextSwitchTo(oldctx);
      95         166 :     MemoryContextDelete(tmpctx);
      96         166 : }
      97             : 
      98             : /*
      99             :  * Process one tablespace directory for ResetUnloggedRelations
     100             :  */
     101             : static void
     102         170 : ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
     103             : {
     104             :     DIR        *ts_dir;
     105             :     struct dirent *de;
     106             :     char        dbspace_path[MAXPGPATH * 2];
     107             : 
     108         170 :     ts_dir = AllocateDir(tsdirname);
     109             : 
     110             :     /*
     111             :      * If we get ENOENT on a tablespace directory, log it and return.  This
     112             :      * can happen if a previous DROP TABLESPACE crashed between removing the
     113             :      * tablespace directory and removing the symlink in pg_tblspc.  We don't
     114             :      * really want to prevent database startup in that scenario, so let it
     115             :      * pass instead.  Any other type of error will be reported by ReadDir
     116             :      * (causing a startup failure).
     117             :      */
     118         170 :     if (ts_dir == NULL && errno == ENOENT)
     119             :     {
     120           0 :         ereport(LOG,
     121             :                 (errcode_for_file_access(),
     122             :                  errmsg("could not open directory \"%s\": %m",
     123             :                         tsdirname)));
     124           0 :         return;
     125             :     }
     126             : 
     127        1196 :     while ((de = ReadDir(ts_dir, tsdirname)) != NULL)
     128             :     {
     129             :         /*
     130             :          * We're only interested in the per-database directories, which have
     131             :          * numeric names.  Note that this code will also (properly) ignore "."
     132             :          * and "..".
     133             :          */
     134         856 :         if (strspn(de->d_name, "0123456789") != strlen(de->d_name))
     135         340 :             continue;
     136             : 
     137         516 :         snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s",
     138         516 :                  tsdirname, de->d_name);
     139         516 :         ResetUnloggedRelationsInDbspaceDir(dbspace_path, op);
     140             :     }
     141             : 
     142         170 :     FreeDir(ts_dir);
     143             : }
     144             : 
     145             : /*
     146             :  * Process one per-dbspace directory for ResetUnloggedRelations
     147             :  */
     148             : static void
     149         516 : ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
     150             : {
     151             :     DIR        *dbspace_dir;
     152             :     struct dirent *de;
     153             :     char        rm_path[MAXPGPATH * 2];
     154             : 
     155             :     /* Caller must specify at least one operation. */
     156             :     Assert((op & (UNLOGGED_RELATION_CLEANUP | UNLOGGED_RELATION_INIT)) != 0);
     157             : 
     158             :     /*
     159             :      * Cleanup is a two-pass operation.  First, we go through and identify all
     160             :      * the files with init forks.  Then, we go through again and nuke
     161             :      * everything with the same OID except the init fork.
     162             :      */
     163         516 :     if ((op & UNLOGGED_RELATION_CLEANUP) != 0)
     164             :     {
     165             :         HTAB       *hash;
     166             :         HASHCTL     ctl;
     167             : 
     168             :         /*
     169             :          * It's possible that someone could create a ton of unlogged relations
     170             :          * in the same database & tablespace, so we'd better use a hash table
     171             :          * rather than an array or linked list to keep track of which files
     172             :          * need to be reset.  Otherwise, this cleanup operation would be
     173             :          * O(n^2).
     174             :          */
     175         290 :         memset(&ctl, 0, sizeof(ctl));
     176         290 :         ctl.keysize = sizeof(unlogged_relation_entry);
     177         290 :         ctl.entrysize = sizeof(unlogged_relation_entry);
     178         290 :         hash = hash_create("unlogged hash", 32, &ctl, HASH_ELEM);
     179             : 
     180             :         /* Scan the directory. */
     181         290 :         dbspace_dir = AllocateDir(dbspacedirname);
     182       88324 :         while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
     183             :         {
     184             :             ForkNumber  forkNum;
     185             :             int         oidchars;
     186             :             unlogged_relation_entry ent;
     187             : 
     188             :             /* Skip anything that doesn't look like a relation data file. */
     189       87744 :             if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
     190             :                                                      &forkNum))
     191       88896 :                 continue;
     192             : 
     193             :             /* Also skip it unless this is the init fork. */
     194       86588 :             if (forkNum != INIT_FORKNUM)
     195       86584 :                 continue;
     196             : 
     197             :             /*
     198             :              * Put the OID portion of the name into the hash table, if it
     199             :              * isn't already.
     200             :              */
     201           4 :             memset(ent.oid, 0, sizeof(ent.oid));
     202           4 :             memcpy(ent.oid, de->d_name, oidchars);
     203           4 :             hash_search(hash, &ent, HASH_ENTER, NULL);
     204             :         }
     205             : 
     206             :         /* Done with the first pass. */
     207         290 :         FreeDir(dbspace_dir);
     208             : 
     209             :         /*
     210             :          * If we didn't find any init forks, there's no point in continuing;
     211             :          * we can bail out now.
     212             :          */
     213         290 :         if (hash_get_num_entries(hash) == 0)
     214             :         {
     215         286 :             hash_destroy(hash);
     216         286 :             return;
     217             :         }
     218             : 
     219             :         /*
     220             :          * Now, make a second pass and remove anything that matches.
     221             :          */
     222           4 :         dbspace_dir = AllocateDir(dbspacedirname);
     223         632 :         while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
     224             :         {
     225             :             ForkNumber  forkNum;
     226             :             int         oidchars;
     227             :             bool        found;
     228             :             unlogged_relation_entry ent;
     229             : 
     230             :             /* Skip anything that doesn't look like a relation data file. */
     231         624 :             if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
     232             :                                                      &forkNum))
     233          28 :                 continue;
     234             : 
     235             :             /* We never remove the init fork. */
     236         612 :             if (forkNum == INIT_FORKNUM)
     237           4 :                 continue;
     238             : 
     239             :             /*
     240             :              * See whether the OID portion of the name shows up in the hash
     241             :              * table.
     242             :              */
     243         608 :             memset(ent.oid, 0, sizeof(ent.oid));
     244         608 :             memcpy(ent.oid, de->d_name, oidchars);
     245         608 :             hash_search(hash, &ent, HASH_FIND, &found);
     246             : 
     247             :             /* If so, nuke it! */
     248         608 :             if (found)
     249             :             {
     250           8 :                 snprintf(rm_path, sizeof(rm_path), "%s/%s",
     251           8 :                          dbspacedirname, de->d_name);
     252           8 :                 if (unlink(rm_path) < 0)
     253           0 :                     ereport(ERROR,
     254             :                             (errcode_for_file_access(),
     255             :                              errmsg("could not remove file \"%s\": %m",
     256             :                                     rm_path)));
     257             :                 else
     258           8 :                     elog(DEBUG2, "unlinked file \"%s\"", rm_path);
     259             :             }
     260             :         }
     261             : 
     262             :         /* Cleanup is complete. */
     263           4 :         FreeDir(dbspace_dir);
     264           4 :         hash_destroy(hash);
     265             :     }
     266             : 
     267             :     /*
     268             :      * Initialization happens after cleanup is complete: we copy each init
     269             :      * fork file to the corresponding main fork file.  Note that if we are
     270             :      * asked to do both cleanup and init, we may never get here: if the
     271             :      * cleanup code determines that there are no init forks in this dbspace,
     272             :      * it will return before we get to this point.
     273             :      */
     274         230 :     if ((op & UNLOGGED_RELATION_INIT) != 0)
     275             :     {
     276             :         /* Scan the directory. */
     277         226 :         dbspace_dir = AllocateDir(dbspacedirname);
     278       68786 :         while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
     279             :         {
     280             :             ForkNumber  forkNum;
     281             :             int         oidchars;
     282             :             char        oidbuf[OIDCHARS + 1];
     283             :             char        srcpath[MAXPGPATH * 2];
     284             :             char        dstpath[MAXPGPATH];
     285             : 
     286             :             /* Skip anything that doesn't look like a relation data file. */
     287       68334 :             if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
     288             :                                                      &forkNum))
     289       69252 :                 continue;
     290             : 
     291             :             /* Also skip it unless this is the init fork. */
     292       67412 :             if (forkNum != INIT_FORKNUM)
     293       67408 :                 continue;
     294             : 
     295             :             /* Construct source pathname. */
     296           4 :             snprintf(srcpath, sizeof(srcpath), "%s/%s",
     297           4 :                      dbspacedirname, de->d_name);
     298             : 
     299             :             /* Construct destination pathname. */
     300           4 :             memcpy(oidbuf, de->d_name, oidchars);
     301           4 :             oidbuf[oidchars] = '\0';
     302           4 :             snprintf(dstpath, sizeof(dstpath), "%s/%s%s",
     303           8 :                      dbspacedirname, oidbuf, de->d_name + oidchars + 1 +
     304           4 :                      strlen(forkNames[INIT_FORKNUM]));
     305             : 
     306             :             /* OK, we're ready to perform the actual copy. */
     307           4 :             elog(DEBUG2, "copying %s to %s", srcpath, dstpath);
     308           4 :             copy_file(srcpath, dstpath);
     309             :         }
     310             : 
     311         226 :         FreeDir(dbspace_dir);
     312             : 
     313             :         /*
     314             :          * copy_file() above has already called pg_flush_data() on the files
     315             :          * it created. Now we need to fsync those files, because a checkpoint
     316             :          * won't do it for us while we're in recovery. We do this in a
     317             :          * separate pass to allow the kernel to perform all the flushes
     318             :          * (especially the metadata ones) at once.
     319             :          */
     320         226 :         dbspace_dir = AllocateDir(dbspacedirname);
     321       68790 :         while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
     322             :         {
     323             :             ForkNumber  forkNum;
     324             :             int         oidchars;
     325             :             char        oidbuf[OIDCHARS + 1];
     326             :             char        mainpath[MAXPGPATH];
     327             : 
     328             :             /* Skip anything that doesn't look like a relation data file. */
     329       68338 :             if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
     330             :                                                      &forkNum))
     331       69256 :                 continue;
     332             : 
     333             :             /* Also skip it unless this is the init fork. */
     334       67416 :             if (forkNum != INIT_FORKNUM)
     335       67412 :                 continue;
     336             : 
     337             :             /* Construct main fork pathname. */
     338           4 :             memcpy(oidbuf, de->d_name, oidchars);
     339           4 :             oidbuf[oidchars] = '\0';
     340           4 :             snprintf(mainpath, sizeof(mainpath), "%s/%s%s",
     341           8 :                      dbspacedirname, oidbuf, de->d_name + oidchars + 1 +
     342           4 :                      strlen(forkNames[INIT_FORKNUM]));
     343             : 
     344           4 :             fsync_fname(mainpath, false);
     345             :         }
     346             : 
     347         226 :         FreeDir(dbspace_dir);
     348             : 
     349             :         /*
     350             :          * Lastly, fsync the database directory itself, ensuring the
     351             :          * filesystem remembers the file creations and deletions we've done.
     352             :          * We don't bother with this during a call that does only
     353             :          * UNLOGGED_RELATION_CLEANUP, because if recovery crashes before we
     354             :          * get to doing UNLOGGED_RELATION_INIT, we'll redo the cleanup step
     355             :          * too at the next startup attempt.
     356             :          */
     357         226 :         fsync_fname(dbspacedirname, true);
     358             :     }
     359             : }
     360             : 
     361             : /*
     362             :  * Basic parsing of putative relation filenames.
     363             :  *
     364             :  * This function returns true if the file appears to be in the correct format
     365             :  * for a non-temporary relation and false otherwise.
     366             :  *
     367             :  * NB: If this function returns true, the caller is entitled to assume that
     368             :  * *oidchars has been set to the a value no more than OIDCHARS, and thus
     369             :  * that a buffer of OIDCHARS+1 characters is sufficient to hold the OID
     370             :  * portion of the filename.  This is critical to protect against a possible
     371             :  * buffer overrun.
     372             :  */
     373             : bool
     374      309802 : parse_filename_for_nontemp_relation(const char *name, int *oidchars,
     375             :                                     ForkNumber *fork)
     376             : {
     377             :     int         pos;
     378             : 
     379             :     /* Look for a non-empty string of digits (that isn't too long). */
     380      309802 :     for (pos = 0; isdigit((unsigned char) name[pos]); ++pos)
     381             :         ;
     382      309802 :     if (pos == 0 || pos > OIDCHARS)
     383        3604 :         return false;
     384      306198 :     *oidchars = pos;
     385             : 
     386             :     /* Check for a fork name. */
     387      306198 :     if (name[pos] != '_')
     388      228708 :         *fork = MAIN_FORKNUM;
     389             :     else
     390             :     {
     391             :         int         forkchar;
     392             : 
     393       77490 :         forkchar = forkname_chars(&name[pos + 1], fork);
     394       77490 :         if (forkchar <= 0)
     395           0 :             return false;
     396       77490 :         pos += forkchar + 1;
     397             :     }
     398             : 
     399             :     /* Check for a segment number. */
     400      306198 :     if (name[pos] == '.')
     401             :     {
     402             :         int         segchar;
     403             : 
     404           0 :         for (segchar = 1; isdigit((unsigned char) name[pos + segchar]); ++segchar)
     405             :             ;
     406           0 :         if (segchar <= 1)
     407           0 :             return false;
     408           0 :         pos += segchar;
     409             :     }
     410             : 
     411             :     /* Now we should be at the end. */
     412      306198 :     if (name[pos] != '\0')
     413           0 :         return false;
     414      306198 :     return true;
     415             : }

Generated by: LCOV version 1.13