LCOV - code coverage report
Current view: top level - src/common - file_utils.c (source / functions) Hit Total Coverage
Test: PostgreSQL 14devel Lines: 88 124 71.0 %
Date: 2021-01-26 03:06:49 Functions: 8 8 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * File-processing utility routines.
       4             :  *
       5             :  * Assorted utility functions to work on files.
       6             :  *
       7             :  *
       8             :  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
       9             :  * Portions Copyright (c) 1994, Regents of the University of California
      10             :  *
      11             :  * src/common/file_utils.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : 
      16             : #ifndef FRONTEND
      17             : #include "postgres.h"
      18             : #else
      19             : #include "postgres_fe.h"
      20             : #endif
      21             : 
      22             : #include <dirent.h>
      23             : #include <fcntl.h>
      24             : #include <sys/stat.h>
      25             : #include <unistd.h>
      26             : 
      27             : #include "common/file_utils.h"
      28             : #ifdef FRONTEND
      29             : #include "common/logging.h"
      30             : #endif
      31             : 
      32             : #ifdef FRONTEND
      33             : 
      34             : /* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */
      35             : #if defined(HAVE_SYNC_FILE_RANGE)
      36             : #define PG_FLUSH_DATA_WORKS 1
      37             : #elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
      38             : #define PG_FLUSH_DATA_WORKS 1
      39             : #endif
      40             : 
      41             : /*
      42             :  * pg_xlog has been renamed to pg_wal in version 10.
      43             :  */
      44             : #define MINIMUM_VERSION_FOR_PG_WAL  100000
      45             : 
      46             : #ifdef PG_FLUSH_DATA_WORKS
      47             : static int  pre_sync_fname(const char *fname, bool isdir);
      48             : #endif
      49             : static void walkdir(const char *path,
      50             :                     int (*action) (const char *fname, bool isdir),
      51             :                     bool process_symlinks);
      52             : 
      53             : /*
      54             :  * Issue fsync recursively on PGDATA and all its contents.
      55             :  *
      56             :  * We fsync regular files and directories wherever they are, but we follow
      57             :  * symlinks only for pg_wal (or pg_xlog) and immediately under pg_tblspc.
      58             :  * Other symlinks are presumed to point at files we're not responsible for
      59             :  * fsyncing, and might not have privileges to write at all.
      60             :  *
      61             :  * serverVersion indicates the version of the server to be fsync'd.
      62             :  */
      63             : void
      64          36 : fsync_pgdata(const char *pg_data,
      65             :              int serverVersion)
      66             : {
      67             :     bool        xlog_is_symlink;
      68             :     char        pg_wal[MAXPGPATH];
      69             :     char        pg_tblspc[MAXPGPATH];
      70             : 
      71             :     /* handle renaming of pg_xlog to pg_wal in post-10 clusters */
      72          36 :     snprintf(pg_wal, MAXPGPATH, "%s/%s", pg_data,
      73             :              serverVersion < MINIMUM_VERSION_FOR_PG_WAL ? "pg_xlog" : "pg_wal");
      74          36 :     snprintf(pg_tblspc, MAXPGPATH, "%s/pg_tblspc", pg_data);
      75             : 
      76             :     /*
      77             :      * If pg_wal is a symlink, we'll need to recurse into it separately,
      78             :      * because the first walkdir below will ignore it.
      79             :      */
      80          36 :     xlog_is_symlink = false;
      81             : 
      82             : #ifndef WIN32
      83             :     {
      84             :         struct stat st;
      85             : 
      86          36 :         if (lstat(pg_wal, &st) < 0)
      87           0 :             pg_log_error("could not stat file \"%s\": %m", pg_wal);
      88          36 :         else if (S_ISLNK(st.st_mode))
      89           4 :             xlog_is_symlink = true;
      90             :     }
      91             : #else
      92             :     if (pgwin32_is_junction(pg_wal))
      93             :         xlog_is_symlink = true;
      94             : #endif
      95             : 
      96             :     /*
      97             :      * If possible, hint to the kernel that we're soon going to fsync the data
      98             :      * directory and its contents.
      99             :      */
     100             : #ifdef PG_FLUSH_DATA_WORKS
     101          36 :     walkdir(pg_data, pre_sync_fname, false);
     102          36 :     if (xlog_is_symlink)
     103           4 :         walkdir(pg_wal, pre_sync_fname, false);
     104          36 :     walkdir(pg_tblspc, pre_sync_fname, true);
     105             : #endif
     106             : 
     107             :     /*
     108             :      * Now we do the fsync()s in the same order.
     109             :      *
     110             :      * The main call ignores symlinks, so in addition to specially processing
     111             :      * pg_wal if it's a symlink, pg_tblspc has to be visited separately with
     112             :      * process_symlinks = true.  Note that if there are any plain directories
     113             :      * in pg_tblspc, they'll get fsync'd twice.  That's not an expected case
     114             :      * so we don't worry about optimizing it.
     115             :      */
     116          36 :     walkdir(pg_data, fsync_fname, false);
     117          36 :     if (xlog_is_symlink)
     118           4 :         walkdir(pg_wal, fsync_fname, false);
     119          36 :     walkdir(pg_tblspc, fsync_fname, true);
     120          36 : }
     121             : 
     122             : /*
     123             :  * Issue fsync recursively on the given directory and all its contents.
     124             :  *
     125             :  * This is a convenient wrapper on top of walkdir().
     126             :  */
     127             : void
     128          12 : fsync_dir_recurse(const char *dir)
     129             : {
     130             :     /*
     131             :      * If possible, hint to the kernel that we're soon going to fsync the data
     132             :      * directory and its contents.
     133             :      */
     134             : #ifdef PG_FLUSH_DATA_WORKS
     135          12 :     walkdir(dir, pre_sync_fname, false);
     136             : #endif
     137             : 
     138          12 :     walkdir(dir, fsync_fname, false);
     139          12 : }
     140             : 
     141             : /*
     142             :  * walkdir: recursively walk a directory, applying the action to each
     143             :  * regular file and directory (including the named directory itself).
     144             :  *
     145             :  * If process_symlinks is true, the action and recursion are also applied
     146             :  * to regular files and directories that are pointed to by symlinks in the
     147             :  * given directory; otherwise symlinks are ignored.  Symlinks are always
     148             :  * ignored in subdirectories, ie we intentionally don't pass down the
     149             :  * process_symlinks flag to recursive calls.
     150             :  *
     151             :  * Errors are reported but not considered fatal.
     152             :  *
     153             :  * See also walkdir in fd.c, which is a backend version of this logic.
     154             :  */
     155             : static void
     156        2008 : walkdir(const char *path,
     157             :         int (*action) (const char *fname, bool isdir),
     158             :         bool process_symlinks)
     159             : {
     160             :     DIR        *dir;
     161             :     struct dirent *de;
     162             : 
     163        2008 :     dir = opendir(path);
     164        2008 :     if (dir == NULL)
     165             :     {
     166           0 :         pg_log_error("could not open directory \"%s\": %m", path);
     167           0 :         return;
     168             :     }
     169             : 
     170       86504 :     while (errno = 0, (de = readdir(dir)) != NULL)
     171             :     {
     172             :         char        subpath[MAXPGPATH * 2];
     173             : 
     174       84496 :         if (strcmp(de->d_name, ".") == 0 ||
     175       82488 :             strcmp(de->d_name, "..") == 0)
     176        4016 :             continue;
     177             : 
     178       80480 :         snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name);
     179             : 
     180       80480 :         switch (get_dirent_type(subpath, de, process_symlinks, PG_LOG_ERROR))
     181             :         {
     182       78632 :             case PGFILETYPE_REG:
     183       78632 :                 (*action) (subpath, false);
     184       78632 :                 break;
     185        1832 :             case PGFILETYPE_DIR:
     186        1832 :                 walkdir(subpath, action, false);
     187        1832 :                 break;
     188          16 :             default:
     189             : 
     190             :                 /*
     191             :                  * Errors are already reported directly by get_dirent_type(),
     192             :                  * and any remaining symlinks and unknown file types are
     193             :                  * ignored.
     194             :                  */
     195          16 :                 break;
     196             :         }
     197             :     }
     198             : 
     199        2008 :     if (errno)
     200           0 :         pg_log_error("could not read directory \"%s\": %m", path);
     201             : 
     202        2008 :     (void) closedir(dir);
     203             : 
     204             :     /*
     205             :      * It's important to fsync the destination directory itself as individual
     206             :      * file fsyncs don't guarantee that the directory entry for the file is
     207             :      * synced.  Recent versions of ext4 have made the window much wider but
     208             :      * it's been an issue for ext3 and other filesystems in the past.
     209             :      */
     210        2008 :     (*action) (path, true);
     211             : }
     212             : 
     213             : /*
     214             :  * Hint to the OS that it should get ready to fsync() this file.
     215             :  *
     216             :  * Ignores errors trying to open unreadable files, and reports other errors
     217             :  * non-fatally.
     218             :  */
     219             : #ifdef PG_FLUSH_DATA_WORKS
     220             : 
     221             : static int
     222       40320 : pre_sync_fname(const char *fname, bool isdir)
     223             : {
     224             :     int         fd;
     225             : 
     226       40320 :     fd = open(fname, O_RDONLY | PG_BINARY, 0);
     227             : 
     228       40320 :     if (fd < 0)
     229             :     {
     230           0 :         if (errno == EACCES || (isdir && errno == EISDIR))
     231           0 :             return 0;
     232           0 :         pg_log_error("could not open file \"%s\": %m", fname);
     233           0 :         return -1;
     234             :     }
     235             : 
     236             :     /*
     237             :      * We do what pg_flush_data() would do in the backend: prefer to use
     238             :      * sync_file_range, but fall back to posix_fadvise.  We ignore errors
     239             :      * because this is only a hint.
     240             :      */
     241             : #if defined(HAVE_SYNC_FILE_RANGE)
     242       40320 :     (void) sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE);
     243             : #elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
     244             :     (void) posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED);
     245             : #else
     246             : #error PG_FLUSH_DATA_WORKS should not have been defined
     247             : #endif
     248             : 
     249       40320 :     (void) close(fd);
     250       40320 :     return 0;
     251             : }
     252             : 
     253             : #endif                          /* PG_FLUSH_DATA_WORKS */
     254             : 
     255             : /*
     256             :  * fsync_fname -- Try to fsync a file or directory
     257             :  *
     258             :  * Ignores errors trying to open unreadable files, or trying to fsync
     259             :  * directories on systems where that isn't allowed/required.  All other errors
     260             :  * are fatal.
     261             :  */
     262             : int
     263       40764 : fsync_fname(const char *fname, bool isdir)
     264             : {
     265             :     int         fd;
     266             :     int         flags;
     267             :     int         returncode;
     268             : 
     269             :     /*
     270             :      * Some OSs require directories to be opened read-only whereas other
     271             :      * systems don't allow us to fsync files opened read-only; so we need both
     272             :      * cases here.  Using O_RDWR will cause us to fail to fsync files that are
     273             :      * not writable by our userid, but we assume that's OK.
     274             :      */
     275       40764 :     flags = PG_BINARY;
     276       40764 :     if (!isdir)
     277       39616 :         flags |= O_RDWR;
     278             :     else
     279        1148 :         flags |= O_RDONLY;
     280             : 
     281             :     /*
     282             :      * Open the file, silently ignoring errors about unreadable files (or
     283             :      * unsupported operations, e.g. opening a directory under Windows), and
     284             :      * logging others.
     285             :      */
     286       40764 :     fd = open(fname, flags, 0);
     287       40764 :     if (fd < 0)
     288             :     {
     289           0 :         if (errno == EACCES || (isdir && errno == EISDIR))
     290           0 :             return 0;
     291           0 :         pg_log_error("could not open file \"%s\": %m", fname);
     292           0 :         return -1;
     293             :     }
     294             : 
     295       40764 :     returncode = fsync(fd);
     296             : 
     297             :     /*
     298             :      * Some OSes don't allow us to fsync directories at all, so we can ignore
     299             :      * those errors. Anything else needs to be reported.
     300             :      */
     301       40764 :     if (returncode != 0 && !(isdir && (errno == EBADF || errno == EINVAL)))
     302             :     {
     303           0 :         pg_log_fatal("could not fsync file \"%s\": %m", fname);
     304           0 :         (void) close(fd);
     305           0 :         exit(EXIT_FAILURE);
     306             :     }
     307             : 
     308       40764 :     (void) close(fd);
     309       40764 :     return 0;
     310             : }
     311             : 
     312             : /*
     313             :  * fsync_parent_path -- fsync the parent path of a file or directory
     314             :  *
     315             :  * This is aimed at making file operations persistent on disk in case of
     316             :  * an OS crash or power failure.
     317             :  */
     318             : int
     319         142 : fsync_parent_path(const char *fname)
     320             : {
     321             :     char        parentpath[MAXPGPATH];
     322             : 
     323         142 :     strlcpy(parentpath, fname, MAXPGPATH);
     324         142 :     get_parent_directory(parentpath);
     325             : 
     326             :     /*
     327             :      * get_parent_directory() returns an empty string if the input argument is
     328             :      * just a file name (see comments in path.c), so handle that as being the
     329             :      * current directory.
     330             :      */
     331         142 :     if (strlen(parentpath) == 0)
     332           0 :         strlcpy(parentpath, ".", MAXPGPATH);
     333             : 
     334         142 :     if (fsync_fname(parentpath, true) != 0)
     335           0 :         return -1;
     336             : 
     337         142 :     return 0;
     338             : }
     339             : 
     340             : /*
     341             :  * durable_rename -- rename(2) wrapper, issuing fsyncs required for durability
     342             :  *
     343             :  * Wrapper around rename, similar to the backend version.
     344             :  */
     345             : int
     346         138 : durable_rename(const char *oldfile, const char *newfile)
     347             : {
     348             :     int         fd;
     349             : 
     350             :     /*
     351             :      * First fsync the old and target path (if it exists), to ensure that they
     352             :      * are properly persistent on disk. Syncing the target file is not
     353             :      * strictly necessary, but it makes it easier to reason about crashes;
     354             :      * because it's then guaranteed that either source or target file exists
     355             :      * after a crash.
     356             :      */
     357         138 :     if (fsync_fname(oldfile, false) != 0)
     358           0 :         return -1;
     359             : 
     360         138 :     fd = open(newfile, PG_BINARY | O_RDWR, 0);
     361         138 :     if (fd < 0)
     362             :     {
     363         138 :         if (errno != ENOENT)
     364             :         {
     365           0 :             pg_log_error("could not open file \"%s\": %m", newfile);
     366           0 :             return -1;
     367             :         }
     368             :     }
     369             :     else
     370             :     {
     371           0 :         if (fsync(fd) != 0)
     372             :         {
     373           0 :             pg_log_fatal("could not fsync file \"%s\": %m", newfile);
     374           0 :             close(fd);
     375           0 :             exit(EXIT_FAILURE);
     376             :         }
     377           0 :         close(fd);
     378             :     }
     379             : 
     380             :     /* Time to do the real deal... */
     381         138 :     if (rename(oldfile, newfile) != 0)
     382             :     {
     383           0 :         pg_log_error("could not rename file \"%s\" to \"%s\": %m",
     384             :                      oldfile, newfile);
     385           0 :         return -1;
     386             :     }
     387             : 
     388             :     /*
     389             :      * To guarantee renaming the file is persistent, fsync the file with its
     390             :      * new name, and its containing directory.
     391             :      */
     392         138 :     if (fsync_fname(newfile, false) != 0)
     393           0 :         return -1;
     394             : 
     395         138 :     if (fsync_parent_path(newfile) != 0)
     396           0 :         return -1;
     397             : 
     398         138 :     return 0;
     399             : }
     400             : 
     401             : #endif                          /* FRONTEND */
     402             : 
     403             : /*
     404             :  * Return the type of a directory entry.
     405             :  *
     406             :  * In frontend code, elevel should be a level from logging.h; in backend code
     407             :  * it should be a level from elog.h.
     408             :  */
     409             : PGFileType
     410       85750 : get_dirent_type(const char *path,
     411             :                 const struct dirent *de,
     412             :                 bool look_through_symlinks,
     413             :                 int elevel)
     414             : {
     415             :     PGFileType  result;
     416             : 
     417             :     /*
     418             :      * Some systems tell us the type directly in the dirent struct, but that's
     419             :      * a BSD and Linux extension not required by POSIX.  Even when the
     420             :      * interface is present, sometimes the type is unknown, depending on the
     421             :      * filesystem.
     422             :      */
     423             : #if defined(DT_REG) && defined(DT_DIR) && defined(DT_LNK)
     424       85750 :     if (de->d_type == DT_REG)
     425       83802 :         result = PGFILETYPE_REG;
     426        1948 :     else if (de->d_type == DT_DIR)
     427        1924 :         result = PGFILETYPE_DIR;
     428          24 :     else if (de->d_type == DT_LNK && !look_through_symlinks)
     429          16 :         result = PGFILETYPE_LNK;
     430             :     else
     431           8 :         result = PGFILETYPE_UNKNOWN;
     432             : #else
     433             :     result = PGFILETYPE_UNKNOWN;
     434             : #endif
     435             : 
     436       85750 :     if (result == PGFILETYPE_UNKNOWN)
     437             :     {
     438             :         struct stat fst;
     439             :         int         sret;
     440             : 
     441             : 
     442           8 :         if (look_through_symlinks)
     443           8 :             sret = stat(path, &fst);
     444             :         else
     445           0 :             sret = lstat(path, &fst);
     446             : 
     447           8 :         if (sret < 0)
     448             :         {
     449           0 :             result = PGFILETYPE_ERROR;
     450             : #ifdef FRONTEND
     451           0 :             pg_log_generic(elevel, "could not stat file \"%s\": %m", path);
     452             : #else
     453           0 :             ereport(elevel,
     454             :                     (errcode_for_file_access(),
     455             :                      errmsg("could not stat file \"%s\": %m", path)));
     456             : #endif
     457             :         }
     458           8 :         else if (S_ISREG(fst.st_mode))
     459           0 :             result = PGFILETYPE_REG;
     460           8 :         else if (S_ISDIR(fst.st_mode))
     461           8 :             result = PGFILETYPE_DIR;
     462             : #ifdef S_ISLNK
     463           0 :         else if (S_ISLNK(fst.st_mode))
     464           0 :             result = PGFILETYPE_LNK;
     465             : #endif
     466             :     }
     467             : 
     468       85750 :     return result;
     469             : }

Generated by: LCOV version 1.13