LCOV - code coverage report
Current view: top level - src/common - file_utils.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13beta1 Lines: 71 102 69.6 %
Date: 2020-06-03 11:07:14 Functions: 7 7 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * File-processing utility routines.
       4             :  *
       5             :  * Assorted utility functions to work on files.
       6             :  *
       7             :  *
       8             :  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
       9             :  * Portions Copyright (c) 1994, Regents of the University of California
      10             :  *
      11             :  * src/common/file_utils.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres_fe.h"
      16             : 
      17             : #include <dirent.h>
      18             : #include <fcntl.h>
      19             : #include <sys/stat.h>
      20             : #include <unistd.h>
      21             : 
      22             : #include "common/file_utils.h"
      23             : #include "common/logging.h"
      24             : 
      25             : 
      26             : /* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */
      27             : #if defined(HAVE_SYNC_FILE_RANGE)
      28             : #define PG_FLUSH_DATA_WORKS 1
      29             : #elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
      30             : #define PG_FLUSH_DATA_WORKS 1
      31             : #endif
      32             : 
      33             : /*
      34             :  * pg_xlog has been renamed to pg_wal in version 10.
      35             :  */
      36             : #define MINIMUM_VERSION_FOR_PG_WAL  100000
      37             : 
      38             : #ifdef PG_FLUSH_DATA_WORKS
      39             : static int  pre_sync_fname(const char *fname, bool isdir);
      40             : #endif
      41             : static void walkdir(const char *path,
      42             :                     int (*action) (const char *fname, bool isdir),
      43             :                     bool process_symlinks);
      44             : 
      45             : /*
      46             :  * Issue fsync recursively on PGDATA and all its contents.
      47             :  *
      48             :  * We fsync regular files and directories wherever they are, but we follow
      49             :  * symlinks only for pg_wal (or pg_xlog) and immediately under pg_tblspc.
      50             :  * Other symlinks are presumed to point at files we're not responsible for
      51             :  * fsyncing, and might not have privileges to write at all.
      52             :  *
      53             :  * serverVersion indicates the version of the server to be fsync'd.
      54             :  */
      55             : void
      56          34 : fsync_pgdata(const char *pg_data,
      57             :              int serverVersion)
      58             : {
      59             :     bool        xlog_is_symlink;
      60             :     char        pg_wal[MAXPGPATH];
      61             :     char        pg_tblspc[MAXPGPATH];
      62             : 
      63             :     /* handle renaming of pg_xlog to pg_wal in post-10 clusters */
      64          34 :     snprintf(pg_wal, MAXPGPATH, "%s/%s", pg_data,
      65             :              serverVersion < MINIMUM_VERSION_FOR_PG_WAL ? "pg_xlog" : "pg_wal");
      66          34 :     snprintf(pg_tblspc, MAXPGPATH, "%s/pg_tblspc", pg_data);
      67             : 
      68             :     /*
      69             :      * If pg_wal is a symlink, we'll need to recurse into it separately,
      70             :      * because the first walkdir below will ignore it.
      71             :      */
      72          34 :     xlog_is_symlink = false;
      73             : 
      74             : #ifndef WIN32
      75             :     {
      76             :         struct stat st;
      77             : 
      78          34 :         if (lstat(pg_wal, &st) < 0)
      79           0 :             pg_log_error("could not stat file \"%s\": %m", pg_wal);
      80          34 :         else if (S_ISLNK(st.st_mode))
      81           4 :             xlog_is_symlink = true;
      82             :     }
      83             : #else
      84             :     if (pgwin32_is_junction(pg_wal))
      85             :         xlog_is_symlink = true;
      86             : #endif
      87             : 
      88             :     /*
      89             :      * If possible, hint to the kernel that we're soon going to fsync the data
      90             :      * directory and its contents.
      91             :      */
      92             : #ifdef PG_FLUSH_DATA_WORKS
      93          34 :     walkdir(pg_data, pre_sync_fname, false);
      94          34 :     if (xlog_is_symlink)
      95           4 :         walkdir(pg_wal, pre_sync_fname, false);
      96          34 :     walkdir(pg_tblspc, pre_sync_fname, true);
      97             : #endif
      98             : 
      99             :     /*
     100             :      * Now we do the fsync()s in the same order.
     101             :      *
     102             :      * The main call ignores symlinks, so in addition to specially processing
     103             :      * pg_wal if it's a symlink, pg_tblspc has to be visited separately with
     104             :      * process_symlinks = true.  Note that if there are any plain directories
     105             :      * in pg_tblspc, they'll get fsync'd twice.  That's not an expected case
     106             :      * so we don't worry about optimizing it.
     107             :      */
     108          34 :     walkdir(pg_data, fsync_fname, false);
     109          34 :     if (xlog_is_symlink)
     110           4 :         walkdir(pg_wal, fsync_fname, false);
     111          34 :     walkdir(pg_tblspc, fsync_fname, true);
     112          34 : }
     113             : 
     114             : /*
     115             :  * Issue fsync recursively on the given directory and all its contents.
     116             :  *
     117             :  * This is a convenient wrapper on top of walkdir().
     118             :  */
     119             : void
     120          12 : fsync_dir_recurse(const char *dir)
     121             : {
     122             :     /*
     123             :      * If possible, hint to the kernel that we're soon going to fsync the data
     124             :      * directory and its contents.
     125             :      */
     126             : #ifdef PG_FLUSH_DATA_WORKS
     127          12 :     walkdir(dir, pre_sync_fname, false);
     128             : #endif
     129             : 
     130          12 :     walkdir(dir, fsync_fname, false);
     131          12 : }
     132             : 
     133             : /*
     134             :  * walkdir: recursively walk a directory, applying the action to each
     135             :  * regular file and directory (including the named directory itself).
     136             :  *
     137             :  * If process_symlinks is true, the action and recursion are also applied
     138             :  * to regular files and directories that are pointed to by symlinks in the
     139             :  * given directory; otherwise symlinks are ignored.  Symlinks are always
     140             :  * ignored in subdirectories, ie we intentionally don't pass down the
     141             :  * process_symlinks flag to recursive calls.
     142             :  *
     143             :  * Errors are reported but not considered fatal.
     144             :  *
     145             :  * See also walkdir in fd.c, which is a backend version of this logic.
     146             :  */
     147             : static void
     148        1900 : walkdir(const char *path,
     149             :         int (*action) (const char *fname, bool isdir),
     150             :         bool process_symlinks)
     151             : {
     152             :     DIR        *dir;
     153             :     struct dirent *de;
     154             : 
     155        1900 :     dir = opendir(path);
     156        1900 :     if (dir == NULL)
     157             :     {
     158           0 :         pg_log_error("could not open directory \"%s\": %m", path);
     159           0 :         return;
     160             :     }
     161             : 
     162       81968 :     while (errno = 0, (de = readdir(dir)) != NULL)
     163             :     {
     164             :         char        subpath[MAXPGPATH * 2];
     165             :         struct stat fst;
     166             :         int         sret;
     167             : 
     168       80068 :         if (strcmp(de->d_name, ".") == 0 ||
     169       78168 :             strcmp(de->d_name, "..") == 0)
     170        3800 :             continue;
     171             : 
     172       76268 :         snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name);
     173             : 
     174       76268 :         if (process_symlinks)
     175           8 :             sret = stat(subpath, &fst);
     176             :         else
     177       76260 :             sret = lstat(subpath, &fst);
     178             : 
     179       76268 :         if (sret < 0)
     180             :         {
     181           0 :             pg_log_error("could not stat file \"%s\": %m", subpath);
     182           0 :             continue;
     183             :         }
     184             : 
     185       76268 :         if (S_ISREG(fst.st_mode))
     186       74520 :             (*action) (subpath, false);
     187        1748 :         else if (S_ISDIR(fst.st_mode))
     188        1732 :             walkdir(subpath, action, false);
     189             :     }
     190             : 
     191        1900 :     if (errno)
     192           0 :         pg_log_error("could not read directory \"%s\": %m", path);
     193             : 
     194        1900 :     (void) closedir(dir);
     195             : 
     196             :     /*
     197             :      * It's important to fsync the destination directory itself as individual
     198             :      * file fsyncs don't guarantee that the directory entry for the file is
     199             :      * synced.  Recent versions of ext4 have made the window much wider but
     200             :      * it's been an issue for ext3 and other filesystems in the past.
     201             :      */
     202        1900 :     (*action) (path, true);
     203             : }
     204             : 
     205             : /*
     206             :  * Hint to the OS that it should get ready to fsync() this file.
     207             :  *
     208             :  * Ignores errors trying to open unreadable files, and reports other errors
     209             :  * non-fatally.
     210             :  */
     211             : #ifdef PG_FLUSH_DATA_WORKS
     212             : 
     213             : static int
     214       38210 : pre_sync_fname(const char *fname, bool isdir)
     215             : {
     216             :     int         fd;
     217             : 
     218       38210 :     fd = open(fname, O_RDONLY | PG_BINARY, 0);
     219             : 
     220       38210 :     if (fd < 0)
     221             :     {
     222           0 :         if (errno == EACCES || (isdir && errno == EISDIR))
     223           0 :             return 0;
     224           0 :         pg_log_error("could not open file \"%s\": %m", fname);
     225           0 :         return -1;
     226             :     }
     227             : 
     228             :     /*
     229             :      * We do what pg_flush_data() would do in the backend: prefer to use
     230             :      * sync_file_range, but fall back to posix_fadvise.  We ignore errors
     231             :      * because this is only a hint.
     232             :      */
     233             : #if defined(HAVE_SYNC_FILE_RANGE)
     234       38210 :     (void) sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE);
     235             : #elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
     236             :     (void) posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED);
     237             : #else
     238             : #error PG_FLUSH_DATA_WORKS should not have been defined
     239             : #endif
     240             : 
     241       38210 :     (void) close(fd);
     242       38210 :     return 0;
     243             : }
     244             : 
     245             : #endif                          /* PG_FLUSH_DATA_WORKS */
     246             : 
     247             : /*
     248             :  * fsync_fname -- Try to fsync a file or directory
     249             :  *
     250             :  * Ignores errors trying to open unreadable files, or trying to fsync
     251             :  * directories on systems where that isn't allowed/required.  All other errors
     252             :  * are fatal.
     253             :  */
     254             : int
     255       38624 : fsync_fname(const char *fname, bool isdir)
     256             : {
     257             :     int         fd;
     258             :     int         flags;
     259             :     int         returncode;
     260             : 
     261             :     /*
     262             :      * Some OSs require directories to be opened read-only whereas other
     263             :      * systems don't allow us to fsync files opened read-only; so we need both
     264             :      * cases here.  Using O_RDWR will cause us to fail to fsync files that are
     265             :      * not writable by our userid, but we assume that's OK.
     266             :      */
     267       38624 :     flags = PG_BINARY;
     268       38624 :     if (!isdir)
     269       37540 :         flags |= O_RDWR;
     270             :     else
     271        1084 :         flags |= O_RDONLY;
     272             : 
     273             :     /*
     274             :      * Open the file, silently ignoring errors about unreadable files (or
     275             :      * unsupported operations, e.g. opening a directory under Windows), and
     276             :      * logging others.
     277             :      */
     278       38624 :     fd = open(fname, flags, 0);
     279       38624 :     if (fd < 0)
     280             :     {
     281           0 :         if (errno == EACCES || (isdir && errno == EISDIR))
     282           0 :             return 0;
     283           0 :         pg_log_error("could not open file \"%s\": %m", fname);
     284           0 :         return -1;
     285             :     }
     286             : 
     287       38624 :     returncode = fsync(fd);
     288             : 
     289             :     /*
     290             :      * Some OSes don't allow us to fsync directories at all, so we can ignore
     291             :      * those errors. Anything else needs to be reported.
     292             :      */
     293       38624 :     if (returncode != 0 && !(isdir && (errno == EBADF || errno == EINVAL)))
     294             :     {
     295           0 :         pg_log_fatal("could not fsync file \"%s\": %m", fname);
     296           0 :         (void) close(fd);
     297           0 :         exit(EXIT_FAILURE);
     298             :     }
     299             : 
     300       38624 :     (void) close(fd);
     301       38624 :     return 0;
     302             : }
     303             : 
     304             : /*
     305             :  * fsync_parent_path -- fsync the parent path of a file or directory
     306             :  *
     307             :  * This is aimed at making file operations persistent on disk in case of
     308             :  * an OS crash or power failure.
     309             :  */
     310             : int
     311         132 : fsync_parent_path(const char *fname)
     312             : {
     313             :     char        parentpath[MAXPGPATH];
     314             : 
     315         132 :     strlcpy(parentpath, fname, MAXPGPATH);
     316         132 :     get_parent_directory(parentpath);
     317             : 
     318             :     /*
     319             :      * get_parent_directory() returns an empty string if the input argument is
     320             :      * just a file name (see comments in path.c), so handle that as being the
     321             :      * current directory.
     322             :      */
     323         132 :     if (strlen(parentpath) == 0)
     324           0 :         strlcpy(parentpath, ".", MAXPGPATH);
     325             : 
     326         132 :     if (fsync_fname(parentpath, true) != 0)
     327           0 :         return -1;
     328             : 
     329         132 :     return 0;
     330             : }
     331             : 
     332             : /*
     333             :  * durable_rename -- rename(2) wrapper, issuing fsyncs required for durability
     334             :  *
     335             :  * Wrapper around rename, similar to the backend version.
     336             :  */
     337             : int
     338         128 : durable_rename(const char *oldfile, const char *newfile)
     339             : {
     340             :     int         fd;
     341             : 
     342             :     /*
     343             :      * First fsync the old and target path (if it exists), to ensure that they
     344             :      * are properly persistent on disk. Syncing the target file is not
     345             :      * strictly necessary, but it makes it easier to reason about crashes;
     346             :      * because it's then guaranteed that either source or target file exists
     347             :      * after a crash.
     348             :      */
     349         128 :     if (fsync_fname(oldfile, false) != 0)
     350           0 :         return -1;
     351             : 
     352         128 :     fd = open(newfile, PG_BINARY | O_RDWR, 0);
     353         128 :     if (fd < 0)
     354             :     {
     355         128 :         if (errno != ENOENT)
     356             :         {
     357           0 :             pg_log_error("could not open file \"%s\": %m", newfile);
     358           0 :             return -1;
     359             :         }
     360             :     }
     361             :     else
     362             :     {
     363           0 :         if (fsync(fd) != 0)
     364             :         {
     365           0 :             pg_log_fatal("could not fsync file \"%s\": %m", newfile);
     366           0 :             close(fd);
     367           0 :             exit(EXIT_FAILURE);
     368             :         }
     369           0 :         close(fd);
     370             :     }
     371             : 
     372             :     /* Time to do the real deal... */
     373         128 :     if (rename(oldfile, newfile) != 0)
     374             :     {
     375           0 :         pg_log_error("could not rename file \"%s\" to \"%s\": %m",
     376             :                      oldfile, newfile);
     377           0 :         return -1;
     378             :     }
     379             : 
     380             :     /*
     381             :      * To guarantee renaming the file is persistent, fsync the file with its
     382             :      * new name, and its containing directory.
     383             :      */
     384         128 :     if (fsync_fname(newfile, false) != 0)
     385           0 :         return -1;
     386             : 
     387         128 :     if (fsync_parent_path(newfile) != 0)
     388           0 :         return -1;
     389             : 
     390         128 :     return 0;
     391             : }

Generated by: LCOV version 1.13