LCOV - code coverage report
Current view: top level - src/bin/pg_upgrade - file.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 15 139 10.8 %
Date: 2025-01-18 05:15:39 Functions: 1 8 12.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  *  file.c
       3             :  *
       4             :  *  file system operations
       5             :  *
       6             :  *  Copyright (c) 2010-2025, PostgreSQL Global Development Group
       7             :  *  src/bin/pg_upgrade/file.c
       8             :  */
       9             : 
      10             : #include "postgres_fe.h"
      11             : 
      12             : #include <sys/stat.h>
      13             : #include <limits.h>
      14             : #include <fcntl.h>
      15             : #ifdef HAVE_COPYFILE_H
      16             : #include <copyfile.h>
      17             : #endif
      18             : #ifdef __linux__
      19             : #include <sys/ioctl.h>
      20             : #include <linux/fs.h>
      21             : #endif
      22             : 
      23             : #include "access/visibilitymapdefs.h"
      24             : #include "common/file_perm.h"
      25             : #include "pg_upgrade.h"
      26             : #include "storage/bufpage.h"
      27             : #include "storage/checksum.h"
      28             : #include "storage/checksum_impl.h"
      29             : 
      30             : 
      31             : /*
      32             :  * cloneFile()
      33             :  *
      34             :  * Clones/reflinks a relation file from src to dst.
      35             :  *
      36             :  * schemaName/relName are relation's SQL name (used for error messages only).
      37             :  */
      38             : void
      39           0 : cloneFile(const char *src, const char *dst,
      40             :           const char *schemaName, const char *relName)
      41             : {
      42             : #if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
      43             :     if (copyfile(src, dst, NULL, COPYFILE_CLONE_FORCE) < 0)
      44             :         pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %m",
      45             :                  schemaName, relName, src, dst);
      46             : #elif defined(__linux__) && defined(FICLONE)
      47             :     int         src_fd;
      48             :     int         dest_fd;
      49             : 
      50           0 :     if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
      51           0 :         pg_fatal("error while cloning relation \"%s.%s\": could not open file \"%s\": %m",
      52             :                  schemaName, relName, src);
      53             : 
      54           0 :     if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
      55             :                         pg_file_create_mode)) < 0)
      56           0 :         pg_fatal("error while cloning relation \"%s.%s\": could not create file \"%s\": %m",
      57             :                  schemaName, relName, dst);
      58             : 
      59           0 :     if (ioctl(dest_fd, FICLONE, src_fd) < 0)
      60             :     {
      61           0 :         int         save_errno = errno;
      62             : 
      63           0 :         unlink(dst);
      64             : 
      65           0 :         pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %s",
      66             :                  schemaName, relName, src, dst, strerror(save_errno));
      67             :     }
      68             : 
      69           0 :     close(src_fd);
      70           0 :     close(dest_fd);
      71             : #endif
      72           0 : }
      73             : 
      74             : 
      75             : /*
      76             :  * copyFile()
      77             :  *
      78             :  * Copies a relation file from src to dst.
      79             :  * schemaName/relName are relation's SQL name (used for error messages only).
      80             :  */
      81             : void
      82        3344 : copyFile(const char *src, const char *dst,
      83             :          const char *schemaName, const char *relName)
      84             : {
      85             : #ifndef WIN32
      86             :     int         src_fd;
      87             :     int         dest_fd;
      88             :     char       *buffer;
      89             : 
      90        3344 :     if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
      91           0 :         pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %m",
      92             :                  schemaName, relName, src);
      93             : 
      94        3344 :     if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
      95             :                         pg_file_create_mode)) < 0)
      96           0 :         pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %m",
      97             :                  schemaName, relName, dst);
      98             : 
      99             :     /* copy in fairly large chunks for best efficiency */
     100             : #define COPY_BUF_SIZE (50 * BLCKSZ)
     101             : 
     102        3344 :     buffer = (char *) pg_malloc(COPY_BUF_SIZE);
     103             : 
     104             :     /* perform data copying i.e read src source, write to destination */
     105             :     while (true)
     106        2704 :     {
     107        6048 :         ssize_t     nbytes = read(src_fd, buffer, COPY_BUF_SIZE);
     108             : 
     109        6048 :         if (nbytes < 0)
     110           0 :             pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %m",
     111             :                      schemaName, relName, src);
     112             : 
     113        6048 :         if (nbytes == 0)
     114        3344 :             break;
     115             : 
     116        2704 :         errno = 0;
     117        2704 :         if (write(dest_fd, buffer, nbytes) != nbytes)
     118             :         {
     119             :             /* if write didn't set errno, assume problem is no disk space */
     120           0 :             if (errno == 0)
     121           0 :                 errno = ENOSPC;
     122           0 :             pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %m",
     123             :                      schemaName, relName, dst);
     124             :         }
     125             :     }
     126             : 
     127        3344 :     pg_free(buffer);
     128        3344 :     close(src_fd);
     129        3344 :     close(dest_fd);
     130             : 
     131             : #else                           /* WIN32 */
     132             : 
     133             :     if (CopyFile(src, dst, true) == 0)
     134             :     {
     135             :         _dosmaperr(GetLastError());
     136             :         pg_fatal("error while copying relation \"%s.%s\" (\"%s\" to \"%s\"): %m",
     137             :                  schemaName, relName, src, dst);
     138             :     }
     139             : 
     140             : #endif                          /* WIN32 */
     141        3344 : }
     142             : 
     143             : 
     144             : /*
     145             :  * copyFileByRange()
     146             :  *
     147             :  * Copies a relation file from src to dst.
     148             :  * schemaName/relName are relation's SQL name (used for error messages only).
     149             :  */
     150             : void
     151           0 : copyFileByRange(const char *src, const char *dst,
     152             :                 const char *schemaName, const char *relName)
     153             : {
     154             : #ifdef HAVE_COPY_FILE_RANGE
     155             :     int         src_fd;
     156             :     int         dest_fd;
     157             :     ssize_t     nbytes;
     158             : 
     159           0 :     if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
     160           0 :         pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %m",
     161             :                  schemaName, relName, src);
     162             : 
     163           0 :     if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
     164             :                         pg_file_create_mode)) < 0)
     165           0 :         pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %m",
     166             :                  schemaName, relName, dst);
     167             : 
     168             :     do
     169             :     {
     170           0 :         nbytes = copy_file_range(src_fd, NULL, dest_fd, NULL, SSIZE_MAX, 0);
     171           0 :         if (nbytes < 0)
     172           0 :             pg_fatal("error while copying relation \"%s.%s\": could not copy file range from \"%s\" to \"%s\": %m",
     173             :                      schemaName, relName, src, dst);
     174             :     }
     175           0 :     while (nbytes > 0);
     176             : 
     177           0 :     close(src_fd);
     178           0 :     close(dest_fd);
     179             : #endif
     180           0 : }
     181             : 
     182             : 
     183             : /*
     184             :  * linkFile()
     185             :  *
     186             :  * Hard-links a relation file from src to dst.
     187             :  * schemaName/relName are relation's SQL name (used for error messages only).
     188             :  */
     189             : void
     190           0 : linkFile(const char *src, const char *dst,
     191             :          const char *schemaName, const char *relName)
     192             : {
     193           0 :     if (link(src, dst) < 0)
     194           0 :         pg_fatal("error while creating link for relation \"%s.%s\" (\"%s\" to \"%s\"): %m",
     195             :                  schemaName, relName, src, dst);
     196           0 : }
     197             : 
     198             : 
     199             : /*
     200             :  * rewriteVisibilityMap()
     201             :  *
     202             :  * Transform a visibility map file, copying from src to dst.
     203             :  * schemaName/relName are relation's SQL name (used for error messages only).
     204             :  *
     205             :  * In versions of PostgreSQL prior to catversion 201603011, PostgreSQL's
     206             :  * visibility map included one bit per heap page; it now includes two.
     207             :  * When upgrading a cluster from before that time to a current PostgreSQL
     208             :  * version, we could refuse to copy visibility maps from the old cluster
     209             :  * to the new cluster; the next VACUUM would recreate them, but at the
     210             :  * price of scanning the entire table.  So, instead, we rewrite the old
     211             :  * visibility maps in the new format.  That way, the all-visible bits
     212             :  * remain set for the pages for which they were set previously.  The
     213             :  * all-frozen bits are never set by this conversion; we leave that to VACUUM.
     214             :  */
     215             : void
     216           0 : rewriteVisibilityMap(const char *fromfile, const char *tofile,
     217             :                      const char *schemaName, const char *relName)
     218             : {
     219             :     int         src_fd;
     220             :     int         dst_fd;
     221             :     PGIOAlignedBlock buffer;
     222             :     PGIOAlignedBlock new_vmbuf;
     223           0 :     ssize_t     totalBytesRead = 0;
     224             :     ssize_t     src_filesize;
     225             :     int         rewriteVmBytesPerPage;
     226           0 :     BlockNumber new_blkno = 0;
     227             :     struct stat statbuf;
     228             : 
     229             :     /* Compute number of old-format bytes per new page */
     230           0 :     rewriteVmBytesPerPage = (BLCKSZ - SizeOfPageHeaderData) / 2;
     231             : 
     232           0 :     if ((src_fd = open(fromfile, O_RDONLY | PG_BINARY, 0)) < 0)
     233           0 :         pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %m",
     234             :                  schemaName, relName, fromfile);
     235             : 
     236           0 :     if (fstat(src_fd, &statbuf) != 0)
     237           0 :         pg_fatal("error while copying relation \"%s.%s\": could not stat file \"%s\": %m",
     238             :                  schemaName, relName, fromfile);
     239             : 
     240           0 :     if ((dst_fd = open(tofile, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
     241             :                        pg_file_create_mode)) < 0)
     242           0 :         pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %m",
     243             :                  schemaName, relName, tofile);
     244             : 
     245             :     /* Save old file size */
     246           0 :     src_filesize = statbuf.st_size;
     247             : 
     248             :     /*
     249             :      * Turn each visibility map page into 2 pages one by one. Each new page
     250             :      * has the same page header as the old one.  If the last section of the
     251             :      * last page is empty, we skip it, mostly to avoid turning one-page
     252             :      * visibility maps for small relations into two pages needlessly.
     253             :      */
     254           0 :     while (totalBytesRead < src_filesize)
     255             :     {
     256             :         ssize_t     bytesRead;
     257             :         char       *old_cur;
     258             :         char       *old_break;
     259             :         char       *old_blkend;
     260             :         PageHeaderData pageheader;
     261             :         bool        old_lastblk;
     262             : 
     263           0 :         if ((bytesRead = read(src_fd, buffer.data, BLCKSZ)) != BLCKSZ)
     264             :         {
     265           0 :             if (bytesRead < 0)
     266           0 :                 pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %m",
     267             :                          schemaName, relName, fromfile);
     268             :             else
     269           0 :                 pg_fatal("error while copying relation \"%s.%s\": partial page found in file \"%s\"",
     270             :                          schemaName, relName, fromfile);
     271             :         }
     272             : 
     273           0 :         totalBytesRead += BLCKSZ;
     274           0 :         old_lastblk = (totalBytesRead == src_filesize);
     275             : 
     276             :         /* Save the page header data */
     277           0 :         memcpy(&pageheader, buffer.data, SizeOfPageHeaderData);
     278             : 
     279             :         /*
     280             :          * These old_* variables point to old visibility map page. old_cur
     281             :          * points to current position on old page. old_blkend points to end of
     282             :          * old block.  old_break is the end+1 position on the old page for the
     283             :          * data that will be transferred to the current new page.
     284             :          */
     285           0 :         old_cur = buffer.data + SizeOfPageHeaderData;
     286           0 :         old_blkend = buffer.data + bytesRead;
     287           0 :         old_break = old_cur + rewriteVmBytesPerPage;
     288             : 
     289           0 :         while (old_break <= old_blkend)
     290             :         {
     291             :             char       *new_cur;
     292           0 :             bool        empty = true;
     293             :             bool        old_lastpart;
     294             : 
     295             :             /* First, copy old page header to new page */
     296           0 :             memcpy(new_vmbuf.data, &pageheader, SizeOfPageHeaderData);
     297             : 
     298             :             /* Rewriting the last part of the last old page? */
     299           0 :             old_lastpart = old_lastblk && (old_break == old_blkend);
     300             : 
     301           0 :             new_cur = new_vmbuf.data + SizeOfPageHeaderData;
     302             : 
     303             :             /* Process old page bytes one by one, and turn it into new page. */
     304           0 :             while (old_cur < old_break)
     305             :             {
     306           0 :                 uint8       byte = *(uint8 *) old_cur;
     307           0 :                 uint16      new_vmbits = 0;
     308             :                 int         i;
     309             : 
     310             :                 /* Generate new format bits while keeping old information */
     311           0 :                 for (i = 0; i < BITS_PER_BYTE; i++)
     312             :                 {
     313           0 :                     if (byte & (1 << i))
     314             :                     {
     315           0 :                         empty = false;
     316           0 :                         new_vmbits |=
     317           0 :                             VISIBILITYMAP_ALL_VISIBLE << (BITS_PER_HEAPBLOCK * i);
     318             :                     }
     319             :                 }
     320             : 
     321             :                 /* Copy new visibility map bytes to new-format page */
     322           0 :                 new_cur[0] = (char) (new_vmbits & 0xFF);
     323           0 :                 new_cur[1] = (char) (new_vmbits >> 8);
     324             : 
     325           0 :                 old_cur++;
     326           0 :                 new_cur += BITS_PER_HEAPBLOCK;
     327             :             }
     328             : 
     329             :             /* If the last part of the last page is empty, skip writing it */
     330           0 :             if (old_lastpart && empty)
     331           0 :                 break;
     332             : 
     333             :             /* Set new checksum for visibility map page, if enabled */
     334           0 :             if (new_cluster.controldata.data_checksum_version != 0)
     335           0 :                 ((PageHeader) new_vmbuf.data)->pd_checksum =
     336           0 :                     pg_checksum_page(new_vmbuf.data, new_blkno);
     337             : 
     338           0 :             errno = 0;
     339           0 :             if (write(dst_fd, new_vmbuf.data, BLCKSZ) != BLCKSZ)
     340             :             {
     341             :                 /* if write didn't set errno, assume problem is no disk space */
     342           0 :                 if (errno == 0)
     343           0 :                     errno = ENOSPC;
     344           0 :                 pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %m",
     345             :                          schemaName, relName, tofile);
     346             :             }
     347             : 
     348             :             /* Advance for next new page */
     349           0 :             old_break += rewriteVmBytesPerPage;
     350           0 :             new_blkno++;
     351             :         }
     352             :     }
     353             : 
     354             :     /* Clean up */
     355           0 :     close(dst_fd);
     356           0 :     close(src_fd);
     357           0 : }
     358             : 
     359             : void
     360           0 : check_file_clone(void)
     361             : {
     362             :     char        existing_file[MAXPGPATH];
     363             :     char        new_link_file[MAXPGPATH];
     364             : 
     365           0 :     snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata);
     366           0 :     snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.clonetest", new_cluster.pgdata);
     367           0 :     unlink(new_link_file);      /* might fail */
     368             : 
     369             : #if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
     370             :     if (copyfile(existing_file, new_link_file, NULL, COPYFILE_CLONE_FORCE) < 0)
     371             :         pg_fatal("could not clone file between old and new data directories: %m");
     372             : #elif defined(__linux__) && defined(FICLONE)
     373             :     {
     374             :         int         src_fd;
     375             :         int         dest_fd;
     376             : 
     377           0 :         if ((src_fd = open(existing_file, O_RDONLY | PG_BINARY, 0)) < 0)
     378           0 :             pg_fatal("could not open file \"%s\": %m",
     379             :                      existing_file);
     380             : 
     381           0 :         if ((dest_fd = open(new_link_file, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
     382             :                             pg_file_create_mode)) < 0)
     383           0 :             pg_fatal("could not create file \"%s\": %m",
     384             :                      new_link_file);
     385             : 
     386           0 :         if (ioctl(dest_fd, FICLONE, src_fd) < 0)
     387           0 :             pg_fatal("could not clone file between old and new data directories: %m");
     388             : 
     389           0 :         close(src_fd);
     390           0 :         close(dest_fd);
     391             :     }
     392             : #else
     393             :     pg_fatal("file cloning not supported on this platform");
     394             : #endif
     395             : 
     396           0 :     unlink(new_link_file);
     397           0 : }
     398             : 
     399             : void
     400           0 : check_copy_file_range(void)
     401             : {
     402             :     char        existing_file[MAXPGPATH];
     403             :     char        new_link_file[MAXPGPATH];
     404             : 
     405           0 :     snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata);
     406           0 :     snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.copy_file_range_test", new_cluster.pgdata);
     407           0 :     unlink(new_link_file);      /* might fail */
     408             : 
     409             : #if defined(HAVE_COPY_FILE_RANGE)
     410             :     {
     411             :         int         src_fd;
     412             :         int         dest_fd;
     413             : 
     414           0 :         if ((src_fd = open(existing_file, O_RDONLY | PG_BINARY, 0)) < 0)
     415           0 :             pg_fatal("could not open file \"%s\": %m",
     416             :                      existing_file);
     417             : 
     418           0 :         if ((dest_fd = open(new_link_file, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
     419             :                             pg_file_create_mode)) < 0)
     420           0 :             pg_fatal("could not create file \"%s\": %m",
     421             :                      new_link_file);
     422             : 
     423           0 :         if (copy_file_range(src_fd, NULL, dest_fd, NULL, SSIZE_MAX, 0) < 0)
     424           0 :             pg_fatal("could not copy file range between old and new data directories: %m");
     425             : 
     426           0 :         close(src_fd);
     427           0 :         close(dest_fd);
     428             :     }
     429             : #else
     430             :     pg_fatal("copy_file_range not supported on this platform");
     431             : #endif
     432             : 
     433           0 :     unlink(new_link_file);
     434           0 : }
     435             : 
     436             : void
     437           0 : check_hard_link(void)
     438             : {
     439             :     char        existing_file[MAXPGPATH];
     440             :     char        new_link_file[MAXPGPATH];
     441             : 
     442           0 :     snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata);
     443           0 :     snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.linktest", new_cluster.pgdata);
     444           0 :     unlink(new_link_file);      /* might fail */
     445             : 
     446           0 :     if (link(existing_file, new_link_file) < 0)
     447           0 :         pg_fatal("could not create hard link between old and new data directories: %m\n"
     448             :                  "In link mode the old and new data directories must be on the same file system.");
     449             : 
     450           0 :     unlink(new_link_file);
     451           0 : }

Generated by: LCOV version 1.14