LCOV - code coverage report
Current view: top level - src/backend/storage/file - buffile.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 75.6 % 312 236
Test Date: 2026-03-23 16:15:52 Functions: 92.0 % 25 23
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * buffile.c
       4              :  *    Management of large buffered temporary files.
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  * IDENTIFICATION
      10              :  *    src/backend/storage/file/buffile.c
      11              :  *
      12              :  * NOTES:
      13              :  *
      14              :  * BufFiles provide a very incomplete emulation of stdio atop virtual Files
      15              :  * (as managed by fd.c).  Currently, we only support the buffered-I/O
      16              :  * aspect of stdio: a read or write of the low-level File occurs only
      17              :  * when the buffer is filled or emptied.  This is an even bigger win
      18              :  * for virtual Files than for ordinary kernel files, since reducing the
      19              :  * frequency with which a virtual File is touched reduces "thrashing"
      20              :  * of opening/closing file descriptors.
      21              :  *
      22              :  * Note that BufFile structs are allocated with palloc(), and therefore
      23              :  * will go away automatically at query/transaction end.  Since the underlying
      24              :  * virtual Files are made with OpenTemporaryFile, all resources for
      25              :  * the file are certain to be cleaned up even if processing is aborted
      26              :  * by ereport(ERROR).  The data structures required are made in the
      27              :  * palloc context that was current when the BufFile was created, and
      28              :  * any external resources such as temp files are owned by the ResourceOwner
      29              :  * that was current at that time.
      30              :  *
      31              :  * BufFile also supports temporary files that exceed the OS file size limit
      32              :  * (by opening multiple fd.c temporary files).  This is an essential feature
      33              :  * for sorts and hashjoins on large amounts of data.
      34              :  *
      35              :  * BufFile supports temporary files that can be shared with other backends, as
      36              :  * infrastructure for parallel execution.  Such files need to be created as a
      37              :  * member of a SharedFileSet that all participants are attached to.
      38              :  *
      39              :  * BufFile also supports temporary files that can be used by the single backend
      40              :  * when the corresponding files need to be survived across the transaction and
      41              :  * need to be opened and closed multiple times.  Such files need to be created
      42              :  * as a member of a FileSet.
      43              :  *-------------------------------------------------------------------------
      44              :  */
      45              : 
      46              : #include "postgres.h"
      47              : 
      48              : #include "commands/tablespace.h"
      49              : #include "executor/instrument.h"
      50              : #include "miscadmin.h"
      51              : #include "pgstat.h"
      52              : #include "storage/buffile.h"
      53              : #include "storage/bufmgr.h"
      54              : #include "storage/fd.h"
      55              : #include "utils/resowner.h"
      56              : #include "utils/wait_event.h"
      57              : 
      58              : /*
      59              :  * We break BufFiles into gigabyte-sized segments, regardless of RELSEG_SIZE.
      60              :  * The reason is that we'd like large BufFiles to be spread across multiple
      61              :  * tablespaces when available.
      62              :  */
      63              : #define MAX_PHYSICAL_FILESIZE   0x40000000
      64              : #define BUFFILE_SEG_SIZE        (MAX_PHYSICAL_FILESIZE / BLCKSZ)
      65              : 
      66              : /*
      67              :  * This data structure represents a buffered file that consists of one or
      68              :  * more physical files (each accessed through a virtual file descriptor
      69              :  * managed by fd.c).
      70              :  */
      71              : struct BufFile
      72              : {
      73              :     int         numFiles;       /* number of physical files in set */
      74              :     /* all files except the last have length exactly MAX_PHYSICAL_FILESIZE */
      75              :     File       *files;          /* palloc'd array with numFiles entries */
      76              : 
      77              :     bool        isInterXact;    /* keep open over transactions? */
      78              :     bool        dirty;          /* does buffer need to be written? */
      79              :     bool        readOnly;       /* has the file been set to read only? */
      80              : 
      81              :     FileSet    *fileset;        /* space for fileset based segment files */
      82              :     const char *name;           /* name of fileset based BufFile */
      83              : 
      84              :     /*
      85              :      * resowner is the ResourceOwner to use for underlying temp files.  (We
      86              :      * don't need to remember the memory context we're using explicitly,
      87              :      * because after creation we only repalloc our arrays larger.)
      88              :      */
      89              :     ResourceOwner resowner;
      90              : 
      91              :     /*
      92              :      * "current pos" is position of start of buffer within the logical file.
      93              :      * Position as seen by user of BufFile is (curFile, curOffset + pos).
      94              :      */
      95              :     int         curFile;        /* file index (0..n) part of current pos */
      96              :     pgoff_t     curOffset;      /* offset part of current pos */
      97              :     int64       pos;            /* next read/write position in buffer */
      98              :     int64       nbytes;         /* total # of valid bytes in buffer */
      99              : 
     100              :     /*
     101              :      * XXX Should ideally use PGIOAlignedBlock, but might need a way to avoid
     102              :      * wasting per-file alignment padding when some users create many files.
     103              :      */
     104              :     PGAlignedBlock buffer;
     105              : };
     106              : 
     107              : static BufFile *makeBufFileCommon(int nfiles);
     108              : static BufFile *makeBufFile(File firstfile);
     109              : static void extendBufFile(BufFile *file);
     110              : static void BufFileLoadBuffer(BufFile *file);
     111              : static void BufFileDumpBuffer(BufFile *file);
     112              : static void BufFileFlush(BufFile *file);
     113              : static File MakeNewFileSetSegment(BufFile *buffile, int segment);
     114              : 
     115              : /*
     116              :  * Create BufFile and perform the common initialization.
     117              :  */
     118              : static BufFile *
     119         6275 : makeBufFileCommon(int nfiles)
     120              : {
     121         6275 :     BufFile    *file = palloc_object(BufFile);
     122              : 
     123         6275 :     file->numFiles = nfiles;
     124         6275 :     file->isInterXact = false;
     125         6275 :     file->dirty = false;
     126         6275 :     file->resowner = CurrentResourceOwner;
     127         6275 :     file->curFile = 0;
     128         6275 :     file->curOffset = 0;
     129         6275 :     file->pos = 0;
     130         6275 :     file->nbytes = 0;
     131              : 
     132         6275 :     return file;
     133              : }
     134              : 
     135              : /*
     136              :  * Create a BufFile given the first underlying physical file.
     137              :  * NOTE: caller must set isInterXact if appropriate.
     138              :  */
     139              : static BufFile *
     140         1962 : makeBufFile(File firstfile)
     141              : {
     142         1962 :     BufFile    *file = makeBufFileCommon(1);
     143              : 
     144         1962 :     file->files = palloc_object(File);
     145         1962 :     file->files[0] = firstfile;
     146         1962 :     file->readOnly = false;
     147         1962 :     file->fileset = NULL;
     148         1962 :     file->name = NULL;
     149              : 
     150         1962 :     return file;
     151              : }
     152              : 
     153              : /*
     154              :  * Add another component temp file.
     155              :  */
     156              : static void
     157            0 : extendBufFile(BufFile *file)
     158              : {
     159              :     File        pfile;
     160              :     ResourceOwner oldowner;
     161              : 
     162              :     /* Be sure to associate the file with the BufFile's resource owner */
     163            0 :     oldowner = CurrentResourceOwner;
     164            0 :     CurrentResourceOwner = file->resowner;
     165              : 
     166            0 :     if (file->fileset == NULL)
     167            0 :         pfile = OpenTemporaryFile(file->isInterXact);
     168              :     else
     169            0 :         pfile = MakeNewFileSetSegment(file, file->numFiles);
     170              : 
     171              :     Assert(pfile >= 0);
     172              : 
     173            0 :     CurrentResourceOwner = oldowner;
     174              : 
     175            0 :     file->files = (File *) repalloc(file->files,
     176            0 :                                     (file->numFiles + 1) * sizeof(File));
     177            0 :     file->files[file->numFiles] = pfile;
     178            0 :     file->numFiles++;
     179            0 : }
     180              : 
     181              : /*
     182              :  * Create a BufFile for a new temporary file (which will expand to become
     183              :  * multiple temporary files if more than MAX_PHYSICAL_FILESIZE bytes are
     184              :  * written to it).
     185              :  *
     186              :  * If interXact is true, the temp file will not be automatically deleted
     187              :  * at end of transaction.
     188              :  *
     189              :  * Note: if interXact is true, the caller had better be calling us in a
     190              :  * memory context, and with a resource owner, that will survive across
     191              :  * transaction boundaries.
     192              :  */
     193              : BufFile *
     194         1962 : BufFileCreateTemp(bool interXact)
     195              : {
     196              :     BufFile    *file;
     197              :     File        pfile;
     198              : 
     199              :     /*
     200              :      * Ensure that temp tablespaces are set up for OpenTemporaryFile to use.
     201              :      * Possibly the caller will have done this already, but it seems useful to
     202              :      * double-check here.  Failure to do this at all would result in the temp
     203              :      * files always getting placed in the default tablespace, which is a
     204              :      * pretty hard-to-detect bug.  Callers may prefer to do it earlier if they
     205              :      * want to be sure that any required catalog access is done in some other
     206              :      * resource context.
     207              :      */
     208         1962 :     PrepareTempTablespaces();
     209              : 
     210         1962 :     pfile = OpenTemporaryFile(interXact);
     211              :     Assert(pfile >= 0);
     212              : 
     213         1962 :     file = makeBufFile(pfile);
     214         1962 :     file->isInterXact = interXact;
     215              : 
     216         1962 :     return file;
     217              : }
     218              : 
     219              : /*
     220              :  * Build the name for a given segment of a given BufFile.
     221              :  */
     222              : static void
     223         9290 : FileSetSegmentName(char *name, const char *buffile_name, int segment)
     224              : {
     225         9290 :     snprintf(name, MAXPGPATH, "%s.%d", buffile_name, segment);
     226         9290 : }
     227              : 
     228              : /*
     229              :  * Create a new segment file backing a fileset based BufFile.
     230              :  */
     231              : static File
     232         1943 : MakeNewFileSetSegment(BufFile *buffile, int segment)
     233              : {
     234              :     char        name[MAXPGPATH];
     235              :     File        file;
     236              : 
     237              :     /*
     238              :      * It is possible that there are files left over from before a crash
     239              :      * restart with the same name.  In order for BufFileOpenFileSet() not to
     240              :      * get confused about how many segments there are, we'll unlink the next
     241              :      * segment number if it already exists.
     242              :      */
     243         1943 :     FileSetSegmentName(name, buffile->name, segment + 1);
     244         1943 :     FileSetDelete(buffile->fileset, name, true);
     245              : 
     246              :     /* Create the new segment. */
     247         1943 :     FileSetSegmentName(name, buffile->name, segment);
     248         1943 :     file = FileSetCreate(buffile->fileset, name);
     249              : 
     250              :     /* FileSetCreate would've errored out */
     251              :     Assert(file > 0);
     252              : 
     253         1943 :     return file;
     254              : }
     255              : 
     256              : /*
     257              :  * Create a BufFile that can be discovered and opened read-only by other
     258              :  * backends that are attached to the same SharedFileSet using the same name.
     259              :  *
     260              :  * The naming scheme for fileset based BufFiles is left up to the calling code.
     261              :  * The name will appear as part of one or more filenames on disk, and might
     262              :  * provide clues to administrators about which subsystem is generating
     263              :  * temporary file data.  Since each SharedFileSet object is backed by one or
     264              :  * more uniquely named temporary directory, names don't conflict with
     265              :  * unrelated SharedFileSet objects.
     266              :  */
     267              : BufFile *
     268         1943 : BufFileCreateFileSet(FileSet *fileset, const char *name)
     269              : {
     270              :     BufFile    *file;
     271              : 
     272         1943 :     file = makeBufFileCommon(1);
     273         1943 :     file->fileset = fileset;
     274         1943 :     file->name = pstrdup(name);
     275         1943 :     file->files = palloc_object(File);
     276         1943 :     file->files[0] = MakeNewFileSetSegment(file, 0);
     277         1943 :     file->readOnly = false;
     278              : 
     279         1943 :     return file;
     280              : }
     281              : 
     282              : /*
     283              :  * Open a file that was previously created in another backend (or this one)
     284              :  * with BufFileCreateFileSet in the same FileSet using the same name.
     285              :  * The backend that created the file must have called BufFileClose() or
     286              :  * BufFileExportFileSet() to make sure that it is ready to be opened by other
     287              :  * backends and render it read-only.  If missing_ok is true, which indicates
     288              :  * that missing files can be safely ignored, then return NULL if the BufFile
     289              :  * with the given name is not found, otherwise, throw an error.
     290              :  */
     291              : BufFile *
     292         2643 : BufFileOpenFileSet(FileSet *fileset, const char *name, int mode,
     293              :                    bool missing_ok)
     294              : {
     295              :     BufFile    *file;
     296              :     char        segment_name[MAXPGPATH];
     297         2643 :     Size        capacity = 16;
     298              :     File       *files;
     299         2643 :     int         nfiles = 0;
     300              : 
     301         2643 :     files = palloc_array(File, capacity);
     302              : 
     303              :     /*
     304              :      * We don't know how many segments there are, so we'll probe the
     305              :      * filesystem to find out.
     306              :      */
     307              :     for (;;)
     308              :     {
     309              :         /* See if we need to expand our file segment array. */
     310         5013 :         if (nfiles + 1 > capacity)
     311              :         {
     312            0 :             capacity *= 2;
     313            0 :             files = repalloc_array(files, File, capacity);
     314              :         }
     315              :         /* Try to load a segment. */
     316         5013 :         FileSetSegmentName(segment_name, name, nfiles);
     317         5013 :         files[nfiles] = FileSetOpen(fileset, segment_name, mode);
     318         5013 :         if (files[nfiles] <= 0)
     319         2643 :             break;
     320         2370 :         ++nfiles;
     321              : 
     322         2370 :         CHECK_FOR_INTERRUPTS();
     323              :     }
     324              : 
     325              :     /*
     326              :      * If we didn't find any files at all, then no BufFile exists with this
     327              :      * name.
     328              :      */
     329         2643 :     if (nfiles == 0)
     330              :     {
     331              :         /* free the memory */
     332          273 :         pfree(files);
     333              : 
     334          273 :         if (missing_ok)
     335          273 :             return NULL;
     336              : 
     337            0 :         ereport(ERROR,
     338              :                 (errcode_for_file_access(),
     339              :                  errmsg("could not open temporary file \"%s\" from BufFile \"%s\": %m",
     340              :                         segment_name, name)));
     341              :     }
     342              : 
     343         2370 :     file = makeBufFileCommon(nfiles);
     344         2370 :     file->files = files;
     345         2370 :     file->readOnly = (mode == O_RDONLY);
     346         2370 :     file->fileset = fileset;
     347         2370 :     file->name = pstrdup(name);
     348              : 
     349         2370 :     return file;
     350              : }
     351              : 
     352              : /*
     353              :  * Delete a BufFile that was created by BufFileCreateFileSet in the given
     354              :  * FileSet using the given name.
     355              :  *
     356              :  * It is not necessary to delete files explicitly with this function.  It is
     357              :  * provided only as a way to delete files proactively, rather than waiting for
     358              :  * the FileSet to be cleaned up.
     359              :  *
     360              :  * Only one backend should attempt to delete a given name, and should know
     361              :  * that it exists and has been exported or closed otherwise missing_ok should
     362              :  * be passed true.
     363              :  */
     364              : void
     365          352 : BufFileDeleteFileSet(FileSet *fileset, const char *name, bool missing_ok)
     366              : {
     367              :     char        segment_name[MAXPGPATH];
     368          352 :     int         segment = 0;
     369          352 :     bool        found = false;
     370              : 
     371              :     /*
     372              :      * We don't know how many segments the file has.  We'll keep deleting
     373              :      * until we run out.  If we don't manage to find even an initial segment,
     374              :      * raise an error.
     375              :      */
     376              :     for (;;)
     377              :     {
     378          391 :         FileSetSegmentName(segment_name, name, segment);
     379          391 :         if (!FileSetDelete(fileset, segment_name, true))
     380          352 :             break;
     381           39 :         found = true;
     382           39 :         ++segment;
     383              : 
     384           39 :         CHECK_FOR_INTERRUPTS();
     385              :     }
     386              : 
     387          352 :     if (!found && !missing_ok)
     388            0 :         elog(ERROR, "could not delete unknown BufFile \"%s\"", name);
     389          352 : }
     390              : 
     391              : /*
     392              :  * BufFileExportFileSet --- flush and make read-only, in preparation for sharing.
     393              :  */
     394              : void
     395          379 : BufFileExportFileSet(BufFile *file)
     396              : {
     397              :     /* Must be a file belonging to a FileSet. */
     398              :     Assert(file->fileset != NULL);
     399              : 
     400              :     /* It's probably a bug if someone calls this twice. */
     401              :     Assert(!file->readOnly);
     402              : 
     403          379 :     BufFileFlush(file);
     404          379 :     file->readOnly = true;
     405          379 : }
     406              : 
     407              : /*
     408              :  * Close a BufFile
     409              :  *
     410              :  * Like fclose(), this also implicitly FileCloses the underlying File.
     411              :  */
     412              : void
     413         6113 : BufFileClose(BufFile *file)
     414              : {
     415              :     int         i;
     416              : 
     417              :     /* flush any unwritten data */
     418         6113 :     BufFileFlush(file);
     419              :     /* close and delete the underlying file(s) */
     420        12379 :     for (i = 0; i < file->numFiles; i++)
     421         6266 :         FileClose(file->files[i]);
     422              :     /* release the buffer space */
     423         6113 :     pfree(file->files);
     424         6113 :     pfree(file);
     425         6113 : }
     426              : 
     427              : /*
     428              :  * BufFileLoadBuffer
     429              :  *
     430              :  * Load some data into buffer, if possible, starting from curOffset.
     431              :  * At call, must have dirty = false, pos and nbytes = 0.
     432              :  * On exit, nbytes is number of bytes loaded.
     433              :  */
     434              : static void
     435        63075 : BufFileLoadBuffer(BufFile *file)
     436              : {
     437              :     File        thisfile;
     438              :     instr_time  io_start;
     439              :     instr_time  io_time;
     440              : 
     441              :     /*
     442              :      * Advance to next component file if necessary and possible.
     443              :      */
     444        63075 :     if (file->curOffset >= MAX_PHYSICAL_FILESIZE &&
     445            0 :         file->curFile + 1 < file->numFiles)
     446              :     {
     447            0 :         file->curFile++;
     448            0 :         file->curOffset = 0;
     449              :     }
     450              : 
     451        63075 :     thisfile = file->files[file->curFile];
     452              : 
     453        63075 :     if (track_io_timing)
     454            0 :         INSTR_TIME_SET_CURRENT(io_start);
     455              :     else
     456        63075 :         INSTR_TIME_SET_ZERO(io_start);
     457              : 
     458              :     /*
     459              :      * Read whatever we can get, up to a full bufferload.
     460              :      */
     461       126150 :     file->nbytes = FileRead(thisfile,
     462        63075 :                             file->buffer.data,
     463              :                             sizeof(file->buffer.data),
     464              :                             file->curOffset,
     465              :                             WAIT_EVENT_BUFFILE_READ);
     466        63075 :     if (file->nbytes < 0)
     467              :     {
     468            0 :         file->nbytes = 0;
     469            0 :         ereport(ERROR,
     470              :                 (errcode_for_file_access(),
     471              :                  errmsg("could not read file \"%s\": %m",
     472              :                         FilePathName(thisfile))));
     473              :     }
     474              : 
     475        63075 :     if (track_io_timing)
     476              :     {
     477            0 :         INSTR_TIME_SET_CURRENT(io_time);
     478            0 :         INSTR_TIME_ACCUM_DIFF(pgBufferUsage.temp_blk_read_time, io_time, io_start);
     479              :     }
     480              : 
     481              :     /* we choose not to advance curOffset here */
     482              : 
     483        63075 :     if (file->nbytes > 0)
     484        61388 :         pgBufferUsage.temp_blks_read++;
     485        63075 : }
     486              : 
     487              : /*
     488              :  * BufFileDumpBuffer
     489              :  *
     490              :  * Dump buffer contents starting at curOffset.
     491              :  * At call, should have dirty = true, nbytes > 0.
     492              :  * On exit, dirty is cleared if successful write, and curOffset is advanced.
     493              :  */
     494              : static void
     495        72521 : BufFileDumpBuffer(BufFile *file)
     496              : {
     497        72521 :     int64       wpos = 0;
     498              :     int64       bytestowrite;
     499              :     File        thisfile;
     500              : 
     501              :     /*
     502              :      * Unlike BufFileLoadBuffer, we must dump the whole buffer even if it
     503              :      * crosses a component-file boundary; so we need a loop.
     504              :      */
     505       145042 :     while (wpos < file->nbytes)
     506              :     {
     507              :         int64       availbytes;
     508              :         instr_time  io_start;
     509              :         instr_time  io_time;
     510              : 
     511              :         /*
     512              :          * Advance to next component file if necessary and possible.
     513              :          */
     514        72521 :         if (file->curOffset >= MAX_PHYSICAL_FILESIZE)
     515              :         {
     516            0 :             while (file->curFile + 1 >= file->numFiles)
     517            0 :                 extendBufFile(file);
     518            0 :             file->curFile++;
     519            0 :             file->curOffset = 0;
     520              :         }
     521              : 
     522              :         /*
     523              :          * Determine how much we need to write into this file.
     524              :          */
     525        72521 :         bytestowrite = file->nbytes - wpos;
     526        72521 :         availbytes = MAX_PHYSICAL_FILESIZE - file->curOffset;
     527              : 
     528        72521 :         if (bytestowrite > availbytes)
     529            0 :             bytestowrite = availbytes;
     530              : 
     531        72521 :         thisfile = file->files[file->curFile];
     532              : 
     533        72521 :         if (track_io_timing)
     534            0 :             INSTR_TIME_SET_CURRENT(io_start);
     535              :         else
     536        72521 :             INSTR_TIME_SET_ZERO(io_start);
     537              : 
     538        72521 :         bytestowrite = FileWrite(thisfile,
     539        72521 :                                  file->buffer.data + wpos,
     540              :                                  bytestowrite,
     541              :                                  file->curOffset,
     542              :                                  WAIT_EVENT_BUFFILE_WRITE);
     543        72521 :         if (bytestowrite <= 0)
     544            0 :             ereport(ERROR,
     545              :                     (errcode_for_file_access(),
     546              :                      errmsg("could not write to file \"%s\": %m",
     547              :                             FilePathName(thisfile))));
     548              : 
     549        72521 :         if (track_io_timing)
     550              :         {
     551            0 :             INSTR_TIME_SET_CURRENT(io_time);
     552            0 :             INSTR_TIME_ACCUM_DIFF(pgBufferUsage.temp_blk_write_time, io_time, io_start);
     553              :         }
     554              : 
     555        72521 :         file->curOffset += bytestowrite;
     556        72521 :         wpos += bytestowrite;
     557              : 
     558        72521 :         pgBufferUsage.temp_blks_written++;
     559              :     }
     560        72521 :     file->dirty = false;
     561              : 
     562              :     /*
     563              :      * At this point, curOffset has been advanced to the end of the buffer,
     564              :      * ie, its original value + nbytes.  We need to make it point to the
     565              :      * logical file position, ie, original value + pos, in case that is less
     566              :      * (as could happen due to a small backwards seek in a dirty buffer!)
     567              :      */
     568        72521 :     file->curOffset -= (file->nbytes - file->pos);
     569        72521 :     if (file->curOffset < 0)  /* handle possible segment crossing */
     570              :     {
     571            0 :         file->curFile--;
     572              :         Assert(file->curFile >= 0);
     573            0 :         file->curOffset += MAX_PHYSICAL_FILESIZE;
     574              :     }
     575              : 
     576              :     /*
     577              :      * Now we can set the buffer empty without changing the logical position
     578              :      */
     579        72521 :     file->pos = 0;
     580        72521 :     file->nbytes = 0;
     581        72521 : }
     582              : 
     583              : /*
     584              :  * BufFileRead variants
     585              :  *
     586              :  * Like fread() except we assume 1-byte element size and report I/O errors via
     587              :  * ereport().
     588              :  *
     589              :  * If 'exact' is true, then an error is also raised if the number of bytes
     590              :  * read is not exactly 'size' (no short reads).  If 'exact' and 'eofOK' are
     591              :  * true, then reading zero bytes is ok.
     592              :  */
     593              : static size_t
     594     15785618 : BufFileReadCommon(BufFile *file, void *ptr, size_t size, bool exact, bool eofOK)
     595              : {
     596     15785618 :     size_t      start_size = size;
     597     15785618 :     size_t      nread = 0;
     598              :     size_t      nthistime;
     599              : 
     600     15785618 :     BufFileFlush(file);
     601              : 
     602     31585555 :     while (size > 0)
     603              :     {
     604     15801624 :         if (file->pos >= file->nbytes)
     605              :         {
     606              :             /* Try to load more data into buffer. */
     607        63075 :             file->curOffset += file->pos;
     608        63075 :             file->pos = 0;
     609        63075 :             file->nbytes = 0;
     610        63075 :             BufFileLoadBuffer(file);
     611        63075 :             if (file->nbytes <= 0)
     612         1687 :                 break;          /* no more data available */
     613              :         }
     614              : 
     615     15799937 :         nthistime = file->nbytes - file->pos;
     616     15799937 :         if (nthistime > size)
     617     15740563 :             nthistime = size;
     618              :         Assert(nthistime > 0);
     619              : 
     620     15799937 :         memcpy(ptr, file->buffer.data + file->pos, nthistime);
     621              : 
     622     15799937 :         file->pos += nthistime;
     623     15799937 :         ptr = (char *) ptr + nthistime;
     624     15799937 :         size -= nthistime;
     625     15799937 :         nread += nthistime;
     626              :     }
     627              : 
     628     15785618 :     if (exact &&
     629         1687 :         (nread != start_size && !(nread == 0 && eofOK)))
     630            0 :         ereport(ERROR,
     631              :                 errcode_for_file_access(),
     632              :                 file->name ?
     633              :                 errmsg("could not read from file set \"%s\": read only %zu of %zu bytes",
     634              :                        file->name, nread, start_size) :
     635              :                 errmsg("could not read from temporary file: read only %zu of %zu bytes",
     636              :                        nread, start_size));
     637              : 
     638     15785618 :     return nread;
     639              : }
     640              : 
     641              : /*
     642              :  * Legacy interface where the caller needs to check for end of file or short
     643              :  * reads.
     644              :  */
     645              : size_t
     646            0 : BufFileRead(BufFile *file, void *ptr, size_t size)
     647              : {
     648            0 :     return BufFileReadCommon(file, ptr, size, false, false);
     649              : }
     650              : 
     651              : /*
     652              :  * Require read of exactly the specified size.
     653              :  */
     654              : void
     655     10350039 : BufFileReadExact(BufFile *file, void *ptr, size_t size)
     656              : {
     657     10350039 :     BufFileReadCommon(file, ptr, size, true, false);
     658     10350039 : }
     659              : 
     660              : /*
     661              :  * Require read of exactly the specified size, but optionally allow end of
     662              :  * file (in which case 0 is returned).
     663              :  */
     664              : size_t
     665      5435579 : BufFileReadMaybeEOF(BufFile *file, void *ptr, size_t size, bool eofOK)
     666              : {
     667      5435579 :     return BufFileReadCommon(file, ptr, size, true, eofOK);
     668              : }
     669              : 
     670              : /*
     671              :  * BufFileWrite
     672              :  *
     673              :  * Like fwrite() except we assume 1-byte element size and report errors via
     674              :  * ereport().
     675              :  */
     676              : void
     677     15431677 : BufFileWrite(BufFile *file, const void *ptr, size_t size)
     678              : {
     679              :     size_t      nthistime;
     680              : 
     681              :     Assert(!file->readOnly);
     682              : 
     683     30891247 :     while (size > 0)
     684              :     {
     685     15459570 :         if (file->pos >= BLCKSZ)
     686              :         {
     687              :             /* Buffer full, dump it out */
     688        45892 :             if (file->dirty)
     689        45576 :                 BufFileDumpBuffer(file);
     690              :             else
     691              :             {
     692              :                 /* Hmm, went directly from reading to writing? */
     693          316 :                 file->curOffset += file->pos;
     694          316 :                 file->pos = 0;
     695          316 :                 file->nbytes = 0;
     696              :             }
     697              :         }
     698              : 
     699     15459570 :         nthistime = BLCKSZ - file->pos;
     700     15459570 :         if (nthistime > size)
     701     15389313 :             nthistime = size;
     702              :         Assert(nthistime > 0);
     703              : 
     704     15459570 :         memcpy(file->buffer.data + file->pos, ptr, nthistime);
     705              : 
     706     15459570 :         file->dirty = true;
     707     15459570 :         file->pos += nthistime;
     708     15459570 :         if (file->nbytes < file->pos)
     709     15456994 :             file->nbytes = file->pos;
     710     15459570 :         ptr = (const char *) ptr + nthistime;
     711     15459570 :         size -= nthistime;
     712              :     }
     713     15431677 : }
     714              : 
     715              : /*
     716              :  * BufFileFlush
     717              :  *
     718              :  * Like fflush(), except that I/O errors are reported with ereport().
     719              :  */
     720              : static void
     721     15827283 : BufFileFlush(BufFile *file)
     722              : {
     723     15827283 :     if (file->dirty)
     724        26945 :         BufFileDumpBuffer(file);
     725              : 
     726              :     Assert(!file->dirty);
     727     15827283 : }
     728              : 
     729              : /*
     730              :  * BufFileSeek
     731              :  *
     732              :  * Like fseek(), except that target position needs two values in order to
     733              :  * work when logical filesize exceeds maximum value representable by pgoff_t.
     734              :  * We do not support relative seeks across more than that, however.
     735              :  * I/O errors are reported by ereport().
     736              :  *
     737              :  * Result is 0 if OK, EOF if not.  Logical position is not moved if an
     738              :  * impossible seek is attempted.
     739              :  */
     740              : int
     741        71592 : BufFileSeek(BufFile *file, int fileno, pgoff_t offset, int whence)
     742              : {
     743              :     int         newFile;
     744              :     pgoff_t     newOffset;
     745              : 
     746        71592 :     switch (whence)
     747              :     {
     748        71257 :         case SEEK_SET:
     749        71257 :             if (fileno < 0)
     750            0 :                 return EOF;
     751        71257 :             newFile = fileno;
     752        71257 :             newOffset = offset;
     753        71257 :             break;
     754            0 :         case SEEK_CUR:
     755              : 
     756              :             /*
     757              :              * Relative seek considers only the signed offset, ignoring
     758              :              * fileno.
     759              :              */
     760            0 :             newFile = file->curFile;
     761            0 :             newOffset = (file->curOffset + file->pos) + offset;
     762            0 :             break;
     763          335 :         case SEEK_END:
     764              : 
     765              :             /*
     766              :              * The file size of the last file gives us the end offset of that
     767              :              * file.
     768              :              */
     769          335 :             newFile = file->numFiles - 1;
     770          335 :             newOffset = FileSize(file->files[file->numFiles - 1]);
     771          335 :             if (newOffset < 0)
     772            0 :                 ereport(ERROR,
     773              :                         (errcode_for_file_access(),
     774              :                          errmsg("could not determine size of temporary file \"%s\" from BufFile \"%s\": %m",
     775              :                                 FilePathName(file->files[file->numFiles - 1]),
     776              :                                 file->name)));
     777          335 :             break;
     778            0 :         default:
     779            0 :             elog(ERROR, "invalid whence: %d", whence);
     780              :             return EOF;
     781              :     }
     782        71592 :     while (newOffset < 0)
     783              :     {
     784            0 :         if (--newFile < 0)
     785            0 :             return EOF;
     786            0 :         newOffset += MAX_PHYSICAL_FILESIZE;
     787              :     }
     788        71592 :     if (newFile == file->curFile &&
     789        71439 :         newOffset >= file->curOffset &&
     790        52239 :         newOffset <= file->curOffset + file->nbytes)
     791              :     {
     792              :         /*
     793              :          * Seek is to a point within existing buffer; we can just adjust
     794              :          * pos-within-buffer, without flushing buffer.  Note this is OK
     795              :          * whether reading or writing, but buffer remains dirty if we were
     796              :          * writing.
     797              :          */
     798        36419 :         file->pos = (int64) (newOffset - file->curOffset);
     799        36419 :         return 0;
     800              :     }
     801              :     /* Otherwise, must reposition buffer, so flush any dirty data */
     802        35173 :     BufFileFlush(file);
     803              : 
     804              :     /*
     805              :      * At this point and no sooner, check for seek past last segment. The
     806              :      * above flush could have created a new segment, so checking sooner would
     807              :      * not work (at least not with this code).
     808              :      */
     809              : 
     810              :     /* convert seek to "start of next seg" to "end of last seg" */
     811        35173 :     if (newFile == file->numFiles && newOffset == 0)
     812              :     {
     813            0 :         newFile--;
     814            0 :         newOffset = MAX_PHYSICAL_FILESIZE;
     815              :     }
     816        35173 :     while (newOffset > MAX_PHYSICAL_FILESIZE)
     817              :     {
     818            0 :         if (++newFile >= file->numFiles)
     819            0 :             return EOF;
     820            0 :         newOffset -= MAX_PHYSICAL_FILESIZE;
     821              :     }
     822        35173 :     if (newFile >= file->numFiles)
     823            0 :         return EOF;
     824              :     /* Seek is OK! */
     825        35173 :     file->curFile = newFile;
     826        35173 :     file->curOffset = newOffset;
     827        35173 :     file->pos = 0;
     828        35173 :     file->nbytes = 0;
     829        35173 :     return 0;
     830              : }
     831              : 
     832              : void
     833        88672 : BufFileTell(BufFile *file, int *fileno, pgoff_t *offset)
     834              : {
     835        88672 :     *fileno = file->curFile;
     836        88672 :     *offset = file->curOffset + file->pos;
     837        88672 : }
     838              : 
     839              : /*
     840              :  * BufFileSeekBlock --- block-oriented seek
     841              :  *
     842              :  * Performs absolute seek to the start of the n'th BLCKSZ-sized block of
     843              :  * the file.  Note that users of this interface will fail if their files
     844              :  * exceed BLCKSZ * PG_INT64_MAX bytes, but that is quite a lot; we don't
     845              :  * work with tables bigger than that, either...
     846              :  *
     847              :  * Result is 0 if OK, EOF if not.  Logical position is not moved if an
     848              :  * impossible seek is attempted.
     849              :  */
     850              : int
     851        69412 : BufFileSeekBlock(BufFile *file, int64 blknum)
     852              : {
     853       138824 :     return BufFileSeek(file,
     854        69412 :                        (int) (blknum / BUFFILE_SEG_SIZE),
     855        69412 :                        (pgoff_t) (blknum % BUFFILE_SEG_SIZE) * BLCKSZ,
     856              :                        SEEK_SET);
     857              : }
     858              : 
     859              : /*
     860              :  * Returns the amount of data in the given BufFile, in bytes.
     861              :  *
     862              :  * Returned value includes the size of any holes left behind by BufFileAppend.
     863              :  * ereport()s on failure.
     864              :  */
     865              : int64
     866          291 : BufFileSize(BufFile *file)
     867              : {
     868              :     int64       lastFileSize;
     869              : 
     870              :     /* Get the size of the last physical file. */
     871          291 :     lastFileSize = FileSize(file->files[file->numFiles - 1]);
     872          291 :     if (lastFileSize < 0)
     873            0 :         ereport(ERROR,
     874              :                 (errcode_for_file_access(),
     875              :                  errmsg("could not determine size of temporary file \"%s\" from BufFile \"%s\": %m",
     876              :                         FilePathName(file->files[file->numFiles - 1]),
     877              :                         file->name)));
     878              : 
     879          291 :     return ((file->numFiles - 1) * (int64) MAX_PHYSICAL_FILESIZE) +
     880              :         lastFileSize;
     881              : }
     882              : 
     883              : /*
     884              :  * Append the contents of the source file to the end of the target file.
     885              :  *
     886              :  * Note that operation subsumes ownership of underlying resources from
     887              :  * "source".  Caller should never call BufFileClose against source having
     888              :  * called here first.  Resource owners for source and target must match,
     889              :  * too.
     890              :  *
     891              :  * This operation works by manipulating lists of segment files, so the
     892              :  * file content is always appended at a MAX_PHYSICAL_FILESIZE-aligned
     893              :  * boundary, typically creating empty holes before the boundary.  These
     894              :  * areas do not contain any interesting data, and cannot be read from by
     895              :  * caller.
     896              :  *
     897              :  * Returns the block number within target where the contents of source
     898              :  * begins.  Caller should apply this as an offset when working off block
     899              :  * positions that are in terms of the original BufFile space.
     900              :  */
     901              : int64
     902          153 : BufFileAppend(BufFile *target, BufFile *source)
     903              : {
     904          153 :     int64       startBlock = (int64) target->numFiles * BUFFILE_SEG_SIZE;
     905          153 :     int         newNumFiles = target->numFiles + source->numFiles;
     906              :     int         i;
     907              : 
     908              :     Assert(source->readOnly);
     909              :     Assert(!source->dirty);
     910              : 
     911          153 :     if (target->resowner != source->resowner)
     912            0 :         elog(ERROR, "could not append BufFile with non-matching resource owner");
     913              : 
     914          153 :     target->files = (File *)
     915          153 :         repalloc(target->files, sizeof(File) * newNumFiles);
     916          306 :     for (i = target->numFiles; i < newNumFiles; i++)
     917          153 :         target->files[i] = source->files[i - target->numFiles];
     918          153 :     target->numFiles = newNumFiles;
     919              : 
     920          153 :     return startBlock;
     921              : }
     922              : 
     923              : /*
     924              :  * Truncate a BufFile created by BufFileCreateFileSet up to the given fileno
     925              :  * and the offset.
     926              :  */
     927              : void
     928            9 : BufFileTruncateFileSet(BufFile *file, int fileno, pgoff_t offset)
     929              : {
     930            9 :     int         numFiles = file->numFiles;
     931            9 :     int         newFile = fileno;
     932            9 :     pgoff_t     newOffset = file->curOffset;
     933              :     char        segment_name[MAXPGPATH];
     934              :     int         i;
     935              : 
     936              :     /*
     937              :      * Loop over all the files up to the given fileno and remove the files
     938              :      * that are greater than the fileno and truncate the given file up to the
     939              :      * offset. Note that we also remove the given fileno if the offset is 0
     940              :      * provided it is not the first file in which we truncate it.
     941              :      */
     942           18 :     for (i = file->numFiles - 1; i >= fileno; i--)
     943              :     {
     944            9 :         if ((i != fileno || offset == 0) && i != 0)
     945              :         {
     946            0 :             FileSetSegmentName(segment_name, file->name, i);
     947            0 :             FileClose(file->files[i]);
     948            0 :             if (!FileSetDelete(file->fileset, segment_name, true))
     949            0 :                 ereport(ERROR,
     950              :                         (errcode_for_file_access(),
     951              :                          errmsg("could not delete fileset \"%s\": %m",
     952              :                                 segment_name)));
     953            0 :             numFiles--;
     954            0 :             newOffset = MAX_PHYSICAL_FILESIZE;
     955              : 
     956              :             /*
     957              :              * This is required to indicate that we have deleted the given
     958              :              * fileno.
     959              :              */
     960            0 :             if (i == fileno)
     961            0 :                 newFile--;
     962              :         }
     963              :         else
     964              :         {
     965            9 :             if (FileTruncate(file->files[i], offset,
     966              :                              WAIT_EVENT_BUFFILE_TRUNCATE) < 0)
     967            0 :                 ereport(ERROR,
     968              :                         (errcode_for_file_access(),
     969              :                          errmsg("could not truncate file \"%s\": %m",
     970              :                                 FilePathName(file->files[i]))));
     971            9 :             newOffset = offset;
     972              :         }
     973              :     }
     974              : 
     975            9 :     file->numFiles = numFiles;
     976              : 
     977              :     /*
     978              :      * If the truncate point is within existing buffer then we can just adjust
     979              :      * pos within buffer.
     980              :      */
     981            9 :     if (newFile == file->curFile &&
     982            9 :         newOffset >= file->curOffset &&
     983            9 :         newOffset <= file->curOffset + file->nbytes)
     984              :     {
     985              :         /* No need to reset the current pos if the new pos is greater. */
     986            0 :         if (newOffset <= file->curOffset + file->pos)
     987            0 :             file->pos = (int64) newOffset - file->curOffset;
     988              : 
     989              :         /* Adjust the nbytes for the current buffer. */
     990            0 :         file->nbytes = (int64) newOffset - file->curOffset;
     991              :     }
     992            9 :     else if (newFile == file->curFile &&
     993            9 :              newOffset < file->curOffset)
     994              :     {
     995              :         /*
     996              :          * The truncate point is within the existing file but prior to the
     997              :          * current position, so we can forget the current buffer and reset the
     998              :          * current position.
     999              :          */
    1000            0 :         file->curOffset = newOffset;
    1001            0 :         file->pos = 0;
    1002            0 :         file->nbytes = 0;
    1003              :     }
    1004            9 :     else if (newFile < file->curFile)
    1005              :     {
    1006              :         /*
    1007              :          * The truncate point is prior to the current file, so need to reset
    1008              :          * the current position accordingly.
    1009              :          */
    1010            0 :         file->curFile = newFile;
    1011            0 :         file->curOffset = newOffset;
    1012            0 :         file->pos = 0;
    1013            0 :         file->nbytes = 0;
    1014              :     }
    1015              :     /* Nothing to do, if the truncate point is beyond current file. */
    1016            9 : }
        

Generated by: LCOV version 2.0-1