LCOV - code coverage report
Current view: top level - src/backend/storage/file - buffile.c (source / functions) Hit Total Coverage
Test: PostgreSQL 15devel Lines: 226 289 78.2 %
Date: 2021-09-17 15:07:27 Functions: 21 22 95.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * buffile.c
       4             :  *    Management of large buffered temporary files.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  * IDENTIFICATION
      10             :  *    src/backend/storage/file/buffile.c
      11             :  *
      12             :  * NOTES:
      13             :  *
      14             :  * BufFiles provide a very incomplete emulation of stdio atop virtual Files
      15             :  * (as managed by fd.c).  Currently, we only support the buffered-I/O
      16             :  * aspect of stdio: a read or write of the low-level File occurs only
      17             :  * when the buffer is filled or emptied.  This is an even bigger win
      18             :  * for virtual Files than for ordinary kernel files, since reducing the
      19             :  * frequency with which a virtual File is touched reduces "thrashing"
      20             :  * of opening/closing file descriptors.
      21             :  *
      22             :  * Note that BufFile structs are allocated with palloc(), and therefore
      23             :  * will go away automatically at query/transaction end.  Since the underlying
      24             :  * virtual Files are made with OpenTemporaryFile, all resources for
      25             :  * the file are certain to be cleaned up even if processing is aborted
      26             :  * by ereport(ERROR).  The data structures required are made in the
      27             :  * palloc context that was current when the BufFile was created, and
      28             :  * any external resources such as temp files are owned by the ResourceOwner
      29             :  * that was current at that time.
      30             :  *
      31             :  * BufFile also supports temporary files that exceed the OS file size limit
      32             :  * (by opening multiple fd.c temporary files).  This is an essential feature
      33             :  * for sorts and hashjoins on large amounts of data.
      34             :  *
      35             :  * BufFile supports temporary files that can be shared with other backends, as
      36             :  * infrastructure for parallel execution.  Such files need to be created as a
      37             :  * member of a SharedFileSet that all participants are attached to.
      38             :  *
      39             :  * BufFile also supports temporary files that can be used by the single backend
      40             :  * when the corresponding files need to be survived across the transaction and
      41             :  * need to be opened and closed multiple times.  Such files need to be created
      42             :  * as a member of a FileSet.
      43             :  *-------------------------------------------------------------------------
      44             :  */
      45             : 
      46             : #include "postgres.h"
      47             : 
      48             : #include "commands/tablespace.h"
      49             : #include "executor/instrument.h"
      50             : #include "miscadmin.h"
      51             : #include "pgstat.h"
      52             : #include "storage/buf_internals.h"
      53             : #include "storage/buffile.h"
      54             : #include "storage/fd.h"
      55             : #include "utils/resowner.h"
      56             : 
      57             : /*
      58             :  * We break BufFiles into gigabyte-sized segments, regardless of RELSEG_SIZE.
      59             :  * The reason is that we'd like large BufFiles to be spread across multiple
      60             :  * tablespaces when available.
      61             :  */
      62             : #define MAX_PHYSICAL_FILESIZE   0x40000000
      63             : #define BUFFILE_SEG_SIZE        (MAX_PHYSICAL_FILESIZE / BLCKSZ)
      64             : 
      65             : /*
      66             :  * This data structure represents a buffered file that consists of one or
      67             :  * more physical files (each accessed through a virtual file descriptor
      68             :  * managed by fd.c).
      69             :  */
      70             : struct BufFile
      71             : {
      72             :     int         numFiles;       /* number of physical files in set */
      73             :     /* all files except the last have length exactly MAX_PHYSICAL_FILESIZE */
      74             :     File       *files;          /* palloc'd array with numFiles entries */
      75             : 
      76             :     bool        isInterXact;    /* keep open over transactions? */
      77             :     bool        dirty;          /* does buffer need to be written? */
      78             :     bool        readOnly;       /* has the file been set to read only? */
      79             : 
      80             :     FileSet    *fileset;        /* space for fileset based segment files */
      81             :     const char *name;           /* name of fileset based BufFile */
      82             : 
      83             :     /*
      84             :      * resowner is the ResourceOwner to use for underlying temp files.  (We
      85             :      * don't need to remember the memory context we're using explicitly,
      86             :      * because after creation we only repalloc our arrays larger.)
      87             :      */
      88             :     ResourceOwner resowner;
      89             : 
      90             :     /*
      91             :      * "current pos" is position of start of buffer within the logical file.
      92             :      * Position as seen by user of BufFile is (curFile, curOffset + pos).
      93             :      */
      94             :     int         curFile;        /* file index (0..n) part of current pos */
      95             :     off_t       curOffset;      /* offset part of current pos */
      96             :     int         pos;            /* next read/write position in buffer */
      97             :     int         nbytes;         /* total # of valid bytes in buffer */
      98             :     PGAlignedBlock buffer;
      99             : };
     100             : 
     101             : static BufFile *makeBufFileCommon(int nfiles);
     102             : static BufFile *makeBufFile(File firstfile);
     103             : static void extendBufFile(BufFile *file);
     104             : static void BufFileLoadBuffer(BufFile *file);
     105             : static void BufFileDumpBuffer(BufFile *file);
     106             : static void BufFileFlush(BufFile *file);
     107             : static File MakeNewFileSetSegment(BufFile *file, int segment);
     108             : 
     109             : /*
     110             :  * Create BufFile and perform the common initialization.
     111             :  */
     112             : static BufFile *
     113        5280 : makeBufFileCommon(int nfiles)
     114             : {
     115        5280 :     BufFile    *file = (BufFile *) palloc(sizeof(BufFile));
     116             : 
     117        5280 :     file->numFiles = nfiles;
     118        5280 :     file->isInterXact = false;
     119        5280 :     file->dirty = false;
     120        5280 :     file->resowner = CurrentResourceOwner;
     121        5280 :     file->curFile = 0;
     122        5280 :     file->curOffset = 0L;
     123        5280 :     file->pos = 0;
     124        5280 :     file->nbytes = 0;
     125             : 
     126        5280 :     return file;
     127             : }
     128             : 
     129             : /*
     130             :  * Create a BufFile given the first underlying physical file.
     131             :  * NOTE: caller must set isInterXact if appropriate.
     132             :  */
     133             : static BufFile *
     134        1866 : makeBufFile(File firstfile)
     135             : {
     136        1866 :     BufFile    *file = makeBufFileCommon(1);
     137             : 
     138        1866 :     file->files = (File *) palloc(sizeof(File));
     139        1866 :     file->files[0] = firstfile;
     140        1866 :     file->readOnly = false;
     141        1866 :     file->fileset = NULL;
     142        1866 :     file->name = NULL;
     143             : 
     144        1866 :     return file;
     145             : }
     146             : 
     147             : /*
     148             :  * Add another component temp file.
     149             :  */
     150             : static void
     151           0 : extendBufFile(BufFile *file)
     152             : {
     153             :     File        pfile;
     154             :     ResourceOwner oldowner;
     155             : 
     156             :     /* Be sure to associate the file with the BufFile's resource owner */
     157           0 :     oldowner = CurrentResourceOwner;
     158           0 :     CurrentResourceOwner = file->resowner;
     159             : 
     160           0 :     if (file->fileset == NULL)
     161           0 :         pfile = OpenTemporaryFile(file->isInterXact);
     162             :     else
     163           0 :         pfile = MakeNewFileSetSegment(file, file->numFiles);
     164             : 
     165             :     Assert(pfile >= 0);
     166             : 
     167           0 :     CurrentResourceOwner = oldowner;
     168             : 
     169           0 :     file->files = (File *) repalloc(file->files,
     170           0 :                                     (file->numFiles + 1) * sizeof(File));
     171           0 :     file->files[file->numFiles] = pfile;
     172           0 :     file->numFiles++;
     173           0 : }
     174             : 
     175             : /*
     176             :  * Create a BufFile for a new temporary file (which will expand to become
     177             :  * multiple temporary files if more than MAX_PHYSICAL_FILESIZE bytes are
     178             :  * written to it).
     179             :  *
     180             :  * If interXact is true, the temp file will not be automatically deleted
     181             :  * at end of transaction.
     182             :  *
     183             :  * Note: if interXact is true, the caller had better be calling us in a
     184             :  * memory context, and with a resource owner, that will survive across
     185             :  * transaction boundaries.
     186             :  */
     187             : BufFile *
     188        1866 : BufFileCreateTemp(bool interXact)
     189             : {
     190             :     BufFile    *file;
     191             :     File        pfile;
     192             : 
     193             :     /*
     194             :      * Ensure that temp tablespaces are set up for OpenTemporaryFile to use.
     195             :      * Possibly the caller will have done this already, but it seems useful to
     196             :      * double-check here.  Failure to do this at all would result in the temp
     197             :      * files always getting placed in the default tablespace, which is a
     198             :      * pretty hard-to-detect bug.  Callers may prefer to do it earlier if they
     199             :      * want to be sure that any required catalog access is done in some other
     200             :      * resource context.
     201             :      */
     202        1866 :     PrepareTempTablespaces();
     203             : 
     204        1866 :     pfile = OpenTemporaryFile(interXact);
     205             :     Assert(pfile >= 0);
     206             : 
     207        1866 :     file = makeBufFile(pfile);
     208        1866 :     file->isInterXact = interXact;
     209             : 
     210        1866 :     return file;
     211             : }
     212             : 
     213             : /*
     214             :  * Build the name for a given segment of a given BufFile.
     215             :  */
     216             : static void
     217        8010 : FileSetSegmentName(char *name, const char *buffile_name, int segment)
     218             : {
     219        8010 :     snprintf(name, MAXPGPATH, "%s.%d", buffile_name, segment);
     220        8010 : }
     221             : 
     222             : /*
     223             :  * Create a new segment file backing a fileset based BufFile.
     224             :  */
     225             : static File
     226        1330 : MakeNewFileSetSegment(BufFile *buffile, int segment)
     227             : {
     228             :     char        name[MAXPGPATH];
     229             :     File        file;
     230             : 
     231             :     /*
     232             :      * It is possible that there are files left over from before a crash
     233             :      * restart with the same name.  In order for BufFileOpenFileSet() not to
     234             :      * get confused about how many segments there are, we'll unlink the next
     235             :      * segment number if it already exists.
     236             :      */
     237        1330 :     FileSetSegmentName(name, buffile->name, segment + 1);
     238        1330 :     FileSetDelete(buffile->fileset, name, true);
     239             : 
     240             :     /* Create the new segment. */
     241        1330 :     FileSetSegmentName(name, buffile->name, segment);
     242        1330 :     file = FileSetCreate(buffile->fileset, name);
     243             : 
     244             :     /* FileSetCreate would've errored out */
     245             :     Assert(file > 0);
     246             : 
     247        1330 :     return file;
     248             : }
     249             : 
     250             : /*
     251             :  * Create a BufFile that can be discovered and opened read-only by other
     252             :  * backends that are attached to the same SharedFileSet using the same name.
     253             :  *
     254             :  * The naming scheme for fileset based BufFiles is left up to the calling code.
     255             :  * The name will appear as part of one or more filenames on disk, and might
     256             :  * provide clues to administrators about which subsystem is generating
     257             :  * temporary file data.  Since each SharedFileSet object is backed by one or
     258             :  * more uniquely named temporary directory, names don't conflict with
     259             :  * unrelated SharedFileSet objects.
     260             :  */
     261             : BufFile *
     262        1330 : BufFileCreateFileSet(FileSet *fileset, const char *name)
     263             : {
     264             :     BufFile    *file;
     265             : 
     266        1330 :     file = makeBufFileCommon(1);
     267        1330 :     file->fileset = fileset;
     268        1330 :     file->name = pstrdup(name);
     269        1330 :     file->files = (File *) palloc(sizeof(File));
     270        1330 :     file->files[0] = MakeNewFileSetSegment(file, 0);
     271        1330 :     file->readOnly = false;
     272             : 
     273        1330 :     return file;
     274             : }
     275             : 
     276             : /*
     277             :  * Open a file that was previously created in another backend (or this one)
     278             :  * with BufFileCreateFileSet in the same FileSet using the same name.
     279             :  * The backend that created the file must have called BufFileClose() or
     280             :  * BufFileExportFileSet() to make sure that it is ready to be opened by other
     281             :  * backends and render it read-only.  If missing_ok is true, which indicates
     282             :  * that missing files can be safely ignored, then return NULL if the BufFile
     283             :  * with the given name is not found, otherwise, throw an error.
     284             :  */
     285             : BufFile *
     286        2602 : BufFileOpenFileSet(FileSet *fileset, const char *name, int mode,
     287             :                    bool missing_ok)
     288             : {
     289             :     BufFile    *file;
     290             :     char        segment_name[MAXPGPATH];
     291        2602 :     Size        capacity = 16;
     292             :     File       *files;
     293        2602 :     int         nfiles = 0;
     294             : 
     295        2602 :     files = palloc(sizeof(File) * capacity);
     296             : 
     297             :     /*
     298             :      * We don't know how many segments there are, so we'll probe the
     299             :      * filesystem to find out.
     300             :      */
     301             :     for (;;)
     302             :     {
     303             :         /* See if we need to expand our file segment array. */
     304        4686 :         if (nfiles + 1 > capacity)
     305             :         {
     306           0 :             capacity *= 2;
     307           0 :             files = repalloc(files, sizeof(File) * capacity);
     308             :         }
     309             :         /* Try to load a segment. */
     310        4686 :         FileSetSegmentName(segment_name, name, nfiles);
     311        4686 :         files[nfiles] = FileSetOpen(fileset, segment_name, mode);
     312        4686 :         if (files[nfiles] <= 0)
     313        2602 :             break;
     314        2084 :         ++nfiles;
     315             : 
     316        2084 :         CHECK_FOR_INTERRUPTS();
     317             :     }
     318             : 
     319             :     /*
     320             :      * If we didn't find any files at all, then no BufFile exists with this
     321             :      * name.
     322             :      */
     323        2602 :     if (nfiles == 0)
     324             :     {
     325             :         /* free the memory */
     326         518 :         pfree(files);
     327             : 
     328         518 :         if (missing_ok)
     329         518 :             return NULL;
     330             : 
     331           0 :         ereport(ERROR,
     332             :                 (errcode_for_file_access(),
     333             :                  errmsg("could not open temporary file \"%s\" from BufFile \"%s\": %m",
     334             :                         segment_name, name)));
     335             :     }
     336             : 
     337        2084 :     file = makeBufFileCommon(nfiles);
     338        2084 :     file->files = files;
     339        2084 :     file->readOnly = (mode == O_RDONLY);
     340        2084 :     file->fileset = fileset;
     341        2084 :     file->name = pstrdup(name);
     342             : 
     343        2084 :     return file;
     344             : }
     345             : 
     346             : /*
     347             :  * Delete a BufFile that was created by BufFileCreateFileSet in the given
     348             :  * FileSet using the given name.
     349             :  *
     350             :  * It is not necessary to delete files explicitly with this function.  It is
     351             :  * provided only as a way to delete files proactively, rather than waiting for
     352             :  * the FileSet to be cleaned up.
     353             :  *
     354             :  * Only one backend should attempt to delete a given name, and should know
     355             :  * that it exists and has been exported or closed otherwise missing_ok should
     356             :  * be passed true.
     357             :  */
     358             : void
     359         608 : BufFileDeleteFileSet(FileSet *fileset, const char *name, bool missing_ok)
     360             : {
     361             :     char        segment_name[MAXPGPATH];
     362         608 :     int         segment = 0;
     363         608 :     bool        found = false;
     364             : 
     365             :     /*
     366             :      * We don't know how many segments the file has.  We'll keep deleting
     367             :      * until we run out.  If we don't manage to find even an initial segment,
     368             :      * raise an error.
     369             :      */
     370             :     for (;;)
     371             :     {
     372         664 :         FileSetSegmentName(segment_name, name, segment);
     373         664 :         if (!FileSetDelete(fileset, segment_name, true))
     374         608 :             break;
     375          56 :         found = true;
     376          56 :         ++segment;
     377             : 
     378          56 :         CHECK_FOR_INTERRUPTS();
     379             :     }
     380             : 
     381         608 :     if (!found && !missing_ok)
     382           0 :         elog(ERROR, "could not delete unknown BufFile \"%s\"", name);
     383         608 : }
     384             : 
     385             : /*
     386             :  * BufFileExportFileSet --- flush and make read-only, in preparation for sharing.
     387             :  */
     388             : void
     389         280 : BufFileExportFileSet(BufFile *file)
     390             : {
     391             :     /* Must be a file belonging to a FileSet. */
     392             :     Assert(file->fileset != NULL);
     393             : 
     394             :     /* It's probably a bug if someone calls this twice. */
     395             :     Assert(!file->readOnly);
     396             : 
     397         280 :     BufFileFlush(file);
     398         280 :     file->readOnly = true;
     399         280 : }
     400             : 
     401             : /*
     402             :  * Close a BufFile
     403             :  *
     404             :  * Like fclose(), this also implicitly FileCloses the underlying File.
     405             :  */
     406             : void
     407        5176 : BufFileClose(BufFile *file)
     408             : {
     409             :     int         i;
     410             : 
     411             :     /* flush any unwritten data */
     412        5176 :     BufFileFlush(file);
     413             :     /* close and delete the underlying file(s) */
     414       10448 :     for (i = 0; i < file->numFiles; i++)
     415        5272 :         FileClose(file->files[i]);
     416             :     /* release the buffer space */
     417        5176 :     pfree(file->files);
     418        5176 :     pfree(file);
     419        5176 : }
     420             : 
     421             : /*
     422             :  * BufFileLoadBuffer
     423             :  *
     424             :  * Load some data into buffer, if possible, starting from curOffset.
     425             :  * At call, must have dirty = false, pos and nbytes = 0.
     426             :  * On exit, nbytes is number of bytes loaded.
     427             :  */
     428             : static void
     429       70372 : BufFileLoadBuffer(BufFile *file)
     430             : {
     431             :     File        thisfile;
     432             : 
     433             :     /*
     434             :      * Advance to next component file if necessary and possible.
     435             :      */
     436       70372 :     if (file->curOffset >= MAX_PHYSICAL_FILESIZE &&
     437           0 :         file->curFile + 1 < file->numFiles)
     438             :     {
     439           0 :         file->curFile++;
     440           0 :         file->curOffset = 0L;
     441             :     }
     442             : 
     443             :     /*
     444             :      * Read whatever we can get, up to a full bufferload.
     445             :      */
     446       70372 :     thisfile = file->files[file->curFile];
     447      211116 :     file->nbytes = FileRead(thisfile,
     448       70372 :                             file->buffer.data,
     449             :                             sizeof(file->buffer),
     450             :                             file->curOffset,
     451             :                             WAIT_EVENT_BUFFILE_READ);
     452       70372 :     if (file->nbytes < 0)
     453             :     {
     454           0 :         file->nbytes = 0;
     455           0 :         ereport(ERROR,
     456             :                 (errcode_for_file_access(),
     457             :                  errmsg("could not read file \"%s\": %m",
     458             :                         FilePathName(thisfile))));
     459             :     }
     460             : 
     461             :     /* we choose not to advance curOffset here */
     462             : 
     463       70372 :     if (file->nbytes > 0)
     464       68766 :         pgBufferUsage.temp_blks_read++;
     465       70372 : }
     466             : 
     467             : /*
     468             :  * BufFileDumpBuffer
     469             :  *
     470             :  * Dump buffer contents starting at curOffset.
     471             :  * At call, should have dirty = true, nbytes > 0.
     472             :  * On exit, dirty is cleared if successful write, and curOffset is advanced.
     473             :  */
     474             : static void
     475       85874 : BufFileDumpBuffer(BufFile *file)
     476             : {
     477       85874 :     int         wpos = 0;
     478             :     int         bytestowrite;
     479             :     File        thisfile;
     480             : 
     481             :     /*
     482             :      * Unlike BufFileLoadBuffer, we must dump the whole buffer even if it
     483             :      * crosses a component-file boundary; so we need a loop.
     484             :      */
     485      171748 :     while (wpos < file->nbytes)
     486             :     {
     487             :         off_t       availbytes;
     488             : 
     489             :         /*
     490             :          * Advance to next component file if necessary and possible.
     491             :          */
     492       85874 :         if (file->curOffset >= MAX_PHYSICAL_FILESIZE)
     493             :         {
     494           0 :             while (file->curFile + 1 >= file->numFiles)
     495           0 :                 extendBufFile(file);
     496           0 :             file->curFile++;
     497           0 :             file->curOffset = 0L;
     498             :         }
     499             : 
     500             :         /*
     501             :          * Determine how much we need to write into this file.
     502             :          */
     503       85874 :         bytestowrite = file->nbytes - wpos;
     504       85874 :         availbytes = MAX_PHYSICAL_FILESIZE - file->curOffset;
     505             : 
     506       85874 :         if ((off_t) bytestowrite > availbytes)
     507           0 :             bytestowrite = (int) availbytes;
     508             : 
     509       85874 :         thisfile = file->files[file->curFile];
     510      171748 :         bytestowrite = FileWrite(thisfile,
     511       85874 :                                  file->buffer.data + wpos,
     512             :                                  bytestowrite,
     513             :                                  file->curOffset,
     514             :                                  WAIT_EVENT_BUFFILE_WRITE);
     515       85874 :         if (bytestowrite <= 0)
     516           0 :             ereport(ERROR,
     517             :                     (errcode_for_file_access(),
     518             :                      errmsg("could not write to file \"%s\": %m",
     519             :                             FilePathName(thisfile))));
     520       85874 :         file->curOffset += bytestowrite;
     521       85874 :         wpos += bytestowrite;
     522             : 
     523       85874 :         pgBufferUsage.temp_blks_written++;
     524             :     }
     525       85874 :     file->dirty = false;
     526             : 
     527             :     /*
     528             :      * At this point, curOffset has been advanced to the end of the buffer,
     529             :      * ie, its original value + nbytes.  We need to make it point to the
     530             :      * logical file position, ie, original value + pos, in case that is less
     531             :      * (as could happen due to a small backwards seek in a dirty buffer!)
     532             :      */
     533       85874 :     file->curOffset -= (file->nbytes - file->pos);
     534       85874 :     if (file->curOffset < 0)  /* handle possible segment crossing */
     535             :     {
     536           0 :         file->curFile--;
     537             :         Assert(file->curFile >= 0);
     538           0 :         file->curOffset += MAX_PHYSICAL_FILESIZE;
     539             :     }
     540             : 
     541             :     /*
     542             :      * Now we can set the buffer empty without changing the logical position
     543             :      */
     544       85874 :     file->pos = 0;
     545       85874 :     file->nbytes = 0;
     546       85874 : }
     547             : 
     548             : /*
     549             :  * BufFileRead
     550             :  *
     551             :  * Like fread() except we assume 1-byte element size and report I/O errors via
     552             :  * ereport().
     553             :  */
     554             : size_t
     555    21679460 : BufFileRead(BufFile *file, void *ptr, size_t size)
     556             : {
     557    21679460 :     size_t      nread = 0;
     558             :     size_t      nthistime;
     559             : 
     560    21679460 :     BufFileFlush(file);
     561             : 
     562    43374780 :     while (size > 0)
     563             :     {
     564    21696926 :         if (file->pos >= file->nbytes)
     565             :         {
     566             :             /* Try to load more data into buffer. */
     567       70372 :             file->curOffset += file->pos;
     568       70372 :             file->pos = 0;
     569       70372 :             file->nbytes = 0;
     570       70372 :             BufFileLoadBuffer(file);
     571       70372 :             if (file->nbytes <= 0)
     572        1606 :                 break;          /* no more data available */
     573             :         }
     574             : 
     575    21695320 :         nthistime = file->nbytes - file->pos;
     576    21695320 :         if (nthistime > size)
     577    21628466 :             nthistime = size;
     578             :         Assert(nthistime > 0);
     579             : 
     580    21695320 :         memcpy(ptr, file->buffer.data + file->pos, nthistime);
     581             : 
     582    21695320 :         file->pos += nthistime;
     583    21695320 :         ptr = (void *) ((char *) ptr + nthistime);
     584    21695320 :         size -= nthistime;
     585    21695320 :         nread += nthistime;
     586             :     }
     587             : 
     588    21679460 :     return nread;
     589             : }
     590             : 
     591             : /*
     592             :  * BufFileWrite
     593             :  *
     594             :  * Like fwrite() except we assume 1-byte element size and report errors via
     595             :  * ereport().
     596             :  */
     597             : void
     598    27754296 : BufFileWrite(BufFile *file, void *ptr, size_t size)
     599             : {
     600             :     size_t      nthistime;
     601             : 
     602             :     Assert(!file->readOnly);
     603             : 
     604    55543168 :     while (size > 0)
     605             :     {
     606    27788872 :         if (file->pos >= BLCKSZ)
     607             :         {
     608             :             /* Buffer full, dump it out */
     609       57968 :             if (file->dirty)
     610       57596 :                 BufFileDumpBuffer(file);
     611             :             else
     612             :             {
     613             :                 /* Hmm, went directly from reading to writing? */
     614         372 :                 file->curOffset += file->pos;
     615         372 :                 file->pos = 0;
     616         372 :                 file->nbytes = 0;
     617             :             }
     618             :         }
     619             : 
     620    27788872 :         nthistime = BLCKSZ - file->pos;
     621    27788872 :         if (nthistime > size)
     622    27705490 :             nthistime = size;
     623             :         Assert(nthistime > 0);
     624             : 
     625    27788872 :         memcpy(file->buffer.data + file->pos, ptr, nthistime);
     626             : 
     627    27788872 :         file->dirty = true;
     628    27788872 :         file->pos += nthistime;
     629    27788872 :         if (file->nbytes < file->pos)
     630    27786340 :             file->nbytes = file->pos;
     631    27788872 :         ptr = (void *) ((char *) ptr + nthistime);
     632    27788872 :         size -= nthistime;
     633             :     }
     634    27754296 : }
     635             : 
     636             : /*
     637             :  * BufFileFlush
     638             :  *
     639             :  * Like fflush(), except that I/O errors are reported with ereport().
     640             :  */
     641             : static void
     642    21724028 : BufFileFlush(BufFile *file)
     643             : {
     644    21724028 :     if (file->dirty)
     645       28278 :         BufFileDumpBuffer(file);
     646             : 
     647             :     Assert(!file->dirty);
     648    21724028 : }
     649             : 
     650             : /*
     651             :  * BufFileSeek
     652             :  *
     653             :  * Like fseek(), except that target position needs two values in order to
     654             :  * work when logical filesize exceeds maximum value representable by off_t.
     655             :  * We do not support relative seeks across more than that, however.
     656             :  * I/O errors are reported by ereport().
     657             :  *
     658             :  * Result is 0 if OK, EOF if not.  Logical position is not moved if an
     659             :  * impossible seek is attempted.
     660             :  */
     661             : int
     662       80456 : BufFileSeek(BufFile *file, int fileno, off_t offset, int whence)
     663             : {
     664             :     int         newFile;
     665             :     off_t       newOffset;
     666             : 
     667       80456 :     switch (whence)
     668             :     {
     669       79840 :         case SEEK_SET:
     670       79840 :             if (fileno < 0)
     671           0 :                 return EOF;
     672       79840 :             newFile = fileno;
     673       79840 :             newOffset = offset;
     674       79840 :             break;
     675          20 :         case SEEK_CUR:
     676             : 
     677             :             /*
     678             :              * Relative seek considers only the signed offset, ignoring
     679             :              * fileno. Note that large offsets (> 1 GB) risk overflow in this
     680             :              * add, unless we have 64-bit off_t.
     681             :              */
     682          20 :             newFile = file->curFile;
     683          20 :             newOffset = (file->curOffset + file->pos) + offset;
     684          20 :             break;
     685         596 :         case SEEK_END:
     686             : 
     687             :             /*
     688             :              * The file size of the last file gives us the end offset of that
     689             :              * file.
     690             :              */
     691         596 :             newFile = file->numFiles - 1;
     692         596 :             newOffset = FileSize(file->files[file->numFiles - 1]);
     693         596 :             if (newOffset < 0)
     694           0 :                 ereport(ERROR,
     695             :                         (errcode_for_file_access(),
     696             :                          errmsg("could not determine size of temporary file \"%s\" from BufFile \"%s\": %m",
     697             :                                 FilePathName(file->files[file->numFiles - 1]),
     698             :                                 file->name)));
     699         596 :             break;
     700           0 :         default:
     701           0 :             elog(ERROR, "invalid whence: %d", whence);
     702             :             return EOF;
     703             :     }
     704       80456 :     while (newOffset < 0)
     705             :     {
     706           0 :         if (--newFile < 0)
     707           0 :             return EOF;
     708           0 :         newOffset += MAX_PHYSICAL_FILESIZE;
     709             :     }
     710       80456 :     if (newFile == file->curFile &&
     711       80360 :         newOffset >= file->curOffset &&
     712       59630 :         newOffset <= file->curOffset + file->nbytes)
     713             :     {
     714             :         /*
     715             :          * Seek is to a point within existing buffer; we can just adjust
     716             :          * pos-within-buffer, without flushing buffer.  Note this is OK
     717             :          * whether reading or writing, but buffer remains dirty if we were
     718             :          * writing.
     719             :          */
     720       41344 :         file->pos = (int) (newOffset - file->curOffset);
     721       41344 :         return 0;
     722             :     }
     723             :     /* Otherwise, must reposition buffer, so flush any dirty data */
     724       39112 :     BufFileFlush(file);
     725             : 
     726             :     /*
     727             :      * At this point and no sooner, check for seek past last segment. The
     728             :      * above flush could have created a new segment, so checking sooner would
     729             :      * not work (at least not with this code).
     730             :      */
     731             : 
     732             :     /* convert seek to "start of next seg" to "end of last seg" */
     733       39112 :     if (newFile == file->numFiles && newOffset == 0)
     734             :     {
     735           0 :         newFile--;
     736           0 :         newOffset = MAX_PHYSICAL_FILESIZE;
     737             :     }
     738       39112 :     while (newOffset > MAX_PHYSICAL_FILESIZE)
     739             :     {
     740           0 :         if (++newFile >= file->numFiles)
     741           0 :             return EOF;
     742           0 :         newOffset -= MAX_PHYSICAL_FILESIZE;
     743             :     }
     744       39112 :     if (newFile >= file->numFiles)
     745           0 :         return EOF;
     746             :     /* Seek is OK! */
     747       39112 :     file->curFile = newFile;
     748       39112 :     file->curOffset = newOffset;
     749       39112 :     file->pos = 0;
     750       39112 :     file->nbytes = 0;
     751       39112 :     return 0;
     752             : }
     753             : 
     754             : void
     755         200 : BufFileTell(BufFile *file, int *fileno, off_t *offset)
     756             : {
     757         200 :     *fileno = file->curFile;
     758         200 :     *offset = file->curOffset + file->pos;
     759         200 : }
     760             : 
     761             : /*
     762             :  * BufFileSeekBlock --- block-oriented seek
     763             :  *
     764             :  * Performs absolute seek to the start of the n'th BLCKSZ-sized block of
     765             :  * the file.  Note that users of this interface will fail if their files
     766             :  * exceed BLCKSZ * LONG_MAX bytes, but that is quite a lot; we don't work
     767             :  * with tables bigger than that, either...
     768             :  *
     769             :  * Result is 0 if OK, EOF if not.  Logical position is not moved if an
     770             :  * impossible seek is attempted.
     771             :  */
     772             : int
     773       78106 : BufFileSeekBlock(BufFile *file, long blknum)
     774             : {
     775      234318 :     return BufFileSeek(file,
     776       78106 :                        (int) (blknum / BUFFILE_SEG_SIZE),
     777       78106 :                        (off_t) (blknum % BUFFILE_SEG_SIZE) * BLCKSZ,
     778             :                        SEEK_SET);
     779             : }
     780             : 
     781             : #ifdef NOT_USED
     782             : /*
     783             :  * BufFileTellBlock --- block-oriented tell
     784             :  *
     785             :  * Any fractional part of a block in the current seek position is ignored.
     786             :  */
     787             : long
     788             : BufFileTellBlock(BufFile *file)
     789             : {
     790             :     long        blknum;
     791             : 
     792             :     blknum = (file->curOffset + file->pos) / BLCKSZ;
     793             :     blknum += file->curFile * BUFFILE_SEG_SIZE;
     794             :     return blknum;
     795             : }
     796             : 
     797             : #endif
     798             : 
     799             : /*
     800             :  * Return the current fileset based BufFile size.
     801             :  *
     802             :  * Counts any holes left behind by BufFileAppend as part of the size.
     803             :  * ereport()s on failure.
     804             :  */
     805             : int64
     806         192 : BufFileSize(BufFile *file)
     807             : {
     808             :     int64       lastFileSize;
     809             : 
     810             :     Assert(file->fileset != NULL);
     811             : 
     812             :     /* Get the size of the last physical file. */
     813         192 :     lastFileSize = FileSize(file->files[file->numFiles - 1]);
     814         192 :     if (lastFileSize < 0)
     815           0 :         ereport(ERROR,
     816             :                 (errcode_for_file_access(),
     817             :                  errmsg("could not determine size of temporary file \"%s\" from BufFile \"%s\": %m",
     818             :                         FilePathName(file->files[file->numFiles - 1]),
     819             :                         file->name)));
     820             : 
     821         192 :     return ((file->numFiles - 1) * (int64) MAX_PHYSICAL_FILESIZE) +
     822             :         lastFileSize;
     823             : }
     824             : 
     825             : /*
     826             :  * Append the contents of source file (managed within fileset) to
     827             :  * end of target file (managed within same fileset).
     828             :  *
     829             :  * Note that operation subsumes ownership of underlying resources from
     830             :  * "source".  Caller should never call BufFileClose against source having
     831             :  * called here first.  Resource owners for source and target must match,
     832             :  * too.
     833             :  *
     834             :  * This operation works by manipulating lists of segment files, so the
     835             :  * file content is always appended at a MAX_PHYSICAL_FILESIZE-aligned
     836             :  * boundary, typically creating empty holes before the boundary.  These
     837             :  * areas do not contain any interesting data, and cannot be read from by
     838             :  * caller.
     839             :  *
     840             :  * Returns the block number within target where the contents of source
     841             :  * begins.  Caller should apply this as an offset when working off block
     842             :  * positions that are in terms of the original BufFile space.
     843             :  */
     844             : long
     845          96 : BufFileAppend(BufFile *target, BufFile *source)
     846             : {
     847          96 :     long        startBlock = target->numFiles * BUFFILE_SEG_SIZE;
     848          96 :     int         newNumFiles = target->numFiles + source->numFiles;
     849             :     int         i;
     850             : 
     851             :     Assert(target->fileset != NULL);
     852             :     Assert(source->readOnly);
     853             :     Assert(!source->dirty);
     854             :     Assert(source->fileset != NULL);
     855             : 
     856          96 :     if (target->resowner != source->resowner)
     857           0 :         elog(ERROR, "could not append BufFile with non-matching resource owner");
     858             : 
     859          96 :     target->files = (File *)
     860          96 :         repalloc(target->files, sizeof(File) * newNumFiles);
     861         192 :     for (i = target->numFiles; i < newNumFiles; i++)
     862          96 :         target->files[i] = source->files[i - target->numFiles];
     863          96 :     target->numFiles = newNumFiles;
     864             : 
     865          96 :     return startBlock;
     866             : }
     867             : 
     868             : /*
     869             :  * Truncate a BufFile created by BufFileCreateFileSet up to the given fileno
     870             :  * and the offset.
     871             :  */
     872             : void
     873          18 : BufFileTruncateFileSet(BufFile *file, int fileno, off_t offset)
     874             : {
     875          18 :     int         numFiles = file->numFiles;
     876          18 :     int         newFile = fileno;
     877          18 :     off_t       newOffset = file->curOffset;
     878             :     char        segment_name[MAXPGPATH];
     879             :     int         i;
     880             : 
     881             :     /*
     882             :      * Loop over all the files up to the given fileno and remove the files
     883             :      * that are greater than the fileno and truncate the given file up to the
     884             :      * offset. Note that we also remove the given fileno if the offset is 0
     885             :      * provided it is not the first file in which we truncate it.
     886             :      */
     887          36 :     for (i = file->numFiles - 1; i >= fileno; i--)
     888             :     {
     889          18 :         if ((i != fileno || offset == 0) && i != 0)
     890             :         {
     891           0 :             FileSetSegmentName(segment_name, file->name, i);
     892           0 :             FileClose(file->files[i]);
     893           0 :             if (!FileSetDelete(file->fileset, segment_name, true))
     894           0 :                 ereport(ERROR,
     895             :                         (errcode_for_file_access(),
     896             :                          errmsg("could not delete fileset \"%s\": %m",
     897             :                                 segment_name)));
     898           0 :             numFiles--;
     899           0 :             newOffset = MAX_PHYSICAL_FILESIZE;
     900             : 
     901             :             /*
     902             :              * This is required to indicate that we have deleted the given
     903             :              * fileno.
     904             :              */
     905           0 :             if (i == fileno)
     906           0 :                 newFile--;
     907             :         }
     908             :         else
     909             :         {
     910          18 :             if (FileTruncate(file->files[i], offset,
     911             :                              WAIT_EVENT_BUFFILE_TRUNCATE) < 0)
     912           0 :                 ereport(ERROR,
     913             :                         (errcode_for_file_access(),
     914             :                          errmsg("could not truncate file \"%s\": %m",
     915             :                                 FilePathName(file->files[i]))));
     916          18 :             newOffset = offset;
     917             :         }
     918             :     }
     919             : 
     920          18 :     file->numFiles = numFiles;
     921             : 
     922             :     /*
     923             :      * If the truncate point is within existing buffer then we can just adjust
     924             :      * pos within buffer.
     925             :      */
     926          18 :     if (newFile == file->curFile &&
     927          18 :         newOffset >= file->curOffset &&
     928          18 :         newOffset <= file->curOffset + file->nbytes)
     929             :     {
     930             :         /* No need to reset the current pos if the new pos is greater. */
     931           0 :         if (newOffset <= file->curOffset + file->pos)
     932           0 :             file->pos = (int) (newOffset - file->curOffset);
     933             : 
     934             :         /* Adjust the nbytes for the current buffer. */
     935           0 :         file->nbytes = (int) (newOffset - file->curOffset);
     936             :     }
     937          18 :     else if (newFile == file->curFile &&
     938          18 :              newOffset < file->curOffset)
     939             :     {
     940             :         /*
     941             :          * The truncate point is within the existing file but prior to the
     942             :          * current position, so we can forget the current buffer and reset the
     943             :          * current position.
     944             :          */
     945           0 :         file->curOffset = newOffset;
     946           0 :         file->pos = 0;
     947           0 :         file->nbytes = 0;
     948             :     }
     949          18 :     else if (newFile < file->curFile)
     950             :     {
     951             :         /*
     952             :          * The truncate point is prior to the current file, so need to reset
     953             :          * the current position accordingly.
     954             :          */
     955           0 :         file->curFile = newFile;
     956           0 :         file->curOffset = newOffset;
     957           0 :         file->pos = 0;
     958           0 :         file->nbytes = 0;
     959             :     }
     960             :     /* Nothing to do, if the truncate point is beyond current file. */
     961          18 : }

Generated by: LCOV version 1.13