LCOV - code coverage report
Current view: top level - src/backend/storage/file - buffile.c (source / functions) Hit Total Coverage
Test: PostgreSQL 14devel Lines: 212 286 74.1 %
Date: 2021-01-26 21:06:51 Functions: 20 22 90.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * buffile.c
       4             :  *    Management of large buffered temporary files.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  * IDENTIFICATION
      10             :  *    src/backend/storage/file/buffile.c
      11             :  *
      12             :  * NOTES:
      13             :  *
      14             :  * BufFiles provide a very incomplete emulation of stdio atop virtual Files
      15             :  * (as managed by fd.c).  Currently, we only support the buffered-I/O
      16             :  * aspect of stdio: a read or write of the low-level File occurs only
      17             :  * when the buffer is filled or emptied.  This is an even bigger win
      18             :  * for virtual Files than for ordinary kernel files, since reducing the
      19             :  * frequency with which a virtual File is touched reduces "thrashing"
      20             :  * of opening/closing file descriptors.
      21             :  *
      22             :  * Note that BufFile structs are allocated with palloc(), and therefore
      23             :  * will go away automatically at query/transaction end.  Since the underlying
      24             :  * virtual Files are made with OpenTemporaryFile, all resources for
      25             :  * the file are certain to be cleaned up even if processing is aborted
      26             :  * by ereport(ERROR).  The data structures required are made in the
      27             :  * palloc context that was current when the BufFile was created, and
      28             :  * any external resources such as temp files are owned by the ResourceOwner
      29             :  * that was current at that time.
      30             :  *
      31             :  * BufFile also supports temporary files that exceed the OS file size limit
      32             :  * (by opening multiple fd.c temporary files).  This is an essential feature
      33             :  * for sorts and hashjoins on large amounts of data.
      34             :  *
      35             :  * BufFile supports temporary files that can be shared with other backends, as
      36             :  * infrastructure for parallel execution.  Such files need to be created as a
      37             :  * member of a SharedFileSet that all participants are attached to.
      38             :  *
      39             :  * BufFile also supports temporary files that can be used by the single backend
      40             :  * when the corresponding files need to be survived across the transaction and
      41             :  * need to be opened and closed multiple times.  Such files need to be created
      42             :  * as a member of a SharedFileSet.
      43             :  *-------------------------------------------------------------------------
      44             :  */
      45             : 
      46             : #include "postgres.h"
      47             : 
      48             : #include "commands/tablespace.h"
      49             : #include "executor/instrument.h"
      50             : #include "miscadmin.h"
      51             : #include "pgstat.h"
      52             : #include "storage/buf_internals.h"
      53             : #include "storage/buffile.h"
      54             : #include "storage/fd.h"
      55             : #include "utils/resowner.h"
      56             : 
      57             : /*
      58             :  * We break BufFiles into gigabyte-sized segments, regardless of RELSEG_SIZE.
      59             :  * The reason is that we'd like large BufFiles to be spread across multiple
      60             :  * tablespaces when available.
      61             :  */
      62             : #define MAX_PHYSICAL_FILESIZE   0x40000000
      63             : #define BUFFILE_SEG_SIZE        (MAX_PHYSICAL_FILESIZE / BLCKSZ)
      64             : 
      65             : /*
      66             :  * This data structure represents a buffered file that consists of one or
      67             :  * more physical files (each accessed through a virtual file descriptor
      68             :  * managed by fd.c).
      69             :  */
      70             : struct BufFile
      71             : {
      72             :     int         numFiles;       /* number of physical files in set */
      73             :     /* all files except the last have length exactly MAX_PHYSICAL_FILESIZE */
      74             :     File       *files;          /* palloc'd array with numFiles entries */
      75             : 
      76             :     bool        isInterXact;    /* keep open over transactions? */
      77             :     bool        dirty;          /* does buffer need to be written? */
      78             :     bool        readOnly;       /* has the file been set to read only? */
      79             : 
      80             :     SharedFileSet *fileset;     /* space for segment files if shared */
      81             :     const char *name;           /* name of this BufFile if shared */
      82             : 
      83             :     /*
      84             :      * resowner is the ResourceOwner to use for underlying temp files.  (We
      85             :      * don't need to remember the memory context we're using explicitly,
      86             :      * because after creation we only repalloc our arrays larger.)
      87             :      */
      88             :     ResourceOwner resowner;
      89             : 
      90             :     /*
      91             :      * "current pos" is position of start of buffer within the logical file.
      92             :      * Position as seen by user of BufFile is (curFile, curOffset + pos).
      93             :      */
      94             :     int         curFile;        /* file index (0..n) part of current pos */
      95             :     off_t       curOffset;      /* offset part of current pos */
      96             :     int         pos;            /* next read/write position in buffer */
      97             :     int         nbytes;         /* total # of valid bytes in buffer */
      98             :     PGAlignedBlock buffer;
      99             : };
     100             : 
     101             : static BufFile *makeBufFileCommon(int nfiles);
     102             : static BufFile *makeBufFile(File firstfile);
     103             : static void extendBufFile(BufFile *file);
     104             : static void BufFileLoadBuffer(BufFile *file);
     105             : static void BufFileDumpBuffer(BufFile *file);
     106             : static void BufFileFlush(BufFile *file);
     107             : static File MakeNewSharedSegment(BufFile *file, int segment);
     108             : 
     109             : /*
     110             :  * Create BufFile and perform the common initialization.
     111             :  */
     112             : static BufFile *
     113        5036 : makeBufFileCommon(int nfiles)
     114             : {
     115        5036 :     BufFile    *file = (BufFile *) palloc(sizeof(BufFile));
     116             : 
     117        5036 :     file->numFiles = nfiles;
     118        5036 :     file->isInterXact = false;
     119        5036 :     file->dirty = false;
     120        5036 :     file->resowner = CurrentResourceOwner;
     121        5036 :     file->curFile = 0;
     122        5036 :     file->curOffset = 0L;
     123        5036 :     file->pos = 0;
     124        5036 :     file->nbytes = 0;
     125             : 
     126        5036 :     return file;
     127             : }
     128             : 
     129             : /*
     130             :  * Create a BufFile given the first underlying physical file.
     131             :  * NOTE: caller must set isInterXact if appropriate.
     132             :  */
     133             : static BufFile *
     134        1818 : makeBufFile(File firstfile)
     135             : {
     136        1818 :     BufFile    *file = makeBufFileCommon(1);
     137             : 
     138        1818 :     file->files = (File *) palloc(sizeof(File));
     139        1818 :     file->files[0] = firstfile;
     140        1818 :     file->readOnly = false;
     141        1818 :     file->fileset = NULL;
     142        1818 :     file->name = NULL;
     143             : 
     144        1818 :     return file;
     145             : }
     146             : 
     147             : /*
     148             :  * Add another component temp file.
     149             :  */
     150             : static void
     151           0 : extendBufFile(BufFile *file)
     152             : {
     153             :     File        pfile;
     154             :     ResourceOwner oldowner;
     155             : 
     156             :     /* Be sure to associate the file with the BufFile's resource owner */
     157           0 :     oldowner = CurrentResourceOwner;
     158           0 :     CurrentResourceOwner = file->resowner;
     159             : 
     160           0 :     if (file->fileset == NULL)
     161           0 :         pfile = OpenTemporaryFile(file->isInterXact);
     162             :     else
     163           0 :         pfile = MakeNewSharedSegment(file, file->numFiles);
     164             : 
     165             :     Assert(pfile >= 0);
     166             : 
     167           0 :     CurrentResourceOwner = oldowner;
     168             : 
     169           0 :     file->files = (File *) repalloc(file->files,
     170           0 :                                     (file->numFiles + 1) * sizeof(File));
     171           0 :     file->files[file->numFiles] = pfile;
     172           0 :     file->numFiles++;
     173           0 : }
     174             : 
     175             : /*
     176             :  * Create a BufFile for a new temporary file (which will expand to become
     177             :  * multiple temporary files if more than MAX_PHYSICAL_FILESIZE bytes are
     178             :  * written to it).
     179             :  *
     180             :  * If interXact is true, the temp file will not be automatically deleted
     181             :  * at end of transaction.
     182             :  *
     183             :  * Note: if interXact is true, the caller had better be calling us in a
     184             :  * memory context, and with a resource owner, that will survive across
     185             :  * transaction boundaries.
     186             :  */
     187             : BufFile *
     188        1818 : BufFileCreateTemp(bool interXact)
     189             : {
     190             :     BufFile    *file;
     191             :     File        pfile;
     192             : 
     193             :     /*
     194             :      * Ensure that temp tablespaces are set up for OpenTemporaryFile to use.
     195             :      * Possibly the caller will have done this already, but it seems useful to
     196             :      * double-check here.  Failure to do this at all would result in the temp
     197             :      * files always getting placed in the default tablespace, which is a
     198             :      * pretty hard-to-detect bug.  Callers may prefer to do it earlier if they
     199             :      * want to be sure that any required catalog access is done in some other
     200             :      * resource context.
     201             :      */
     202        1818 :     PrepareTempTablespaces();
     203             : 
     204        1818 :     pfile = OpenTemporaryFile(interXact);
     205             :     Assert(pfile >= 0);
     206             : 
     207        1818 :     file = makeBufFile(pfile);
     208        1818 :     file->isInterXact = interXact;
     209             : 
     210        1818 :     return file;
     211             : }
     212             : 
     213             : /*
     214             :  * Build the name for a given segment of a given BufFile.
     215             :  */
     216             : static void
     217        6436 : SharedSegmentName(char *name, const char *buffile_name, int segment)
     218             : {
     219        6436 :     snprintf(name, MAXPGPATH, "%s.%d", buffile_name, segment);
     220        6436 : }
     221             : 
     222             : /*
     223             :  * Create a new segment file backing a shared BufFile.
     224             :  */
     225             : static File
     226        1356 : MakeNewSharedSegment(BufFile *buffile, int segment)
     227             : {
     228             :     char        name[MAXPGPATH];
     229             :     File        file;
     230             : 
     231             :     /*
     232             :      * It is possible that there are files left over from before a crash
     233             :      * restart with the same name.  In order for BufFileOpenShared() not to
     234             :      * get confused about how many segments there are, we'll unlink the next
     235             :      * segment number if it already exists.
     236             :      */
     237        1356 :     SharedSegmentName(name, buffile->name, segment + 1);
     238        1356 :     SharedFileSetDelete(buffile->fileset, name, true);
     239             : 
     240             :     /* Create the new segment. */
     241        1356 :     SharedSegmentName(name, buffile->name, segment);
     242        1356 :     file = SharedFileSetCreate(buffile->fileset, name);
     243             : 
     244             :     /* SharedFileSetCreate would've errored out */
     245             :     Assert(file > 0);
     246             : 
     247        1356 :     return file;
     248             : }
     249             : 
     250             : /*
     251             :  * Create a BufFile that can be discovered and opened read-only by other
     252             :  * backends that are attached to the same SharedFileSet using the same name.
     253             :  *
     254             :  * The naming scheme for shared BufFiles is left up to the calling code.  The
     255             :  * name will appear as part of one or more filenames on disk, and might
     256             :  * provide clues to administrators about which subsystem is generating
     257             :  * temporary file data.  Since each SharedFileSet object is backed by one or
     258             :  * more uniquely named temporary directory, names don't conflict with
     259             :  * unrelated SharedFileSet objects.
     260             :  */
     261             : BufFile *
     262        1356 : BufFileCreateShared(SharedFileSet *fileset, const char *name)
     263             : {
     264             :     BufFile    *file;
     265             : 
     266        1356 :     file = makeBufFileCommon(1);
     267        1356 :     file->fileset = fileset;
     268        1356 :     file->name = pstrdup(name);
     269        1356 :     file->files = (File *) palloc(sizeof(File));
     270        1356 :     file->files[0] = MakeNewSharedSegment(file, 0);
     271        1356 :     file->readOnly = false;
     272             : 
     273        1356 :     return file;
     274             : }
     275             : 
     276             : /*
     277             :  * Open a file that was previously created in another backend (or this one)
     278             :  * with BufFileCreateShared in the same SharedFileSet using the same name.
     279             :  * The backend that created the file must have called BufFileClose() or
     280             :  * BufFileExportShared() to make sure that it is ready to be opened by other
     281             :  * backends and render it read-only.
     282             :  */
     283             : BufFile *
     284        1862 : BufFileOpenShared(SharedFileSet *fileset, const char *name, int mode)
     285             : {
     286             :     BufFile    *file;
     287             :     char        segment_name[MAXPGPATH];
     288        1862 :     Size        capacity = 16;
     289             :     File       *files;
     290        1862 :     int         nfiles = 0;
     291             : 
     292        1862 :     files = palloc(sizeof(File) * capacity);
     293             : 
     294             :     /*
     295             :      * We don't know how many segments there are, so we'll probe the
     296             :      * filesystem to find out.
     297             :      */
     298             :     for (;;)
     299             :     {
     300             :         /* See if we need to expand our file segment array. */
     301        3724 :         if (nfiles + 1 > capacity)
     302             :         {
     303           0 :             capacity *= 2;
     304           0 :             files = repalloc(files, sizeof(File) * capacity);
     305             :         }
     306             :         /* Try to load a segment. */
     307        3724 :         SharedSegmentName(segment_name, name, nfiles);
     308        3724 :         files[nfiles] = SharedFileSetOpen(fileset, segment_name, mode);
     309        3724 :         if (files[nfiles] <= 0)
     310        1862 :             break;
     311        1862 :         ++nfiles;
     312             : 
     313        1862 :         CHECK_FOR_INTERRUPTS();
     314             :     }
     315             : 
     316             :     /*
     317             :      * If we didn't find any files at all, then no BufFile exists with this
     318             :      * name.
     319             :      */
     320        1862 :     if (nfiles == 0)
     321           0 :         ereport(ERROR,
     322             :                 (errcode_for_file_access(),
     323             :                  errmsg("could not open temporary file \"%s\" from BufFile \"%s\": %m",
     324             :                         segment_name, name)));
     325             : 
     326        1862 :     file = makeBufFileCommon(nfiles);
     327        1862 :     file->files = files;
     328        1862 :     file->readOnly = (mode == O_RDONLY) ? true : false;
     329        1862 :     file->fileset = fileset;
     330        1862 :     file->name = pstrdup(name);
     331             : 
     332        1862 :     return file;
     333             : }
     334             : 
     335             : /*
     336             :  * Delete a BufFile that was created by BufFileCreateShared in the given
     337             :  * SharedFileSet using the given name.
     338             :  *
     339             :  * It is not necessary to delete files explicitly with this function.  It is
     340             :  * provided only as a way to delete files proactively, rather than waiting for
     341             :  * the SharedFileSet to be cleaned up.
     342             :  *
     343             :  * Only one backend should attempt to delete a given name, and should know
     344             :  * that it exists and has been exported or closed.
     345             :  */
     346             : void
     347           0 : BufFileDeleteShared(SharedFileSet *fileset, const char *name)
     348             : {
     349             :     char        segment_name[MAXPGPATH];
     350           0 :     int         segment = 0;
     351           0 :     bool        found = false;
     352             : 
     353             :     /*
     354             :      * We don't know how many segments the file has.  We'll keep deleting
     355             :      * until we run out.  If we don't manage to find even an initial segment,
     356             :      * raise an error.
     357             :      */
     358             :     for (;;)
     359             :     {
     360           0 :         SharedSegmentName(segment_name, name, segment);
     361           0 :         if (!SharedFileSetDelete(fileset, segment_name, true))
     362           0 :             break;
     363           0 :         found = true;
     364           0 :         ++segment;
     365             : 
     366           0 :         CHECK_FOR_INTERRUPTS();
     367             :     }
     368             : 
     369           0 :     if (!found)
     370           0 :         elog(ERROR, "could not delete unknown shared BufFile \"%s\"", name);
     371           0 : }
     372             : 
     373             : /*
     374             :  * BufFileExportShared --- flush and make read-only, in preparation for sharing.
     375             :  */
     376             : void
     377         248 : BufFileExportShared(BufFile *file)
     378             : {
     379             :     /* Must be a file belonging to a SharedFileSet. */
     380             :     Assert(file->fileset != NULL);
     381             : 
     382             :     /* It's probably a bug if someone calls this twice. */
     383             :     Assert(!file->readOnly);
     384             : 
     385         248 :     BufFileFlush(file);
     386         248 :     file->readOnly = true;
     387         248 : }
     388             : 
     389             : /*
     390             :  * Close a BufFile
     391             :  *
     392             :  * Like fclose(), this also implicitly FileCloses the underlying File.
     393             :  */
     394             : void
     395        4940 : BufFileClose(BufFile *file)
     396             : {
     397             :     int         i;
     398             : 
     399             :     /* flush any unwritten data */
     400        4940 :     BufFileFlush(file);
     401             :     /* close and delete the underlying file(s) */
     402        9968 :     for (i = 0; i < file->numFiles; i++)
     403        5028 :         FileClose(file->files[i]);
     404             :     /* release the buffer space */
     405        4940 :     pfree(file->files);
     406        4940 :     pfree(file);
     407        4940 : }
     408             : 
     409             : /*
     410             :  * BufFileLoadBuffer
     411             :  *
     412             :  * Load some data into buffer, if possible, starting from curOffset.
     413             :  * At call, must have dirty = false, pos and nbytes = 0.
     414             :  * On exit, nbytes is number of bytes loaded.
     415             :  */
     416             : static void
     417       69428 : BufFileLoadBuffer(BufFile *file)
     418             : {
     419             :     File        thisfile;
     420             : 
     421             :     /*
     422             :      * Advance to next component file if necessary and possible.
     423             :      */
     424       69428 :     if (file->curOffset >= MAX_PHYSICAL_FILESIZE &&
     425           0 :         file->curFile + 1 < file->numFiles)
     426             :     {
     427           0 :         file->curFile++;
     428           0 :         file->curOffset = 0L;
     429             :     }
     430             : 
     431             :     /*
     432             :      * Read whatever we can get, up to a full bufferload.
     433             :      */
     434       69428 :     thisfile = file->files[file->curFile];
     435      208284 :     file->nbytes = FileRead(thisfile,
     436       69428 :                             file->buffer.data,
     437             :                             sizeof(file->buffer),
     438             :                             file->curOffset,
     439             :                             WAIT_EVENT_BUFFILE_READ);
     440       69428 :     if (file->nbytes < 0)
     441             :     {
     442           0 :         file->nbytes = 0;
     443           0 :         ereport(ERROR,
     444             :                 (errcode_for_file_access(),
     445             :                  errmsg("could not read file \"%s\": %m",
     446             :                         FilePathName(thisfile))));
     447             :     }
     448             : 
     449             :     /* we choose not to advance curOffset here */
     450             : 
     451       69428 :     if (file->nbytes > 0)
     452       67872 :         pgBufferUsage.temp_blks_read++;
     453       69428 : }
     454             : 
     455             : /*
     456             :  * BufFileDumpBuffer
     457             :  *
     458             :  * Dump buffer contents starting at curOffset.
     459             :  * At call, should have dirty = true, nbytes > 0.
     460             :  * On exit, dirty is cleared if successful write, and curOffset is advanced.
     461             :  */
     462             : static void
     463       82484 : BufFileDumpBuffer(BufFile *file)
     464             : {
     465       82484 :     int         wpos = 0;
     466             :     int         bytestowrite;
     467             :     File        thisfile;
     468             : 
     469             :     /*
     470             :      * Unlike BufFileLoadBuffer, we must dump the whole buffer even if it
     471             :      * crosses a component-file boundary; so we need a loop.
     472             :      */
     473      164968 :     while (wpos < file->nbytes)
     474             :     {
     475             :         off_t       availbytes;
     476             : 
     477             :         /*
     478             :          * Advance to next component file if necessary and possible.
     479             :          */
     480       82484 :         if (file->curOffset >= MAX_PHYSICAL_FILESIZE)
     481             :         {
     482           0 :             while (file->curFile + 1 >= file->numFiles)
     483           0 :                 extendBufFile(file);
     484           0 :             file->curFile++;
     485           0 :             file->curOffset = 0L;
     486             :         }
     487             : 
     488             :         /*
     489             :          * Determine how much we need to write into this file.
     490             :          */
     491       82484 :         bytestowrite = file->nbytes - wpos;
     492       82484 :         availbytes = MAX_PHYSICAL_FILESIZE - file->curOffset;
     493             : 
     494       82484 :         if ((off_t) bytestowrite > availbytes)
     495           0 :             bytestowrite = (int) availbytes;
     496             : 
     497       82484 :         thisfile = file->files[file->curFile];
     498      164968 :         bytestowrite = FileWrite(thisfile,
     499       82484 :                                  file->buffer.data + wpos,
     500             :                                  bytestowrite,
     501             :                                  file->curOffset,
     502             :                                  WAIT_EVENT_BUFFILE_WRITE);
     503       82484 :         if (bytestowrite <= 0)
     504           0 :             ereport(ERROR,
     505             :                     (errcode_for_file_access(),
     506             :                      errmsg("could not write to file \"%s\": %m",
     507             :                             FilePathName(thisfile))));
     508       82484 :         file->curOffset += bytestowrite;
     509       82484 :         wpos += bytestowrite;
     510             : 
     511       82484 :         pgBufferUsage.temp_blks_written++;
     512             :     }
     513       82484 :     file->dirty = false;
     514             : 
     515             :     /*
     516             :      * At this point, curOffset has been advanced to the end of the buffer,
     517             :      * ie, its original value + nbytes.  We need to make it point to the
     518             :      * logical file position, ie, original value + pos, in case that is less
     519             :      * (as could happen due to a small backwards seek in a dirty buffer!)
     520             :      */
     521       82484 :     file->curOffset -= (file->nbytes - file->pos);
     522       82484 :     if (file->curOffset < 0)  /* handle possible segment crossing */
     523             :     {
     524           0 :         file->curFile--;
     525             :         Assert(file->curFile >= 0);
     526           0 :         file->curOffset += MAX_PHYSICAL_FILESIZE;
     527             :     }
     528             : 
     529             :     /*
     530             :      * Now we can set the buffer empty without changing the logical position
     531             :      */
     532       82484 :     file->pos = 0;
     533       82484 :     file->nbytes = 0;
     534       82484 : }
     535             : 
     536             : /*
     537             :  * BufFileRead
     538             :  *
     539             :  * Like fread() except we assume 1-byte element size and report I/O errors via
     540             :  * ereport().
     541             :  */
     542             : size_t
     543    21470326 : BufFileRead(BufFile *file, void *ptr, size_t size)
     544             : {
     545    21470326 :     size_t      nread = 0;
     546             :     size_t      nthistime;
     547             : 
     548    21470326 :     BufFileFlush(file);
     549             : 
     550    42955868 :     while (size > 0)
     551             :     {
     552    21487098 :         if (file->pos >= file->nbytes)
     553             :         {
     554             :             /* Try to load more data into buffer. */
     555       69428 :             file->curOffset += file->pos;
     556       69428 :             file->pos = 0;
     557       69428 :             file->nbytes = 0;
     558       69428 :             BufFileLoadBuffer(file);
     559       69428 :             if (file->nbytes <= 0)
     560        1556 :                 break;          /* no more data available */
     561             :         }
     562             : 
     563    21485542 :         nthistime = file->nbytes - file->pos;
     564    21485542 :         if (nthistime > size)
     565    21419656 :             nthistime = size;
     566             :         Assert(nthistime > 0);
     567             : 
     568    21485542 :         memcpy(ptr, file->buffer.data + file->pos, nthistime);
     569             : 
     570    21485542 :         file->pos += nthistime;
     571    21485542 :         ptr = (void *) ((char *) ptr + nthistime);
     572    21485542 :         size -= nthistime;
     573    21485542 :         nread += nthistime;
     574             :     }
     575             : 
     576    21470326 :     return nread;
     577             : }
     578             : 
     579             : /*
     580             :  * BufFileWrite
     581             :  *
     582             :  * Like fwrite() except we assume 1-byte element size and report errors via
     583             :  * ereport().
     584             :  */
     585             : void
     586    25941308 : BufFileWrite(BufFile *file, void *ptr, size_t size)
     587             : {
     588             :     size_t      nthistime;
     589             : 
     590             :     Assert(!file->readOnly);
     591             : 
     592    51914288 :     while (size > 0)
     593             :     {
     594    25972980 :         if (file->pos >= BLCKSZ)
     595             :         {
     596             :             /* Buffer full, dump it out */
     597       54794 :             if (file->dirty)
     598       54374 :                 BufFileDumpBuffer(file);
     599             :             else
     600             :             {
     601             :                 /* Hmm, went directly from reading to writing? */
     602         420 :                 file->curOffset += file->pos;
     603         420 :                 file->pos = 0;
     604         420 :                 file->nbytes = 0;
     605             :             }
     606             :         }
     607             : 
     608    25972980 :         nthistime = BLCKSZ - file->pos;
     609    25972980 :         if (nthistime > size)
     610    25892694 :             nthistime = size;
     611             :         Assert(nthistime > 0);
     612             : 
     613    25972980 :         memcpy(file->buffer.data + file->pos, ptr, nthistime);
     614             : 
     615    25972980 :         file->dirty = true;
     616    25972980 :         file->pos += nthistime;
     617    25972980 :         if (file->nbytes < file->pos)
     618    25970456 :             file->nbytes = file->pos;
     619    25972980 :         ptr = (void *) ((char *) ptr + nthistime);
     620    25972980 :         size -= nthistime;
     621             :     }
     622    25941308 : }
     623             : 
     624             : /*
     625             :  * BufFileFlush
     626             :  *
     627             :  * Like fflush(), except that I/O errors are reported with ereport().
     628             :  */
     629             : static void
     630    21514590 : BufFileFlush(BufFile *file)
     631             : {
     632    21514590 :     if (file->dirty)
     633       28110 :         BufFileDumpBuffer(file);
     634             : 
     635             :     Assert(!file->dirty);
     636    21514590 : }
     637             : 
     638             : /*
     639             :  * BufFileSeek
     640             :  *
     641             :  * Like fseek(), except that target position needs two values in order to
     642             :  * work when logical filesize exceeds maximum value representable by off_t.
     643             :  * We do not support relative seeks across more than that, however.
     644             :  * I/O errors are reported by ereport().
     645             :  *
     646             :  * Result is 0 if OK, EOF if not.  Logical position is not moved if an
     647             :  * impossible seek is attempted.
     648             :  */
     649             : int
     650       80406 : BufFileSeek(BufFile *file, int fileno, off_t offset, int whence)
     651             : {
     652             :     int         newFile;
     653             :     off_t       newOffset;
     654             : 
     655       80406 :     switch (whence)
     656             :     {
     657       79974 :         case SEEK_SET:
     658       79974 :             if (fileno < 0)
     659           0 :                 return EOF;
     660       79974 :             newFile = fileno;
     661       79974 :             newOffset = offset;
     662       79974 :             break;
     663          20 :         case SEEK_CUR:
     664             : 
     665             :             /*
     666             :              * Relative seek considers only the signed offset, ignoring
     667             :              * fileno. Note that large offsets (> 1 GB) risk overflow in this
     668             :              * add, unless we have 64-bit off_t.
     669             :              */
     670          20 :             newFile = file->curFile;
     671          20 :             newOffset = (file->curOffset + file->pos) + offset;
     672          20 :             break;
     673         412 :         case SEEK_END:
     674             : 
     675             :             /*
     676             :              * The file size of the last file gives us the end offset of that
     677             :              * file.
     678             :              */
     679         412 :             newFile = file->numFiles - 1;
     680         412 :             newOffset = FileSize(file->files[file->numFiles - 1]);
     681         412 :             if (newOffset < 0)
     682           0 :                 ereport(ERROR,
     683             :                         (errcode_for_file_access(),
     684             :                          errmsg("could not determine size of temporary file \"%s\" from BufFile \"%s\": %m",
     685             :                                 FilePathName(file->files[file->numFiles - 1]),
     686             :                                 file->name)));
     687         412 :             break;
     688           0 :         default:
     689           0 :             elog(ERROR, "invalid whence: %d", whence);
     690             :             return EOF;
     691             :     }
     692       80406 :     while (newOffset < 0)
     693             :     {
     694           0 :         if (--newFile < 0)
     695           0 :             return EOF;
     696           0 :         newOffset += MAX_PHYSICAL_FILESIZE;
     697             :     }
     698       80406 :     if (newFile == file->curFile &&
     699       80318 :         newOffset >= file->curOffset &&
     700       59536 :         newOffset <= file->curOffset + file->nbytes)
     701             :     {
     702             :         /*
     703             :          * Seek is to a point within existing buffer; we can just adjust
     704             :          * pos-within-buffer, without flushing buffer.  Note this is OK
     705             :          * whether reading or writing, but buffer remains dirty if we were
     706             :          * writing.
     707             :          */
     708       41330 :         file->pos = (int) (newOffset - file->curOffset);
     709       41330 :         return 0;
     710             :     }
     711             :     /* Otherwise, must reposition buffer, so flush any dirty data */
     712       39076 :     BufFileFlush(file);
     713             : 
     714             :     /*
     715             :      * At this point and no sooner, check for seek past last segment. The
     716             :      * above flush could have created a new segment, so checking sooner would
     717             :      * not work (at least not with this code).
     718             :      */
     719             : 
     720             :     /* convert seek to "start of next seg" to "end of last seg" */
     721       39076 :     if (newFile == file->numFiles && newOffset == 0)
     722             :     {
     723           0 :         newFile--;
     724           0 :         newOffset = MAX_PHYSICAL_FILESIZE;
     725             :     }
     726       39076 :     while (newOffset > MAX_PHYSICAL_FILESIZE)
     727             :     {
     728           0 :         if (++newFile >= file->numFiles)
     729           0 :             return EOF;
     730           0 :         newOffset -= MAX_PHYSICAL_FILESIZE;
     731             :     }
     732       39076 :     if (newFile >= file->numFiles)
     733           0 :         return EOF;
     734             :     /* Seek is OK! */
     735       39076 :     file->curFile = newFile;
     736       39076 :     file->curOffset = newOffset;
     737       39076 :     file->pos = 0;
     738       39076 :     file->nbytes = 0;
     739       39076 :     return 0;
     740             : }
     741             : 
     742             : void
     743         190 : BufFileTell(BufFile *file, int *fileno, off_t *offset)
     744             : {
     745         190 :     *fileno = file->curFile;
     746         190 :     *offset = file->curOffset + file->pos;
     747         190 : }
     748             : 
     749             : /*
     750             :  * BufFileSeekBlock --- block-oriented seek
     751             :  *
     752             :  * Performs absolute seek to the start of the n'th BLCKSZ-sized block of
     753             :  * the file.  Note that users of this interface will fail if their files
     754             :  * exceed BLCKSZ * LONG_MAX bytes, but that is quite a lot; we don't work
     755             :  * with tables bigger than that, either...
     756             :  *
     757             :  * Result is 0 if OK, EOF if not.  Logical position is not moved if an
     758             :  * impossible seek is attempted.
     759             :  */
     760             : int
     761       78288 : BufFileSeekBlock(BufFile *file, long blknum)
     762             : {
     763      234864 :     return BufFileSeek(file,
     764       78288 :                        (int) (blknum / BUFFILE_SEG_SIZE),
     765       78288 :                        (off_t) (blknum % BUFFILE_SEG_SIZE) * BLCKSZ,
     766             :                        SEEK_SET);
     767             : }
     768             : 
     769             : #ifdef NOT_USED
     770             : /*
     771             :  * BufFileTellBlock --- block-oriented tell
     772             :  *
     773             :  * Any fractional part of a block in the current seek position is ignored.
     774             :  */
     775             : long
     776             : BufFileTellBlock(BufFile *file)
     777             : {
     778             :     long        blknum;
     779             : 
     780             :     blknum = (file->curOffset + file->pos) / BLCKSZ;
     781             :     blknum += file->curFile * BUFFILE_SEG_SIZE;
     782             :     return blknum;
     783             : }
     784             : 
     785             : #endif
     786             : 
     787             : /*
     788             :  * Return the current shared BufFile size.
     789             :  *
     790             :  * Counts any holes left behind by BufFileAppend as part of the size.
     791             :  * ereport()s on failure.
     792             :  */
     793             : int64
     794         176 : BufFileSize(BufFile *file)
     795             : {
     796             :     int64       lastFileSize;
     797             : 
     798             :     Assert(file->fileset != NULL);
     799             : 
     800             :     /* Get the size of the last physical file. */
     801         176 :     lastFileSize = FileSize(file->files[file->numFiles - 1]);
     802         176 :     if (lastFileSize < 0)
     803           0 :         ereport(ERROR,
     804             :                 (errcode_for_file_access(),
     805             :                  errmsg("could not determine size of temporary file \"%s\" from BufFile \"%s\": %m",
     806             :                         FilePathName(file->files[file->numFiles - 1]),
     807             :                         file->name)));
     808             : 
     809         176 :     return ((file->numFiles - 1) * (int64) MAX_PHYSICAL_FILESIZE) +
     810             :         lastFileSize;
     811             : }
     812             : 
     813             : /*
     814             :  * Append the contents of source file (managed within shared fileset) to
     815             :  * end of target file (managed within same shared fileset).
     816             :  *
     817             :  * Note that operation subsumes ownership of underlying resources from
     818             :  * "source".  Caller should never call BufFileClose against source having
     819             :  * called here first.  Resource owners for source and target must match,
     820             :  * too.
     821             :  *
     822             :  * This operation works by manipulating lists of segment files, so the
     823             :  * file content is always appended at a MAX_PHYSICAL_FILESIZE-aligned
     824             :  * boundary, typically creating empty holes before the boundary.  These
     825             :  * areas do not contain any interesting data, and cannot be read from by
     826             :  * caller.
     827             :  *
     828             :  * Returns the block number within target where the contents of source
     829             :  * begins.  Caller should apply this as an offset when working off block
     830             :  * positions that are in terms of the original BufFile space.
     831             :  */
     832             : long
     833          88 : BufFileAppend(BufFile *target, BufFile *source)
     834             : {
     835          88 :     long        startBlock = target->numFiles * BUFFILE_SEG_SIZE;
     836          88 :     int         newNumFiles = target->numFiles + source->numFiles;
     837             :     int         i;
     838             : 
     839             :     Assert(target->fileset != NULL);
     840             :     Assert(source->readOnly);
     841             :     Assert(!source->dirty);
     842             :     Assert(source->fileset != NULL);
     843             : 
     844          88 :     if (target->resowner != source->resowner)
     845           0 :         elog(ERROR, "could not append BufFile with non-matching resource owner");
     846             : 
     847          88 :     target->files = (File *)
     848          88 :         repalloc(target->files, sizeof(File) * newNumFiles);
     849         176 :     for (i = target->numFiles; i < newNumFiles; i++)
     850          88 :         target->files[i] = source->files[i - target->numFiles];
     851          88 :     target->numFiles = newNumFiles;
     852             : 
     853          88 :     return startBlock;
     854             : }
     855             : 
     856             : /*
     857             :  * Truncate a BufFile created by BufFileCreateShared up to the given fileno and
     858             :  * the offset.
     859             :  */
     860             : void
     861          16 : BufFileTruncateShared(BufFile *file, int fileno, off_t offset)
     862             : {
     863          16 :     int         numFiles = file->numFiles;
     864          16 :     int         newFile = fileno;
     865          16 :     off_t       newOffset = file->curOffset;
     866             :     char        segment_name[MAXPGPATH];
     867             :     int         i;
     868             : 
     869             :     /*
     870             :      * Loop over all the files up to the given fileno and remove the files
     871             :      * that are greater than the fileno and truncate the given file up to the
     872             :      * offset. Note that we also remove the given fileno if the offset is 0
     873             :      * provided it is not the first file in which we truncate it.
     874             :      */
     875          32 :     for (i = file->numFiles - 1; i >= fileno; i--)
     876             :     {
     877          16 :         if ((i != fileno || offset == 0) && i != 0)
     878             :         {
     879           0 :             SharedSegmentName(segment_name, file->name, i);
     880           0 :             FileClose(file->files[i]);
     881           0 :             if (!SharedFileSetDelete(file->fileset, segment_name, true))
     882           0 :                 ereport(ERROR,
     883             :                         (errcode_for_file_access(),
     884             :                          errmsg("could not delete shared fileset \"%s\": %m",
     885             :                                 segment_name)));
     886           0 :             numFiles--;
     887           0 :             newOffset = MAX_PHYSICAL_FILESIZE;
     888             : 
     889             :             /*
     890             :              * This is required to indicate that we have deleted the given
     891             :              * fileno.
     892             :              */
     893           0 :             if (i == fileno)
     894           0 :                 newFile--;
     895             :         }
     896             :         else
     897             :         {
     898          16 :             if (FileTruncate(file->files[i], offset,
     899             :                              WAIT_EVENT_BUFFILE_TRUNCATE) < 0)
     900           0 :                 ereport(ERROR,
     901             :                         (errcode_for_file_access(),
     902             :                          errmsg("could not truncate file \"%s\": %m",
     903             :                                 FilePathName(file->files[i]))));
     904          16 :             newOffset = offset;
     905             :         }
     906             :     }
     907             : 
     908          16 :     file->numFiles = numFiles;
     909             : 
     910             :     /*
     911             :      * If the truncate point is within existing buffer then we can just adjust
     912             :      * pos within buffer.
     913             :      */
     914          16 :     if (newFile == file->curFile &&
     915          16 :         newOffset >= file->curOffset &&
     916          16 :         newOffset <= file->curOffset + file->nbytes)
     917             :     {
     918             :         /* No need to reset the current pos if the new pos is greater. */
     919           0 :         if (newOffset <= file->curOffset + file->pos)
     920           0 :             file->pos = (int) (newOffset - file->curOffset);
     921             : 
     922             :         /* Adjust the nbytes for the current buffer. */
     923           0 :         file->nbytes = (int) (newOffset - file->curOffset);
     924             :     }
     925          16 :     else if (newFile == file->curFile &&
     926          16 :              newOffset < file->curOffset)
     927             :     {
     928             :         /*
     929             :          * The truncate point is within the existing file but prior to the
     930             :          * current position, so we can forget the current buffer and reset the
     931             :          * current position.
     932             :          */
     933           0 :         file->curOffset = newOffset;
     934           0 :         file->pos = 0;
     935           0 :         file->nbytes = 0;
     936             :     }
     937          16 :     else if (newFile < file->curFile)
     938             :     {
     939             :         /*
     940             :          * The truncate point is prior to the current file, so need to reset
     941             :          * the current position accordingly.
     942             :          */
     943           0 :         file->curFile = newFile;
     944           0 :         file->curOffset = newOffset;
     945           0 :         file->pos = 0;
     946           0 :         file->nbytes = 0;
     947             :     }
     948             :     /* Nothing to do, if the truncate point is beyond current file. */
     949          16 : }

Generated by: LCOV version 1.13