LCOV - code coverage report
Current view: top level - src/backend/storage/sync - sync.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 76.6 % 124 95
Test Date: 2026-03-23 22:16:10 Functions: 100.0 % 6 6
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * sync.c
       4              :  *    File synchronization management code.
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *    src/backend/storage/sync/sync.c
      12              :  *
      13              :  *-------------------------------------------------------------------------
      14              :  */
      15              : #include "postgres.h"
      16              : 
      17              : #include <unistd.h>
      18              : #include <fcntl.h>
      19              : #include <sys/file.h>
      20              : 
      21              : #include "access/clog.h"
      22              : #include "access/commit_ts.h"
      23              : #include "access/multixact.h"
      24              : #include "access/xlog.h"
      25              : #include "miscadmin.h"
      26              : #include "pgstat.h"
      27              : #include "portability/instr_time.h"
      28              : #include "postmaster/bgwriter.h"
      29              : #include "storage/fd.h"
      30              : #include "storage/latch.h"
      31              : #include "storage/md.h"
      32              : #include "utils/hsearch.h"
      33              : #include "utils/memutils.h"
      34              : #include "utils/wait_event.h"
      35              : 
      36              : /*
      37              :  * In some contexts (currently, standalone backends and the checkpointer)
      38              :  * we keep track of pending fsync operations: we need to remember all relation
      39              :  * segments that have been written since the last checkpoint, so that we can
      40              :  * fsync them down to disk before completing the next checkpoint.  This hash
      41              :  * table remembers the pending operations.  We use a hash table mostly as
      42              :  * a convenient way of merging duplicate requests.
      43              :  *
      44              :  * We use a similar mechanism to remember no-longer-needed files that can
      45              :  * be deleted after the next checkpoint, but we use a linked list instead of
      46              :  * a hash table, because we don't expect there to be any duplicate requests.
      47              :  *
      48              :  * These mechanisms are only used for non-temp relations; we never fsync
      49              :  * temp rels, nor do we need to postpone their deletion (see comments in
      50              :  * mdunlink).
      51              :  *
      52              :  * (Regular backends do not track pending operations locally, but forward
      53              :  * them to the checkpointer.)
      54              :  */
      55              : typedef uint16 CycleCtr;        /* can be any convenient integer size */
      56              : 
      57              : typedef struct
      58              : {
      59              :     FileTag     tag;            /* identifies handler and file */
      60              :     CycleCtr    cycle_ctr;      /* sync_cycle_ctr of oldest request */
      61              :     bool        canceled;       /* canceled is true if we canceled "recently" */
      62              : } PendingFsyncEntry;
      63              : 
      64              : typedef struct
      65              : {
      66              :     FileTag     tag;            /* identifies handler and file */
      67              :     CycleCtr    cycle_ctr;      /* checkpoint_cycle_ctr when request was made */
      68              :     bool        canceled;       /* true if request has been canceled */
      69              : } PendingUnlinkEntry;
      70              : 
      71              : static HTAB *pendingOps = NULL;
      72              : static List *pendingUnlinks = NIL;
      73              : static MemoryContext pendingOpsCxt; /* context for the above  */
      74              : 
      75              : static CycleCtr sync_cycle_ctr = 0;
      76              : static CycleCtr checkpoint_cycle_ctr = 0;
      77              : 
      78              : /* Intervals for calling AbsorbSyncRequests */
      79              : #define FSYNCS_PER_ABSORB       10
      80              : #define UNLINKS_PER_ABSORB      10
      81              : 
      82              : /*
      83              :  * Function pointers for handling sync and unlink requests.
      84              :  */
      85              : typedef struct SyncOps
      86              : {
      87              :     int         (*sync_syncfiletag) (const FileTag *ftag, char *path);
      88              :     int         (*sync_unlinkfiletag) (const FileTag *ftag, char *path);
      89              :     bool        (*sync_filetagmatches) (const FileTag *ftag,
      90              :                                         const FileTag *candidate);
      91              : } SyncOps;
      92              : 
      93              : /*
      94              :  * These indexes must correspond to the values of the SyncRequestHandler enum.
      95              :  */
      96              : static const SyncOps syncsw[] = {
      97              :     /* magnetic disk */
      98              :     [SYNC_HANDLER_MD] = {
      99              :         .sync_syncfiletag = mdsyncfiletag,
     100              :         .sync_unlinkfiletag = mdunlinkfiletag,
     101              :         .sync_filetagmatches = mdfiletagmatches
     102              :     },
     103              :     /* pg_xact */
     104              :     [SYNC_HANDLER_CLOG] = {
     105              :         .sync_syncfiletag = clogsyncfiletag
     106              :     },
     107              :     /* pg_commit_ts */
     108              :     [SYNC_HANDLER_COMMIT_TS] = {
     109              :         .sync_syncfiletag = committssyncfiletag
     110              :     },
     111              :     /* pg_multixact/offsets */
     112              :     [SYNC_HANDLER_MULTIXACT_OFFSET] = {
     113              :         .sync_syncfiletag = multixactoffsetssyncfiletag
     114              :     },
     115              :     /* pg_multixact/members */
     116              :     [SYNC_HANDLER_MULTIXACT_MEMBER] = {
     117              :         .sync_syncfiletag = multixactmemberssyncfiletag
     118              :     }
     119              : };
     120              : 
     121              : /*
     122              :  * Initialize data structures for the file sync tracking.
     123              :  */
     124              : void
     125        24267 : InitSync(void)
     126              : {
     127              :     /*
     128              :      * Create pending-operations hashtable if we need it.  Currently, we need
     129              :      * it if we are standalone (not under a postmaster) or if we are a
     130              :      * checkpointer auxiliary process.
     131              :      */
     132        24267 :     if (!IsUnderPostmaster || AmCheckpointerProcess())
     133              :     {
     134              :         HASHCTL     hash_ctl;
     135              : 
     136              :         /*
     137              :          * XXX: The checkpointer needs to add entries to the pending ops table
     138              :          * when absorbing fsync requests.  That is done within a critical
     139              :          * section, which isn't usually allowed, but we make an exception. It
     140              :          * means that there's a theoretical possibility that you run out of
     141              :          * memory while absorbing fsync requests, which leads to a PANIC.
     142              :          * Fortunately the hash table is small so that's unlikely to happen in
     143              :          * practice.
     144              :          */
     145          728 :         pendingOpsCxt = AllocSetContextCreate(TopMemoryContext,
     146              :                                               "Pending ops context",
     147              :                                               ALLOCSET_DEFAULT_SIZES);
     148          728 :         MemoryContextAllowInCriticalSection(pendingOpsCxt, true);
     149              : 
     150          728 :         hash_ctl.keysize = sizeof(FileTag);
     151          728 :         hash_ctl.entrysize = sizeof(PendingFsyncEntry);
     152          728 :         hash_ctl.hcxt = pendingOpsCxt;
     153          728 :         pendingOps = hash_create("Pending Ops Table",
     154              :                                  100L,
     155              :                                  &hash_ctl,
     156              :                                  HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
     157          728 :         pendingUnlinks = NIL;
     158              :     }
     159        24267 : }
     160              : 
     161              : /*
     162              :  * SyncPreCheckpoint() -- Do pre-checkpoint work
     163              :  *
     164              :  * To distinguish unlink requests that arrived before this checkpoint
     165              :  * started from those that arrived during the checkpoint, we use a cycle
     166              :  * counter similar to the one we use for fsync requests. That cycle
     167              :  * counter is incremented here.
     168              :  *
     169              :  * This must be called *before* the checkpoint REDO point is determined.
     170              :  * That ensures that we won't delete files too soon.  Since this calls
     171              :  * AbsorbSyncRequests(), which performs memory allocations, it cannot be
     172              :  * called within a critical section.
     173              :  *
     174              :  * Note that we can't do anything here that depends on the assumption
     175              :  * that the checkpoint will be completed.
     176              :  */
     177              : void
     178         1632 : SyncPreCheckpoint(void)
     179              : {
     180              :     /*
     181              :      * Operations such as DROP TABLESPACE assume that the next checkpoint will
     182              :      * process all recently forwarded unlink requests, but if they aren't
     183              :      * absorbed prior to advancing the cycle counter, they won't be processed
     184              :      * until a future checkpoint.  The following absorb ensures that any
     185              :      * unlink requests forwarded before the checkpoint began will be processed
     186              :      * in the current checkpoint.
     187              :      */
     188         1632 :     AbsorbSyncRequests();
     189              : 
     190              :     /*
     191              :      * Any unlink requests arriving after this point will be assigned the next
     192              :      * cycle counter, and won't be unlinked until next checkpoint.
     193              :      */
     194         1632 :     checkpoint_cycle_ctr++;
     195         1632 : }
     196              : 
     197              : /*
     198              :  * SyncPostCheckpoint() -- Do post-checkpoint work
     199              :  *
     200              :  * Remove any lingering files that can now be safely removed.
     201              :  */
     202              : void
     203         1630 : SyncPostCheckpoint(void)
     204              : {
     205              :     int         absorb_counter;
     206              :     ListCell   *lc;
     207              : 
     208         1630 :     absorb_counter = UNLINKS_PER_ABSORB;
     209        36963 :     foreach(lc, pendingUnlinks)
     210              :     {
     211        35475 :         PendingUnlinkEntry *entry = (PendingUnlinkEntry *) lfirst(lc);
     212              :         char        path[MAXPGPATH];
     213              : 
     214              :         /* Skip over any canceled entries */
     215        35475 :         if (entry->canceled)
     216            1 :             continue;
     217              : 
     218              :         /*
     219              :          * New entries are appended to the end, so if the entry is new we've
     220              :          * reached the end of old entries.
     221              :          *
     222              :          * Note: if just the right number of consecutive checkpoints fail, we
     223              :          * could be fooled here by cycle_ctr wraparound.  However, the only
     224              :          * consequence is that we'd delay unlinking for one more checkpoint,
     225              :          * which is perfectly tolerable.
     226              :          */
     227        35474 :         if (entry->cycle_ctr == checkpoint_cycle_ctr)
     228          142 :             break;
     229              : 
     230              :         /* Unlink the file */
     231        35332 :         if (syncsw[entry->tag.handler].sync_unlinkfiletag(&entry->tag,
     232              :                                                           path) < 0)
     233              :         {
     234              :             /*
     235              :              * There's a race condition, when the database is dropped at the
     236              :              * same time that we process the pending unlink requests. If the
     237              :              * DROP DATABASE deletes the file before we do, we will get ENOENT
     238              :              * here. rmtree() also has to ignore ENOENT errors, to deal with
     239              :              * the possibility that we delete the file first.
     240              :              */
     241            3 :             if (errno != ENOENT)
     242            0 :                 ereport(WARNING,
     243              :                         (errcode_for_file_access(),
     244              :                          errmsg("could not remove file \"%s\": %m", path)));
     245              :         }
     246              : 
     247              :         /* Mark the list entry as canceled, just in case */
     248        35332 :         entry->canceled = true;
     249              : 
     250              :         /*
     251              :          * As in ProcessSyncRequests, we don't want to stop absorbing fsync
     252              :          * requests for a long time when there are many deletions to be done.
     253              :          * We can safely call AbsorbSyncRequests() at this point in the loop.
     254              :          */
     255        35332 :         if (--absorb_counter <= 0)
     256              :         {
     257         3406 :             AbsorbSyncRequests();
     258         3406 :             absorb_counter = UNLINKS_PER_ABSORB;
     259              :         }
     260              :     }
     261              : 
     262              :     /*
     263              :      * If we reached the end of the list, we can just remove the whole list
     264              :      * (remembering to pfree all the PendingUnlinkEntry objects).  Otherwise,
     265              :      * we must keep the entries at or after "lc".
     266              :      */
     267         1630 :     if (lc == NULL)
     268              :     {
     269         1488 :         list_free_deep(pendingUnlinks);
     270         1488 :         pendingUnlinks = NIL;
     271              :     }
     272              :     else
     273              :     {
     274          142 :         int         ntodelete = list_cell_number(pendingUnlinks, lc);
     275              : 
     276        18977 :         for (int i = 0; i < ntodelete; i++)
     277        18835 :             pfree(list_nth(pendingUnlinks, i));
     278              : 
     279          142 :         pendingUnlinks = list_delete_first_n(pendingUnlinks, ntodelete);
     280              :     }
     281         1630 : }
     282              : 
     283              : /*
     284              :  *  ProcessSyncRequests() -- Process queued fsync requests.
     285              :  */
     286              : void
     287         1833 : ProcessSyncRequests(void)
     288              : {
     289              :     static bool sync_in_progress = false;
     290              : 
     291              :     HASH_SEQ_STATUS hstat;
     292              :     PendingFsyncEntry *entry;
     293              :     int         absorb_counter;
     294              : 
     295              :     /* Statistics on sync times */
     296         1833 :     int         processed = 0;
     297              :     instr_time  sync_start,
     298              :                 sync_end,
     299              :                 sync_diff;
     300              :     uint64      elapsed;
     301         1833 :     uint64      longest = 0;
     302         1833 :     uint64      total_elapsed = 0;
     303              : 
     304              :     /*
     305              :      * This is only called during checkpoints, and checkpoints should only
     306              :      * occur in processes that have created a pendingOps.
     307              :      */
     308         1833 :     if (!pendingOps)
     309            0 :         elog(ERROR, "cannot sync without a pendingOps table");
     310              : 
     311              :     /*
     312              :      * If we are in the checkpointer, the sync had better include all fsync
     313              :      * requests that were queued by backends up to this point.  The tightest
     314              :      * race condition that could occur is that a buffer that must be written
     315              :      * and fsync'd for the checkpoint could have been dumped by a backend just
     316              :      * before it was visited by BufferSync().  We know the backend will have
     317              :      * queued an fsync request before clearing the buffer's dirtybit, so we
     318              :      * are safe as long as we do an Absorb after completing BufferSync().
     319              :      */
     320         1833 :     AbsorbSyncRequests();
     321              : 
     322              :     /*
     323              :      * To avoid excess fsync'ing (in the worst case, maybe a never-terminating
     324              :      * checkpoint), we want to ignore fsync requests that are entered into the
     325              :      * hashtable after this point --- they should be processed next time,
     326              :      * instead.  We use sync_cycle_ctr to tell old entries apart from new
     327              :      * ones: new ones will have cycle_ctr equal to the incremented value of
     328              :      * sync_cycle_ctr.
     329              :      *
     330              :      * In normal circumstances, all entries present in the table at this point
     331              :      * will have cycle_ctr exactly equal to the current (about to be old)
     332              :      * value of sync_cycle_ctr.  However, if we fail partway through the
     333              :      * fsync'ing loop, then older values of cycle_ctr might remain when we
     334              :      * come back here to try again.  Repeated checkpoint failures would
     335              :      * eventually wrap the counter around to the point where an old entry
     336              :      * might appear new, causing us to skip it, possibly allowing a checkpoint
     337              :      * to succeed that should not have.  To forestall wraparound, any time the
     338              :      * previous ProcessSyncRequests() failed to complete, run through the
     339              :      * table and forcibly set cycle_ctr = sync_cycle_ctr.
     340              :      *
     341              :      * Think not to merge this loop with the main loop, as the problem is
     342              :      * exactly that that loop may fail before having visited all the entries.
     343              :      * From a performance point of view it doesn't matter anyway, as this path
     344              :      * will never be taken in a system that's functioning normally.
     345              :      */
     346         1833 :     if (sync_in_progress)
     347              :     {
     348              :         /* prior try failed, so update any stale cycle_ctr values */
     349            0 :         hash_seq_init(&hstat, pendingOps);
     350            0 :         while ((entry = (PendingFsyncEntry *) hash_seq_search(&hstat)) != NULL)
     351              :         {
     352            0 :             entry->cycle_ctr = sync_cycle_ctr;
     353              :         }
     354              :     }
     355              : 
     356              :     /* Advance counter so that new hashtable entries are distinguishable */
     357         1833 :     sync_cycle_ctr++;
     358              : 
     359              :     /* Set flag to detect failure if we don't reach the end of the loop */
     360         1833 :     sync_in_progress = true;
     361              : 
     362              :     /* Now scan the hashtable for fsync requests to process */
     363         1833 :     absorb_counter = FSYNCS_PER_ABSORB;
     364         1833 :     hash_seq_init(&hstat, pendingOps);
     365       197736 :     while ((entry = (PendingFsyncEntry *) hash_seq_search(&hstat)) != NULL)
     366              :     {
     367              :         int         failures;
     368              : 
     369              :         /*
     370              :          * If the entry is new then don't process it this time; it is new.
     371              :          * Note "continue" bypasses the hash-remove call at the bottom of the
     372              :          * loop.
     373              :          */
     374       195903 :         if (entry->cycle_ctr == sync_cycle_ctr)
     375            0 :             continue;
     376              : 
     377              :         /* Else assert we haven't missed it */
     378              :         Assert((CycleCtr) (entry->cycle_ctr + 1) == sync_cycle_ctr);
     379              : 
     380              :         /*
     381              :          * If fsync is off then we don't have to bother opening the file at
     382              :          * all.  (We delay checking until this point so that changing fsync on
     383              :          * the fly behaves sensibly.)
     384              :          */
     385       195903 :         if (enableFsync)
     386              :         {
     387              :             /*
     388              :              * If in checkpointer, we want to absorb pending requests every so
     389              :              * often to prevent overflow of the fsync request queue.  It is
     390              :              * unspecified whether newly-added entries will be visited by
     391              :              * hash_seq_search, but we don't care since we don't need to
     392              :              * process them anyway.
     393              :              */
     394            0 :             if (--absorb_counter <= 0)
     395              :             {
     396            0 :                 AbsorbSyncRequests();
     397            0 :                 absorb_counter = FSYNCS_PER_ABSORB;
     398              :             }
     399              : 
     400              :             /*
     401              :              * The fsync table could contain requests to fsync segments that
     402              :              * have been deleted (unlinked) by the time we get to them. Rather
     403              :              * than just hoping an ENOENT (or EACCES on Windows) error can be
     404              :              * ignored, what we do on error is absorb pending requests and
     405              :              * then retry. Since mdunlink() queues a "cancel" message before
     406              :              * actually unlinking, the fsync request is guaranteed to be
     407              :              * marked canceled after the absorb if it really was this case.
     408              :              * DROP DATABASE likewise has to tell us to forget fsync requests
     409              :              * before it starts deletions.
     410              :              */
     411            0 :             for (failures = 0; !entry->canceled; failures++)
     412              :             {
     413              :                 char        path[MAXPGPATH];
     414              : 
     415            0 :                 INSTR_TIME_SET_CURRENT(sync_start);
     416            0 :                 if (syncsw[entry->tag.handler].sync_syncfiletag(&entry->tag,
     417              :                                                                 path) == 0)
     418              :                 {
     419              :                     /* Success; update statistics about sync timing */
     420            0 :                     INSTR_TIME_SET_CURRENT(sync_end);
     421            0 :                     sync_diff = sync_end;
     422            0 :                     INSTR_TIME_SUBTRACT(sync_diff, sync_start);
     423            0 :                     elapsed = INSTR_TIME_GET_MICROSEC(sync_diff);
     424            0 :                     if (elapsed > longest)
     425            0 :                         longest = elapsed;
     426            0 :                     total_elapsed += elapsed;
     427            0 :                     processed++;
     428              : 
     429            0 :                     if (log_checkpoints)
     430            0 :                         elog(DEBUG1, "checkpoint sync: number=%d file=%s time=%.3f ms",
     431              :                              processed,
     432              :                              path,
     433              :                              (double) elapsed / 1000);
     434              : 
     435            0 :                     break;      /* out of retry loop */
     436              :                 }
     437              : 
     438              :                 /*
     439              :                  * It is possible that the relation has been dropped or
     440              :                  * truncated since the fsync request was entered. Therefore,
     441              :                  * allow ENOENT, but only if we didn't fail already on this
     442              :                  * file.
     443              :                  */
     444            0 :                 if (!FILE_POSSIBLY_DELETED(errno) || failures > 0)
     445            0 :                     ereport(data_sync_elevel(ERROR),
     446              :                             (errcode_for_file_access(),
     447              :                              errmsg("could not fsync file \"%s\": %m",
     448              :                                     path)));
     449              :                 else
     450            0 :                     ereport(DEBUG1,
     451              :                             (errcode_for_file_access(),
     452              :                              errmsg_internal("could not fsync file \"%s\" but retrying: %m",
     453              :                                              path)));
     454              : 
     455              :                 /*
     456              :                  * Absorb incoming requests and check to see if a cancel
     457              :                  * arrived for this relation fork.
     458              :                  */
     459            0 :                 AbsorbSyncRequests();
     460            0 :                 absorb_counter = FSYNCS_PER_ABSORB; /* might as well... */
     461              :             }                   /* end retry loop */
     462              :         }
     463              : 
     464              :         /* We are done with this entry, remove it */
     465       195903 :         if (hash_search(pendingOps, &entry->tag, HASH_REMOVE, NULL) == NULL)
     466            0 :             elog(ERROR, "pendingOps corrupted");
     467              :     }                           /* end loop over hashtable entries */
     468              : 
     469              :     /* Return sync performance metrics for report at checkpoint end */
     470         1833 :     CheckpointStats.ckpt_sync_rels = processed;
     471         1833 :     CheckpointStats.ckpt_longest_sync = longest;
     472         1833 :     CheckpointStats.ckpt_agg_sync_time = total_elapsed;
     473              : 
     474              :     /* Flag successful completion of ProcessSyncRequests */
     475         1833 :     sync_in_progress = false;
     476         1833 : }
     477              : 
     478              : /*
     479              :  * RememberSyncRequest() -- callback from checkpointer side of sync request
     480              :  *
     481              :  * We stuff fsync requests into the local hash table for execution
     482              :  * during the checkpointer's next checkpoint.  UNLINK requests go into a
     483              :  * separate linked list, however, because they get processed separately.
     484              :  *
     485              :  * See sync.h for more information on the types of sync requests supported.
     486              :  */
     487              : void
     488      1546696 : RememberSyncRequest(const FileTag *ftag, SyncRequestType type)
     489              : {
     490              :     Assert(pendingOps);
     491              : 
     492      1546696 :     if (type == SYNC_FORGET_REQUEST)
     493              :     {
     494              :         PendingFsyncEntry *entry;
     495              : 
     496              :         /* Cancel previously entered request */
     497       152118 :         entry = (PendingFsyncEntry *) hash_search(pendingOps,
     498              :                                                   ftag,
     499              :                                                   HASH_FIND,
     500              :                                                   NULL);
     501       152118 :         if (entry != NULL)
     502        21280 :             entry->canceled = true;
     503              :     }
     504      1394578 :     else if (type == SYNC_FILTER_REQUEST)
     505              :     {
     506              :         HASH_SEQ_STATUS hstat;
     507              :         PendingFsyncEntry *pfe;
     508              :         ListCell   *cell;
     509              : 
     510              :         /* Cancel matching fsync requests */
     511           43 :         hash_seq_init(&hstat, pendingOps);
     512         8384 :         while ((pfe = (PendingFsyncEntry *) hash_seq_search(&hstat)) != NULL)
     513              :         {
     514        16591 :             if (pfe->tag.handler == ftag->handler &&
     515         8293 :                 syncsw[ftag->handler].sync_filetagmatches(ftag, &pfe->tag))
     516         4847 :                 pfe->canceled = true;
     517              :         }
     518              : 
     519              :         /* Cancel matching unlink requests */
     520          164 :         foreach(cell, pendingUnlinks)
     521              :         {
     522          121 :             PendingUnlinkEntry *pue = (PendingUnlinkEntry *) lfirst(cell);
     523              : 
     524          242 :             if (pue->tag.handler == ftag->handler &&
     525          121 :                 syncsw[ftag->handler].sync_filetagmatches(ftag, &pue->tag))
     526            1 :                 pue->canceled = true;
     527              :         }
     528              :     }
     529      1394535 :     else if (type == SYNC_UNLINK_REQUEST)
     530              :     {
     531              :         /* Unlink request: put it in the linked list */
     532        35333 :         MemoryContext oldcxt = MemoryContextSwitchTo(pendingOpsCxt);
     533              :         PendingUnlinkEntry *entry;
     534              : 
     535        35333 :         entry = palloc_object(PendingUnlinkEntry);
     536        35333 :         entry->tag = *ftag;
     537        35333 :         entry->cycle_ctr = checkpoint_cycle_ctr;
     538        35333 :         entry->canceled = false;
     539              : 
     540        35333 :         pendingUnlinks = lappend(pendingUnlinks, entry);
     541              : 
     542        35333 :         MemoryContextSwitchTo(oldcxt);
     543              :     }
     544              :     else
     545              :     {
     546              :         /* Normal case: enter a request to fsync this segment */
     547      1359202 :         MemoryContext oldcxt = MemoryContextSwitchTo(pendingOpsCxt);
     548              :         PendingFsyncEntry *entry;
     549              :         bool        found;
     550              : 
     551              :         Assert(type == SYNC_REQUEST);
     552              : 
     553      1359202 :         entry = (PendingFsyncEntry *) hash_search(pendingOps,
     554              :                                                   ftag,
     555              :                                                   HASH_ENTER,
     556              :                                                   &found);
     557              :         /* if new entry, or was previously canceled, initialize it */
     558      1359202 :         if (!found || entry->canceled)
     559              :         {
     560       200194 :             entry->cycle_ctr = sync_cycle_ctr;
     561       200194 :             entry->canceled = false;
     562              :         }
     563              : 
     564              :         /*
     565              :          * NB: it's intentional that we don't change cycle_ctr if the entry
     566              :          * already exists.  The cycle_ctr must represent the oldest fsync
     567              :          * request that could be in the entry.
     568              :          */
     569              : 
     570      1359202 :         MemoryContextSwitchTo(oldcxt);
     571              :     }
     572      1546696 : }
     573              : 
     574              : /*
     575              :  * Register the sync request locally, or forward it to the checkpointer.
     576              :  *
     577              :  * If retryOnError is true, we'll keep trying if there is no space in the
     578              :  * queue.  Return true if we succeeded, or false if there wasn't space.
     579              :  */
     580              : bool
     581      1718209 : RegisterSyncRequest(const FileTag *ftag, SyncRequestType type,
     582              :                     bool retryOnError)
     583              : {
     584              :     bool        ret;
     585              : 
     586      1718209 :     if (pendingOps != NULL)
     587              :     {
     588              :         /* standalone backend or startup process: fsync state is local */
     589       364870 :         RememberSyncRequest(ftag, type);
     590       364870 :         return true;
     591              :     }
     592              : 
     593              :     for (;;)
     594              :     {
     595              :         /*
     596              :          * Notify the checkpointer about it.  If we fail to queue a message in
     597              :          * retryOnError mode, we have to sleep and try again ... ugly, but
     598              :          * hopefully won't happen often.
     599              :          *
     600              :          * XXX should we CHECK_FOR_INTERRUPTS in this loop?  Escaping with an
     601              :          * error in the case of SYNC_UNLINK_REQUEST would leave the
     602              :          * no-longer-used file still present on disk, which would be bad, so
     603              :          * I'm inclined to assume that the checkpointer will always empty the
     604              :          * queue soon.
     605              :          */
     606      1353353 :         ret = ForwardSyncRequest(ftag, type);
     607              : 
     608              :         /*
     609              :          * If we are successful in queueing the request, or we failed and were
     610              :          * instructed not to retry on error, break.
     611              :          */
     612      1353353 :         if (ret || (!ret && !retryOnError))
     613              :             break;
     614              : 
     615           14 :         WaitLatch(NULL, WL_EXIT_ON_PM_DEATH | WL_TIMEOUT, 10,
     616              :                   WAIT_EVENT_REGISTER_SYNC_REQUEST);
     617              :     }
     618              : 
     619      1353339 :     return ret;
     620              : }
        

Generated by: LCOV version 2.0-1