LCOV - code coverage report
Current view: top level - src/backend/storage/sync - sync.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 95 124 76.6 %
Date: 2023-10-01 22:11:05 Functions: 6 6 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * sync.c
       4             :  *    File synchronization management code.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/storage/sync/sync.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include <unistd.h>
      18             : #include <fcntl.h>
      19             : #include <sys/file.h>
      20             : 
      21             : #include "access/commit_ts.h"
      22             : #include "access/clog.h"
      23             : #include "access/multixact.h"
      24             : #include "access/xlog.h"
      25             : #include "access/xlogutils.h"
      26             : #include "commands/tablespace.h"
      27             : #include "miscadmin.h"
      28             : #include "pgstat.h"
      29             : #include "portability/instr_time.h"
      30             : #include "postmaster/bgwriter.h"
      31             : #include "storage/bufmgr.h"
      32             : #include "storage/fd.h"
      33             : #include "storage/ipc.h"
      34             : #include "storage/latch.h"
      35             : #include "storage/md.h"
      36             : #include "utils/hsearch.h"
      37             : #include "utils/inval.h"
      38             : #include "utils/memutils.h"
      39             : 
      40             : /*
      41             :  * In some contexts (currently, standalone backends and the checkpointer)
      42             :  * we keep track of pending fsync operations: we need to remember all relation
      43             :  * segments that have been written since the last checkpoint, so that we can
      44             :  * fsync them down to disk before completing the next checkpoint.  This hash
      45             :  * table remembers the pending operations.  We use a hash table mostly as
      46             :  * a convenient way of merging duplicate requests.
      47             :  *
      48             :  * We use a similar mechanism to remember no-longer-needed files that can
      49             :  * be deleted after the next checkpoint, but we use a linked list instead of
      50             :  * a hash table, because we don't expect there to be any duplicate requests.
      51             :  *
      52             :  * These mechanisms are only used for non-temp relations; we never fsync
      53             :  * temp rels, nor do we need to postpone their deletion (see comments in
      54             :  * mdunlink).
      55             :  *
      56             :  * (Regular backends do not track pending operations locally, but forward
      57             :  * them to the checkpointer.)
      58             :  */
      59             : typedef uint16 CycleCtr;        /* can be any convenient integer size */
      60             : 
      61             : typedef struct
      62             : {
      63             :     FileTag     tag;            /* identifies handler and file */
      64             :     CycleCtr    cycle_ctr;      /* sync_cycle_ctr of oldest request */
      65             :     bool        canceled;       /* canceled is true if we canceled "recently" */
      66             : } PendingFsyncEntry;
      67             : 
      68             : typedef struct
      69             : {
      70             :     FileTag     tag;            /* identifies handler and file */
      71             :     CycleCtr    cycle_ctr;      /* checkpoint_cycle_ctr when request was made */
      72             :     bool        canceled;       /* true if request has been canceled */
      73             : } PendingUnlinkEntry;
      74             : 
      75             : static HTAB *pendingOps = NULL;
      76             : static List *pendingUnlinks = NIL;
      77             : static MemoryContext pendingOpsCxt; /* context for the above  */
      78             : 
      79             : static CycleCtr sync_cycle_ctr = 0;
      80             : static CycleCtr checkpoint_cycle_ctr = 0;
      81             : 
      82             : /* Intervals for calling AbsorbSyncRequests */
      83             : #define FSYNCS_PER_ABSORB       10
      84             : #define UNLINKS_PER_ABSORB      10
      85             : 
      86             : /*
      87             :  * Function pointers for handling sync and unlink requests.
      88             :  */
      89             : typedef struct SyncOps
      90             : {
      91             :     int         (*sync_syncfiletag) (const FileTag *ftag, char *path);
      92             :     int         (*sync_unlinkfiletag) (const FileTag *ftag, char *path);
      93             :     bool        (*sync_filetagmatches) (const FileTag *ftag,
      94             :                                         const FileTag *candidate);
      95             : } SyncOps;
      96             : 
      97             : /*
      98             :  * These indexes must correspond to the values of the SyncRequestHandler enum.
      99             :  */
     100             : static const SyncOps syncsw[] = {
     101             :     /* magnetic disk */
     102             :     [SYNC_HANDLER_MD] = {
     103             :         .sync_syncfiletag = mdsyncfiletag,
     104             :         .sync_unlinkfiletag = mdunlinkfiletag,
     105             :         .sync_filetagmatches = mdfiletagmatches
     106             :     },
     107             :     /* pg_xact */
     108             :     [SYNC_HANDLER_CLOG] = {
     109             :         .sync_syncfiletag = clogsyncfiletag
     110             :     },
     111             :     /* pg_commit_ts */
     112             :     [SYNC_HANDLER_COMMIT_TS] = {
     113             :         .sync_syncfiletag = committssyncfiletag
     114             :     },
     115             :     /* pg_multixact/offsets */
     116             :     [SYNC_HANDLER_MULTIXACT_OFFSET] = {
     117             :         .sync_syncfiletag = multixactoffsetssyncfiletag
     118             :     },
     119             :     /* pg_multixact/members */
     120             :     [SYNC_HANDLER_MULTIXACT_MEMBER] = {
     121             :         .sync_syncfiletag = multixactmemberssyncfiletag
     122             :     }
     123             : };
     124             : 
     125             : /*
     126             :  * Initialize data structures for the file sync tracking.
     127             :  */
     128             : void
     129       26932 : InitSync(void)
     130             : {
     131             :     /*
     132             :      * Create pending-operations hashtable if we need it.  Currently, we need
     133             :      * it if we are standalone (not under a postmaster) or if we are a
     134             :      * checkpointer auxiliary process.
     135             :      */
     136       26932 :     if (!IsUnderPostmaster || AmCheckpointerProcess())
     137             :     {
     138             :         HASHCTL     hash_ctl;
     139             : 
     140             :         /*
     141             :          * XXX: The checkpointer needs to add entries to the pending ops table
     142             :          * when absorbing fsync requests.  That is done within a critical
     143             :          * section, which isn't usually allowed, but we make an exception. It
     144             :          * means that there's a theoretical possibility that you run out of
     145             :          * memory while absorbing fsync requests, which leads to a PANIC.
     146             :          * Fortunately the hash table is small so that's unlikely to happen in
     147             :          * practice.
     148             :          */
     149         842 :         pendingOpsCxt = AllocSetContextCreate(TopMemoryContext,
     150             :                                               "Pending ops context",
     151             :                                               ALLOCSET_DEFAULT_SIZES);
     152         842 :         MemoryContextAllowInCriticalSection(pendingOpsCxt, true);
     153             : 
     154         842 :         hash_ctl.keysize = sizeof(FileTag);
     155         842 :         hash_ctl.entrysize = sizeof(PendingFsyncEntry);
     156         842 :         hash_ctl.hcxt = pendingOpsCxt;
     157         842 :         pendingOps = hash_create("Pending Ops Table",
     158             :                                  100L,
     159             :                                  &hash_ctl,
     160             :                                  HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
     161         842 :         pendingUnlinks = NIL;
     162             :     }
     163       26932 : }
     164             : 
     165             : /*
     166             :  * SyncPreCheckpoint() -- Do pre-checkpoint work
     167             :  *
     168             :  * To distinguish unlink requests that arrived before this checkpoint
     169             :  * started from those that arrived during the checkpoint, we use a cycle
     170             :  * counter similar to the one we use for fsync requests. That cycle
     171             :  * counter is incremented here.
     172             :  *
     173             :  * This must be called *before* the checkpoint REDO point is determined.
     174             :  * That ensures that we won't delete files too soon.  Since this calls
     175             :  * AbsorbSyncRequests(), which performs memory allocations, it cannot be
     176             :  * called within a critical section.
     177             :  *
     178             :  * Note that we can't do anything here that depends on the assumption
     179             :  * that the checkpoint will be completed.
     180             :  */
     181             : void
     182        1388 : SyncPreCheckpoint(void)
     183             : {
     184             :     /*
     185             :      * Operations such as DROP TABLESPACE assume that the next checkpoint will
     186             :      * process all recently forwarded unlink requests, but if they aren't
     187             :      * absorbed prior to advancing the cycle counter, they won't be processed
     188             :      * until a future checkpoint.  The following absorb ensures that any
     189             :      * unlink requests forwarded before the checkpoint began will be processed
     190             :      * in the current checkpoint.
     191             :      */
     192        1388 :     AbsorbSyncRequests();
     193             : 
     194             :     /*
     195             :      * Any unlink requests arriving after this point will be assigned the next
     196             :      * cycle counter, and won't be unlinked until next checkpoint.
     197             :      */
     198        1388 :     checkpoint_cycle_ctr++;
     199        1388 : }
     200             : 
     201             : /*
     202             :  * SyncPostCheckpoint() -- Do post-checkpoint work
     203             :  *
     204             :  * Remove any lingering files that can now be safely removed.
     205             :  */
     206             : void
     207        1378 : SyncPostCheckpoint(void)
     208             : {
     209             :     int         absorb_counter;
     210             :     ListCell   *lc;
     211             : 
     212        1378 :     absorb_counter = UNLINKS_PER_ABSORB;
     213       56356 :     foreach(lc, pendingUnlinks)
     214             :     {
     215       55020 :         PendingUnlinkEntry *entry = (PendingUnlinkEntry *) lfirst(lc);
     216             :         char        path[MAXPGPATH];
     217             : 
     218             :         /* Skip over any canceled entries */
     219       55020 :         if (entry->canceled)
     220           2 :             continue;
     221             : 
     222             :         /*
     223             :          * New entries are appended to the end, so if the entry is new we've
     224             :          * reached the end of old entries.
     225             :          *
     226             :          * Note: if just the right number of consecutive checkpoints fail, we
     227             :          * could be fooled here by cycle_ctr wraparound.  However, the only
     228             :          * consequence is that we'd delay unlinking for one more checkpoint,
     229             :          * which is perfectly tolerable.
     230             :          */
     231       55018 :         if (entry->cycle_ctr == checkpoint_cycle_ctr)
     232          42 :             break;
     233             : 
     234             :         /* Unlink the file */
     235       54976 :         if (syncsw[entry->tag.handler].sync_unlinkfiletag(&entry->tag,
     236             :                                                           path) < 0)
     237             :         {
     238             :             /*
     239             :              * There's a race condition, when the database is dropped at the
     240             :              * same time that we process the pending unlink requests. If the
     241             :              * DROP DATABASE deletes the file before we do, we will get ENOENT
     242             :              * here. rmtree() also has to ignore ENOENT errors, to deal with
     243             :              * the possibility that we delete the file first.
     244             :              */
     245           6 :             if (errno != ENOENT)
     246           0 :                 ereport(WARNING,
     247             :                         (errcode_for_file_access(),
     248             :                          errmsg("could not remove file \"%s\": %m", path)));
     249             :         }
     250             : 
     251             :         /* Mark the list entry as canceled, just in case */
     252       54976 :         entry->canceled = true;
     253             : 
     254             :         /*
     255             :          * As in ProcessSyncRequests, we don't want to stop absorbing fsync
     256             :          * requests for a long time when there are many deletions to be done.
     257             :          * We can safely call AbsorbSyncRequests() at this point in the loop.
     258             :          */
     259       54976 :         if (--absorb_counter <= 0)
     260             :         {
     261        5394 :             AbsorbSyncRequests();
     262        5394 :             absorb_counter = UNLINKS_PER_ABSORB;
     263             :         }
     264             :     }
     265             : 
     266             :     /*
     267             :      * If we reached the end of the list, we can just remove the whole list
     268             :      * (remembering to pfree all the PendingUnlinkEntry objects).  Otherwise,
     269             :      * we must keep the entries at or after "lc".
     270             :      */
     271        1378 :     if (lc == NULL)
     272             :     {
     273        1336 :         list_free_deep(pendingUnlinks);
     274        1336 :         pendingUnlinks = NIL;
     275             :     }
     276             :     else
     277             :     {
     278          42 :         int         ntodelete = list_cell_number(pendingUnlinks, lc);
     279             : 
     280       31630 :         for (int i = 0; i < ntodelete; i++)
     281       31588 :             pfree(list_nth(pendingUnlinks, i));
     282             : 
     283          42 :         pendingUnlinks = list_delete_first_n(pendingUnlinks, ntodelete);
     284             :     }
     285        1378 : }
     286             : 
     287             : /*
     288             :  *  ProcessSyncRequests() -- Process queued fsync requests.
     289             :  */
     290             : void
     291        1436 : ProcessSyncRequests(void)
     292             : {
     293             :     static bool sync_in_progress = false;
     294             : 
     295             :     HASH_SEQ_STATUS hstat;
     296             :     PendingFsyncEntry *entry;
     297             :     int         absorb_counter;
     298             : 
     299             :     /* Statistics on sync times */
     300        1436 :     int         processed = 0;
     301             :     instr_time  sync_start,
     302             :                 sync_end,
     303             :                 sync_diff;
     304             :     uint64      elapsed;
     305        1436 :     uint64      longest = 0;
     306        1436 :     uint64      total_elapsed = 0;
     307             : 
     308             :     /*
     309             :      * This is only called during checkpoints, and checkpoints should only
     310             :      * occur in processes that have created a pendingOps.
     311             :      */
     312        1436 :     if (!pendingOps)
     313           0 :         elog(ERROR, "cannot sync without a pendingOps table");
     314             : 
     315             :     /*
     316             :      * If we are in the checkpointer, the sync had better include all fsync
     317             :      * requests that were queued by backends up to this point.  The tightest
     318             :      * race condition that could occur is that a buffer that must be written
     319             :      * and fsync'd for the checkpoint could have been dumped by a backend just
     320             :      * before it was visited by BufferSync().  We know the backend will have
     321             :      * queued an fsync request before clearing the buffer's dirtybit, so we
     322             :      * are safe as long as we do an Absorb after completing BufferSync().
     323             :      */
     324        1436 :     AbsorbSyncRequests();
     325             : 
     326             :     /*
     327             :      * To avoid excess fsync'ing (in the worst case, maybe a never-terminating
     328             :      * checkpoint), we want to ignore fsync requests that are entered into the
     329             :      * hashtable after this point --- they should be processed next time,
     330             :      * instead.  We use sync_cycle_ctr to tell old entries apart from new
     331             :      * ones: new ones will have cycle_ctr equal to the incremented value of
     332             :      * sync_cycle_ctr.
     333             :      *
     334             :      * In normal circumstances, all entries present in the table at this point
     335             :      * will have cycle_ctr exactly equal to the current (about to be old)
     336             :      * value of sync_cycle_ctr.  However, if we fail partway through the
     337             :      * fsync'ing loop, then older values of cycle_ctr might remain when we
     338             :      * come back here to try again.  Repeated checkpoint failures would
     339             :      * eventually wrap the counter around to the point where an old entry
     340             :      * might appear new, causing us to skip it, possibly allowing a checkpoint
     341             :      * to succeed that should not have.  To forestall wraparound, any time the
     342             :      * previous ProcessSyncRequests() failed to complete, run through the
     343             :      * table and forcibly set cycle_ctr = sync_cycle_ctr.
     344             :      *
     345             :      * Think not to merge this loop with the main loop, as the problem is
     346             :      * exactly that that loop may fail before having visited all the entries.
     347             :      * From a performance point of view it doesn't matter anyway, as this path
     348             :      * will never be taken in a system that's functioning normally.
     349             :      */
     350        1436 :     if (sync_in_progress)
     351             :     {
     352             :         /* prior try failed, so update any stale cycle_ctr values */
     353           0 :         hash_seq_init(&hstat, pendingOps);
     354           0 :         while ((entry = (PendingFsyncEntry *) hash_seq_search(&hstat)) != NULL)
     355             :         {
     356           0 :             entry->cycle_ctr = sync_cycle_ctr;
     357             :         }
     358             :     }
     359             : 
     360             :     /* Advance counter so that new hashtable entries are distinguishable */
     361        1436 :     sync_cycle_ctr++;
     362             : 
     363             :     /* Set flag to detect failure if we don't reach the end of the loop */
     364        1436 :     sync_in_progress = true;
     365             : 
     366             :     /* Now scan the hashtable for fsync requests to process */
     367        1436 :     absorb_counter = FSYNCS_PER_ABSORB;
     368        1436 :     hash_seq_init(&hstat, pendingOps);
     369      228876 :     while ((entry = (PendingFsyncEntry *) hash_seq_search(&hstat)) != NULL)
     370             :     {
     371             :         int         failures;
     372             : 
     373             :         /*
     374             :          * If the entry is new then don't process it this time; it is new.
     375             :          * Note "continue" bypasses the hash-remove call at the bottom of the
     376             :          * loop.
     377             :          */
     378      227440 :         if (entry->cycle_ctr == sync_cycle_ctr)
     379           0 :             continue;
     380             : 
     381             :         /* Else assert we haven't missed it */
     382             :         Assert((CycleCtr) (entry->cycle_ctr + 1) == sync_cycle_ctr);
     383             : 
     384             :         /*
     385             :          * If fsync is off then we don't have to bother opening the file at
     386             :          * all.  (We delay checking until this point so that changing fsync on
     387             :          * the fly behaves sensibly.)
     388             :          */
     389      227440 :         if (enableFsync)
     390             :         {
     391             :             /*
     392             :              * If in checkpointer, we want to absorb pending requests every so
     393             :              * often to prevent overflow of the fsync request queue.  It is
     394             :              * unspecified whether newly-added entries will be visited by
     395             :              * hash_seq_search, but we don't care since we don't need to
     396             :              * process them anyway.
     397             :              */
     398           0 :             if (--absorb_counter <= 0)
     399             :             {
     400           0 :                 AbsorbSyncRequests();
     401           0 :                 absorb_counter = FSYNCS_PER_ABSORB;
     402             :             }
     403             : 
     404             :             /*
     405             :              * The fsync table could contain requests to fsync segments that
     406             :              * have been deleted (unlinked) by the time we get to them. Rather
     407             :              * than just hoping an ENOENT (or EACCES on Windows) error can be
     408             :              * ignored, what we do on error is absorb pending requests and
     409             :              * then retry. Since mdunlink() queues a "cancel" message before
     410             :              * actually unlinking, the fsync request is guaranteed to be
     411             :              * marked canceled after the absorb if it really was this case.
     412             :              * DROP DATABASE likewise has to tell us to forget fsync requests
     413             :              * before it starts deletions.
     414             :              */
     415           0 :             for (failures = 0; !entry->canceled; failures++)
     416             :             {
     417             :                 char        path[MAXPGPATH];
     418             : 
     419           0 :                 INSTR_TIME_SET_CURRENT(sync_start);
     420           0 :                 if (syncsw[entry->tag.handler].sync_syncfiletag(&entry->tag,
     421             :                                                                 path) == 0)
     422             :                 {
     423             :                     /* Success; update statistics about sync timing */
     424           0 :                     INSTR_TIME_SET_CURRENT(sync_end);
     425           0 :                     sync_diff = sync_end;
     426           0 :                     INSTR_TIME_SUBTRACT(sync_diff, sync_start);
     427           0 :                     elapsed = INSTR_TIME_GET_MICROSEC(sync_diff);
     428           0 :                     if (elapsed > longest)
     429           0 :                         longest = elapsed;
     430           0 :                     total_elapsed += elapsed;
     431           0 :                     processed++;
     432             : 
     433           0 :                     if (log_checkpoints)
     434           0 :                         elog(DEBUG1, "checkpoint sync: number=%d file=%s time=%.3f ms",
     435             :                              processed,
     436             :                              path,
     437             :                              (double) elapsed / 1000);
     438             : 
     439           0 :                     break;      /* out of retry loop */
     440             :                 }
     441             : 
     442             :                 /*
     443             :                  * It is possible that the relation has been dropped or
     444             :                  * truncated since the fsync request was entered. Therefore,
     445             :                  * allow ENOENT, but only if we didn't fail already on this
     446             :                  * file.
     447             :                  */
     448           0 :                 if (!FILE_POSSIBLY_DELETED(errno) || failures > 0)
     449           0 :                     ereport(data_sync_elevel(ERROR),
     450             :                             (errcode_for_file_access(),
     451             :                              errmsg("could not fsync file \"%s\": %m",
     452             :                                     path)));
     453             :                 else
     454           0 :                     ereport(DEBUG1,
     455             :                             (errcode_for_file_access(),
     456             :                              errmsg_internal("could not fsync file \"%s\" but retrying: %m",
     457             :                                              path)));
     458             : 
     459             :                 /*
     460             :                  * Absorb incoming requests and check to see if a cancel
     461             :                  * arrived for this relation fork.
     462             :                  */
     463           0 :                 AbsorbSyncRequests();
     464           0 :                 absorb_counter = FSYNCS_PER_ABSORB; /* might as well... */
     465             :             }                   /* end retry loop */
     466             :         }
     467             : 
     468             :         /* We are done with this entry, remove it */
     469      227440 :         if (hash_search(pendingOps, &entry->tag, HASH_REMOVE, NULL) == NULL)
     470           0 :             elog(ERROR, "pendingOps corrupted");
     471             :     }                           /* end loop over hashtable entries */
     472             : 
     473             :     /* Return sync performance metrics for report at checkpoint end */
     474        1436 :     CheckpointStats.ckpt_sync_rels = processed;
     475        1436 :     CheckpointStats.ckpt_longest_sync = longest;
     476        1436 :     CheckpointStats.ckpt_agg_sync_time = total_elapsed;
     477             : 
     478             :     /* Flag successful completion of ProcessSyncRequests */
     479        1436 :     sync_in_progress = false;
     480        1436 : }
     481             : 
     482             : /*
     483             :  * RememberSyncRequest() -- callback from checkpointer side of sync request
     484             :  *
     485             :  * We stuff fsync requests into the local hash table for execution
     486             :  * during the checkpointer's next checkpoint.  UNLINK requests go into a
     487             :  * separate linked list, however, because they get processed separately.
     488             :  *
     489             :  * See sync.h for more information on the types of sync requests supported.
     490             :  */
     491             : void
     492     1530672 : RememberSyncRequest(const FileTag *ftag, SyncRequestType type)
     493             : {
     494             :     Assert(pendingOps);
     495             : 
     496     1530672 :     if (type == SYNC_FORGET_REQUEST)
     497             :     {
     498             :         PendingFsyncEntry *entry;
     499             : 
     500             :         /* Cancel previously entered request */
     501      212740 :         entry = (PendingFsyncEntry *) hash_search(pendingOps,
     502             :                                                   ftag,
     503             :                                                   HASH_FIND,
     504             :                                                   NULL);
     505      212740 :         if (entry != NULL)
     506       14336 :             entry->canceled = true;
     507             :     }
     508     1317932 :     else if (type == SYNC_FILTER_REQUEST)
     509             :     {
     510             :         HASH_SEQ_STATUS hstat;
     511             :         PendingFsyncEntry *pfe;
     512             :         ListCell   *cell;
     513             : 
     514             :         /* Cancel matching fsync requests */
     515          30 :         hash_seq_init(&hstat, pendingOps);
     516       11690 :         while ((pfe = (PendingFsyncEntry *) hash_seq_search(&hstat)) != NULL)
     517             :         {
     518       23312 :             if (pfe->tag.handler == ftag->handler &&
     519       11652 :                 syncsw[ftag->handler].sync_filetagmatches(ftag, &pfe->tag))
     520        6664 :                 pfe->canceled = true;
     521             :         }
     522             : 
     523             :         /* Cancel matching unlink requests */
     524          52 :         foreach(cell, pendingUnlinks)
     525             :         {
     526          22 :             PendingUnlinkEntry *pue = (PendingUnlinkEntry *) lfirst(cell);
     527             : 
     528          44 :             if (pue->tag.handler == ftag->handler &&
     529          22 :                 syncsw[ftag->handler].sync_filetagmatches(ftag, &pue->tag))
     530           2 :                 pue->canceled = true;
     531             :         }
     532             :     }
     533     1317902 :     else if (type == SYNC_UNLINK_REQUEST)
     534             :     {
     535             :         /* Unlink request: put it in the linked list */
     536       54978 :         MemoryContext oldcxt = MemoryContextSwitchTo(pendingOpsCxt);
     537             :         PendingUnlinkEntry *entry;
     538             : 
     539       54978 :         entry = palloc(sizeof(PendingUnlinkEntry));
     540       54978 :         entry->tag = *ftag;
     541       54978 :         entry->cycle_ctr = checkpoint_cycle_ctr;
     542       54978 :         entry->canceled = false;
     543             : 
     544       54978 :         pendingUnlinks = lappend(pendingUnlinks, entry);
     545             : 
     546       54978 :         MemoryContextSwitchTo(oldcxt);
     547             :     }
     548             :     else
     549             :     {
     550             :         /* Normal case: enter a request to fsync this segment */
     551     1262924 :         MemoryContext oldcxt = MemoryContextSwitchTo(pendingOpsCxt);
     552             :         PendingFsyncEntry *entry;
     553             :         bool        found;
     554             : 
     555             :         Assert(type == SYNC_REQUEST);
     556             : 
     557     1262924 :         entry = (PendingFsyncEntry *) hash_search(pendingOps,
     558             :                                                   ftag,
     559             :                                                   HASH_ENTER,
     560             :                                                   &found);
     561             :         /* if new entry, or was previously canceled, initialize it */
     562     1262924 :         if (!found || entry->canceled)
     563             :         {
     564      230136 :             entry->cycle_ctr = sync_cycle_ctr;
     565      230136 :             entry->canceled = false;
     566             :         }
     567             : 
     568             :         /*
     569             :          * NB: it's intentional that we don't change cycle_ctr if the entry
     570             :          * already exists.  The cycle_ctr must represent the oldest fsync
     571             :          * request that could be in the entry.
     572             :          */
     573             : 
     574     1262924 :         MemoryContextSwitchTo(oldcxt);
     575             :     }
     576     1530672 : }
     577             : 
     578             : /*
     579             :  * Register the sync request locally, or forward it to the checkpointer.
     580             :  *
     581             :  * If retryOnError is true, we'll keep trying if there is no space in the
     582             :  * queue.  Return true if we succeeded, or false if there wasn't space.
     583             :  */
     584             : bool
     585     1679212 : RegisterSyncRequest(const FileTag *ftag, SyncRequestType type,
     586             :                     bool retryOnError)
     587             : {
     588             :     bool        ret;
     589             : 
     590     1679212 :     if (pendingOps != NULL)
     591             :     {
     592             :         /* standalone backend or startup process: fsync state is local */
     593      450446 :         RememberSyncRequest(ftag, type);
     594      450446 :         return true;
     595             :     }
     596             : 
     597             :     for (;;)
     598             :     {
     599             :         /*
     600             :          * Notify the checkpointer about it.  If we fail to queue a message in
     601             :          * retryOnError mode, we have to sleep and try again ... ugly, but
     602             :          * hopefully won't happen often.
     603             :          *
     604             :          * XXX should we CHECK_FOR_INTERRUPTS in this loop?  Escaping with an
     605             :          * error in the case of SYNC_UNLINK_REQUEST would leave the
     606             :          * no-longer-used file still present on disk, which would be bad, so
     607             :          * I'm inclined to assume that the checkpointer will always empty the
     608             :          * queue soon.
     609             :          */
     610     1228770 :         ret = ForwardSyncRequest(ftag, type);
     611             : 
     612             :         /*
     613             :          * If we are successful in queueing the request, or we failed and were
     614             :          * instructed not to retry on error, break.
     615             :          */
     616     1228770 :         if (ret || (!ret && !retryOnError))
     617             :             break;
     618             : 
     619           4 :         WaitLatch(NULL, WL_EXIT_ON_PM_DEATH | WL_TIMEOUT, 10,
     620             :                   WAIT_EVENT_REGISTER_SYNC_REQUEST);
     621             :     }
     622             : 
     623     1228766 :     return ret;
     624             : }

Generated by: LCOV version 1.14