LCOV - code coverage report
Current view: top level - src/backend/access/common - syncscan.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 96.3 % 54 52
Test Date: 2026-03-01 18:15:11 Functions: 100.0 % 5 5
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * syncscan.c
       4              :  *    scan synchronization support
       5              :  *
       6              :  * When multiple backends run a sequential scan on the same table, we try
       7              :  * to keep them synchronized to reduce the overall I/O needed.  The goal is
       8              :  * to read each page into shared buffer cache only once, and let all backends
       9              :  * that take part in the shared scan process the page before it falls out of
      10              :  * the cache.
      11              :  *
      12              :  * Since the "leader" in a pack of backends doing a seqscan will have to wait
      13              :  * for I/O, while the "followers" don't, there is a strong self-synchronizing
      14              :  * effect once we can get the backends examining approximately the same part
      15              :  * of the table at the same time.  Hence all that is really needed is to get
      16              :  * a new backend beginning a seqscan to begin it close to where other backends
      17              :  * are reading.  We can scan the table circularly, from block X up to the
      18              :  * end and then from block 0 to X-1, to ensure we visit all rows while still
      19              :  * participating in the common scan.
      20              :  *
      21              :  * To accomplish that, we keep track of the scan position of each table, and
      22              :  * start new scans close to where the previous scan(s) are.  We don't try to
      23              :  * do any extra synchronization to keep the scans together afterwards; some
      24              :  * scans might progress much more slowly than others, for example if the
      25              :  * results need to be transferred to the client over a slow network, and we
      26              :  * don't want such queries to slow down others.
      27              :  *
      28              :  * There can realistically only be a few large sequential scans on different
      29              :  * tables in progress at any time.  Therefore we just keep the scan positions
      30              :  * in a small LRU list which we scan every time we need to look up or update a
      31              :  * scan position.  The whole mechanism is only applied for tables exceeding
      32              :  * a threshold size (but that is not the concern of this module).
      33              :  *
      34              :  * INTERFACE ROUTINES
      35              :  *      ss_get_location     - return current scan location of a relation
      36              :  *      ss_report_location  - update current scan location
      37              :  *
      38              :  *
      39              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      40              :  * Portions Copyright (c) 1994, Regents of the University of California
      41              :  *
      42              :  * IDENTIFICATION
      43              :  *    src/backend/access/common/syncscan.c
      44              :  *
      45              :  *-------------------------------------------------------------------------
      46              :  */
      47              : #include "postgres.h"
      48              : 
      49              : #include "access/syncscan.h"
      50              : #include "miscadmin.h"
      51              : #include "storage/lwlock.h"
      52              : #include "storage/shmem.h"
      53              : #include "utils/rel.h"
      54              : 
      55              : 
      56              : /* GUC variables */
      57              : #ifdef TRACE_SYNCSCAN
      58              : bool        trace_syncscan = false;
      59              : #endif
      60              : 
      61              : 
      62              : /*
      63              :  * Size of the LRU list.
      64              :  *
      65              :  * Note: the code assumes that SYNC_SCAN_NELEM > 1.
      66              :  *
      67              :  * XXX: What's a good value? It should be large enough to hold the
      68              :  * maximum number of large tables scanned simultaneously.  But a larger value
      69              :  * means more traversing of the LRU list when starting a new scan.
      70              :  */
      71              : #define SYNC_SCAN_NELEM 20
      72              : 
      73              : /*
      74              :  * Interval between reports of the location of the current scan, in pages.
      75              :  *
      76              :  * Note: This should be smaller than the ring size (see buffer/freelist.c)
      77              :  * we use for bulk reads.  Otherwise a scan joining other scans might start
      78              :  * from a page that's no longer in the buffer cache.  This is a bit fuzzy;
      79              :  * there's no guarantee that the new scan will read the page before it leaves
      80              :  * the buffer cache anyway, and on the other hand the page is most likely
      81              :  * still in the OS cache.
      82              :  */
      83              : #define SYNC_SCAN_REPORT_INTERVAL (128 * 1024 / BLCKSZ)
      84              : 
      85              : 
      86              : /*
      87              :  * The scan locations structure is essentially a doubly-linked LRU with head
      88              :  * and tail pointer, but designed to hold a fixed maximum number of elements in
      89              :  * fixed-size shared memory.
      90              :  */
      91              : typedef struct ss_scan_location_t
      92              : {
      93              :     RelFileLocator relfilelocator;  /* identity of a relation */
      94              :     BlockNumber location;       /* last-reported location in the relation */
      95              : } ss_scan_location_t;
      96              : 
      97              : typedef struct ss_lru_item_t
      98              : {
      99              :     struct ss_lru_item_t *prev;
     100              :     struct ss_lru_item_t *next;
     101              :     ss_scan_location_t location;
     102              : } ss_lru_item_t;
     103              : 
     104              : typedef struct ss_scan_locations_t
     105              : {
     106              :     ss_lru_item_t *head;
     107              :     ss_lru_item_t *tail;
     108              :     ss_lru_item_t items[FLEXIBLE_ARRAY_MEMBER]; /* SYNC_SCAN_NELEM items */
     109              : } ss_scan_locations_t;
     110              : 
     111              : #define SizeOfScanLocations(N) \
     112              :     (offsetof(ss_scan_locations_t, items) + (N) * sizeof(ss_lru_item_t))
     113              : 
     114              : /* Pointer to struct in shared memory */
     115              : static ss_scan_locations_t *scan_locations;
     116              : 
     117              : /* prototypes for internal functions */
     118              : static BlockNumber ss_search(RelFileLocator relfilelocator,
     119              :                              BlockNumber location, bool set);
     120              : 
     121              : 
     122              : /*
     123              :  * SyncScanShmemSize --- report amount of shared memory space needed
     124              :  */
     125              : Size
     126         2147 : SyncScanShmemSize(void)
     127              : {
     128         2147 :     return SizeOfScanLocations(SYNC_SCAN_NELEM);
     129              : }
     130              : 
     131              : /*
     132              :  * SyncScanShmemInit --- initialize this module's shared memory
     133              :  */
     134              : void
     135         1150 : SyncScanShmemInit(void)
     136              : {
     137              :     int         i;
     138              :     bool        found;
     139              : 
     140         1150 :     scan_locations = (ss_scan_locations_t *)
     141         1150 :         ShmemInitStruct("Sync Scan Locations List",
     142              :                         SizeOfScanLocations(SYNC_SCAN_NELEM),
     143              :                         &found);
     144              : 
     145         1150 :     if (!IsUnderPostmaster)
     146              :     {
     147              :         /* Initialize shared memory area */
     148              :         Assert(!found);
     149              : 
     150         1150 :         scan_locations->head = &scan_locations->items[0];
     151         1150 :         scan_locations->tail = &scan_locations->items[SYNC_SCAN_NELEM - 1];
     152              : 
     153        24150 :         for (i = 0; i < SYNC_SCAN_NELEM; i++)
     154              :         {
     155        23000 :             ss_lru_item_t *item = &scan_locations->items[i];
     156              : 
     157              :             /*
     158              :              * Initialize all slots with invalid values. As scans are started,
     159              :              * these invalid entries will fall off the LRU list and get
     160              :              * replaced with real entries.
     161              :              */
     162        23000 :             item->location.relfilelocator.spcOid = InvalidOid;
     163        23000 :             item->location.relfilelocator.dbOid = InvalidOid;
     164        23000 :             item->location.relfilelocator.relNumber = InvalidRelFileNumber;
     165        23000 :             item->location.location = InvalidBlockNumber;
     166              : 
     167        23000 :             item->prev = (i > 0) ?
     168        23000 :                 (&scan_locations->items[i - 1]) : NULL;
     169        23000 :             item->next = (i < SYNC_SCAN_NELEM - 1) ?
     170        23000 :                 (&scan_locations->items[i + 1]) : NULL;
     171              :         }
     172              :     }
     173              :     else
     174              :         Assert(found);
     175         1150 : }
     176              : 
     177              : /*
     178              :  * ss_search --- search the scan_locations structure for an entry with the
     179              :  *      given relfilelocator.
     180              :  *
     181              :  * If "set" is true, the location is updated to the given location.  If no
     182              :  * entry for the given relfilelocator is found, it will be created at the head
     183              :  * of the list with the given location, even if "set" is false.
     184              :  *
     185              :  * In any case, the location after possible update is returned.
     186              :  *
     187              :  * Caller is responsible for having acquired suitable lock on the shared
     188              :  * data structure.
     189              :  */
     190              : static BlockNumber
     191         1374 : ss_search(RelFileLocator relfilelocator, BlockNumber location, bool set)
     192              : {
     193              :     ss_lru_item_t *item;
     194              : 
     195         1374 :     item = scan_locations->head;
     196              :     for (;;)
     197          551 :     {
     198              :         bool        match;
     199              : 
     200         1925 :         match = RelFileLocatorEquals(item->location.relfilelocator,
     201              :                                      relfilelocator);
     202              : 
     203         1925 :         if (match || item->next == NULL)
     204              :         {
     205              :             /*
     206              :              * If we reached the end of list and no match was found, take over
     207              :              * the last entry
     208              :              */
     209         1374 :             if (!match)
     210              :             {
     211           29 :                 item->location.relfilelocator = relfilelocator;
     212           29 :                 item->location.location = location;
     213              :             }
     214         1345 :             else if (set)
     215         1303 :                 item->location.location = location;
     216              : 
     217              :             /* Move the entry to the front of the LRU list */
     218         1374 :             if (item != scan_locations->head)
     219              :             {
     220              :                 /* unlink */
     221           29 :                 if (item == scan_locations->tail)
     222           29 :                     scan_locations->tail = item->prev;
     223           29 :                 item->prev->next = item->next;
     224           29 :                 if (item->next)
     225            0 :                     item->next->prev = item->prev;
     226              : 
     227              :                 /* link */
     228           29 :                 item->prev = NULL;
     229           29 :                 item->next = scan_locations->head;
     230           29 :                 scan_locations->head->prev = item;
     231           29 :                 scan_locations->head = item;
     232              :             }
     233              : 
     234         1374 :             return item->location.location;
     235              :         }
     236              : 
     237          551 :         item = item->next;
     238              :     }
     239              : 
     240              :     /* not reached */
     241              : }
     242              : 
     243              : /*
     244              :  * ss_get_location --- get the optimal starting location for scan
     245              :  *
     246              :  * Returns the last-reported location of a sequential scan on the
     247              :  * relation, or 0 if no valid location is found.
     248              :  *
     249              :  * We expect the caller has just done RelationGetNumberOfBlocks(), and
     250              :  * so that number is passed in rather than computing it again.  The result
     251              :  * is guaranteed less than relnblocks (assuming that's > 0).
     252              :  */
     253              : BlockNumber
     254           71 : ss_get_location(Relation rel, BlockNumber relnblocks)
     255              : {
     256              :     BlockNumber startloc;
     257              : 
     258           71 :     LWLockAcquire(SyncScanLock, LW_EXCLUSIVE);
     259           71 :     startloc = ss_search(rel->rd_locator, 0, false);
     260           71 :     LWLockRelease(SyncScanLock);
     261              : 
     262              :     /*
     263              :      * If the location is not a valid block number for this scan, start at 0.
     264              :      *
     265              :      * This can happen if for instance a VACUUM truncated the table since the
     266              :      * location was saved.
     267              :      */
     268           71 :     if (startloc >= relnblocks)
     269            0 :         startloc = 0;
     270              : 
     271              : #ifdef TRACE_SYNCSCAN
     272              :     if (trace_syncscan)
     273              :         elog(LOG,
     274              :              "SYNC_SCAN: start \"%s\" (size %u) at %u",
     275              :              RelationGetRelationName(rel), relnblocks, startloc);
     276              : #endif
     277              : 
     278           71 :     return startloc;
     279              : }
     280              : 
     281              : /*
     282              :  * ss_report_location --- update the current scan location
     283              :  *
     284              :  * Writes an entry into the shared Sync Scan state of the form
     285              :  * (relfilelocator, blocknumber), overwriting any existing entry for the
     286              :  * same relfilelocator.
     287              :  */
     288              : void
     289        19950 : ss_report_location(Relation rel, BlockNumber location)
     290              : {
     291              : #ifdef TRACE_SYNCSCAN
     292              :     if (trace_syncscan)
     293              :     {
     294              :         if ((location % 1024) == 0)
     295              :             elog(LOG,
     296              :                  "SYNC_SCAN: scanning \"%s\" at %u",
     297              :                  RelationGetRelationName(rel), location);
     298              :     }
     299              : #endif
     300              : 
     301              :     /*
     302              :      * To reduce lock contention, only report scan progress every N pages. For
     303              :      * the same reason, don't block if the lock isn't immediately available.
     304              :      * Missing a few updates isn't critical, it just means that a new scan
     305              :      * that wants to join the pack will start a little bit behind the head of
     306              :      * the scan.  Hopefully the pages are still in OS cache and the scan
     307              :      * catches up quickly.
     308              :      */
     309        19950 :     if ((location % SYNC_SCAN_REPORT_INTERVAL) == 0)
     310              :     {
     311         1303 :         if (LWLockConditionalAcquire(SyncScanLock, LW_EXCLUSIVE))
     312              :         {
     313         1303 :             (void) ss_search(rel->rd_locator, location, true);
     314         1303 :             LWLockRelease(SyncScanLock);
     315              :         }
     316              : #ifdef TRACE_SYNCSCAN
     317              :         else if (trace_syncscan)
     318              :             elog(LOG,
     319              :                  "SYNC_SCAN: missed update for \"%s\" at %u",
     320              :                  RelationGetRelationName(rel), location);
     321              : #endif
     322              :     }
     323        19950 : }
        

Generated by: LCOV version 2.0-1