LCOV - code coverage report
Current view: top level - src/backend/access/common - syncscan.c (source / functions) Coverage Total Hit
Test: PostgreSQL 20devel Lines: 96.2 % 52 50
Test Date: 2026-07-03 19:57:34 Functions: 100.0 % 5 5
Legend: Lines:     hit not hit
Branches: + taken - not taken # not executed
Branches: 87.5 % 32 28

             Branch data     Line data    Source code
       1                 :             : /*-------------------------------------------------------------------------
       2                 :             :  *
       3                 :             :  * syncscan.c
       4                 :             :  *    scan synchronization support
       5                 :             :  *
       6                 :             :  * When multiple backends run a sequential scan on the same table, we try
       7                 :             :  * to keep them synchronized to reduce the overall I/O needed.  The goal is
       8                 :             :  * to read each page into shared buffer cache only once, and let all backends
       9                 :             :  * that take part in the shared scan process the page before it falls out of
      10                 :             :  * the cache.
      11                 :             :  *
      12                 :             :  * Since the "leader" in a pack of backends doing a seqscan will have to wait
      13                 :             :  * for I/O, while the "followers" don't, there is a strong self-synchronizing
      14                 :             :  * effect once we can get the backends examining approximately the same part
      15                 :             :  * of the table at the same time.  Hence all that is really needed is to get
      16                 :             :  * a new backend beginning a seqscan to begin it close to where other backends
      17                 :             :  * are reading.  We can scan the table circularly, from block X up to the
      18                 :             :  * end and then from block 0 to X-1, to ensure we visit all rows while still
      19                 :             :  * participating in the common scan.
      20                 :             :  *
      21                 :             :  * To accomplish that, we keep track of the scan position of each table, and
      22                 :             :  * start new scans close to where the previous scan(s) are.  We don't try to
      23                 :             :  * do any extra synchronization to keep the scans together afterwards; some
      24                 :             :  * scans might progress much more slowly than others, for example if the
      25                 :             :  * results need to be transferred to the client over a slow network, and we
      26                 :             :  * don't want such queries to slow down others.
      27                 :             :  *
      28                 :             :  * There can realistically only be a few large sequential scans on different
      29                 :             :  * tables in progress at any time.  Therefore we just keep the scan positions
      30                 :             :  * in a small LRU list which we scan every time we need to look up or update a
      31                 :             :  * scan position.  The whole mechanism is only applied for tables exceeding
      32                 :             :  * a threshold size (but that is not the concern of this module).
      33                 :             :  *
      34                 :             :  * INTERFACE ROUTINES
      35                 :             :  *      ss_get_location     - return current scan location of a relation
      36                 :             :  *      ss_report_location  - update current scan location
      37                 :             :  *
      38                 :             :  *
      39                 :             :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      40                 :             :  * Portions Copyright (c) 1994, Regents of the University of California
      41                 :             :  *
      42                 :             :  * IDENTIFICATION
      43                 :             :  *    src/backend/access/common/syncscan.c
      44                 :             :  *
      45                 :             :  *-------------------------------------------------------------------------
      46                 :             :  */
      47                 :             : #include "postgres.h"
      48                 :             : 
      49                 :             : #include "access/syncscan.h"
      50                 :             : #include "miscadmin.h"
      51                 :             : #include "storage/lwlock.h"
      52                 :             : #include "storage/shmem.h"
      53                 :             : #include "storage/subsystems.h"
      54                 :             : #include "utils/rel.h"
      55                 :             : 
      56                 :             : 
      57                 :             : /* GUC variables */
      58                 :             : #ifdef TRACE_SYNCSCAN
      59                 :             : bool        trace_syncscan = false;
      60                 :             : #endif
      61                 :             : 
      62                 :             : 
      63                 :             : /*
      64                 :             :  * Size of the LRU list.
      65                 :             :  *
      66                 :             :  * Note: the code assumes that SYNC_SCAN_NELEM > 1.
      67                 :             :  *
      68                 :             :  * XXX: What's a good value? It should be large enough to hold the
      69                 :             :  * maximum number of large tables scanned simultaneously.  But a larger value
      70                 :             :  * means more traversing of the LRU list when starting a new scan.
      71                 :             :  */
      72                 :             : #define SYNC_SCAN_NELEM 20
      73                 :             : 
      74                 :             : /*
      75                 :             :  * Interval between reports of the location of the current scan, in pages.
      76                 :             :  *
      77                 :             :  * Note: This should be smaller than the ring size (see buffer/freelist.c)
      78                 :             :  * we use for bulk reads.  Otherwise a scan joining other scans might start
      79                 :             :  * from a page that's no longer in the buffer cache.  This is a bit fuzzy;
      80                 :             :  * there's no guarantee that the new scan will read the page before it leaves
      81                 :             :  * the buffer cache anyway, and on the other hand the page is most likely
      82                 :             :  * still in the OS cache.
      83                 :             :  */
      84                 :             : #define SYNC_SCAN_REPORT_INTERVAL (128 * 1024 / BLCKSZ)
      85                 :             : 
      86                 :             : 
      87                 :             : /*
      88                 :             :  * The scan locations structure is essentially a doubly-linked LRU with head
      89                 :             :  * and tail pointer, but designed to hold a fixed maximum number of elements in
      90                 :             :  * fixed-size shared memory.
      91                 :             :  */
      92                 :             : typedef struct ss_scan_location_t
      93                 :             : {
      94                 :             :     RelFileLocator relfilelocator;  /* identity of a relation */
      95                 :             :     BlockNumber location;       /* last-reported location in the relation */
      96                 :             : } ss_scan_location_t;
      97                 :             : 
      98                 :             : typedef struct ss_lru_item_t
      99                 :             : {
     100                 :             :     struct ss_lru_item_t *prev;
     101                 :             :     struct ss_lru_item_t *next;
     102                 :             :     ss_scan_location_t location;
     103                 :             : } ss_lru_item_t;
     104                 :             : 
     105                 :             : typedef struct ss_scan_locations_t
     106                 :             : {
     107                 :             :     ss_lru_item_t *head;
     108                 :             :     ss_lru_item_t *tail;
     109                 :             :     ss_lru_item_t items[FLEXIBLE_ARRAY_MEMBER]; /* SYNC_SCAN_NELEM items */
     110                 :             : } ss_scan_locations_t;
     111                 :             : 
     112                 :             : #define SizeOfScanLocations(N) \
     113                 :             :     (offsetof(ss_scan_locations_t, items) + (N) * sizeof(ss_lru_item_t))
     114                 :             : 
     115                 :             : static void SyncScanShmemRequest(void *arg);
     116                 :             : static void SyncScanShmemInit(void *arg);
     117                 :             : 
     118                 :             : const ShmemCallbacks SyncScanShmemCallbacks = {
     119                 :             :     .request_fn = SyncScanShmemRequest,
     120                 :             :     .init_fn = SyncScanShmemInit,
     121                 :             : };
     122                 :             : 
     123                 :             : /* Pointer to struct in shared memory */
     124                 :             : static ss_scan_locations_t *scan_locations;
     125                 :             : 
     126                 :             : /* prototypes for internal functions */
     127                 :             : static BlockNumber ss_search(RelFileLocator relfilelocator,
     128                 :             :                              BlockNumber location, bool set);
     129                 :             : 
     130                 :             : 
     131                 :             : /*
     132                 :             :  * SyncScanShmemRequest --- register this module's shared memory
     133                 :             :  */
     134                 :             : static void
     135                 :        1249 : SyncScanShmemRequest(void *arg)
     136                 :             : {
     137                 :        1249 :     ShmemRequestStruct(.name = "Sync Scan Locations List",
     138                 :             :                        .size = SizeOfScanLocations(SYNC_SCAN_NELEM),
     139                 :             :                        .ptr = (void **) &scan_locations,
     140                 :             :         );
     141                 :        1249 : }
     142                 :             : 
     143                 :             : /*
     144                 :             :  * SyncScanShmemInit --- initialize this module's shared memory
     145                 :             :  */
     146                 :             : static void
     147                 :        1246 : SyncScanShmemInit(void *arg)
     148                 :             : {
     149                 :             :     int         i;
     150                 :             : 
     151                 :        1246 :     scan_locations->head = &scan_locations->items[0];
     152                 :        1246 :     scan_locations->tail = &scan_locations->items[SYNC_SCAN_NELEM - 1];
     153                 :             : 
     154         [ +  + ]:       26166 :     for (i = 0; i < SYNC_SCAN_NELEM; i++)
     155                 :             :     {
     156                 :       24920 :         ss_lru_item_t *item = &scan_locations->items[i];
     157                 :             : 
     158                 :             :         /*
     159                 :             :          * Initialize all slots with invalid values. As scans are started,
     160                 :             :          * these invalid entries will fall off the LRU list and get replaced
     161                 :             :          * with real entries.
     162                 :             :          */
     163                 :       24920 :         item->location.relfilelocator.spcOid = InvalidOid;
     164                 :       24920 :         item->location.relfilelocator.dbOid = InvalidOid;
     165                 :       24920 :         item->location.relfilelocator.relNumber = InvalidRelFileNumber;
     166                 :       24920 :         item->location.location = InvalidBlockNumber;
     167                 :             : 
     168                 :       24920 :         item->prev = (i > 0) ?
     169         [ +  + ]:       24920 :             (&scan_locations->items[i - 1]) : NULL;
     170                 :       24920 :         item->next = (i < SYNC_SCAN_NELEM - 1) ?
     171         [ +  + ]:       24920 :             (&scan_locations->items[i + 1]) : NULL;
     172                 :             :     }
     173                 :        1246 : }
     174                 :             : 
     175                 :             : /*
     176                 :             :  * ss_search --- search the scan_locations structure for an entry with the
     177                 :             :  *      given relfilelocator.
     178                 :             :  *
     179                 :             :  * If "set" is true, the location is updated to the given location.  If no
     180                 :             :  * entry for the given relfilelocator is found, it will be created at the head
     181                 :             :  * of the list with the given location, even if "set" is false.
     182                 :             :  *
     183                 :             :  * In any case, the location after possible update is returned.
     184                 :             :  *
     185                 :             :  * Caller is responsible for having acquired suitable lock on the shared
     186                 :             :  * data structure.
     187                 :             :  */
     188                 :             : static BlockNumber
     189                 :        3977 : ss_search(RelFileLocator relfilelocator, BlockNumber location, bool set)
     190                 :             : {
     191                 :             :     ss_lru_item_t *item;
     192                 :             : 
     193                 :        3977 :     item = scan_locations->head;
     194                 :             :     for (;;)
     195                 :         608 :     {
     196                 :             :         bool        match;
     197                 :             : 
     198   [ +  +  +  +  :        4585 :         match = RelFileLocatorEquals(item->location.relfilelocator,
                   +  + ]
     199                 :             :                                      relfilelocator);
     200                 :             : 
     201   [ +  +  +  + ]:        4585 :         if (match || item->next == NULL)
     202                 :             :         {
     203                 :             :             /*
     204                 :             :              * If we reached the end of list and no match was found, take over
     205                 :             :              * the last entry
     206                 :             :              */
     207         [ +  + ]:        3977 :             if (!match)
     208                 :             :             {
     209                 :          32 :                 item->location.relfilelocator = relfilelocator;
     210                 :          32 :                 item->location.location = location;
     211                 :             :             }
     212         [ +  + ]:        3945 :             else if (set)
     213                 :        3881 :                 item->location.location = location;
     214                 :             : 
     215                 :             :             /* Move the entry to the front of the LRU list */
     216         [ +  + ]:        3977 :             if (item != scan_locations->head)
     217                 :             :             {
     218                 :             :                 /* unlink */
     219         [ +  - ]:          32 :                 if (item == scan_locations->tail)
     220                 :          32 :                     scan_locations->tail = item->prev;
     221                 :          32 :                 item->prev->next = item->next;
     222         [ -  + ]:          32 :                 if (item->next)
     223                 :           0 :                     item->next->prev = item->prev;
     224                 :             : 
     225                 :             :                 /* link */
     226                 :          32 :                 item->prev = NULL;
     227                 :          32 :                 item->next = scan_locations->head;
     228                 :          32 :                 scan_locations->head->prev = item;
     229                 :          32 :                 scan_locations->head = item;
     230                 :             :             }
     231                 :             : 
     232                 :        3977 :             return item->location.location;
     233                 :             :         }
     234                 :             : 
     235                 :         608 :         item = item->next;
     236                 :             :     }
     237                 :             : 
     238                 :             :     /* not reached */
     239                 :             : }
     240                 :             : 
     241                 :             : /*
     242                 :             :  * ss_get_location --- get the optimal starting location for scan
     243                 :             :  *
     244                 :             :  * Returns the last-reported location of a sequential scan on the
     245                 :             :  * relation, or 0 if no valid location is found.
     246                 :             :  *
     247                 :             :  * We expect the caller has just done RelationGetNumberOfBlocks(), and
     248                 :             :  * so that number is passed in rather than computing it again.  The result
     249                 :             :  * is guaranteed less than relnblocks (assuming that's > 0).
     250                 :             :  */
     251                 :             : BlockNumber
     252                 :          96 : ss_get_location(Relation rel, BlockNumber relnblocks)
     253                 :             : {
     254                 :             :     BlockNumber startloc;
     255                 :             : 
     256                 :          96 :     LWLockAcquire(SyncScanLock, LW_EXCLUSIVE);
     257                 :          96 :     startloc = ss_search(rel->rd_locator, 0, false);
     258                 :          96 :     LWLockRelease(SyncScanLock);
     259                 :             : 
     260                 :             :     /*
     261                 :             :      * If the location is not a valid block number for this scan, start at 0.
     262                 :             :      *
     263                 :             :      * This can happen if for instance a VACUUM truncated the table since the
     264                 :             :      * location was saved.
     265                 :             :      */
     266         [ -  + ]:          96 :     if (startloc >= relnblocks)
     267                 :           0 :         startloc = 0;
     268                 :             : 
     269                 :             : #ifdef TRACE_SYNCSCAN
     270                 :             :     if (trace_syncscan)
     271                 :             :         elog(LOG,
     272                 :             :              "SYNC_SCAN: start \"%s\" (size %u) at %u",
     273                 :             :              RelationGetRelationName(rel), relnblocks, startloc);
     274                 :             : #endif
     275                 :             : 
     276                 :          96 :     return startloc;
     277                 :             : }
     278                 :             : 
     279                 :             : /*
     280                 :             :  * ss_report_location --- update the current scan location
     281                 :             :  *
     282                 :             :  * Writes an entry into the shared Sync Scan state of the form
     283                 :             :  * (relfilelocator, blocknumber), overwriting any existing entry for the
     284                 :             :  * same relfilelocator.
     285                 :             :  */
     286                 :             : void
     287                 :       61433 : ss_report_location(Relation rel, BlockNumber location)
     288                 :             : {
     289                 :             : #ifdef TRACE_SYNCSCAN
     290                 :             :     if (trace_syncscan)
     291                 :             :     {
     292                 :             :         if ((location % 1024) == 0)
     293                 :             :             elog(LOG,
     294                 :             :                  "SYNC_SCAN: scanning \"%s\" at %u",
     295                 :             :                  RelationGetRelationName(rel), location);
     296                 :             :     }
     297                 :             : #endif
     298                 :             : 
     299                 :             :     /*
     300                 :             :      * To reduce lock contention, only report scan progress every N pages. For
     301                 :             :      * the same reason, don't block if the lock isn't immediately available.
     302                 :             :      * Missing a few updates isn't critical, it just means that a new scan
     303                 :             :      * that wants to join the pack will start a little bit behind the head of
     304                 :             :      * the scan.  Hopefully the pages are still in OS cache and the scan
     305                 :             :      * catches up quickly.
     306                 :             :      */
     307         [ +  + ]:       61433 :     if ((location % SYNC_SCAN_REPORT_INTERVAL) == 0)
     308                 :             :     {
     309         [ +  - ]:        3881 :         if (LWLockConditionalAcquire(SyncScanLock, LW_EXCLUSIVE))
     310                 :             :         {
     311                 :        3881 :             (void) ss_search(rel->rd_locator, location, true);
     312                 :        3881 :             LWLockRelease(SyncScanLock);
     313                 :             :         }
     314                 :             : #ifdef TRACE_SYNCSCAN
     315                 :             :         else if (trace_syncscan)
     316                 :             :             elog(LOG,
     317                 :             :                  "SYNC_SCAN: missed update for \"%s\" at %u",
     318                 :             :                  RelationGetRelationName(rel), location);
     319                 :             : #endif
     320                 :             :     }
     321                 :       61433 : }
        

Generated by: LCOV version 2.0-1