LCOV - code coverage report
Current view: top level - src/backend/executor - nodeBitmapHeapscan.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 275 307 89.6 %
Date: 2025-02-22 07:14:56 Functions: 15 17 88.2 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * nodeBitmapHeapscan.c
       4             :  *    Routines to support bitmapped scans of relations
       5             :  *
       6             :  * NOTE: it is critical that this plan type only be used with MVCC-compliant
       7             :  * snapshots (ie, regular snapshots, not SnapshotAny or one of the other
       8             :  * special snapshots).  The reason is that since index and heap scans are
       9             :  * decoupled, there can be no assurance that the index tuple prompting a
      10             :  * visit to a particular heap TID still exists when the visit is made.
      11             :  * Therefore the tuple might not exist anymore either (which is OK because
      12             :  * heap_fetch will cope) --- but worse, the tuple slot could have been
      13             :  * re-used for a newer tuple.  With an MVCC snapshot the newer tuple is
      14             :  * certain to fail the time qual and so it will not be mistakenly returned,
      15             :  * but with anything else we might return a tuple that doesn't meet the
      16             :  * required index qual conditions.
      17             :  *
      18             :  *
      19             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
      20             :  * Portions Copyright (c) 1994, Regents of the University of California
      21             :  *
      22             :  *
      23             :  * IDENTIFICATION
      24             :  *    src/backend/executor/nodeBitmapHeapscan.c
      25             :  *
      26             :  *-------------------------------------------------------------------------
      27             :  */
      28             : /*
      29             :  * INTERFACE ROUTINES
      30             :  *      ExecBitmapHeapScan          scans a relation using bitmap info
      31             :  *      ExecBitmapHeapNext          workhorse for above
      32             :  *      ExecInitBitmapHeapScan      creates and initializes state info.
      33             :  *      ExecReScanBitmapHeapScan    prepares to rescan the plan.
      34             :  *      ExecEndBitmapHeapScan       releases all storage.
      35             :  */
      36             : #include "postgres.h"
      37             : 
      38             : #include <math.h>
      39             : 
      40             : #include "access/relscan.h"
      41             : #include "access/tableam.h"
      42             : #include "access/visibilitymap.h"
      43             : #include "executor/executor.h"
      44             : #include "executor/nodeBitmapHeapscan.h"
      45             : #include "miscadmin.h"
      46             : #include "pgstat.h"
      47             : #include "storage/bufmgr.h"
      48             : #include "utils/rel.h"
      49             : #include "utils/spccache.h"
      50             : 
      51             : static void BitmapTableScanSetup(BitmapHeapScanState *node);
      52             : static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
      53             : static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate);
      54             : static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node);
      55             : static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
      56             : static inline void BitmapPrefetch(BitmapHeapScanState *node,
      57             :                                   TableScanDesc scan);
      58             : static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate);
      59             : 
      60             : 
      61             : /*
      62             :  * Do the underlying index scan, build the bitmap, set up the parallel state
      63             :  * needed for parallel workers to iterate through the bitmap, and set up the
      64             :  * underlying table scan descriptor.
      65             :  *
      66             :  * For prefetching, we use *two* iterators, one for the pages we are actually
      67             :  * scanning and another that runs ahead of the first for prefetching.
      68             :  * node->prefetch_pages tracks exactly how many pages ahead the prefetch
      69             :  * iterator is.  Also, node->prefetch_target tracks the desired prefetch
      70             :  * distance, which starts small and increases up to the
      71             :  * node->prefetch_maximum.  This is to avoid doing a lot of prefetching in a
      72             :  * scan that stops after a few tuples because of a LIMIT.
      73             :  */
      74             : static void
      75       25576 : BitmapTableScanSetup(BitmapHeapScanState *node)
      76             : {
      77       25576 :     TBMIterator tbmiterator = {0};
      78       25576 :     ParallelBitmapHeapState *pstate = node->pstate;
      79       25576 :     dsa_area   *dsa = node->ss.ps.state->es_query_dsa;
      80             : 
      81       25576 :     if (!pstate)
      82             :     {
      83       25234 :         node->tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
      84             : 
      85       25234 :         if (!node->tbm || !IsA(node->tbm, TIDBitmap))
      86           0 :             elog(ERROR, "unrecognized result from subplan");
      87             :     }
      88         342 :     else if (BitmapShouldInitializeSharedState(pstate))
      89             :     {
      90             :         /*
      91             :          * The leader will immediately come out of the function, but others
      92             :          * will be blocked until leader populates the TBM and wakes them up.
      93             :          */
      94          72 :         node->tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
      95          72 :         if (!node->tbm || !IsA(node->tbm, TIDBitmap))
      96           0 :             elog(ERROR, "unrecognized result from subplan");
      97             : 
      98             :         /*
      99             :          * Prepare to iterate over the TBM. This will return the dsa_pointer
     100             :          * of the iterator state which will be used by multiple processes to
     101             :          * iterate jointly.
     102             :          */
     103          72 :         pstate->tbmiterator = tbm_prepare_shared_iterate(node->tbm);
     104             : 
     105             : #ifdef USE_PREFETCH
     106          72 :         if (node->prefetch_maximum > 0)
     107             :         {
     108          72 :             pstate->prefetch_iterator =
     109          72 :                 tbm_prepare_shared_iterate(node->tbm);
     110             :         }
     111             : #endif                          /* USE_PREFETCH */
     112             : 
     113             :         /* We have initialized the shared state so wake up others. */
     114          72 :         BitmapDoneInitializingSharedState(pstate);
     115             :     }
     116             : 
     117       25576 :     tbmiterator = tbm_begin_iterate(node->tbm, dsa,
     118             :                                     pstate ?
     119             :                                     pstate->tbmiterator :
     120             :                                     InvalidDsaPointer);
     121             : 
     122             : #ifdef USE_PREFETCH
     123       25576 :     if (node->prefetch_maximum > 0)
     124             :         node->prefetch_iterator =
     125       25576 :             tbm_begin_iterate(node->tbm, dsa,
     126             :                               pstate ?
     127             :                               pstate->prefetch_iterator :
     128             :                               InvalidDsaPointer);
     129             : #endif                          /* USE_PREFETCH */
     130             : 
     131             :     /*
     132             :      * If this is the first scan of the underlying table, create the table
     133             :      * scan descriptor and begin the scan.
     134             :      */
     135       25576 :     if (!node->ss.ss_currentScanDesc)
     136             :     {
     137       21322 :         bool        need_tuples = false;
     138             : 
     139             :         /*
     140             :          * We can potentially skip fetching heap pages if we do not need any
     141             :          * columns of the table, either for checking non-indexable quals or
     142             :          * for returning data.  This test is a bit simplistic, as it checks
     143             :          * the stronger condition that there's no qual or return tlist at all.
     144             :          * But in most cases it's probably not worth working harder than that.
     145             :          */
     146       40282 :         need_tuples = (node->ss.ps.plan->qual != NIL ||
     147       18960 :                        node->ss.ps.plan->targetlist != NIL);
     148             : 
     149       21322 :         node->ss.ss_currentScanDesc =
     150       21322 :             table_beginscan_bm(node->ss.ss_currentRelation,
     151       21322 :                                node->ss.ps.state->es_snapshot,
     152             :                                0,
     153             :                                NULL,
     154             :                                need_tuples);
     155             :     }
     156             : 
     157       25576 :     node->ss.ss_currentScanDesc->st.rs_tbmiterator = tbmiterator;
     158       25576 :     node->initialized = true;
     159       25576 : }
     160             : 
     161             : 
     162             : /* ----------------------------------------------------------------
     163             :  *      BitmapHeapNext
     164             :  *
     165             :  *      Retrieve next tuple from the BitmapHeapScan node's currentRelation
     166             :  * ----------------------------------------------------------------
     167             :  */
     168             : static TupleTableSlot *
     169     5871424 : BitmapHeapNext(BitmapHeapScanState *node)
     170             : {
     171             :     ExprContext *econtext;
     172             :     TableScanDesc scan;
     173             :     TupleTableSlot *slot;
     174             : 
     175             : #ifdef USE_PREFETCH
     176     5871424 :     ParallelBitmapHeapState *pstate = node->pstate;
     177             : #endif
     178             : 
     179             :     /*
     180             :      * extract necessary information from index scan node
     181             :      */
     182     5871424 :     econtext = node->ss.ps.ps_ExprContext;
     183     5871424 :     slot = node->ss.ss_ScanTupleSlot;
     184     5871424 :     scan = node->ss.ss_currentScanDesc;
     185             : 
     186             :     /*
     187             :      * If we haven't yet performed the underlying index scan, do it, and begin
     188             :      * the iteration over the bitmap.
     189             :      */
     190     5871424 :     if (!node->initialized)
     191             :     {
     192       25576 :         BitmapTableScanSetup(node);
     193       25576 :         scan = node->ss.ss_currentScanDesc;
     194       25576 :         goto new_page;
     195             :     }
     196             : 
     197             :     for (;;)
     198             :     {
     199     6826844 :         while (table_scan_bitmap_next_tuple(scan, slot))
     200             :         {
     201             :             /*
     202             :              * Continuing in previously obtained page.
     203             :              */
     204             : 
     205     6433736 :             CHECK_FOR_INTERRUPTS();
     206             : 
     207             : #ifdef USE_PREFETCH
     208             : 
     209             :             /*
     210             :              * Try to prefetch at least a few pages even before we get to the
     211             :              * second page if we don't stop reading after the first tuple.
     212             :              */
     213     6433736 :             if (!pstate)
     214             :             {
     215     5239736 :                 if (node->prefetch_target < node->prefetch_maximum)
     216       15074 :                     node->prefetch_target++;
     217             :             }
     218     1194000 :             else if (pstate->prefetch_target < node->prefetch_maximum)
     219             :             {
     220             :                 /* take spinlock while updating shared state */
     221        1926 :                 SpinLockAcquire(&pstate->mutex);
     222        1926 :                 if (pstate->prefetch_target < node->prefetch_maximum)
     223        1926 :                     pstate->prefetch_target++;
     224        1926 :                 SpinLockRelease(&pstate->mutex);
     225             :             }
     226             : #endif                          /* USE_PREFETCH */
     227             : 
     228             :             /*
     229             :              * We issue prefetch requests *after* fetching the current page to
     230             :              * try to avoid having prefetching interfere with the main I/O.
     231             :              * Also, this should happen only when we have determined there is
     232             :              * still something to do on the current page, else we may
     233             :              * uselessly prefetch the same page we are just about to request
     234             :              * for real.
     235             :              */
     236     6433736 :             BitmapPrefetch(node, scan);
     237             : 
     238             :             /*
     239             :              * If we are using lossy info, we have to recheck the qual
     240             :              * conditions at every tuple.
     241             :              */
     242     6433736 :             if (node->recheck)
     243             :             {
     244     3124944 :                 econtext->ecxt_scantuple = slot;
     245     3124944 :                 if (!ExecQualAndReset(node->bitmapqualorig, econtext))
     246             :                 {
     247             :                     /* Fails recheck, so drop it and loop back for another */
     248      587410 :                     InstrCountFiltered2(node, 1);
     249      587410 :                     ExecClearTuple(slot);
     250      587410 :                     continue;
     251             :                 }
     252             :             }
     253             : 
     254             :             /* OK to return this tuple */
     255     5846326 :             return slot;
     256             :         }
     257             : 
     258      393108 : new_page:
     259             : 
     260      418684 :         BitmapAdjustPrefetchIterator(node);
     261             : 
     262             :         /*
     263             :          * Returns false if the bitmap is exhausted and there are no further
     264             :          * blocks we need to scan.
     265             :          */
     266      418684 :         if (!table_scan_bitmap_next_block(scan, &node->blockno,
     267             :                                           &node->recheck,
     268             :                                           &node->stats.lossy_pages,
     269             :                                           &node->stats.exact_pages))
     270       25092 :             break;
     271             : 
     272             :         /*
     273             :          * If serial, we can error out if the prefetch block doesn't stay
     274             :          * ahead of the current block.
     275             :          */
     276      393586 :         if (node->pstate == NULL &&
     277      363478 :             !tbm_exhausted(&node->prefetch_iterator) &&
     278      363478 :             node->prefetch_blockno < node->blockno)
     279           0 :             elog(ERROR,
     280             :                  "prefetch and main iterators are out of sync. pfblockno: %d. blockno: %d",
     281             :                  node->prefetch_blockno, node->blockno);
     282             : 
     283             :         /* Adjust the prefetch target */
     284      393586 :         BitmapAdjustPrefetchTarget(node);
     285             :     }
     286             : 
     287             :     /*
     288             :      * if we get here it means we are at the end of the scan..
     289             :      */
     290       25092 :     return ExecClearTuple(slot);
     291             : }
     292             : 
     293             : /*
     294             :  *  BitmapDoneInitializingSharedState - Shared state is initialized
     295             :  *
     296             :  *  By this time the leader has already populated the TBM and initialized the
     297             :  *  shared state so wake up other processes.
     298             :  */
     299             : static inline void
     300          72 : BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
     301             : {
     302          72 :     SpinLockAcquire(&pstate->mutex);
     303          72 :     pstate->state = BM_FINISHED;
     304          72 :     SpinLockRelease(&pstate->mutex);
     305          72 :     ConditionVariableBroadcast(&pstate->cv);
     306          72 : }
     307             : 
     308             : /*
     309             :  *  BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
     310             :  *
     311             :  *  We keep track of how far the prefetch iterator is ahead of the main
     312             :  *  iterator in prefetch_pages. For each block the main iterator returns, we
     313             :  *  decrement prefetch_pages.
     314             :  */
     315             : static inline void
     316      418684 : BitmapAdjustPrefetchIterator(BitmapHeapScanState *node)
     317             : {
     318             : #ifdef USE_PREFETCH
     319      418684 :     ParallelBitmapHeapState *pstate = node->pstate;
     320             :     TBMIterateResult *tbmpre;
     321             : 
     322      418684 :     if (pstate == NULL)
     323             :     {
     324      388234 :         TBMIterator *prefetch_iterator = &node->prefetch_iterator;
     325             : 
     326      388234 :         if (node->prefetch_pages > 0)
     327             :         {
     328             :             /* The main iterator has closed the distance by one page */
     329      341894 :             node->prefetch_pages--;
     330             :         }
     331       46340 :         else if (!tbm_exhausted(prefetch_iterator))
     332             :         {
     333       31298 :             tbmpre = tbm_iterate(prefetch_iterator);
     334       31298 :             node->prefetch_blockno = tbmpre ? tbmpre->blockno :
     335             :                 InvalidBlockNumber;
     336             :         }
     337      388234 :         return;
     338             :     }
     339             : 
     340             :     /*
     341             :      * XXX: There is a known issue with keeping the prefetch and current block
     342             :      * iterators in sync for parallel bitmap table scans. This can lead to
     343             :      * prefetching blocks that have already been read. See the discussion
     344             :      * here:
     345             :      * https://postgr.es/m/20240315211449.en2jcmdqxv5o6tlz%40alap3.anarazel.de
     346             :      * Note that moving the call site of BitmapAdjustPrefetchIterator()
     347             :      * exacerbates the effects of this bug.
     348             :      */
     349       30450 :     if (node->prefetch_maximum > 0)
     350             :     {
     351       30450 :         TBMIterator *prefetch_iterator = &node->prefetch_iterator;
     352             : 
     353       30450 :         SpinLockAcquire(&pstate->mutex);
     354       30450 :         if (pstate->prefetch_pages > 0)
     355             :         {
     356       30112 :             pstate->prefetch_pages--;
     357       30112 :             SpinLockRelease(&pstate->mutex);
     358             :         }
     359             :         else
     360             :         {
     361             :             /* Release the mutex before iterating */
     362         338 :             SpinLockRelease(&pstate->mutex);
     363             : 
     364             :             /*
     365             :              * In case of shared mode, we can not ensure that the current
     366             :              * blockno of the main iterator and that of the prefetch iterator
     367             :              * are same.  It's possible that whatever blockno we are
     368             :              * prefetching will be processed by another process.  Therefore,
     369             :              * we don't validate the blockno here as we do in non-parallel
     370             :              * case.
     371             :              */
     372         338 :             if (!tbm_exhausted(prefetch_iterator))
     373             :             {
     374         338 :                 tbmpre = tbm_iterate(prefetch_iterator);
     375         338 :                 node->prefetch_blockno = tbmpre ? tbmpre->blockno :
     376             :                     InvalidBlockNumber;
     377             :             }
     378             :         }
     379             :     }
     380             : #endif                          /* USE_PREFETCH */
     381             : }
     382             : 
     383             : /*
     384             :  * BitmapAdjustPrefetchTarget - Adjust the prefetch target
     385             :  *
     386             :  * Increase prefetch target if it's not yet at the max.  Note that
     387             :  * we will increase it to zero after fetching the very first
     388             :  * page/tuple, then to one after the second tuple is fetched, then
     389             :  * it doubles as later pages are fetched.
     390             :  */
     391             : static inline void
     392      393586 : BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
     393             : {
     394             : #ifdef USE_PREFETCH
     395      393586 :     ParallelBitmapHeapState *pstate = node->pstate;
     396             : 
     397      393586 :     if (pstate == NULL)
     398             :     {
     399      363478 :         if (node->prefetch_target >= node->prefetch_maximum)
     400             :              /* don't increase any further */ ;
     401       16182 :         else if (node->prefetch_target >= node->prefetch_maximum / 2)
     402         490 :             node->prefetch_target = node->prefetch_maximum;
     403       15692 :         else if (node->prefetch_target > 0)
     404           0 :             node->prefetch_target *= 2;
     405             :         else
     406       15692 :             node->prefetch_target++;
     407      363478 :         return;
     408             :     }
     409             : 
     410             :     /* Do an unlocked check first to save spinlock acquisitions. */
     411       30108 :     if (pstate->prefetch_target < node->prefetch_maximum)
     412             :     {
     413         132 :         SpinLockAcquire(&pstate->mutex);
     414         132 :         if (pstate->prefetch_target >= node->prefetch_maximum)
     415             :              /* don't increase any further */ ;
     416         132 :         else if (pstate->prefetch_target >= node->prefetch_maximum / 2)
     417          60 :             pstate->prefetch_target = node->prefetch_maximum;
     418          72 :         else if (pstate->prefetch_target > 0)
     419           0 :             pstate->prefetch_target *= 2;
     420             :         else
     421          72 :             pstate->prefetch_target++;
     422         132 :         SpinLockRelease(&pstate->mutex);
     423             :     }
     424             : #endif                          /* USE_PREFETCH */
     425             : }
     426             : 
     427             : /*
     428             :  * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
     429             :  */
     430             : static inline void
     431     6433736 : BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
     432             : {
     433             : #ifdef USE_PREFETCH
     434     6433736 :     ParallelBitmapHeapState *pstate = node->pstate;
     435             : 
     436     6433736 :     if (pstate == NULL)
     437             :     {
     438     5239736 :         TBMIterator *prefetch_iterator = &node->prefetch_iterator;
     439             : 
     440     5239736 :         if (!tbm_exhausted(prefetch_iterator))
     441             :         {
     442     5482428 :             while (node->prefetch_pages < node->prefetch_target)
     443             :             {
     444      357414 :                 TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
     445             :                 bool        skip_fetch;
     446             : 
     447      357414 :                 if (tbmpre == NULL)
     448             :                 {
     449             :                     /* No more pages to prefetch */
     450       15492 :                     tbm_end_iterate(prefetch_iterator);
     451       15492 :                     break;
     452             :                 }
     453      341922 :                 node->prefetch_pages++;
     454      341922 :                 node->prefetch_blockno = tbmpre->blockno;
     455             : 
     456             :                 /*
     457             :                  * If we expect not to have to actually read this heap page,
     458             :                  * skip this prefetch call, but continue to run the prefetch
     459             :                  * logic normally.  (Would it be better not to increment
     460             :                  * prefetch_pages?)
     461             :                  */
     462      749462 :                 skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
     463      372794 :                               !tbmpre->recheck &&
     464       30872 :                               VM_ALL_VISIBLE(node->ss.ss_currentRelation,
     465             :                                              tbmpre->blockno,
     466             :                                              &node->pvmbuffer));
     467             : 
     468      341922 :                 if (!skip_fetch)
     469      341700 :                     PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
     470             :             }
     471             :         }
     472             : 
     473     5239736 :         return;
     474             :     }
     475             : 
     476     1194000 :     if (pstate->prefetch_pages < pstate->prefetch_target)
     477             :     {
     478      135448 :         TBMIterator *prefetch_iterator = &node->prefetch_iterator;
     479             : 
     480      135448 :         if (!tbm_exhausted(prefetch_iterator))
     481             :         {
     482             :             while (1)
     483       30036 :             {
     484             :                 TBMIterateResult *tbmpre;
     485       58746 :                 bool        do_prefetch = false;
     486             :                 bool        skip_fetch;
     487             : 
     488             :                 /*
     489             :                  * Recheck under the mutex. If some other process has already
     490             :                  * done enough prefetching then we need not to do anything.
     491             :                  */
     492       58746 :                 SpinLockAcquire(&pstate->mutex);
     493       58746 :                 if (pstate->prefetch_pages < pstate->prefetch_target)
     494             :                 {
     495       30112 :                     pstate->prefetch_pages++;
     496       30112 :                     do_prefetch = true;
     497             :                 }
     498       58746 :                 SpinLockRelease(&pstate->mutex);
     499             : 
     500       58746 :                 if (!do_prefetch)
     501       28634 :                     return;
     502             : 
     503       30112 :                 tbmpre = tbm_iterate(prefetch_iterator);
     504       30112 :                 if (tbmpre == NULL)
     505             :                 {
     506             :                     /* No more pages to prefetch */
     507          76 :                     tbm_end_iterate(prefetch_iterator);
     508          76 :                     break;
     509             :                 }
     510             : 
     511       30036 :                 node->prefetch_blockno = tbmpre->blockno;
     512             : 
     513             :                 /* As above, skip prefetch if we expect not to need page */
     514       88116 :                 skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
     515       54984 :                               !tbmpre->recheck &&
     516       24948 :                               VM_ALL_VISIBLE(node->ss.ss_currentRelation,
     517             :                                              tbmpre->blockno,
     518             :                                              &node->pvmbuffer));
     519             : 
     520       30036 :                 if (!skip_fetch)
     521        9396 :                     PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
     522             :             }
     523             :         }
     524             :     }
     525             : #endif                          /* USE_PREFETCH */
     526             : }
     527             : 
     528             : /*
     529             :  * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
     530             :  */
     531             : static bool
     532           0 : BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
     533             : {
     534             :     ExprContext *econtext;
     535             : 
     536             :     /*
     537             :      * extract necessary information from index scan node
     538             :      */
     539           0 :     econtext = node->ss.ps.ps_ExprContext;
     540             : 
     541             :     /* Does the tuple meet the original qual conditions? */
     542           0 :     econtext->ecxt_scantuple = slot;
     543           0 :     return ExecQualAndReset(node->bitmapqualorig, econtext);
     544             : }
     545             : 
     546             : /* ----------------------------------------------------------------
     547             :  *      ExecBitmapHeapScan(node)
     548             :  * ----------------------------------------------------------------
     549             :  */
     550             : static TupleTableSlot *
     551     5597848 : ExecBitmapHeapScan(PlanState *pstate)
     552             : {
     553     5597848 :     BitmapHeapScanState *node = castNode(BitmapHeapScanState, pstate);
     554             : 
     555     5597848 :     return ExecScan(&node->ss,
     556             :                     (ExecScanAccessMtd) BitmapHeapNext,
     557             :                     (ExecScanRecheckMtd) BitmapHeapRecheck);
     558             : }
     559             : 
     560             : /* ----------------------------------------------------------------
     561             :  *      ExecReScanBitmapHeapScan(node)
     562             :  * ----------------------------------------------------------------
     563             :  */
     564             : void
     565       10336 : ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
     566             : {
     567       10336 :     PlanState  *outerPlan = outerPlanState(node);
     568             : 
     569       10336 :     TableScanDesc scan = node->ss.ss_currentScanDesc;
     570             : 
     571       10336 :     if (scan)
     572             :     {
     573             :         /*
     574             :          * End iteration on iterators saved in scan descriptor if they have
     575             :          * not already been cleaned up.
     576             :          */
     577        4260 :         if (!tbm_exhausted(&scan->st.rs_tbmiterator))
     578        4254 :             tbm_end_iterate(&scan->st.rs_tbmiterator);
     579             : 
     580             :         /* rescan to release any page pin */
     581        4260 :         table_rescan(node->ss.ss_currentScanDesc, NULL);
     582             :     }
     583             : 
     584             :     /* If we did not already clean up the prefetch iterator, do so now. */
     585       10336 :     if (!tbm_exhausted(&node->prefetch_iterator))
     586        1442 :         tbm_end_iterate(&node->prefetch_iterator);
     587             : 
     588             :     /* release bitmaps and buffers if any */
     589       10336 :     if (node->tbm)
     590        4254 :         tbm_free(node->tbm);
     591       10336 :     if (node->pvmbuffer != InvalidBuffer)
     592          54 :         ReleaseBuffer(node->pvmbuffer);
     593       10336 :     node->tbm = NULL;
     594       10336 :     node->initialized = false;
     595       10336 :     node->pvmbuffer = InvalidBuffer;
     596       10336 :     node->recheck = true;
     597             :     /* Only used for serial BHS */
     598       10336 :     node->blockno = InvalidBlockNumber;
     599       10336 :     node->prefetch_blockno = InvalidBlockNumber;
     600       10336 :     node->prefetch_pages = 0;
     601       10336 :     node->prefetch_target = -1;
     602             : 
     603       10336 :     ExecScanReScan(&node->ss);
     604             : 
     605             :     /*
     606             :      * if chgParam of subnode is not null then plan will be re-scanned by
     607             :      * first ExecProcNode.
     608             :      */
     609       10336 :     if (outerPlan->chgParam == NULL)
     610         232 :         ExecReScan(outerPlan);
     611       10336 : }
     612             : 
     613             : /* ----------------------------------------------------------------
     614             :  *      ExecEndBitmapHeapScan
     615             :  * ----------------------------------------------------------------
     616             :  */
     617             : void
     618       26358 : ExecEndBitmapHeapScan(BitmapHeapScanState *node)
     619             : {
     620             :     TableScanDesc scanDesc;
     621             : 
     622             :     /*
     623             :      * When ending a parallel worker, copy the statistics gathered by the
     624             :      * worker back into shared memory so that it can be picked up by the main
     625             :      * process to report in EXPLAIN ANALYZE.
     626             :      */
     627       26358 :     if (node->sinstrument != NULL && IsParallelWorker())
     628             :     {
     629             :         BitmapHeapScanInstrumentation *si;
     630             : 
     631             :         Assert(ParallelWorkerNumber <= node->sinstrument->num_workers);
     632           0 :         si = &node->sinstrument->sinstrument[ParallelWorkerNumber];
     633             : 
     634             :         /*
     635             :          * Here we accumulate the stats rather than performing memcpy on
     636             :          * node->stats into si.  When a Gather/GatherMerge node finishes it
     637             :          * will perform planner shutdown on the workers.  On rescan it will
     638             :          * spin up new workers which will have a new BitmapHeapScanState and
     639             :          * zeroed stats.
     640             :          */
     641           0 :         si->exact_pages += node->stats.exact_pages;
     642           0 :         si->lossy_pages += node->stats.lossy_pages;
     643             :     }
     644             : 
     645             :     /*
     646             :      * extract information from the node
     647             :      */
     648       26358 :     scanDesc = node->ss.ss_currentScanDesc;
     649             : 
     650             :     /*
     651             :      * close down subplans
     652             :      */
     653       26358 :     ExecEndNode(outerPlanState(node));
     654             : 
     655       26358 :     if (scanDesc)
     656             :     {
     657             :         /*
     658             :          * End iteration on iterators saved in scan descriptor if they have
     659             :          * not already been cleaned up.
     660             :          */
     661       21208 :         if (!tbm_exhausted(&scanDesc->st.rs_tbmiterator))
     662       21208 :             tbm_end_iterate(&scanDesc->st.rs_tbmiterator);
     663             : 
     664             :         /*
     665             :          * close table scan
     666             :          */
     667       21208 :         table_endscan(scanDesc);
     668             :     }
     669             : 
     670             :     /* If we did not already clean up the prefetch iterator, do so now. */
     671       26358 :     if (!tbm_exhausted(&node->prefetch_iterator))
     672        8560 :         tbm_end_iterate(&node->prefetch_iterator);
     673             : 
     674             :     /*
     675             :      * release bitmaps and buffers if any
     676             :      */
     677       26358 :     if (node->tbm)
     678       20938 :         tbm_free(node->tbm);
     679       26358 :     if (node->pvmbuffer != InvalidBuffer)
     680          48 :         ReleaseBuffer(node->pvmbuffer);
     681       26358 : }
     682             : 
     683             : /* ----------------------------------------------------------------
     684             :  *      ExecInitBitmapHeapScan
     685             :  *
     686             :  *      Initializes the scan's state information.
     687             :  * ----------------------------------------------------------------
     688             :  */
     689             : BitmapHeapScanState *
     690       26472 : ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
     691             : {
     692             :     BitmapHeapScanState *scanstate;
     693             :     Relation    currentRelation;
     694             : 
     695             :     /* check for unsupported flags */
     696             :     Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
     697             : 
     698             :     /*
     699             :      * Assert caller didn't ask for an unsafe snapshot --- see comments at
     700             :      * head of file.
     701             :      */
     702             :     Assert(IsMVCCSnapshot(estate->es_snapshot));
     703             : 
     704             :     /*
     705             :      * create state structure
     706             :      */
     707       26472 :     scanstate = makeNode(BitmapHeapScanState);
     708       26472 :     scanstate->ss.ps.plan = (Plan *) node;
     709       26472 :     scanstate->ss.ps.state = estate;
     710       26472 :     scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;
     711             : 
     712       26472 :     scanstate->tbm = NULL;
     713       26472 :     scanstate->pvmbuffer = InvalidBuffer;
     714             : 
     715             :     /* Zero the statistics counters */
     716       26472 :     memset(&scanstate->stats, 0, sizeof(BitmapHeapScanInstrumentation));
     717             : 
     718       26472 :     scanstate->prefetch_pages = 0;
     719       26472 :     scanstate->prefetch_target = -1;
     720       26472 :     scanstate->initialized = false;
     721       26472 :     scanstate->pstate = NULL;
     722       26472 :     scanstate->recheck = true;
     723       26472 :     scanstate->blockno = InvalidBlockNumber;
     724       26472 :     scanstate->prefetch_blockno = InvalidBlockNumber;
     725             : 
     726             :     /*
     727             :      * Miscellaneous initialization
     728             :      *
     729             :      * create expression context for node
     730             :      */
     731       26472 :     ExecAssignExprContext(estate, &scanstate->ss.ps);
     732             : 
     733             :     /*
     734             :      * open the scan relation
     735             :      */
     736       26472 :     currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
     737             : 
     738             :     /*
     739             :      * initialize child nodes
     740             :      */
     741       26472 :     outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
     742             : 
     743             :     /*
     744             :      * get the scan type from the relation descriptor.
     745             :      */
     746       26472 :     ExecInitScanTupleSlot(estate, &scanstate->ss,
     747             :                           RelationGetDescr(currentRelation),
     748             :                           table_slot_callbacks(currentRelation));
     749             : 
     750             :     /*
     751             :      * Initialize result type and projection.
     752             :      */
     753       26472 :     ExecInitResultTypeTL(&scanstate->ss.ps);
     754       26472 :     ExecAssignScanProjectionInfo(&scanstate->ss);
     755             : 
     756             :     /*
     757             :      * initialize child expressions
     758             :      */
     759       26472 :     scanstate->ss.ps.qual =
     760       26472 :         ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
     761       26472 :     scanstate->bitmapqualorig =
     762       26472 :         ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);
     763             : 
     764             :     /*
     765             :      * Maximum number of prefetches for the tablespace if configured,
     766             :      * otherwise the current value of the effective_io_concurrency GUC.
     767             :      */
     768       26472 :     scanstate->prefetch_maximum =
     769       26472 :         get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
     770             : 
     771       26472 :     scanstate->ss.ss_currentRelation = currentRelation;
     772             : 
     773             :     /*
     774             :      * all done.
     775             :      */
     776       26472 :     return scanstate;
     777             : }
     778             : 
     779             : /*----------------
     780             :  *      BitmapShouldInitializeSharedState
     781             :  *
     782             :  *      The first process to come here and see the state to the BM_INITIAL
     783             :  *      will become the leader for the parallel bitmap scan and will be
     784             :  *      responsible for populating the TIDBitmap.  The other processes will
     785             :  *      be blocked by the condition variable until the leader wakes them up.
     786             :  * ---------------
     787             :  */
     788             : static bool
     789         342 : BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
     790             : {
     791             :     SharedBitmapState state;
     792             : 
     793             :     while (1)
     794             :     {
     795         342 :         SpinLockAcquire(&pstate->mutex);
     796         342 :         state = pstate->state;
     797         342 :         if (pstate->state == BM_INITIAL)
     798          72 :             pstate->state = BM_INPROGRESS;
     799         342 :         SpinLockRelease(&pstate->mutex);
     800             : 
     801             :         /* Exit if bitmap is done, or if we're the leader. */
     802         342 :         if (state != BM_INPROGRESS)
     803         342 :             break;
     804             : 
     805             :         /* Wait for the leader to wake us up. */
     806           0 :         ConditionVariableSleep(&pstate->cv, WAIT_EVENT_PARALLEL_BITMAP_SCAN);
     807             :     }
     808             : 
     809         342 :     ConditionVariableCancelSleep();
     810             : 
     811         342 :     return (state == BM_INITIAL);
     812             : }
     813             : 
     814             : /* ----------------------------------------------------------------
     815             :  *      ExecBitmapHeapEstimate
     816             :  *
     817             :  *      Compute the amount of space we'll need in the parallel
     818             :  *      query DSM, and inform pcxt->estimator about our needs.
     819             :  * ----------------------------------------------------------------
     820             :  */
     821             : void
     822          18 : ExecBitmapHeapEstimate(BitmapHeapScanState *node,
     823             :                        ParallelContext *pcxt)
     824             : {
     825             :     Size        size;
     826             : 
     827          18 :     size = MAXALIGN(sizeof(ParallelBitmapHeapState));
     828             : 
     829             :     /* account for instrumentation, if required */
     830          18 :     if (node->ss.ps.instrument && pcxt->nworkers > 0)
     831             :     {
     832           0 :         size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
     833           0 :         size = add_size(size, mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation)));
     834             :     }
     835             : 
     836          18 :     shm_toc_estimate_chunk(&pcxt->estimator, size);
     837          18 :     shm_toc_estimate_keys(&pcxt->estimator, 1);
     838          18 : }
     839             : 
     840             : /* ----------------------------------------------------------------
     841             :  *      ExecBitmapHeapInitializeDSM
     842             :  *
     843             :  *      Set up a parallel bitmap heap scan descriptor.
     844             :  * ----------------------------------------------------------------
     845             :  */
     846             : void
     847          18 : ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
     848             :                             ParallelContext *pcxt)
     849             : {
     850             :     ParallelBitmapHeapState *pstate;
     851          18 :     SharedBitmapHeapInstrumentation *sinstrument = NULL;
     852          18 :     dsa_area   *dsa = node->ss.ps.state->es_query_dsa;
     853             :     char       *ptr;
     854             :     Size        size;
     855             : 
     856             :     /* If there's no DSA, there are no workers; initialize nothing. */
     857          18 :     if (dsa == NULL)
     858           0 :         return;
     859             : 
     860          18 :     size = MAXALIGN(sizeof(ParallelBitmapHeapState));
     861          18 :     if (node->ss.ps.instrument && pcxt->nworkers > 0)
     862             :     {
     863           0 :         size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
     864           0 :         size = add_size(size, mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation)));
     865             :     }
     866             : 
     867          18 :     ptr = shm_toc_allocate(pcxt->toc, size);
     868          18 :     pstate = (ParallelBitmapHeapState *) ptr;
     869          18 :     ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
     870          18 :     if (node->ss.ps.instrument && pcxt->nworkers > 0)
     871           0 :         sinstrument = (SharedBitmapHeapInstrumentation *) ptr;
     872             : 
     873          18 :     pstate->tbmiterator = 0;
     874          18 :     pstate->prefetch_iterator = 0;
     875             : 
     876             :     /* Initialize the mutex */
     877          18 :     SpinLockInit(&pstate->mutex);
     878          18 :     pstate->prefetch_pages = 0;
     879          18 :     pstate->prefetch_target = -1;
     880          18 :     pstate->state = BM_INITIAL;
     881             : 
     882          18 :     ConditionVariableInit(&pstate->cv);
     883             : 
     884          18 :     if (sinstrument)
     885             :     {
     886           0 :         sinstrument->num_workers = pcxt->nworkers;
     887             : 
     888             :         /* ensure any unfilled slots will contain zeroes */
     889           0 :         memset(sinstrument->sinstrument, 0,
     890           0 :                pcxt->nworkers * sizeof(BitmapHeapScanInstrumentation));
     891             :     }
     892             : 
     893          18 :     shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
     894          18 :     node->pstate = pstate;
     895          18 :     node->sinstrument = sinstrument;
     896             : }
     897             : 
     898             : /* ----------------------------------------------------------------
     899             :  *      ExecBitmapHeapReInitializeDSM
     900             :  *
     901             :  *      Reset shared state before beginning a fresh scan.
     902             :  * ----------------------------------------------------------------
     903             :  */
     904             : void
     905          54 : ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node,
     906             :                               ParallelContext *pcxt)
     907             : {
     908          54 :     ParallelBitmapHeapState *pstate = node->pstate;
     909          54 :     dsa_area   *dsa = node->ss.ps.state->es_query_dsa;
     910             : 
     911             :     /* If there's no DSA, there are no workers; do nothing. */
     912          54 :     if (dsa == NULL)
     913           0 :         return;
     914             : 
     915          54 :     pstate->state = BM_INITIAL;
     916          54 :     pstate->prefetch_pages = 0;
     917          54 :     pstate->prefetch_target = -1;
     918             : 
     919          54 :     if (DsaPointerIsValid(pstate->tbmiterator))
     920          54 :         tbm_free_shared_area(dsa, pstate->tbmiterator);
     921             : 
     922          54 :     if (DsaPointerIsValid(pstate->prefetch_iterator))
     923          54 :         tbm_free_shared_area(dsa, pstate->prefetch_iterator);
     924             : 
     925          54 :     pstate->tbmiterator = InvalidDsaPointer;
     926          54 :     pstate->prefetch_iterator = InvalidDsaPointer;
     927             : }
     928             : 
     929             : /* ----------------------------------------------------------------
     930             :  *      ExecBitmapHeapInitializeWorker
     931             :  *
     932             :  *      Copy relevant information from TOC into planstate.
     933             :  * ----------------------------------------------------------------
     934             :  */
     935             : void
     936         270 : ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
     937             :                                ParallelWorkerContext *pwcxt)
     938             : {
     939             :     char       *ptr;
     940             : 
     941             :     Assert(node->ss.ps.state->es_query_dsa != NULL);
     942             : 
     943         270 :     ptr = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
     944             : 
     945         270 :     node->pstate = (ParallelBitmapHeapState *) ptr;
     946         270 :     ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
     947             : 
     948         270 :     if (node->ss.ps.instrument)
     949           0 :         node->sinstrument = (SharedBitmapHeapInstrumentation *) ptr;
     950         270 : }
     951             : 
     952             : /* ----------------------------------------------------------------
     953             :  *      ExecBitmapHeapRetrieveInstrumentation
     954             :  *
     955             :  *      Transfer bitmap heap scan statistics from DSM to private memory.
     956             :  * ----------------------------------------------------------------
     957             :  */
     958             : void
     959           0 : ExecBitmapHeapRetrieveInstrumentation(BitmapHeapScanState *node)
     960             : {
     961           0 :     SharedBitmapHeapInstrumentation *sinstrument = node->sinstrument;
     962             :     Size        size;
     963             : 
     964           0 :     if (sinstrument == NULL)
     965           0 :         return;
     966             : 
     967           0 :     size = offsetof(SharedBitmapHeapInstrumentation, sinstrument)
     968           0 :         + sinstrument->num_workers * sizeof(BitmapHeapScanInstrumentation);
     969             : 
     970           0 :     node->sinstrument = palloc(size);
     971           0 :     memcpy(node->sinstrument, sinstrument, size);
     972             : }

Generated by: LCOV version 1.14