Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * nodeBitmapHeapscan.c
4 : * Routines to support bitmapped scans of relations
5 : *
6 : * NOTE: it is critical that this plan type only be used with MVCC-compliant
7 : * snapshots (ie, regular snapshots, not SnapshotAny or one of the other
8 : * special snapshots). The reason is that since index and heap scans are
9 : * decoupled, there can be no assurance that the index tuple prompting a
10 : * visit to a particular heap TID still exists when the visit is made.
11 : * Therefore the tuple might not exist anymore either (which is OK because
12 : * heap_fetch will cope) --- but worse, the tuple slot could have been
13 : * re-used for a newer tuple. With an MVCC snapshot the newer tuple is
14 : * certain to fail the time qual and so it will not be mistakenly returned,
15 : * but with anything else we might return a tuple that doesn't meet the
16 : * required index qual conditions.
17 : *
18 : *
19 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
20 : * Portions Copyright (c) 1994, Regents of the University of California
21 : *
22 : *
23 : * IDENTIFICATION
24 : * src/backend/executor/nodeBitmapHeapscan.c
25 : *
26 : *-------------------------------------------------------------------------
27 : */
28 : /*
29 : * INTERFACE ROUTINES
30 : * ExecBitmapHeapScan scans a relation using bitmap info
31 : * ExecBitmapHeapNext workhorse for above
32 : * ExecInitBitmapHeapScan creates and initializes state info.
33 : * ExecReScanBitmapHeapScan prepares to rescan the plan.
34 : * ExecEndBitmapHeapScan releases all storage.
35 : */
36 : #include "postgres.h"
37 :
38 : #include "access/relscan.h"
39 : #include "access/tableam.h"
40 : #include "access/visibilitymap.h"
41 : #include "executor/executor.h"
42 : #include "executor/nodeBitmapHeapscan.h"
43 : #include "miscadmin.h"
44 : #include "pgstat.h"
45 : #include "storage/bufmgr.h"
46 : #include "utils/rel.h"
47 : #include "utils/spccache.h"
48 :
49 : static void BitmapTableScanSetup(BitmapHeapScanState *node);
50 : static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
51 : static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate);
52 : static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate);
53 :
54 :
55 : /*
56 : * Do the underlying index scan, build the bitmap, set up the parallel state
57 : * needed for parallel workers to iterate through the bitmap, and set up the
58 : * underlying table scan descriptor.
59 : */
60 : static void
61 26206 : BitmapTableScanSetup(BitmapHeapScanState *node)
62 : {
63 26206 : TBMIterator tbmiterator = {0};
64 26206 : ParallelBitmapHeapState *pstate = node->pstate;
65 26206 : dsa_area *dsa = node->ss.ps.state->es_query_dsa;
66 :
67 26206 : if (!pstate)
68 : {
69 25864 : node->tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
70 :
71 25864 : if (!node->tbm || !IsA(node->tbm, TIDBitmap))
72 0 : elog(ERROR, "unrecognized result from subplan");
73 : }
74 342 : else if (BitmapShouldInitializeSharedState(pstate))
75 : {
76 : /*
77 : * The leader will immediately come out of the function, but others
78 : * will be blocked until leader populates the TBM and wakes them up.
79 : */
80 72 : node->tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
81 72 : if (!node->tbm || !IsA(node->tbm, TIDBitmap))
82 0 : elog(ERROR, "unrecognized result from subplan");
83 :
84 : /*
85 : * Prepare to iterate over the TBM. This will return the dsa_pointer
86 : * of the iterator state which will be used by multiple processes to
87 : * iterate jointly.
88 : */
89 72 : pstate->tbmiterator = tbm_prepare_shared_iterate(node->tbm);
90 :
91 : /* We have initialized the shared state so wake up others. */
92 72 : BitmapDoneInitializingSharedState(pstate);
93 : }
94 :
95 26206 : tbmiterator = tbm_begin_iterate(node->tbm, dsa,
96 : pstate ?
97 : pstate->tbmiterator :
98 : InvalidDsaPointer);
99 :
100 : /*
101 : * If this is the first scan of the underlying table, create the table
102 : * scan descriptor and begin the scan.
103 : */
104 26206 : if (!node->ss.ss_currentScanDesc)
105 : {
106 21922 : node->ss.ss_currentScanDesc =
107 21922 : table_beginscan_bm(node->ss.ss_currentRelation,
108 21922 : node->ss.ps.state->es_snapshot,
109 : 0,
110 : NULL);
111 : }
112 :
113 26206 : node->ss.ss_currentScanDesc->st.rs_tbmiterator = tbmiterator;
114 26206 : node->initialized = true;
115 26206 : }
116 :
117 : /* ----------------------------------------------------------------
118 : * BitmapHeapNext
119 : *
120 : * Retrieve next tuple from the BitmapHeapScan node's currentRelation
121 : * ----------------------------------------------------------------
122 : */
123 : static TupleTableSlot *
124 6083152 : BitmapHeapNext(BitmapHeapScanState *node)
125 : {
126 6083152 : ExprContext *econtext = node->ss.ps.ps_ExprContext;
127 6083152 : TupleTableSlot *slot = node->ss.ss_ScanTupleSlot;
128 :
129 : /*
130 : * If we haven't yet performed the underlying index scan, do it, and begin
131 : * the iteration over the bitmap.
132 : */
133 6083152 : if (!node->initialized)
134 26206 : BitmapTableScanSetup(node);
135 :
136 6841352 : while (table_scan_bitmap_next_tuple(node->ss.ss_currentScanDesc,
137 : slot, &node->recheck,
138 : &node->stats.lossy_pages,
139 : &node->stats.exact_pages))
140 : {
141 : /*
142 : * Continuing in previously obtained page.
143 : */
144 6815534 : CHECK_FOR_INTERRUPTS();
145 :
146 : /*
147 : * If we are using lossy info, we have to recheck the qual conditions
148 : * at every tuple.
149 : */
150 6815534 : if (node->recheck)
151 : {
152 3295560 : econtext->ecxt_scantuple = slot;
153 3295560 : if (!ExecQualAndReset(node->bitmapqualorig, econtext))
154 : {
155 : /* Fails recheck, so drop it and loop back for another */
156 758200 : InstrCountFiltered2(node, 1);
157 758200 : ExecClearTuple(slot);
158 758200 : continue;
159 : }
160 : }
161 :
162 : /* OK to return this tuple */
163 6057334 : return slot;
164 : }
165 :
166 : /*
167 : * if we get here it means we are at the end of the scan..
168 : */
169 25812 : return ExecClearTuple(slot);
170 : }
171 :
172 : /*
173 : * BitmapDoneInitializingSharedState - Shared state is initialized
174 : *
175 : * By this time the leader has already populated the TBM and initialized the
176 : * shared state so wake up other processes.
177 : */
178 : static inline void
179 72 : BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
180 : {
181 72 : SpinLockAcquire(&pstate->mutex);
182 72 : pstate->state = BM_FINISHED;
183 72 : SpinLockRelease(&pstate->mutex);
184 72 : ConditionVariableBroadcast(&pstate->cv);
185 72 : }
186 :
187 : /*
188 : * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
189 : */
190 : static bool
191 0 : BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
192 : {
193 : ExprContext *econtext;
194 :
195 : /*
196 : * extract necessary information from index scan node
197 : */
198 0 : econtext = node->ss.ps.ps_ExprContext;
199 :
200 : /* Does the tuple meet the original qual conditions? */
201 0 : econtext->ecxt_scantuple = slot;
202 0 : return ExecQualAndReset(node->bitmapqualorig, econtext);
203 : }
204 :
205 : /* ----------------------------------------------------------------
206 : * ExecBitmapHeapScan(node)
207 : * ----------------------------------------------------------------
208 : */
209 : static TupleTableSlot *
210 5756196 : ExecBitmapHeapScan(PlanState *pstate)
211 : {
212 5756196 : BitmapHeapScanState *node = castNode(BitmapHeapScanState, pstate);
213 :
214 5756196 : return ExecScan(&node->ss,
215 : (ExecScanAccessMtd) BitmapHeapNext,
216 : (ExecScanRecheckMtd) BitmapHeapRecheck);
217 : }
218 :
219 : /* ----------------------------------------------------------------
220 : * ExecReScanBitmapHeapScan(node)
221 : * ----------------------------------------------------------------
222 : */
223 : void
224 5264 : ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
225 : {
226 5264 : PlanState *outerPlan = outerPlanState(node);
227 :
228 5264 : TableScanDesc scan = node->ss.ss_currentScanDesc;
229 :
230 5264 : if (scan)
231 : {
232 : /*
233 : * End iteration on iterators saved in scan descriptor if they have
234 : * not already been cleaned up.
235 : */
236 4290 : if (!tbm_exhausted(&scan->st.rs_tbmiterator))
237 4284 : tbm_end_iterate(&scan->st.rs_tbmiterator);
238 :
239 : /* rescan to release any page pin */
240 4290 : table_rescan(node->ss.ss_currentScanDesc, NULL);
241 : }
242 :
243 : /* release bitmaps and buffers if any */
244 5264 : if (node->tbm)
245 4284 : tbm_free(node->tbm);
246 5264 : node->tbm = NULL;
247 5264 : node->initialized = false;
248 5264 : node->recheck = true;
249 :
250 5264 : ExecScanReScan(&node->ss);
251 :
252 : /*
253 : * if chgParam of subnode is not null then plan will be re-scanned by
254 : * first ExecProcNode.
255 : */
256 5264 : if (outerPlan->chgParam == NULL)
257 328 : ExecReScan(outerPlan);
258 5264 : }
259 :
260 : /* ----------------------------------------------------------------
261 : * ExecEndBitmapHeapScan
262 : * ----------------------------------------------------------------
263 : */
264 : void
265 26694 : ExecEndBitmapHeapScan(BitmapHeapScanState *node)
266 : {
267 : TableScanDesc scanDesc;
268 :
269 : /*
270 : * When ending a parallel worker, copy the statistics gathered by the
271 : * worker back into shared memory so that it can be picked up by the main
272 : * process to report in EXPLAIN ANALYZE.
273 : */
274 26694 : if (node->sinstrument != NULL && IsParallelWorker())
275 : {
276 : BitmapHeapScanInstrumentation *si;
277 :
278 : Assert(ParallelWorkerNumber <= node->sinstrument->num_workers);
279 0 : si = &node->sinstrument->sinstrument[ParallelWorkerNumber];
280 :
281 : /*
282 : * Here we accumulate the stats rather than performing memcpy on
283 : * node->stats into si. When a Gather/GatherMerge node finishes it
284 : * will perform planner shutdown on the workers. On rescan it will
285 : * spin up new workers which will have a new BitmapHeapScanState and
286 : * zeroed stats.
287 : */
288 0 : si->exact_pages += node->stats.exact_pages;
289 0 : si->lossy_pages += node->stats.lossy_pages;
290 : }
291 :
292 : /*
293 : * extract information from the node
294 : */
295 26694 : scanDesc = node->ss.ss_currentScanDesc;
296 :
297 : /*
298 : * close down subplans
299 : */
300 26694 : ExecEndNode(outerPlanState(node));
301 :
302 26694 : if (scanDesc)
303 : {
304 : /*
305 : * End iteration on iterators saved in scan descriptor if they have
306 : * not already been cleaned up.
307 : */
308 21808 : if (!tbm_exhausted(&scanDesc->st.rs_tbmiterator))
309 21808 : tbm_end_iterate(&scanDesc->st.rs_tbmiterator);
310 :
311 : /*
312 : * close table scan
313 : */
314 21808 : table_endscan(scanDesc);
315 : }
316 :
317 : /*
318 : * release bitmaps and buffers if any
319 : */
320 26694 : if (node->tbm)
321 21538 : tbm_free(node->tbm);
322 26694 : }
323 :
324 : /* ----------------------------------------------------------------
325 : * ExecInitBitmapHeapScan
326 : *
327 : * Initializes the scan's state information.
328 : * ----------------------------------------------------------------
329 : */
330 : BitmapHeapScanState *
331 26808 : ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
332 : {
333 : BitmapHeapScanState *scanstate;
334 : Relation currentRelation;
335 :
336 : /* check for unsupported flags */
337 : Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
338 :
339 : /*
340 : * Assert caller didn't ask for an unsafe snapshot --- see comments at
341 : * head of file.
342 : */
343 : Assert(IsMVCCSnapshot(estate->es_snapshot));
344 :
345 : /*
346 : * create state structure
347 : */
348 26808 : scanstate = makeNode(BitmapHeapScanState);
349 26808 : scanstate->ss.ps.plan = (Plan *) node;
350 26808 : scanstate->ss.ps.state = estate;
351 26808 : scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;
352 :
353 26808 : scanstate->tbm = NULL;
354 :
355 : /* Zero the statistics counters */
356 26808 : memset(&scanstate->stats, 0, sizeof(BitmapHeapScanInstrumentation));
357 :
358 26808 : scanstate->initialized = false;
359 26808 : scanstate->pstate = NULL;
360 26808 : scanstate->recheck = true;
361 :
362 : /*
363 : * Miscellaneous initialization
364 : *
365 : * create expression context for node
366 : */
367 26808 : ExecAssignExprContext(estate, &scanstate->ss.ps);
368 :
369 : /*
370 : * open the scan relation
371 : */
372 26808 : currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
373 :
374 : /*
375 : * initialize child nodes
376 : */
377 26808 : outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
378 :
379 : /*
380 : * get the scan type from the relation descriptor.
381 : */
382 26808 : ExecInitScanTupleSlot(estate, &scanstate->ss,
383 : RelationGetDescr(currentRelation),
384 : table_slot_callbacks(currentRelation));
385 :
386 : /*
387 : * Initialize result type and projection.
388 : */
389 26808 : ExecInitResultTypeTL(&scanstate->ss.ps);
390 26808 : ExecAssignScanProjectionInfo(&scanstate->ss);
391 :
392 : /*
393 : * initialize child expressions
394 : */
395 26808 : scanstate->ss.ps.qual =
396 26808 : ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
397 26808 : scanstate->bitmapqualorig =
398 26808 : ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);
399 :
400 26808 : scanstate->ss.ss_currentRelation = currentRelation;
401 :
402 : /*
403 : * all done.
404 : */
405 26808 : return scanstate;
406 : }
407 :
408 : /*----------------
409 : * BitmapShouldInitializeSharedState
410 : *
411 : * The first process to come here and see the state to the BM_INITIAL
412 : * will become the leader for the parallel bitmap scan and will be
413 : * responsible for populating the TIDBitmap. The other processes will
414 : * be blocked by the condition variable until the leader wakes them up.
415 : * ---------------
416 : */
417 : static bool
418 342 : BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
419 : {
420 : SharedBitmapState state;
421 :
422 : while (1)
423 : {
424 342 : SpinLockAcquire(&pstate->mutex);
425 342 : state = pstate->state;
426 342 : if (pstate->state == BM_INITIAL)
427 72 : pstate->state = BM_INPROGRESS;
428 342 : SpinLockRelease(&pstate->mutex);
429 :
430 : /* Exit if bitmap is done, or if we're the leader. */
431 342 : if (state != BM_INPROGRESS)
432 342 : break;
433 :
434 : /* Wait for the leader to wake us up. */
435 0 : ConditionVariableSleep(&pstate->cv, WAIT_EVENT_PARALLEL_BITMAP_SCAN);
436 : }
437 :
438 342 : ConditionVariableCancelSleep();
439 :
440 342 : return (state == BM_INITIAL);
441 : }
442 :
443 : /* ----------------------------------------------------------------
444 : * ExecBitmapHeapEstimate
445 : *
446 : * Compute the amount of space we'll need in the parallel
447 : * query DSM, and inform pcxt->estimator about our needs.
448 : * ----------------------------------------------------------------
449 : */
450 : void
451 18 : ExecBitmapHeapEstimate(BitmapHeapScanState *node,
452 : ParallelContext *pcxt)
453 : {
454 : Size size;
455 :
456 18 : size = MAXALIGN(sizeof(ParallelBitmapHeapState));
457 :
458 : /* account for instrumentation, if required */
459 18 : if (node->ss.ps.instrument && pcxt->nworkers > 0)
460 : {
461 0 : size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
462 0 : size = add_size(size, mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation)));
463 : }
464 :
465 18 : shm_toc_estimate_chunk(&pcxt->estimator, size);
466 18 : shm_toc_estimate_keys(&pcxt->estimator, 1);
467 18 : }
468 :
469 : /* ----------------------------------------------------------------
470 : * ExecBitmapHeapInitializeDSM
471 : *
472 : * Set up a parallel bitmap heap scan descriptor.
473 : * ----------------------------------------------------------------
474 : */
475 : void
476 18 : ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
477 : ParallelContext *pcxt)
478 : {
479 : ParallelBitmapHeapState *pstate;
480 18 : SharedBitmapHeapInstrumentation *sinstrument = NULL;
481 18 : dsa_area *dsa = node->ss.ps.state->es_query_dsa;
482 : char *ptr;
483 : Size size;
484 :
485 : /* If there's no DSA, there are no workers; initialize nothing. */
486 18 : if (dsa == NULL)
487 0 : return;
488 :
489 18 : size = MAXALIGN(sizeof(ParallelBitmapHeapState));
490 18 : if (node->ss.ps.instrument && pcxt->nworkers > 0)
491 : {
492 0 : size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
493 0 : size = add_size(size, mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation)));
494 : }
495 :
496 18 : ptr = shm_toc_allocate(pcxt->toc, size);
497 18 : pstate = (ParallelBitmapHeapState *) ptr;
498 18 : ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
499 18 : if (node->ss.ps.instrument && pcxt->nworkers > 0)
500 0 : sinstrument = (SharedBitmapHeapInstrumentation *) ptr;
501 :
502 18 : pstate->tbmiterator = 0;
503 :
504 : /* Initialize the mutex */
505 18 : SpinLockInit(&pstate->mutex);
506 18 : pstate->state = BM_INITIAL;
507 :
508 18 : ConditionVariableInit(&pstate->cv);
509 :
510 18 : if (sinstrument)
511 : {
512 0 : sinstrument->num_workers = pcxt->nworkers;
513 :
514 : /* ensure any unfilled slots will contain zeroes */
515 0 : memset(sinstrument->sinstrument, 0,
516 0 : pcxt->nworkers * sizeof(BitmapHeapScanInstrumentation));
517 : }
518 :
519 18 : shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
520 18 : node->pstate = pstate;
521 18 : node->sinstrument = sinstrument;
522 : }
523 :
524 : /* ----------------------------------------------------------------
525 : * ExecBitmapHeapReInitializeDSM
526 : *
527 : * Reset shared state before beginning a fresh scan.
528 : * ----------------------------------------------------------------
529 : */
530 : void
531 54 : ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node,
532 : ParallelContext *pcxt)
533 : {
534 54 : ParallelBitmapHeapState *pstate = node->pstate;
535 54 : dsa_area *dsa = node->ss.ps.state->es_query_dsa;
536 :
537 : /* If there's no DSA, there are no workers; do nothing. */
538 54 : if (dsa == NULL)
539 0 : return;
540 :
541 54 : pstate->state = BM_INITIAL;
542 :
543 54 : if (DsaPointerIsValid(pstate->tbmiterator))
544 54 : tbm_free_shared_area(dsa, pstate->tbmiterator);
545 :
546 54 : pstate->tbmiterator = InvalidDsaPointer;
547 : }
548 :
549 : /* ----------------------------------------------------------------
550 : * ExecBitmapHeapInitializeWorker
551 : *
552 : * Copy relevant information from TOC into planstate.
553 : * ----------------------------------------------------------------
554 : */
555 : void
556 270 : ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
557 : ParallelWorkerContext *pwcxt)
558 : {
559 : char *ptr;
560 :
561 : Assert(node->ss.ps.state->es_query_dsa != NULL);
562 :
563 270 : ptr = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
564 :
565 270 : node->pstate = (ParallelBitmapHeapState *) ptr;
566 270 : ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
567 :
568 270 : if (node->ss.ps.instrument)
569 0 : node->sinstrument = (SharedBitmapHeapInstrumentation *) ptr;
570 270 : }
571 :
572 : /* ----------------------------------------------------------------
573 : * ExecBitmapHeapRetrieveInstrumentation
574 : *
575 : * Transfer bitmap heap scan statistics from DSM to private memory.
576 : * ----------------------------------------------------------------
577 : */
578 : void
579 0 : ExecBitmapHeapRetrieveInstrumentation(BitmapHeapScanState *node)
580 : {
581 0 : SharedBitmapHeapInstrumentation *sinstrument = node->sinstrument;
582 : Size size;
583 :
584 0 : if (sinstrument == NULL)
585 0 : return;
586 :
587 0 : size = offsetof(SharedBitmapHeapInstrumentation, sinstrument)
588 0 : + sinstrument->num_workers * sizeof(BitmapHeapScanInstrumentation);
589 :
590 0 : node->sinstrument = palloc(size);
591 0 : memcpy(node->sinstrument, sinstrument, size);
592 : }
|