Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * nodeBitmapHeapscan.c
4 : * Routines to support bitmapped scans of relations
5 : *
6 : * NOTE: it is critical that this plan type only be used with MVCC-compliant
7 : * snapshots (ie, regular snapshots, not SnapshotAny or one of the other
8 : * special snapshots). The reason is that since index and heap scans are
9 : * decoupled, there can be no assurance that the index tuple prompting a
10 : * visit to a particular heap TID still exists when the visit is made.
11 : * Therefore the tuple might not exist anymore either (which is OK because
12 : * heap_fetch will cope) --- but worse, the tuple slot could have been
13 : * re-used for a newer tuple. With an MVCC snapshot the newer tuple is
14 : * certain to fail the time qual and so it will not be mistakenly returned,
15 : * but with anything else we might return a tuple that doesn't meet the
16 : * required index qual conditions.
17 : *
18 : *
19 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
20 : * Portions Copyright (c) 1994, Regents of the University of California
21 : *
22 : *
23 : * IDENTIFICATION
24 : * src/backend/executor/nodeBitmapHeapscan.c
25 : *
26 : *-------------------------------------------------------------------------
27 : */
28 : /*
29 : * INTERFACE ROUTINES
30 : * ExecBitmapHeapScan scans a relation using bitmap info
31 : * ExecBitmapHeapNext workhorse for above
32 : * ExecInitBitmapHeapScan creates and initializes state info.
33 : * ExecReScanBitmapHeapScan prepares to rescan the plan.
34 : * ExecEndBitmapHeapScan releases all storage.
35 : */
36 : #include "postgres.h"
37 :
38 : #include <math.h>
39 :
40 : #include "access/relscan.h"
41 : #include "access/tableam.h"
42 : #include "access/visibilitymap.h"
43 : #include "executor/executor.h"
44 : #include "executor/nodeBitmapHeapscan.h"
45 : #include "miscadmin.h"
46 : #include "pgstat.h"
47 : #include "storage/bufmgr.h"
48 : #include "utils/rel.h"
49 : #include "utils/spccache.h"
50 :
51 : static void BitmapTableScanSetup(BitmapHeapScanState *node);
52 : static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
53 : static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate);
54 : static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate);
55 :
56 :
57 : /*
58 : * Do the underlying index scan, build the bitmap, set up the parallel state
59 : * needed for parallel workers to iterate through the bitmap, and set up the
60 : * underlying table scan descriptor.
61 : */
62 : static void
63 20794 : BitmapTableScanSetup(BitmapHeapScanState *node)
64 : {
65 20794 : TBMIterator tbmiterator = {0};
66 20794 : ParallelBitmapHeapState *pstate = node->pstate;
67 20794 : dsa_area *dsa = node->ss.ps.state->es_query_dsa;
68 :
69 20794 : if (!pstate)
70 : {
71 20452 : node->tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
72 :
73 20452 : if (!node->tbm || !IsA(node->tbm, TIDBitmap))
74 0 : elog(ERROR, "unrecognized result from subplan");
75 : }
76 342 : else if (BitmapShouldInitializeSharedState(pstate))
77 : {
78 : /*
79 : * The leader will immediately come out of the function, but others
80 : * will be blocked until leader populates the TBM and wakes them up.
81 : */
82 72 : node->tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
83 72 : if (!node->tbm || !IsA(node->tbm, TIDBitmap))
84 0 : elog(ERROR, "unrecognized result from subplan");
85 :
86 : /*
87 : * Prepare to iterate over the TBM. This will return the dsa_pointer
88 : * of the iterator state which will be used by multiple processes to
89 : * iterate jointly.
90 : */
91 72 : pstate->tbmiterator = tbm_prepare_shared_iterate(node->tbm);
92 :
93 : /* We have initialized the shared state so wake up others. */
94 72 : BitmapDoneInitializingSharedState(pstate);
95 : }
96 :
97 20794 : tbmiterator = tbm_begin_iterate(node->tbm, dsa,
98 : pstate ?
99 : pstate->tbmiterator :
100 : InvalidDsaPointer);
101 :
102 : /*
103 : * If this is the first scan of the underlying table, create the table
104 : * scan descriptor and begin the scan.
105 : */
106 20794 : if (!node->ss.ss_currentScanDesc)
107 : {
108 16528 : node->ss.ss_currentScanDesc =
109 16528 : table_beginscan_bm(node->ss.ss_currentRelation,
110 16528 : node->ss.ps.state->es_snapshot,
111 : 0,
112 : NULL);
113 : }
114 :
115 20794 : node->ss.ss_currentScanDesc->st.rs_tbmiterator = tbmiterator;
116 20794 : node->initialized = true;
117 20794 : }
118 :
119 : /* ----------------------------------------------------------------
120 : * BitmapHeapNext
121 : *
122 : * Retrieve next tuple from the BitmapHeapScan node's currentRelation
123 : * ----------------------------------------------------------------
124 : */
125 : static TupleTableSlot *
126 5990668 : BitmapHeapNext(BitmapHeapScanState *node)
127 : {
128 5990668 : ExprContext *econtext = node->ss.ps.ps_ExprContext;
129 5990668 : TupleTableSlot *slot = node->ss.ss_ScanTupleSlot;
130 :
131 : /*
132 : * If we haven't yet performed the underlying index scan, do it, and begin
133 : * the iteration over the bitmap.
134 : */
135 5990668 : if (!node->initialized)
136 20794 : BitmapTableScanSetup(node);
137 :
138 6748688 : while (table_scan_bitmap_next_tuple(node->ss.ss_currentScanDesc,
139 : slot, &node->recheck,
140 : &node->stats.lossy_pages,
141 : &node->stats.exact_pages))
142 : {
143 : /*
144 : * Continuing in previously obtained page.
145 : */
146 6728280 : CHECK_FOR_INTERRUPTS();
147 :
148 : /*
149 : * If we are using lossy info, we have to recheck the qual conditions
150 : * at every tuple.
151 : */
152 6728280 : if (node->recheck)
153 : {
154 3295380 : econtext->ecxt_scantuple = slot;
155 3295380 : if (!ExecQualAndReset(node->bitmapqualorig, econtext))
156 : {
157 : /* Fails recheck, so drop it and loop back for another */
158 758020 : InstrCountFiltered2(node, 1);
159 758020 : ExecClearTuple(slot);
160 758020 : continue;
161 : }
162 : }
163 :
164 : /* OK to return this tuple */
165 5970260 : return slot;
166 : }
167 :
168 : /*
169 : * if we get here it means we are at the end of the scan..
170 : */
171 20402 : return ExecClearTuple(slot);
172 : }
173 :
174 : /*
175 : * BitmapDoneInitializingSharedState - Shared state is initialized
176 : *
177 : * By this time the leader has already populated the TBM and initialized the
178 : * shared state so wake up other processes.
179 : */
180 : static inline void
181 72 : BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
182 : {
183 72 : SpinLockAcquire(&pstate->mutex);
184 72 : pstate->state = BM_FINISHED;
185 72 : SpinLockRelease(&pstate->mutex);
186 72 : ConditionVariableBroadcast(&pstate->cv);
187 72 : }
188 :
189 : /*
190 : * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
191 : */
192 : static bool
193 0 : BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
194 : {
195 : ExprContext *econtext;
196 :
197 : /*
198 : * extract necessary information from index scan node
199 : */
200 0 : econtext = node->ss.ps.ps_ExprContext;
201 :
202 : /* Does the tuple meet the original qual conditions? */
203 0 : econtext->ecxt_scantuple = slot;
204 0 : return ExecQualAndReset(node->bitmapqualorig, econtext);
205 : }
206 :
207 : /* ----------------------------------------------------------------
208 : * ExecBitmapHeapScan(node)
209 : * ----------------------------------------------------------------
210 : */
211 : static TupleTableSlot *
212 5613902 : ExecBitmapHeapScan(PlanState *pstate)
213 : {
214 5613902 : BitmapHeapScanState *node = castNode(BitmapHeapScanState, pstate);
215 :
216 5613902 : return ExecScan(&node->ss,
217 : (ExecScanAccessMtd) BitmapHeapNext,
218 : (ExecScanRecheckMtd) BitmapHeapRecheck);
219 : }
220 :
221 : /* ----------------------------------------------------------------
222 : * ExecReScanBitmapHeapScan(node)
223 : * ----------------------------------------------------------------
224 : */
225 : void
226 5190 : ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
227 : {
228 5190 : PlanState *outerPlan = outerPlanState(node);
229 :
230 5190 : TableScanDesc scan = node->ss.ss_currentScanDesc;
231 :
232 5190 : if (scan)
233 : {
234 : /*
235 : * End iteration on iterators saved in scan descriptor if they have
236 : * not already been cleaned up.
237 : */
238 4272 : if (!tbm_exhausted(&scan->st.rs_tbmiterator))
239 4266 : tbm_end_iterate(&scan->st.rs_tbmiterator);
240 :
241 : /* rescan to release any page pin */
242 4272 : table_rescan(node->ss.ss_currentScanDesc, NULL);
243 : }
244 :
245 : /* release bitmaps and buffers if any */
246 5190 : if (node->tbm)
247 4266 : tbm_free(node->tbm);
248 5190 : node->tbm = NULL;
249 5190 : node->initialized = false;
250 5190 : node->recheck = true;
251 :
252 5190 : ExecScanReScan(&node->ss);
253 :
254 : /*
255 : * if chgParam of subnode is not null then plan will be re-scanned by
256 : * first ExecProcNode.
257 : */
258 5190 : if (outerPlan->chgParam == NULL)
259 290 : ExecReScan(outerPlan);
260 5190 : }
261 :
262 : /* ----------------------------------------------------------------
263 : * ExecEndBitmapHeapScan
264 : * ----------------------------------------------------------------
265 : */
266 : void
267 21200 : ExecEndBitmapHeapScan(BitmapHeapScanState *node)
268 : {
269 : TableScanDesc scanDesc;
270 :
271 : /*
272 : * When ending a parallel worker, copy the statistics gathered by the
273 : * worker back into shared memory so that it can be picked up by the main
274 : * process to report in EXPLAIN ANALYZE.
275 : */
276 21200 : if (node->sinstrument != NULL && IsParallelWorker())
277 : {
278 : BitmapHeapScanInstrumentation *si;
279 :
280 : Assert(ParallelWorkerNumber <= node->sinstrument->num_workers);
281 0 : si = &node->sinstrument->sinstrument[ParallelWorkerNumber];
282 :
283 : /*
284 : * Here we accumulate the stats rather than performing memcpy on
285 : * node->stats into si. When a Gather/GatherMerge node finishes it
286 : * will perform planner shutdown on the workers. On rescan it will
287 : * spin up new workers which will have a new BitmapHeapScanState and
288 : * zeroed stats.
289 : */
290 0 : si->exact_pages += node->stats.exact_pages;
291 0 : si->lossy_pages += node->stats.lossy_pages;
292 : }
293 :
294 : /*
295 : * extract information from the node
296 : */
297 21200 : scanDesc = node->ss.ss_currentScanDesc;
298 :
299 : /*
300 : * close down subplans
301 : */
302 21200 : ExecEndNode(outerPlanState(node));
303 :
304 21200 : if (scanDesc)
305 : {
306 : /*
307 : * End iteration on iterators saved in scan descriptor if they have
308 : * not already been cleaned up.
309 : */
310 16428 : if (!tbm_exhausted(&scanDesc->st.rs_tbmiterator))
311 16428 : tbm_end_iterate(&scanDesc->st.rs_tbmiterator);
312 :
313 : /*
314 : * close table scan
315 : */
316 16428 : table_endscan(scanDesc);
317 : }
318 :
319 : /*
320 : * release bitmaps and buffers if any
321 : */
322 21200 : if (node->tbm)
323 16158 : tbm_free(node->tbm);
324 21200 : }
325 :
326 : /* ----------------------------------------------------------------
327 : * ExecInitBitmapHeapScan
328 : *
329 : * Initializes the scan's state information.
330 : * ----------------------------------------------------------------
331 : */
332 : BitmapHeapScanState *
333 21300 : ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
334 : {
335 : BitmapHeapScanState *scanstate;
336 : Relation currentRelation;
337 :
338 : /* check for unsupported flags */
339 : Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
340 :
341 : /*
342 : * Assert caller didn't ask for an unsafe snapshot --- see comments at
343 : * head of file.
344 : */
345 : Assert(IsMVCCSnapshot(estate->es_snapshot));
346 :
347 : /*
348 : * create state structure
349 : */
350 21300 : scanstate = makeNode(BitmapHeapScanState);
351 21300 : scanstate->ss.ps.plan = (Plan *) node;
352 21300 : scanstate->ss.ps.state = estate;
353 21300 : scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;
354 :
355 21300 : scanstate->tbm = NULL;
356 :
357 : /* Zero the statistics counters */
358 21300 : memset(&scanstate->stats, 0, sizeof(BitmapHeapScanInstrumentation));
359 :
360 21300 : scanstate->initialized = false;
361 21300 : scanstate->pstate = NULL;
362 21300 : scanstate->recheck = true;
363 :
364 : /*
365 : * Miscellaneous initialization
366 : *
367 : * create expression context for node
368 : */
369 21300 : ExecAssignExprContext(estate, &scanstate->ss.ps);
370 :
371 : /*
372 : * open the scan relation
373 : */
374 21300 : currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
375 :
376 : /*
377 : * initialize child nodes
378 : */
379 21300 : outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
380 :
381 : /*
382 : * get the scan type from the relation descriptor.
383 : */
384 21300 : ExecInitScanTupleSlot(estate, &scanstate->ss,
385 : RelationGetDescr(currentRelation),
386 : table_slot_callbacks(currentRelation));
387 :
388 : /*
389 : * Initialize result type and projection.
390 : */
391 21300 : ExecInitResultTypeTL(&scanstate->ss.ps);
392 21300 : ExecAssignScanProjectionInfo(&scanstate->ss);
393 :
394 : /*
395 : * initialize child expressions
396 : */
397 21300 : scanstate->ss.ps.qual =
398 21300 : ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
399 21300 : scanstate->bitmapqualorig =
400 21300 : ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);
401 :
402 21300 : scanstate->ss.ss_currentRelation = currentRelation;
403 :
404 : /*
405 : * all done.
406 : */
407 21300 : return scanstate;
408 : }
409 :
410 : /*----------------
411 : * BitmapShouldInitializeSharedState
412 : *
413 : * The first process to come here and see the state to the BM_INITIAL
414 : * will become the leader for the parallel bitmap scan and will be
415 : * responsible for populating the TIDBitmap. The other processes will
416 : * be blocked by the condition variable until the leader wakes them up.
417 : * ---------------
418 : */
419 : static bool
420 342 : BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
421 : {
422 : SharedBitmapState state;
423 :
424 : while (1)
425 : {
426 342 : SpinLockAcquire(&pstate->mutex);
427 342 : state = pstate->state;
428 342 : if (pstate->state == BM_INITIAL)
429 72 : pstate->state = BM_INPROGRESS;
430 342 : SpinLockRelease(&pstate->mutex);
431 :
432 : /* Exit if bitmap is done, or if we're the leader. */
433 342 : if (state != BM_INPROGRESS)
434 342 : break;
435 :
436 : /* Wait for the leader to wake us up. */
437 0 : ConditionVariableSleep(&pstate->cv, WAIT_EVENT_PARALLEL_BITMAP_SCAN);
438 : }
439 :
440 342 : ConditionVariableCancelSleep();
441 :
442 342 : return (state == BM_INITIAL);
443 : }
444 :
445 : /* ----------------------------------------------------------------
446 : * ExecBitmapHeapEstimate
447 : *
448 : * Compute the amount of space we'll need in the parallel
449 : * query DSM, and inform pcxt->estimator about our needs.
450 : * ----------------------------------------------------------------
451 : */
452 : void
453 18 : ExecBitmapHeapEstimate(BitmapHeapScanState *node,
454 : ParallelContext *pcxt)
455 : {
456 : Size size;
457 :
458 18 : size = MAXALIGN(sizeof(ParallelBitmapHeapState));
459 :
460 : /* account for instrumentation, if required */
461 18 : if (node->ss.ps.instrument && pcxt->nworkers > 0)
462 : {
463 0 : size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
464 0 : size = add_size(size, mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation)));
465 : }
466 :
467 18 : shm_toc_estimate_chunk(&pcxt->estimator, size);
468 18 : shm_toc_estimate_keys(&pcxt->estimator, 1);
469 18 : }
470 :
471 : /* ----------------------------------------------------------------
472 : * ExecBitmapHeapInitializeDSM
473 : *
474 : * Set up a parallel bitmap heap scan descriptor.
475 : * ----------------------------------------------------------------
476 : */
477 : void
478 18 : ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
479 : ParallelContext *pcxt)
480 : {
481 : ParallelBitmapHeapState *pstate;
482 18 : SharedBitmapHeapInstrumentation *sinstrument = NULL;
483 18 : dsa_area *dsa = node->ss.ps.state->es_query_dsa;
484 : char *ptr;
485 : Size size;
486 :
487 : /* If there's no DSA, there are no workers; initialize nothing. */
488 18 : if (dsa == NULL)
489 0 : return;
490 :
491 18 : size = MAXALIGN(sizeof(ParallelBitmapHeapState));
492 18 : if (node->ss.ps.instrument && pcxt->nworkers > 0)
493 : {
494 0 : size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
495 0 : size = add_size(size, mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation)));
496 : }
497 :
498 18 : ptr = shm_toc_allocate(pcxt->toc, size);
499 18 : pstate = (ParallelBitmapHeapState *) ptr;
500 18 : ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
501 18 : if (node->ss.ps.instrument && pcxt->nworkers > 0)
502 0 : sinstrument = (SharedBitmapHeapInstrumentation *) ptr;
503 :
504 18 : pstate->tbmiterator = 0;
505 :
506 : /* Initialize the mutex */
507 18 : SpinLockInit(&pstate->mutex);
508 18 : pstate->state = BM_INITIAL;
509 :
510 18 : ConditionVariableInit(&pstate->cv);
511 :
512 18 : if (sinstrument)
513 : {
514 0 : sinstrument->num_workers = pcxt->nworkers;
515 :
516 : /* ensure any unfilled slots will contain zeroes */
517 0 : memset(sinstrument->sinstrument, 0,
518 0 : pcxt->nworkers * sizeof(BitmapHeapScanInstrumentation));
519 : }
520 :
521 18 : shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
522 18 : node->pstate = pstate;
523 18 : node->sinstrument = sinstrument;
524 : }
525 :
526 : /* ----------------------------------------------------------------
527 : * ExecBitmapHeapReInitializeDSM
528 : *
529 : * Reset shared state before beginning a fresh scan.
530 : * ----------------------------------------------------------------
531 : */
532 : void
533 54 : ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node,
534 : ParallelContext *pcxt)
535 : {
536 54 : ParallelBitmapHeapState *pstate = node->pstate;
537 54 : dsa_area *dsa = node->ss.ps.state->es_query_dsa;
538 :
539 : /* If there's no DSA, there are no workers; do nothing. */
540 54 : if (dsa == NULL)
541 0 : return;
542 :
543 54 : pstate->state = BM_INITIAL;
544 :
545 54 : if (DsaPointerIsValid(pstate->tbmiterator))
546 54 : tbm_free_shared_area(dsa, pstate->tbmiterator);
547 :
548 54 : pstate->tbmiterator = InvalidDsaPointer;
549 : }
550 :
551 : /* ----------------------------------------------------------------
552 : * ExecBitmapHeapInitializeWorker
553 : *
554 : * Copy relevant information from TOC into planstate.
555 : * ----------------------------------------------------------------
556 : */
557 : void
558 270 : ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
559 : ParallelWorkerContext *pwcxt)
560 : {
561 : char *ptr;
562 :
563 : Assert(node->ss.ps.state->es_query_dsa != NULL);
564 :
565 270 : ptr = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
566 :
567 270 : node->pstate = (ParallelBitmapHeapState *) ptr;
568 270 : ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
569 :
570 270 : if (node->ss.ps.instrument)
571 0 : node->sinstrument = (SharedBitmapHeapInstrumentation *) ptr;
572 270 : }
573 :
574 : /* ----------------------------------------------------------------
575 : * ExecBitmapHeapRetrieveInstrumentation
576 : *
577 : * Transfer bitmap heap scan statistics from DSM to private memory.
578 : * ----------------------------------------------------------------
579 : */
580 : void
581 0 : ExecBitmapHeapRetrieveInstrumentation(BitmapHeapScanState *node)
582 : {
583 0 : SharedBitmapHeapInstrumentation *sinstrument = node->sinstrument;
584 : Size size;
585 :
586 0 : if (sinstrument == NULL)
587 0 : return;
588 :
589 0 : size = offsetof(SharedBitmapHeapInstrumentation, sinstrument)
590 0 : + sinstrument->num_workers * sizeof(BitmapHeapScanInstrumentation);
591 :
592 0 : node->sinstrument = palloc(size);
593 0 : memcpy(node->sinstrument, sinstrument, size);
594 : }
|