Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * nodeBitmapHeapscan.c
4 : * Routines to support bitmapped scans of relations
5 : *
6 : * NOTE: it is critical that this plan type only be used with MVCC-compliant
7 : * snapshots (ie, regular snapshots, not SnapshotAny or one of the other
8 : * special snapshots). The reason is that since index and heap scans are
9 : * decoupled, there can be no assurance that the index tuple prompting a
10 : * visit to a particular heap TID still exists when the visit is made.
11 : * Therefore the tuple might not exist anymore either (which is OK because
12 : * heap_fetch will cope) --- but worse, the tuple slot could have been
13 : * re-used for a newer tuple. With an MVCC snapshot the newer tuple is
14 : * certain to fail the time qual and so it will not be mistakenly returned,
15 : * but with anything else we might return a tuple that doesn't meet the
16 : * required index qual conditions.
17 : *
18 : *
19 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
20 : * Portions Copyright (c) 1994, Regents of the University of California
21 : *
22 : *
23 : * IDENTIFICATION
24 : * src/backend/executor/nodeBitmapHeapscan.c
25 : *
26 : *-------------------------------------------------------------------------
27 : */
28 : /*
29 : * INTERFACE ROUTINES
30 : * ExecBitmapHeapScan scans a relation using bitmap info
31 : * ExecBitmapHeapNext workhorse for above
32 : * ExecInitBitmapHeapScan creates and initializes state info.
33 : * ExecReScanBitmapHeapScan prepares to rescan the plan.
34 : * ExecEndBitmapHeapScan releases all storage.
35 : */
36 : #include "postgres.h"
37 :
38 : #include <math.h>
39 :
40 : #include "access/relscan.h"
41 : #include "access/tableam.h"
42 : #include "access/visibilitymap.h"
43 : #include "executor/executor.h"
44 : #include "executor/nodeBitmapHeapscan.h"
45 : #include "miscadmin.h"
46 : #include "pgstat.h"
47 : #include "storage/bufmgr.h"
48 : #include "utils/rel.h"
49 : #include "utils/spccache.h"
50 :
51 : static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
52 : static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate);
53 : static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node);
54 : static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
55 : static inline void BitmapPrefetch(BitmapHeapScanState *node,
56 : TableScanDesc scan);
57 : static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate);
58 :
59 :
60 : /* ----------------------------------------------------------------
61 : * BitmapHeapNext
62 : *
63 : * Retrieve next tuple from the BitmapHeapScan node's currentRelation
64 : * ----------------------------------------------------------------
65 : */
66 : static TupleTableSlot *
67 5838180 : BitmapHeapNext(BitmapHeapScanState *node)
68 : {
69 : ExprContext *econtext;
70 : TableScanDesc scan;
71 : TIDBitmap *tbm;
72 : TupleTableSlot *slot;
73 5838180 : ParallelBitmapHeapState *pstate = node->pstate;
74 5838180 : dsa_area *dsa = node->ss.ps.state->es_query_dsa;
75 :
76 : /*
77 : * extract necessary information from index scan node
78 : */
79 5838180 : econtext = node->ss.ps.ps_ExprContext;
80 5838180 : slot = node->ss.ss_ScanTupleSlot;
81 5838180 : scan = node->ss.ss_currentScanDesc;
82 5838180 : tbm = node->tbm;
83 :
84 : /*
85 : * If we haven't yet performed the underlying index scan, do it, and begin
86 : * the iteration over the bitmap.
87 : *
88 : * For prefetching, we use *two* iterators, one for the pages we are
89 : * actually scanning and another that runs ahead of the first for
90 : * prefetching. node->prefetch_pages tracks exactly how many pages ahead
91 : * the prefetch iterator is. Also, node->prefetch_target tracks the
92 : * desired prefetch distance, which starts small and increases up to the
93 : * node->prefetch_maximum. This is to avoid doing a lot of prefetching in
94 : * a scan that stops after a few tuples because of a LIMIT.
95 : */
96 5838180 : if (!node->initialized)
97 : {
98 : TBMIterator tbmiterator;
99 :
100 19558 : if (!pstate)
101 : {
102 19218 : tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
103 :
104 19218 : if (!tbm || !IsA(tbm, TIDBitmap))
105 0 : elog(ERROR, "unrecognized result from subplan");
106 :
107 19218 : node->tbm = tbm;
108 : }
109 340 : else if (BitmapShouldInitializeSharedState(pstate))
110 : {
111 : /*
112 : * The leader will immediately come out of the function, but
113 : * others will be blocked until leader populates the TBM and wakes
114 : * them up.
115 : */
116 72 : tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
117 72 : if (!tbm || !IsA(tbm, TIDBitmap))
118 0 : elog(ERROR, "unrecognized result from subplan");
119 :
120 72 : node->tbm = tbm;
121 :
122 : /*
123 : * Prepare to iterate over the TBM. This will return the
124 : * dsa_pointer of the iterator state which will be used by
125 : * multiple processes to iterate jointly.
126 : */
127 72 : pstate->tbmiterator = tbm_prepare_shared_iterate(tbm);
128 :
129 : #ifdef USE_PREFETCH
130 72 : if (node->prefetch_maximum > 0)
131 : {
132 72 : pstate->prefetch_iterator =
133 72 : tbm_prepare_shared_iterate(tbm);
134 : }
135 : #endif /* USE_PREFETCH */
136 :
137 : /* We have initialized the shared state so wake up others. */
138 72 : BitmapDoneInitializingSharedState(pstate);
139 : }
140 :
141 19558 : tbmiterator = tbm_begin_iterate(tbm, dsa,
142 : pstate ?
143 : pstate->tbmiterator :
144 : InvalidDsaPointer);
145 :
146 : #ifdef USE_PREFETCH
147 19558 : if (node->prefetch_maximum > 0)
148 : node->prefetch_iterator =
149 19558 : tbm_begin_iterate(tbm, dsa,
150 : pstate ?
151 : pstate->prefetch_iterator :
152 : InvalidDsaPointer);
153 : #endif /* USE_PREFETCH */
154 :
155 : /*
156 : * If this is the first scan of the underlying table, create the table
157 : * scan descriptor and begin the scan.
158 : */
159 19558 : if (!scan)
160 : {
161 15470 : bool need_tuples = false;
162 :
163 : /*
164 : * We can potentially skip fetching heap pages if we do not need
165 : * any columns of the table, either for checking non-indexable
166 : * quals or for returning data. This test is a bit simplistic, as
167 : * it checks the stronger condition that there's no qual or return
168 : * tlist at all. But in most cases it's probably not worth working
169 : * harder than that.
170 : */
171 28828 : need_tuples = (node->ss.ps.plan->qual != NIL ||
172 13358 : node->ss.ps.plan->targetlist != NIL);
173 :
174 15470 : scan = table_beginscan_bm(node->ss.ss_currentRelation,
175 15470 : node->ss.ps.state->es_snapshot,
176 : 0,
177 : NULL,
178 : need_tuples);
179 :
180 15470 : node->ss.ss_currentScanDesc = scan;
181 : }
182 :
183 19558 : scan->st.rs_tbmiterator = tbmiterator;
184 19558 : node->initialized = true;
185 :
186 19558 : goto new_page;
187 : }
188 :
189 : for (;;)
190 : {
191 6797414 : while (table_scan_bitmap_next_tuple(scan, slot))
192 : {
193 : /*
194 : * Continuing in previously obtained page.
195 : */
196 :
197 6406486 : CHECK_FOR_INTERRUPTS();
198 :
199 : #ifdef USE_PREFETCH
200 :
201 : /*
202 : * Try to prefetch at least a few pages even before we get to the
203 : * second page if we don't stop reading after the first tuple.
204 : */
205 6406486 : if (!pstate)
206 : {
207 5212486 : if (node->prefetch_target < node->prefetch_maximum)
208 14476 : node->prefetch_target++;
209 : }
210 1194000 : else if (pstate->prefetch_target < node->prefetch_maximum)
211 : {
212 : /* take spinlock while updating shared state */
213 1926 : SpinLockAcquire(&pstate->mutex);
214 1926 : if (pstate->prefetch_target < node->prefetch_maximum)
215 1926 : pstate->prefetch_target++;
216 1926 : SpinLockRelease(&pstate->mutex);
217 : }
218 : #endif /* USE_PREFETCH */
219 :
220 : /*
221 : * We issue prefetch requests *after* fetching the current page to
222 : * try to avoid having prefetching interfere with the main I/O.
223 : * Also, this should happen only when we have determined there is
224 : * still something to do on the current page, else we may
225 : * uselessly prefetch the same page we are just about to request
226 : * for real.
227 : */
228 6406486 : BitmapPrefetch(node, scan);
229 :
230 : /*
231 : * If we are using lossy info, we have to recheck the qual
232 : * conditions at every tuple.
233 : */
234 6406486 : if (node->recheck)
235 : {
236 3124818 : econtext->ecxt_scantuple = slot;
237 3124818 : if (!ExecQualAndReset(node->bitmapqualorig, econtext))
238 : {
239 : /* Fails recheck, so drop it and loop back for another */
240 587410 : InstrCountFiltered2(node, 1);
241 587410 : ExecClearTuple(slot);
242 587410 : continue;
243 : }
244 : }
245 :
246 : /* OK to return this tuple */
247 5819076 : return slot;
248 : }
249 :
250 390928 : new_page:
251 :
252 410486 : BitmapAdjustPrefetchIterator(node);
253 :
254 : /*
255 : * Returns false if the bitmap is exhausted and there are no further
256 : * blocks we need to scan.
257 : */
258 410486 : if (!table_scan_bitmap_next_block(scan, &node->blockno,
259 : &node->recheck,
260 : &node->stats.lossy_pages,
261 : &node->stats.exact_pages))
262 19098 : break;
263 :
264 : /*
265 : * If serial, we can error out if the prefetch block doesn't stay
266 : * ahead of the current block.
267 : */
268 391382 : if (node->pstate == NULL &&
269 361274 : !tbm_exhausted(&node->prefetch_iterator) &&
270 361274 : node->prefetch_blockno < node->blockno)
271 0 : elog(ERROR,
272 : "prefetch and main iterators are out of sync. pfblockno: %d. blockno: %d",
273 : node->prefetch_blockno, node->blockno);
274 :
275 : /* Adjust the prefetch target */
276 391382 : BitmapAdjustPrefetchTarget(node);
277 : }
278 :
279 : /*
280 : * if we get here it means we are at the end of the scan..
281 : */
282 19098 : return ExecClearTuple(slot);
283 : }
284 :
285 : /*
286 : * BitmapDoneInitializingSharedState - Shared state is initialized
287 : *
288 : * By this time the leader has already populated the TBM and initialized the
289 : * shared state so wake up other processes.
290 : */
291 : static inline void
292 72 : BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
293 : {
294 72 : SpinLockAcquire(&pstate->mutex);
295 72 : pstate->state = BM_FINISHED;
296 72 : SpinLockRelease(&pstate->mutex);
297 72 : ConditionVariableBroadcast(&pstate->cv);
298 72 : }
299 :
300 : /*
301 : * BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
302 : *
303 : * We keep track of how far the prefetch iterator is ahead of the main
304 : * iterator in prefetch_pages. For each block the main iterator returns, we
305 : * decrement prefetch_pages.
306 : */
307 : static inline void
308 410486 : BitmapAdjustPrefetchIterator(BitmapHeapScanState *node)
309 : {
310 : #ifdef USE_PREFETCH
311 410486 : ParallelBitmapHeapState *pstate = node->pstate;
312 : TBMIterateResult *tbmpre;
313 :
314 410486 : if (pstate == NULL)
315 : {
316 380038 : TBMIterator *prefetch_iterator = &node->prefetch_iterator;
317 :
318 380038 : if (node->prefetch_pages > 0)
319 : {
320 : /* The main iterator has closed the distance by one page */
321 340790 : node->prefetch_pages--;
322 : }
323 39248 : else if (!tbm_exhausted(prefetch_iterator))
324 : {
325 24820 : tbmpre = tbm_iterate(prefetch_iterator);
326 24820 : node->prefetch_blockno = tbmpre ? tbmpre->blockno :
327 : InvalidBlockNumber;
328 : }
329 380038 : return;
330 : }
331 :
332 : /*
333 : * XXX: There is a known issue with keeping the prefetch and current block
334 : * iterators in sync for parallel bitmap table scans. This can lead to
335 : * prefetching blocks that have already been read. See the discussion
336 : * here:
337 : * https://postgr.es/m/20240315211449.en2jcmdqxv5o6tlz%40alap3.anarazel.de
338 : * Note that moving the call site of BitmapAdjustPrefetchIterator()
339 : * exacerbates the effects of this bug.
340 : */
341 30448 : if (node->prefetch_maximum > 0)
342 : {
343 30448 : TBMIterator *prefetch_iterator = &node->prefetch_iterator;
344 :
345 30448 : SpinLockAcquire(&pstate->mutex);
346 30448 : if (pstate->prefetch_pages > 0)
347 : {
348 30114 : pstate->prefetch_pages--;
349 30114 : SpinLockRelease(&pstate->mutex);
350 : }
351 : else
352 : {
353 : /* Release the mutex before iterating */
354 334 : SpinLockRelease(&pstate->mutex);
355 :
356 : /*
357 : * In case of shared mode, we can not ensure that the current
358 : * blockno of the main iterator and that of the prefetch iterator
359 : * are same. It's possible that whatever blockno we are
360 : * prefetching will be processed by another process. Therefore,
361 : * we don't validate the blockno here as we do in non-parallel
362 : * case.
363 : */
364 334 : if (!tbm_exhausted(prefetch_iterator))
365 : {
366 334 : tbmpre = tbm_iterate(prefetch_iterator);
367 334 : node->prefetch_blockno = tbmpre ? tbmpre->blockno :
368 : InvalidBlockNumber;
369 : }
370 : }
371 : }
372 : #endif /* USE_PREFETCH */
373 : }
374 :
375 : /*
376 : * BitmapAdjustPrefetchTarget - Adjust the prefetch target
377 : *
378 : * Increase prefetch target if it's not yet at the max. Note that
379 : * we will increase it to zero after fetching the very first
380 : * page/tuple, then to one after the second tuple is fetched, then
381 : * it doubles as later pages are fetched.
382 : */
383 : static inline void
384 391382 : BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
385 : {
386 : #ifdef USE_PREFETCH
387 391382 : ParallelBitmapHeapState *pstate = node->pstate;
388 :
389 391382 : if (pstate == NULL)
390 : {
391 361274 : if (node->prefetch_target >= node->prefetch_maximum)
392 : /* don't increase any further */ ;
393 15486 : else if (node->prefetch_target >= node->prefetch_maximum / 2)
394 442 : node->prefetch_target = node->prefetch_maximum;
395 15044 : else if (node->prefetch_target > 0)
396 0 : node->prefetch_target *= 2;
397 : else
398 15044 : node->prefetch_target++;
399 361274 : return;
400 : }
401 :
402 : /* Do an unlocked check first to save spinlock acquisitions. */
403 30108 : if (pstate->prefetch_target < node->prefetch_maximum)
404 : {
405 132 : SpinLockAcquire(&pstate->mutex);
406 132 : if (pstate->prefetch_target >= node->prefetch_maximum)
407 : /* don't increase any further */ ;
408 132 : else if (pstate->prefetch_target >= node->prefetch_maximum / 2)
409 60 : pstate->prefetch_target = node->prefetch_maximum;
410 72 : else if (pstate->prefetch_target > 0)
411 0 : pstate->prefetch_target *= 2;
412 : else
413 72 : pstate->prefetch_target++;
414 132 : SpinLockRelease(&pstate->mutex);
415 : }
416 : #endif /* USE_PREFETCH */
417 : }
418 :
419 : /*
420 : * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
421 : */
422 : static inline void
423 6406486 : BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
424 : {
425 : #ifdef USE_PREFETCH
426 6406486 : ParallelBitmapHeapState *pstate = node->pstate;
427 :
428 6406486 : if (pstate == NULL)
429 : {
430 5212486 : TBMIterator *prefetch_iterator = &node->prefetch_iterator;
431 :
432 5212486 : if (!tbm_exhausted(prefetch_iterator))
433 : {
434 5459206 : while (node->prefetch_pages < node->prefetch_target)
435 : {
436 355672 : TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
437 : bool skip_fetch;
438 :
439 355672 : if (tbmpre == NULL)
440 : {
441 : /* No more pages to prefetch */
442 14854 : tbm_end_iterate(prefetch_iterator);
443 14854 : break;
444 : }
445 340818 : node->prefetch_pages++;
446 340818 : node->prefetch_blockno = tbmpre->blockno;
447 :
448 : /*
449 : * If we expect not to have to actually read this heap page,
450 : * skip this prefetch call, but continue to run the prefetch
451 : * logic normally. (Would it be better not to increment
452 : * prefetch_pages?)
453 : */
454 747248 : skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
455 371684 : !tbmpre->recheck &&
456 30866 : VM_ALL_VISIBLE(node->ss.ss_currentRelation,
457 : tbmpre->blockno,
458 : &node->pvmbuffer));
459 :
460 340818 : if (!skip_fetch)
461 340596 : PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
462 : }
463 : }
464 :
465 5212486 : return;
466 : }
467 :
468 1194000 : if (pstate->prefetch_pages < pstate->prefetch_target)
469 : {
470 135490 : TBMIterator *prefetch_iterator = &node->prefetch_iterator;
471 :
472 135490 : if (!tbm_exhausted(prefetch_iterator))
473 : {
474 : while (1)
475 30036 : {
476 : TBMIterateResult *tbmpre;
477 58762 : bool do_prefetch = false;
478 : bool skip_fetch;
479 :
480 : /*
481 : * Recheck under the mutex. If some other process has already
482 : * done enough prefetching then we need not to do anything.
483 : */
484 58762 : SpinLockAcquire(&pstate->mutex);
485 58762 : if (pstate->prefetch_pages < pstate->prefetch_target)
486 : {
487 30114 : pstate->prefetch_pages++;
488 30114 : do_prefetch = true;
489 : }
490 58762 : SpinLockRelease(&pstate->mutex);
491 :
492 58762 : if (!do_prefetch)
493 28648 : return;
494 :
495 30114 : tbmpre = tbm_iterate(prefetch_iterator);
496 30114 : if (tbmpre == NULL)
497 : {
498 : /* No more pages to prefetch */
499 78 : tbm_end_iterate(prefetch_iterator);
500 78 : break;
501 : }
502 :
503 30036 : node->prefetch_blockno = tbmpre->blockno;
504 :
505 : /* As above, skip prefetch if we expect not to need page */
506 88116 : skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
507 54984 : !tbmpre->recheck &&
508 24948 : VM_ALL_VISIBLE(node->ss.ss_currentRelation,
509 : tbmpre->blockno,
510 : &node->pvmbuffer));
511 :
512 30036 : if (!skip_fetch)
513 9396 : PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
514 : }
515 : }
516 : }
517 : #endif /* USE_PREFETCH */
518 : }
519 :
520 : /*
521 : * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
522 : */
523 : static bool
524 0 : BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
525 : {
526 : ExprContext *econtext;
527 :
528 : /*
529 : * extract necessary information from index scan node
530 : */
531 0 : econtext = node->ss.ps.ps_ExprContext;
532 :
533 : /* Does the tuple meet the original qual conditions? */
534 0 : econtext->ecxt_scantuple = slot;
535 0 : return ExecQualAndReset(node->bitmapqualorig, econtext);
536 : }
537 :
538 : /* ----------------------------------------------------------------
539 : * ExecBitmapHeapScan(node)
540 : * ----------------------------------------------------------------
541 : */
542 : static TupleTableSlot *
543 5584122 : ExecBitmapHeapScan(PlanState *pstate)
544 : {
545 5584122 : BitmapHeapScanState *node = castNode(BitmapHeapScanState, pstate);
546 :
547 5584122 : return ExecScan(&node->ss,
548 : (ExecScanAccessMtd) BitmapHeapNext,
549 : (ExecScanRecheckMtd) BitmapHeapRecheck);
550 : }
551 :
552 : /* ----------------------------------------------------------------
553 : * ExecReScanBitmapHeapScan(node)
554 : * ----------------------------------------------------------------
555 : */
556 : void
557 4886 : ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
558 : {
559 4886 : PlanState *outerPlan = outerPlanState(node);
560 :
561 4886 : TableScanDesc scan = node->ss.ss_currentScanDesc;
562 :
563 4886 : if (scan)
564 : {
565 : /*
566 : * End iteration on iterators saved in scan descriptor if they have
567 : * not already been cleaned up.
568 : */
569 4094 : if (!tbm_exhausted(&scan->st.rs_tbmiterator))
570 4088 : tbm_end_iterate(&scan->st.rs_tbmiterator);
571 :
572 : /* rescan to release any page pin */
573 4094 : table_rescan(node->ss.ss_currentScanDesc, NULL);
574 : }
575 :
576 : /* If we did not already clean up the prefetch iterator, do so now. */
577 4886 : if (!tbm_exhausted(&node->prefetch_iterator))
578 1328 : tbm_end_iterate(&node->prefetch_iterator);
579 :
580 : /* release bitmaps and buffers if any */
581 4886 : if (node->tbm)
582 4088 : tbm_free(node->tbm);
583 4886 : if (node->pvmbuffer != InvalidBuffer)
584 54 : ReleaseBuffer(node->pvmbuffer);
585 4886 : node->tbm = NULL;
586 4886 : node->initialized = false;
587 4886 : node->pvmbuffer = InvalidBuffer;
588 4886 : node->recheck = true;
589 : /* Only used for serial BHS */
590 4886 : node->blockno = InvalidBlockNumber;
591 4886 : node->prefetch_blockno = InvalidBlockNumber;
592 4886 : node->prefetch_pages = 0;
593 4886 : node->prefetch_target = -1;
594 :
595 4886 : ExecScanReScan(&node->ss);
596 :
597 : /*
598 : * if chgParam of subnode is not null then plan will be re-scanned by
599 : * first ExecProcNode.
600 : */
601 4886 : if (outerPlan->chgParam == NULL)
602 222 : ExecReScan(outerPlan);
603 4886 : }
604 :
605 : /* ----------------------------------------------------------------
606 : * ExecEndBitmapHeapScan
607 : * ----------------------------------------------------------------
608 : */
609 : void
610 20058 : ExecEndBitmapHeapScan(BitmapHeapScanState *node)
611 : {
612 : TableScanDesc scanDesc;
613 :
614 : /*
615 : * When ending a parallel worker, copy the statistics gathered by the
616 : * worker back into shared memory so that it can be picked up by the main
617 : * process to report in EXPLAIN ANALYZE.
618 : */
619 20058 : if (node->sinstrument != NULL && IsParallelWorker())
620 : {
621 : BitmapHeapScanInstrumentation *si;
622 :
623 : Assert(ParallelWorkerNumber <= node->sinstrument->num_workers);
624 0 : si = &node->sinstrument->sinstrument[ParallelWorkerNumber];
625 :
626 : /*
627 : * Here we accumulate the stats rather than performing memcpy on
628 : * node->stats into si. When a Gather/GatherMerge node finishes it
629 : * will perform planner shutdown on the workers. On rescan it will
630 : * spin up new workers which will have a new BitmapHeapScanState and
631 : * zeroed stats.
632 : */
633 0 : si->exact_pages += node->stats.exact_pages;
634 0 : si->lossy_pages += node->stats.lossy_pages;
635 : }
636 :
637 : /*
638 : * extract information from the node
639 : */
640 20058 : scanDesc = node->ss.ss_currentScanDesc;
641 :
642 : /*
643 : * close down subplans
644 : */
645 20058 : ExecEndNode(outerPlanState(node));
646 :
647 20058 : if (scanDesc)
648 : {
649 : /*
650 : * End iteration on iterators saved in scan descriptor if they have
651 : * not already been cleaned up.
652 : */
653 15356 : if (!tbm_exhausted(&scanDesc->st.rs_tbmiterator))
654 15356 : tbm_end_iterate(&scanDesc->st.rs_tbmiterator);
655 :
656 : /*
657 : * close table scan
658 : */
659 15356 : table_endscan(scanDesc);
660 : }
661 :
662 : /* If we did not already clean up the prefetch iterator, do so now. */
663 20058 : if (!tbm_exhausted(&node->prefetch_iterator))
664 3292 : tbm_end_iterate(&node->prefetch_iterator);
665 :
666 : /*
667 : * release bitmaps and buffers if any
668 : */
669 20058 : if (node->tbm)
670 15088 : tbm_free(node->tbm);
671 20058 : if (node->pvmbuffer != InvalidBuffer)
672 48 : ReleaseBuffer(node->pvmbuffer);
673 20058 : }
674 :
675 : /* ----------------------------------------------------------------
676 : * ExecInitBitmapHeapScan
677 : *
678 : * Initializes the scan's state information.
679 : * ----------------------------------------------------------------
680 : */
681 : BitmapHeapScanState *
682 20172 : ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
683 : {
684 : BitmapHeapScanState *scanstate;
685 : Relation currentRelation;
686 :
687 : /* check for unsupported flags */
688 : Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
689 :
690 : /*
691 : * Assert caller didn't ask for an unsafe snapshot --- see comments at
692 : * head of file.
693 : */
694 : Assert(IsMVCCSnapshot(estate->es_snapshot));
695 :
696 : /*
697 : * create state structure
698 : */
699 20172 : scanstate = makeNode(BitmapHeapScanState);
700 20172 : scanstate->ss.ps.plan = (Plan *) node;
701 20172 : scanstate->ss.ps.state = estate;
702 20172 : scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;
703 :
704 20172 : scanstate->tbm = NULL;
705 20172 : scanstate->pvmbuffer = InvalidBuffer;
706 :
707 : /* Zero the statistics counters */
708 20172 : memset(&scanstate->stats, 0, sizeof(BitmapHeapScanInstrumentation));
709 :
710 20172 : scanstate->prefetch_pages = 0;
711 20172 : scanstate->prefetch_target = -1;
712 20172 : scanstate->initialized = false;
713 20172 : scanstate->pstate = NULL;
714 20172 : scanstate->recheck = true;
715 20172 : scanstate->blockno = InvalidBlockNumber;
716 20172 : scanstate->prefetch_blockno = InvalidBlockNumber;
717 :
718 : /*
719 : * Miscellaneous initialization
720 : *
721 : * create expression context for node
722 : */
723 20172 : ExecAssignExprContext(estate, &scanstate->ss.ps);
724 :
725 : /*
726 : * open the scan relation
727 : */
728 20172 : currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
729 :
730 : /*
731 : * initialize child nodes
732 : */
733 20172 : outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
734 :
735 : /*
736 : * get the scan type from the relation descriptor.
737 : */
738 20172 : ExecInitScanTupleSlot(estate, &scanstate->ss,
739 : RelationGetDescr(currentRelation),
740 : table_slot_callbacks(currentRelation));
741 :
742 : /*
743 : * Initialize result type and projection.
744 : */
745 20172 : ExecInitResultTypeTL(&scanstate->ss.ps);
746 20172 : ExecAssignScanProjectionInfo(&scanstate->ss);
747 :
748 : /*
749 : * initialize child expressions
750 : */
751 20172 : scanstate->ss.ps.qual =
752 20172 : ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
753 20172 : scanstate->bitmapqualorig =
754 20172 : ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);
755 :
756 : /*
757 : * Maximum number of prefetches for the tablespace if configured,
758 : * otherwise the current value of the effective_io_concurrency GUC.
759 : */
760 20172 : scanstate->prefetch_maximum =
761 20172 : get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
762 :
763 20172 : scanstate->ss.ss_currentRelation = currentRelation;
764 :
765 : /*
766 : * all done.
767 : */
768 20172 : return scanstate;
769 : }
770 :
771 : /*----------------
772 : * BitmapShouldInitializeSharedState
773 : *
774 : * The first process to come here and see the state to the BM_INITIAL
775 : * will become the leader for the parallel bitmap scan and will be
776 : * responsible for populating the TIDBitmap. The other processes will
777 : * be blocked by the condition variable until the leader wakes them up.
778 : * ---------------
779 : */
780 : static bool
781 340 : BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
782 : {
783 : SharedBitmapState state;
784 :
785 : while (1)
786 : {
787 340 : SpinLockAcquire(&pstate->mutex);
788 340 : state = pstate->state;
789 340 : if (pstate->state == BM_INITIAL)
790 72 : pstate->state = BM_INPROGRESS;
791 340 : SpinLockRelease(&pstate->mutex);
792 :
793 : /* Exit if bitmap is done, or if we're the leader. */
794 340 : if (state != BM_INPROGRESS)
795 340 : break;
796 :
797 : /* Wait for the leader to wake us up. */
798 0 : ConditionVariableSleep(&pstate->cv, WAIT_EVENT_PARALLEL_BITMAP_SCAN);
799 : }
800 :
801 340 : ConditionVariableCancelSleep();
802 :
803 340 : return (state == BM_INITIAL);
804 : }
805 :
806 : /* ----------------------------------------------------------------
807 : * ExecBitmapHeapEstimate
808 : *
809 : * Compute the amount of space we'll need in the parallel
810 : * query DSM, and inform pcxt->estimator about our needs.
811 : * ----------------------------------------------------------------
812 : */
813 : void
814 18 : ExecBitmapHeapEstimate(BitmapHeapScanState *node,
815 : ParallelContext *pcxt)
816 : {
817 : Size size;
818 :
819 18 : size = MAXALIGN(sizeof(ParallelBitmapHeapState));
820 :
821 : /* account for instrumentation, if required */
822 18 : if (node->ss.ps.instrument && pcxt->nworkers > 0)
823 : {
824 0 : size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
825 0 : size = add_size(size, mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation)));
826 : }
827 :
828 18 : shm_toc_estimate_chunk(&pcxt->estimator, size);
829 18 : shm_toc_estimate_keys(&pcxt->estimator, 1);
830 18 : }
831 :
832 : /* ----------------------------------------------------------------
833 : * ExecBitmapHeapInitializeDSM
834 : *
835 : * Set up a parallel bitmap heap scan descriptor.
836 : * ----------------------------------------------------------------
837 : */
838 : void
839 18 : ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
840 : ParallelContext *pcxt)
841 : {
842 : ParallelBitmapHeapState *pstate;
843 18 : SharedBitmapHeapInstrumentation *sinstrument = NULL;
844 18 : dsa_area *dsa = node->ss.ps.state->es_query_dsa;
845 : char *ptr;
846 : Size size;
847 :
848 : /* If there's no DSA, there are no workers; initialize nothing. */
849 18 : if (dsa == NULL)
850 0 : return;
851 :
852 18 : size = MAXALIGN(sizeof(ParallelBitmapHeapState));
853 18 : if (node->ss.ps.instrument && pcxt->nworkers > 0)
854 : {
855 0 : size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
856 0 : size = add_size(size, mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation)));
857 : }
858 :
859 18 : ptr = shm_toc_allocate(pcxt->toc, size);
860 18 : pstate = (ParallelBitmapHeapState *) ptr;
861 18 : ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
862 18 : if (node->ss.ps.instrument && pcxt->nworkers > 0)
863 0 : sinstrument = (SharedBitmapHeapInstrumentation *) ptr;
864 :
865 18 : pstate->tbmiterator = 0;
866 18 : pstate->prefetch_iterator = 0;
867 :
868 : /* Initialize the mutex */
869 18 : SpinLockInit(&pstate->mutex);
870 18 : pstate->prefetch_pages = 0;
871 18 : pstate->prefetch_target = -1;
872 18 : pstate->state = BM_INITIAL;
873 :
874 18 : ConditionVariableInit(&pstate->cv);
875 :
876 18 : if (sinstrument)
877 : {
878 0 : sinstrument->num_workers = pcxt->nworkers;
879 :
880 : /* ensure any unfilled slots will contain zeroes */
881 0 : memset(sinstrument->sinstrument, 0,
882 0 : pcxt->nworkers * sizeof(BitmapHeapScanInstrumentation));
883 : }
884 :
885 18 : shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
886 18 : node->pstate = pstate;
887 18 : node->sinstrument = sinstrument;
888 : }
889 :
890 : /* ----------------------------------------------------------------
891 : * ExecBitmapHeapReInitializeDSM
892 : *
893 : * Reset shared state before beginning a fresh scan.
894 : * ----------------------------------------------------------------
895 : */
896 : void
897 54 : ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node,
898 : ParallelContext *pcxt)
899 : {
900 54 : ParallelBitmapHeapState *pstate = node->pstate;
901 54 : dsa_area *dsa = node->ss.ps.state->es_query_dsa;
902 :
903 : /* If there's no DSA, there are no workers; do nothing. */
904 54 : if (dsa == NULL)
905 0 : return;
906 :
907 54 : pstate->state = BM_INITIAL;
908 54 : pstate->prefetch_pages = 0;
909 54 : pstate->prefetch_target = -1;
910 :
911 54 : if (DsaPointerIsValid(pstate->tbmiterator))
912 54 : tbm_free_shared_area(dsa, pstate->tbmiterator);
913 :
914 54 : if (DsaPointerIsValid(pstate->prefetch_iterator))
915 54 : tbm_free_shared_area(dsa, pstate->prefetch_iterator);
916 :
917 54 : pstate->tbmiterator = InvalidDsaPointer;
918 54 : pstate->prefetch_iterator = InvalidDsaPointer;
919 : }
920 :
921 : /* ----------------------------------------------------------------
922 : * ExecBitmapHeapInitializeWorker
923 : *
924 : * Copy relevant information from TOC into planstate.
925 : * ----------------------------------------------------------------
926 : */
927 : void
928 268 : ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
929 : ParallelWorkerContext *pwcxt)
930 : {
931 : char *ptr;
932 :
933 : Assert(node->ss.ps.state->es_query_dsa != NULL);
934 :
935 268 : ptr = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
936 :
937 268 : node->pstate = (ParallelBitmapHeapState *) ptr;
938 268 : ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
939 :
940 268 : if (node->ss.ps.instrument)
941 0 : node->sinstrument = (SharedBitmapHeapInstrumentation *) ptr;
942 268 : }
943 :
944 : /* ----------------------------------------------------------------
945 : * ExecBitmapHeapRetrieveInstrumentation
946 : *
947 : * Transfer bitmap heap scan statistics from DSM to private memory.
948 : * ----------------------------------------------------------------
949 : */
950 : void
951 0 : ExecBitmapHeapRetrieveInstrumentation(BitmapHeapScanState *node)
952 : {
953 0 : SharedBitmapHeapInstrumentation *sinstrument = node->sinstrument;
954 : Size size;
955 :
956 0 : if (sinstrument == NULL)
957 0 : return;
958 :
959 0 : size = offsetof(SharedBitmapHeapInstrumentation, sinstrument)
960 0 : + sinstrument->num_workers * sizeof(BitmapHeapScanInstrumentation);
961 :
962 0 : node->sinstrument = palloc(size);
963 0 : memcpy(node->sinstrument, sinstrument, size);
964 : }
|