Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * nodeBitmapHeapscan.c
4 : * Routines to support bitmapped scans of relations
5 : *
6 : * NOTE: it is critical that this plan type only be used with MVCC-compliant
7 : * snapshots (ie, regular snapshots, not SnapshotAny or one of the other
8 : * special snapshots). The reason is that since index and heap scans are
9 : * decoupled, there can be no assurance that the index tuple prompting a
10 : * visit to a particular heap TID still exists when the visit is made.
11 : * Therefore the tuple might not exist anymore either (which is OK because
12 : * heap_fetch will cope) --- but worse, the tuple slot could have been
13 : * re-used for a newer tuple. With an MVCC snapshot the newer tuple is
14 : * certain to fail the time qual and so it will not be mistakenly returned,
15 : * but with anything else we might return a tuple that doesn't meet the
16 : * required index qual conditions.
17 : *
18 : *
19 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
20 : * Portions Copyright (c) 1994, Regents of the University of California
21 : *
22 : *
23 : * IDENTIFICATION
24 : * src/backend/executor/nodeBitmapHeapscan.c
25 : *
26 : *-------------------------------------------------------------------------
27 : */
28 : /*
29 : * INTERFACE ROUTINES
30 : * ExecBitmapHeapScan scans a relation using bitmap info
31 : * ExecBitmapHeapNext workhorse for above
32 : * ExecInitBitmapHeapScan creates and initializes state info.
33 : * ExecReScanBitmapHeapScan prepares to rescan the plan.
34 : * ExecEndBitmapHeapScan releases all storage.
35 : */
36 : #include "postgres.h"
37 :
38 : #include <math.h>
39 :
40 : #include "access/relscan.h"
41 : #include "access/tableam.h"
42 : #include "access/visibilitymap.h"
43 : #include "executor/executor.h"
44 : #include "executor/nodeBitmapHeapscan.h"
45 : #include "miscadmin.h"
46 : #include "pgstat.h"
47 : #include "storage/bufmgr.h"
48 : #include "utils/rel.h"
49 : #include "utils/spccache.h"
50 :
51 : static void BitmapTableScanSetup(BitmapHeapScanState *node);
52 : static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
53 : static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate);
54 : static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node);
55 : static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
56 : static inline void BitmapPrefetch(BitmapHeapScanState *node,
57 : TableScanDesc scan);
58 : static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate);
59 :
60 :
61 : /*
62 : * Do the underlying index scan, build the bitmap, set up the parallel state
63 : * needed for parallel workers to iterate through the bitmap, and set up the
64 : * underlying table scan descriptor.
65 : *
66 : * For prefetching, we use *two* iterators, one for the pages we are actually
67 : * scanning and another that runs ahead of the first for prefetching.
68 : * node->prefetch_pages tracks exactly how many pages ahead the prefetch
69 : * iterator is. Also, node->prefetch_target tracks the desired prefetch
70 : * distance, which starts small and increases up to the
71 : * node->prefetch_maximum. This is to avoid doing a lot of prefetching in a
72 : * scan that stops after a few tuples because of a LIMIT.
73 : */
74 : static void
75 25576 : BitmapTableScanSetup(BitmapHeapScanState *node)
76 : {
77 25576 : TBMIterator tbmiterator = {0};
78 25576 : ParallelBitmapHeapState *pstate = node->pstate;
79 25576 : dsa_area *dsa = node->ss.ps.state->es_query_dsa;
80 :
81 25576 : if (!pstate)
82 : {
83 25234 : node->tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
84 :
85 25234 : if (!node->tbm || !IsA(node->tbm, TIDBitmap))
86 0 : elog(ERROR, "unrecognized result from subplan");
87 : }
88 342 : else if (BitmapShouldInitializeSharedState(pstate))
89 : {
90 : /*
91 : * The leader will immediately come out of the function, but others
92 : * will be blocked until leader populates the TBM and wakes them up.
93 : */
94 72 : node->tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
95 72 : if (!node->tbm || !IsA(node->tbm, TIDBitmap))
96 0 : elog(ERROR, "unrecognized result from subplan");
97 :
98 : /*
99 : * Prepare to iterate over the TBM. This will return the dsa_pointer
100 : * of the iterator state which will be used by multiple processes to
101 : * iterate jointly.
102 : */
103 72 : pstate->tbmiterator = tbm_prepare_shared_iterate(node->tbm);
104 :
105 : #ifdef USE_PREFETCH
106 72 : if (node->prefetch_maximum > 0)
107 : {
108 72 : pstate->prefetch_iterator =
109 72 : tbm_prepare_shared_iterate(node->tbm);
110 : }
111 : #endif /* USE_PREFETCH */
112 :
113 : /* We have initialized the shared state so wake up others. */
114 72 : BitmapDoneInitializingSharedState(pstate);
115 : }
116 :
117 25576 : tbmiterator = tbm_begin_iterate(node->tbm, dsa,
118 : pstate ?
119 : pstate->tbmiterator :
120 : InvalidDsaPointer);
121 :
122 : #ifdef USE_PREFETCH
123 25576 : if (node->prefetch_maximum > 0)
124 : node->prefetch_iterator =
125 25576 : tbm_begin_iterate(node->tbm, dsa,
126 : pstate ?
127 : pstate->prefetch_iterator :
128 : InvalidDsaPointer);
129 : #endif /* USE_PREFETCH */
130 :
131 : /*
132 : * If this is the first scan of the underlying table, create the table
133 : * scan descriptor and begin the scan.
134 : */
135 25576 : if (!node->ss.ss_currentScanDesc)
136 : {
137 21322 : bool need_tuples = false;
138 :
139 : /*
140 : * We can potentially skip fetching heap pages if we do not need any
141 : * columns of the table, either for checking non-indexable quals or
142 : * for returning data. This test is a bit simplistic, as it checks
143 : * the stronger condition that there's no qual or return tlist at all.
144 : * But in most cases it's probably not worth working harder than that.
145 : */
146 40282 : need_tuples = (node->ss.ps.plan->qual != NIL ||
147 18960 : node->ss.ps.plan->targetlist != NIL);
148 :
149 21322 : node->ss.ss_currentScanDesc =
150 21322 : table_beginscan_bm(node->ss.ss_currentRelation,
151 21322 : node->ss.ps.state->es_snapshot,
152 : 0,
153 : NULL,
154 : need_tuples);
155 : }
156 :
157 25576 : node->ss.ss_currentScanDesc->st.rs_tbmiterator = tbmiterator;
158 25576 : node->initialized = true;
159 25576 : }
160 :
161 :
162 : /* ----------------------------------------------------------------
163 : * BitmapHeapNext
164 : *
165 : * Retrieve next tuple from the BitmapHeapScan node's currentRelation
166 : * ----------------------------------------------------------------
167 : */
168 : static TupleTableSlot *
169 5871424 : BitmapHeapNext(BitmapHeapScanState *node)
170 : {
171 : ExprContext *econtext;
172 : TableScanDesc scan;
173 : TupleTableSlot *slot;
174 :
175 : #ifdef USE_PREFETCH
176 5871424 : ParallelBitmapHeapState *pstate = node->pstate;
177 : #endif
178 :
179 : /*
180 : * extract necessary information from index scan node
181 : */
182 5871424 : econtext = node->ss.ps.ps_ExprContext;
183 5871424 : slot = node->ss.ss_ScanTupleSlot;
184 5871424 : scan = node->ss.ss_currentScanDesc;
185 :
186 : /*
187 : * If we haven't yet performed the underlying index scan, do it, and begin
188 : * the iteration over the bitmap.
189 : */
190 5871424 : if (!node->initialized)
191 : {
192 25576 : BitmapTableScanSetup(node);
193 25576 : scan = node->ss.ss_currentScanDesc;
194 25576 : goto new_page;
195 : }
196 :
197 : for (;;)
198 : {
199 6826844 : while (table_scan_bitmap_next_tuple(scan, slot))
200 : {
201 : /*
202 : * Continuing in previously obtained page.
203 : */
204 :
205 6433736 : CHECK_FOR_INTERRUPTS();
206 :
207 : #ifdef USE_PREFETCH
208 :
209 : /*
210 : * Try to prefetch at least a few pages even before we get to the
211 : * second page if we don't stop reading after the first tuple.
212 : */
213 6433736 : if (!pstate)
214 : {
215 5239736 : if (node->prefetch_target < node->prefetch_maximum)
216 15074 : node->prefetch_target++;
217 : }
218 1194000 : else if (pstate->prefetch_target < node->prefetch_maximum)
219 : {
220 : /* take spinlock while updating shared state */
221 1926 : SpinLockAcquire(&pstate->mutex);
222 1926 : if (pstate->prefetch_target < node->prefetch_maximum)
223 1926 : pstate->prefetch_target++;
224 1926 : SpinLockRelease(&pstate->mutex);
225 : }
226 : #endif /* USE_PREFETCH */
227 :
228 : /*
229 : * We issue prefetch requests *after* fetching the current page to
230 : * try to avoid having prefetching interfere with the main I/O.
231 : * Also, this should happen only when we have determined there is
232 : * still something to do on the current page, else we may
233 : * uselessly prefetch the same page we are just about to request
234 : * for real.
235 : */
236 6433736 : BitmapPrefetch(node, scan);
237 :
238 : /*
239 : * If we are using lossy info, we have to recheck the qual
240 : * conditions at every tuple.
241 : */
242 6433736 : if (node->recheck)
243 : {
244 3124944 : econtext->ecxt_scantuple = slot;
245 3124944 : if (!ExecQualAndReset(node->bitmapqualorig, econtext))
246 : {
247 : /* Fails recheck, so drop it and loop back for another */
248 587410 : InstrCountFiltered2(node, 1);
249 587410 : ExecClearTuple(slot);
250 587410 : continue;
251 : }
252 : }
253 :
254 : /* OK to return this tuple */
255 5846326 : return slot;
256 : }
257 :
258 393108 : new_page:
259 :
260 418684 : BitmapAdjustPrefetchIterator(node);
261 :
262 : /*
263 : * Returns false if the bitmap is exhausted and there are no further
264 : * blocks we need to scan.
265 : */
266 418684 : if (!table_scan_bitmap_next_block(scan, &node->blockno,
267 : &node->recheck,
268 : &node->stats.lossy_pages,
269 : &node->stats.exact_pages))
270 25092 : break;
271 :
272 : /*
273 : * If serial, we can error out if the prefetch block doesn't stay
274 : * ahead of the current block.
275 : */
276 393586 : if (node->pstate == NULL &&
277 363478 : !tbm_exhausted(&node->prefetch_iterator) &&
278 363478 : node->prefetch_blockno < node->blockno)
279 0 : elog(ERROR,
280 : "prefetch and main iterators are out of sync. pfblockno: %d. blockno: %d",
281 : node->prefetch_blockno, node->blockno);
282 :
283 : /* Adjust the prefetch target */
284 393586 : BitmapAdjustPrefetchTarget(node);
285 : }
286 :
287 : /*
288 : * if we get here it means we are at the end of the scan..
289 : */
290 25092 : return ExecClearTuple(slot);
291 : }
292 :
293 : /*
294 : * BitmapDoneInitializingSharedState - Shared state is initialized
295 : *
296 : * By this time the leader has already populated the TBM and initialized the
297 : * shared state so wake up other processes.
298 : */
299 : static inline void
300 72 : BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
301 : {
302 72 : SpinLockAcquire(&pstate->mutex);
303 72 : pstate->state = BM_FINISHED;
304 72 : SpinLockRelease(&pstate->mutex);
305 72 : ConditionVariableBroadcast(&pstate->cv);
306 72 : }
307 :
308 : /*
309 : * BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
310 : *
311 : * We keep track of how far the prefetch iterator is ahead of the main
312 : * iterator in prefetch_pages. For each block the main iterator returns, we
313 : * decrement prefetch_pages.
314 : */
315 : static inline void
316 418684 : BitmapAdjustPrefetchIterator(BitmapHeapScanState *node)
317 : {
318 : #ifdef USE_PREFETCH
319 418684 : ParallelBitmapHeapState *pstate = node->pstate;
320 : TBMIterateResult *tbmpre;
321 :
322 418684 : if (pstate == NULL)
323 : {
324 388234 : TBMIterator *prefetch_iterator = &node->prefetch_iterator;
325 :
326 388234 : if (node->prefetch_pages > 0)
327 : {
328 : /* The main iterator has closed the distance by one page */
329 341894 : node->prefetch_pages--;
330 : }
331 46340 : else if (!tbm_exhausted(prefetch_iterator))
332 : {
333 31298 : tbmpre = tbm_iterate(prefetch_iterator);
334 31298 : node->prefetch_blockno = tbmpre ? tbmpre->blockno :
335 : InvalidBlockNumber;
336 : }
337 388234 : return;
338 : }
339 :
340 : /*
341 : * XXX: There is a known issue with keeping the prefetch and current block
342 : * iterators in sync for parallel bitmap table scans. This can lead to
343 : * prefetching blocks that have already been read. See the discussion
344 : * here:
345 : * https://postgr.es/m/20240315211449.en2jcmdqxv5o6tlz%40alap3.anarazel.de
346 : * Note that moving the call site of BitmapAdjustPrefetchIterator()
347 : * exacerbates the effects of this bug.
348 : */
349 30450 : if (node->prefetch_maximum > 0)
350 : {
351 30450 : TBMIterator *prefetch_iterator = &node->prefetch_iterator;
352 :
353 30450 : SpinLockAcquire(&pstate->mutex);
354 30450 : if (pstate->prefetch_pages > 0)
355 : {
356 30112 : pstate->prefetch_pages--;
357 30112 : SpinLockRelease(&pstate->mutex);
358 : }
359 : else
360 : {
361 : /* Release the mutex before iterating */
362 338 : SpinLockRelease(&pstate->mutex);
363 :
364 : /*
365 : * In case of shared mode, we can not ensure that the current
366 : * blockno of the main iterator and that of the prefetch iterator
367 : * are same. It's possible that whatever blockno we are
368 : * prefetching will be processed by another process. Therefore,
369 : * we don't validate the blockno here as we do in non-parallel
370 : * case.
371 : */
372 338 : if (!tbm_exhausted(prefetch_iterator))
373 : {
374 338 : tbmpre = tbm_iterate(prefetch_iterator);
375 338 : node->prefetch_blockno = tbmpre ? tbmpre->blockno :
376 : InvalidBlockNumber;
377 : }
378 : }
379 : }
380 : #endif /* USE_PREFETCH */
381 : }
382 :
383 : /*
384 : * BitmapAdjustPrefetchTarget - Adjust the prefetch target
385 : *
386 : * Increase prefetch target if it's not yet at the max. Note that
387 : * we will increase it to zero after fetching the very first
388 : * page/tuple, then to one after the second tuple is fetched, then
389 : * it doubles as later pages are fetched.
390 : */
391 : static inline void
392 393586 : BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
393 : {
394 : #ifdef USE_PREFETCH
395 393586 : ParallelBitmapHeapState *pstate = node->pstate;
396 :
397 393586 : if (pstate == NULL)
398 : {
399 363478 : if (node->prefetch_target >= node->prefetch_maximum)
400 : /* don't increase any further */ ;
401 16182 : else if (node->prefetch_target >= node->prefetch_maximum / 2)
402 490 : node->prefetch_target = node->prefetch_maximum;
403 15692 : else if (node->prefetch_target > 0)
404 0 : node->prefetch_target *= 2;
405 : else
406 15692 : node->prefetch_target++;
407 363478 : return;
408 : }
409 :
410 : /* Do an unlocked check first to save spinlock acquisitions. */
411 30108 : if (pstate->prefetch_target < node->prefetch_maximum)
412 : {
413 132 : SpinLockAcquire(&pstate->mutex);
414 132 : if (pstate->prefetch_target >= node->prefetch_maximum)
415 : /* don't increase any further */ ;
416 132 : else if (pstate->prefetch_target >= node->prefetch_maximum / 2)
417 60 : pstate->prefetch_target = node->prefetch_maximum;
418 72 : else if (pstate->prefetch_target > 0)
419 0 : pstate->prefetch_target *= 2;
420 : else
421 72 : pstate->prefetch_target++;
422 132 : SpinLockRelease(&pstate->mutex);
423 : }
424 : #endif /* USE_PREFETCH */
425 : }
426 :
427 : /*
428 : * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
429 : */
430 : static inline void
431 6433736 : BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
432 : {
433 : #ifdef USE_PREFETCH
434 6433736 : ParallelBitmapHeapState *pstate = node->pstate;
435 :
436 6433736 : if (pstate == NULL)
437 : {
438 5239736 : TBMIterator *prefetch_iterator = &node->prefetch_iterator;
439 :
440 5239736 : if (!tbm_exhausted(prefetch_iterator))
441 : {
442 5482428 : while (node->prefetch_pages < node->prefetch_target)
443 : {
444 357414 : TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
445 : bool skip_fetch;
446 :
447 357414 : if (tbmpre == NULL)
448 : {
449 : /* No more pages to prefetch */
450 15492 : tbm_end_iterate(prefetch_iterator);
451 15492 : break;
452 : }
453 341922 : node->prefetch_pages++;
454 341922 : node->prefetch_blockno = tbmpre->blockno;
455 :
456 : /*
457 : * If we expect not to have to actually read this heap page,
458 : * skip this prefetch call, but continue to run the prefetch
459 : * logic normally. (Would it be better not to increment
460 : * prefetch_pages?)
461 : */
462 749462 : skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
463 372794 : !tbmpre->recheck &&
464 30872 : VM_ALL_VISIBLE(node->ss.ss_currentRelation,
465 : tbmpre->blockno,
466 : &node->pvmbuffer));
467 :
468 341922 : if (!skip_fetch)
469 341700 : PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
470 : }
471 : }
472 :
473 5239736 : return;
474 : }
475 :
476 1194000 : if (pstate->prefetch_pages < pstate->prefetch_target)
477 : {
478 135448 : TBMIterator *prefetch_iterator = &node->prefetch_iterator;
479 :
480 135448 : if (!tbm_exhausted(prefetch_iterator))
481 : {
482 : while (1)
483 30036 : {
484 : TBMIterateResult *tbmpre;
485 58746 : bool do_prefetch = false;
486 : bool skip_fetch;
487 :
488 : /*
489 : * Recheck under the mutex. If some other process has already
490 : * done enough prefetching then we need not to do anything.
491 : */
492 58746 : SpinLockAcquire(&pstate->mutex);
493 58746 : if (pstate->prefetch_pages < pstate->prefetch_target)
494 : {
495 30112 : pstate->prefetch_pages++;
496 30112 : do_prefetch = true;
497 : }
498 58746 : SpinLockRelease(&pstate->mutex);
499 :
500 58746 : if (!do_prefetch)
501 28634 : return;
502 :
503 30112 : tbmpre = tbm_iterate(prefetch_iterator);
504 30112 : if (tbmpre == NULL)
505 : {
506 : /* No more pages to prefetch */
507 76 : tbm_end_iterate(prefetch_iterator);
508 76 : break;
509 : }
510 :
511 30036 : node->prefetch_blockno = tbmpre->blockno;
512 :
513 : /* As above, skip prefetch if we expect not to need page */
514 88116 : skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
515 54984 : !tbmpre->recheck &&
516 24948 : VM_ALL_VISIBLE(node->ss.ss_currentRelation,
517 : tbmpre->blockno,
518 : &node->pvmbuffer));
519 :
520 30036 : if (!skip_fetch)
521 9396 : PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
522 : }
523 : }
524 : }
525 : #endif /* USE_PREFETCH */
526 : }
527 :
528 : /*
529 : * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
530 : */
531 : static bool
532 0 : BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
533 : {
534 : ExprContext *econtext;
535 :
536 : /*
537 : * extract necessary information from index scan node
538 : */
539 0 : econtext = node->ss.ps.ps_ExprContext;
540 :
541 : /* Does the tuple meet the original qual conditions? */
542 0 : econtext->ecxt_scantuple = slot;
543 0 : return ExecQualAndReset(node->bitmapqualorig, econtext);
544 : }
545 :
546 : /* ----------------------------------------------------------------
547 : * ExecBitmapHeapScan(node)
548 : * ----------------------------------------------------------------
549 : */
550 : static TupleTableSlot *
551 5597848 : ExecBitmapHeapScan(PlanState *pstate)
552 : {
553 5597848 : BitmapHeapScanState *node = castNode(BitmapHeapScanState, pstate);
554 :
555 5597848 : return ExecScan(&node->ss,
556 : (ExecScanAccessMtd) BitmapHeapNext,
557 : (ExecScanRecheckMtd) BitmapHeapRecheck);
558 : }
559 :
560 : /* ----------------------------------------------------------------
561 : * ExecReScanBitmapHeapScan(node)
562 : * ----------------------------------------------------------------
563 : */
564 : void
565 10336 : ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
566 : {
567 10336 : PlanState *outerPlan = outerPlanState(node);
568 :
569 10336 : TableScanDesc scan = node->ss.ss_currentScanDesc;
570 :
571 10336 : if (scan)
572 : {
573 : /*
574 : * End iteration on iterators saved in scan descriptor if they have
575 : * not already been cleaned up.
576 : */
577 4260 : if (!tbm_exhausted(&scan->st.rs_tbmiterator))
578 4254 : tbm_end_iterate(&scan->st.rs_tbmiterator);
579 :
580 : /* rescan to release any page pin */
581 4260 : table_rescan(node->ss.ss_currentScanDesc, NULL);
582 : }
583 :
584 : /* If we did not already clean up the prefetch iterator, do so now. */
585 10336 : if (!tbm_exhausted(&node->prefetch_iterator))
586 1442 : tbm_end_iterate(&node->prefetch_iterator);
587 :
588 : /* release bitmaps and buffers if any */
589 10336 : if (node->tbm)
590 4254 : tbm_free(node->tbm);
591 10336 : if (node->pvmbuffer != InvalidBuffer)
592 54 : ReleaseBuffer(node->pvmbuffer);
593 10336 : node->tbm = NULL;
594 10336 : node->initialized = false;
595 10336 : node->pvmbuffer = InvalidBuffer;
596 10336 : node->recheck = true;
597 : /* Only used for serial BHS */
598 10336 : node->blockno = InvalidBlockNumber;
599 10336 : node->prefetch_blockno = InvalidBlockNumber;
600 10336 : node->prefetch_pages = 0;
601 10336 : node->prefetch_target = -1;
602 :
603 10336 : ExecScanReScan(&node->ss);
604 :
605 : /*
606 : * if chgParam of subnode is not null then plan will be re-scanned by
607 : * first ExecProcNode.
608 : */
609 10336 : if (outerPlan->chgParam == NULL)
610 232 : ExecReScan(outerPlan);
611 10336 : }
612 :
613 : /* ----------------------------------------------------------------
614 : * ExecEndBitmapHeapScan
615 : * ----------------------------------------------------------------
616 : */
617 : void
618 26358 : ExecEndBitmapHeapScan(BitmapHeapScanState *node)
619 : {
620 : TableScanDesc scanDesc;
621 :
622 : /*
623 : * When ending a parallel worker, copy the statistics gathered by the
624 : * worker back into shared memory so that it can be picked up by the main
625 : * process to report in EXPLAIN ANALYZE.
626 : */
627 26358 : if (node->sinstrument != NULL && IsParallelWorker())
628 : {
629 : BitmapHeapScanInstrumentation *si;
630 :
631 : Assert(ParallelWorkerNumber <= node->sinstrument->num_workers);
632 0 : si = &node->sinstrument->sinstrument[ParallelWorkerNumber];
633 :
634 : /*
635 : * Here we accumulate the stats rather than performing memcpy on
636 : * node->stats into si. When a Gather/GatherMerge node finishes it
637 : * will perform planner shutdown on the workers. On rescan it will
638 : * spin up new workers which will have a new BitmapHeapScanState and
639 : * zeroed stats.
640 : */
641 0 : si->exact_pages += node->stats.exact_pages;
642 0 : si->lossy_pages += node->stats.lossy_pages;
643 : }
644 :
645 : /*
646 : * extract information from the node
647 : */
648 26358 : scanDesc = node->ss.ss_currentScanDesc;
649 :
650 : /*
651 : * close down subplans
652 : */
653 26358 : ExecEndNode(outerPlanState(node));
654 :
655 26358 : if (scanDesc)
656 : {
657 : /*
658 : * End iteration on iterators saved in scan descriptor if they have
659 : * not already been cleaned up.
660 : */
661 21208 : if (!tbm_exhausted(&scanDesc->st.rs_tbmiterator))
662 21208 : tbm_end_iterate(&scanDesc->st.rs_tbmiterator);
663 :
664 : /*
665 : * close table scan
666 : */
667 21208 : table_endscan(scanDesc);
668 : }
669 :
670 : /* If we did not already clean up the prefetch iterator, do so now. */
671 26358 : if (!tbm_exhausted(&node->prefetch_iterator))
672 8560 : tbm_end_iterate(&node->prefetch_iterator);
673 :
674 : /*
675 : * release bitmaps and buffers if any
676 : */
677 26358 : if (node->tbm)
678 20938 : tbm_free(node->tbm);
679 26358 : if (node->pvmbuffer != InvalidBuffer)
680 48 : ReleaseBuffer(node->pvmbuffer);
681 26358 : }
682 :
683 : /* ----------------------------------------------------------------
684 : * ExecInitBitmapHeapScan
685 : *
686 : * Initializes the scan's state information.
687 : * ----------------------------------------------------------------
688 : */
689 : BitmapHeapScanState *
690 26472 : ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
691 : {
692 : BitmapHeapScanState *scanstate;
693 : Relation currentRelation;
694 :
695 : /* check for unsupported flags */
696 : Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
697 :
698 : /*
699 : * Assert caller didn't ask for an unsafe snapshot --- see comments at
700 : * head of file.
701 : */
702 : Assert(IsMVCCSnapshot(estate->es_snapshot));
703 :
704 : /*
705 : * create state structure
706 : */
707 26472 : scanstate = makeNode(BitmapHeapScanState);
708 26472 : scanstate->ss.ps.plan = (Plan *) node;
709 26472 : scanstate->ss.ps.state = estate;
710 26472 : scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;
711 :
712 26472 : scanstate->tbm = NULL;
713 26472 : scanstate->pvmbuffer = InvalidBuffer;
714 :
715 : /* Zero the statistics counters */
716 26472 : memset(&scanstate->stats, 0, sizeof(BitmapHeapScanInstrumentation));
717 :
718 26472 : scanstate->prefetch_pages = 0;
719 26472 : scanstate->prefetch_target = -1;
720 26472 : scanstate->initialized = false;
721 26472 : scanstate->pstate = NULL;
722 26472 : scanstate->recheck = true;
723 26472 : scanstate->blockno = InvalidBlockNumber;
724 26472 : scanstate->prefetch_blockno = InvalidBlockNumber;
725 :
726 : /*
727 : * Miscellaneous initialization
728 : *
729 : * create expression context for node
730 : */
731 26472 : ExecAssignExprContext(estate, &scanstate->ss.ps);
732 :
733 : /*
734 : * open the scan relation
735 : */
736 26472 : currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
737 :
738 : /*
739 : * initialize child nodes
740 : */
741 26472 : outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
742 :
743 : /*
744 : * get the scan type from the relation descriptor.
745 : */
746 26472 : ExecInitScanTupleSlot(estate, &scanstate->ss,
747 : RelationGetDescr(currentRelation),
748 : table_slot_callbacks(currentRelation));
749 :
750 : /*
751 : * Initialize result type and projection.
752 : */
753 26472 : ExecInitResultTypeTL(&scanstate->ss.ps);
754 26472 : ExecAssignScanProjectionInfo(&scanstate->ss);
755 :
756 : /*
757 : * initialize child expressions
758 : */
759 26472 : scanstate->ss.ps.qual =
760 26472 : ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
761 26472 : scanstate->bitmapqualorig =
762 26472 : ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);
763 :
764 : /*
765 : * Maximum number of prefetches for the tablespace if configured,
766 : * otherwise the current value of the effective_io_concurrency GUC.
767 : */
768 26472 : scanstate->prefetch_maximum =
769 26472 : get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
770 :
771 26472 : scanstate->ss.ss_currentRelation = currentRelation;
772 :
773 : /*
774 : * all done.
775 : */
776 26472 : return scanstate;
777 : }
778 :
779 : /*----------------
780 : * BitmapShouldInitializeSharedState
781 : *
782 : * The first process to come here and see the state to the BM_INITIAL
783 : * will become the leader for the parallel bitmap scan and will be
784 : * responsible for populating the TIDBitmap. The other processes will
785 : * be blocked by the condition variable until the leader wakes them up.
786 : * ---------------
787 : */
788 : static bool
789 342 : BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
790 : {
791 : SharedBitmapState state;
792 :
793 : while (1)
794 : {
795 342 : SpinLockAcquire(&pstate->mutex);
796 342 : state = pstate->state;
797 342 : if (pstate->state == BM_INITIAL)
798 72 : pstate->state = BM_INPROGRESS;
799 342 : SpinLockRelease(&pstate->mutex);
800 :
801 : /* Exit if bitmap is done, or if we're the leader. */
802 342 : if (state != BM_INPROGRESS)
803 342 : break;
804 :
805 : /* Wait for the leader to wake us up. */
806 0 : ConditionVariableSleep(&pstate->cv, WAIT_EVENT_PARALLEL_BITMAP_SCAN);
807 : }
808 :
809 342 : ConditionVariableCancelSleep();
810 :
811 342 : return (state == BM_INITIAL);
812 : }
813 :
814 : /* ----------------------------------------------------------------
815 : * ExecBitmapHeapEstimate
816 : *
817 : * Compute the amount of space we'll need in the parallel
818 : * query DSM, and inform pcxt->estimator about our needs.
819 : * ----------------------------------------------------------------
820 : */
821 : void
822 18 : ExecBitmapHeapEstimate(BitmapHeapScanState *node,
823 : ParallelContext *pcxt)
824 : {
825 : Size size;
826 :
827 18 : size = MAXALIGN(sizeof(ParallelBitmapHeapState));
828 :
829 : /* account for instrumentation, if required */
830 18 : if (node->ss.ps.instrument && pcxt->nworkers > 0)
831 : {
832 0 : size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
833 0 : size = add_size(size, mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation)));
834 : }
835 :
836 18 : shm_toc_estimate_chunk(&pcxt->estimator, size);
837 18 : shm_toc_estimate_keys(&pcxt->estimator, 1);
838 18 : }
839 :
840 : /* ----------------------------------------------------------------
841 : * ExecBitmapHeapInitializeDSM
842 : *
843 : * Set up a parallel bitmap heap scan descriptor.
844 : * ----------------------------------------------------------------
845 : */
846 : void
847 18 : ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
848 : ParallelContext *pcxt)
849 : {
850 : ParallelBitmapHeapState *pstate;
851 18 : SharedBitmapHeapInstrumentation *sinstrument = NULL;
852 18 : dsa_area *dsa = node->ss.ps.state->es_query_dsa;
853 : char *ptr;
854 : Size size;
855 :
856 : /* If there's no DSA, there are no workers; initialize nothing. */
857 18 : if (dsa == NULL)
858 0 : return;
859 :
860 18 : size = MAXALIGN(sizeof(ParallelBitmapHeapState));
861 18 : if (node->ss.ps.instrument && pcxt->nworkers > 0)
862 : {
863 0 : size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
864 0 : size = add_size(size, mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation)));
865 : }
866 :
867 18 : ptr = shm_toc_allocate(pcxt->toc, size);
868 18 : pstate = (ParallelBitmapHeapState *) ptr;
869 18 : ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
870 18 : if (node->ss.ps.instrument && pcxt->nworkers > 0)
871 0 : sinstrument = (SharedBitmapHeapInstrumentation *) ptr;
872 :
873 18 : pstate->tbmiterator = 0;
874 18 : pstate->prefetch_iterator = 0;
875 :
876 : /* Initialize the mutex */
877 18 : SpinLockInit(&pstate->mutex);
878 18 : pstate->prefetch_pages = 0;
879 18 : pstate->prefetch_target = -1;
880 18 : pstate->state = BM_INITIAL;
881 :
882 18 : ConditionVariableInit(&pstate->cv);
883 :
884 18 : if (sinstrument)
885 : {
886 0 : sinstrument->num_workers = pcxt->nworkers;
887 :
888 : /* ensure any unfilled slots will contain zeroes */
889 0 : memset(sinstrument->sinstrument, 0,
890 0 : pcxt->nworkers * sizeof(BitmapHeapScanInstrumentation));
891 : }
892 :
893 18 : shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
894 18 : node->pstate = pstate;
895 18 : node->sinstrument = sinstrument;
896 : }
897 :
898 : /* ----------------------------------------------------------------
899 : * ExecBitmapHeapReInitializeDSM
900 : *
901 : * Reset shared state before beginning a fresh scan.
902 : * ----------------------------------------------------------------
903 : */
904 : void
905 54 : ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node,
906 : ParallelContext *pcxt)
907 : {
908 54 : ParallelBitmapHeapState *pstate = node->pstate;
909 54 : dsa_area *dsa = node->ss.ps.state->es_query_dsa;
910 :
911 : /* If there's no DSA, there are no workers; do nothing. */
912 54 : if (dsa == NULL)
913 0 : return;
914 :
915 54 : pstate->state = BM_INITIAL;
916 54 : pstate->prefetch_pages = 0;
917 54 : pstate->prefetch_target = -1;
918 :
919 54 : if (DsaPointerIsValid(pstate->tbmiterator))
920 54 : tbm_free_shared_area(dsa, pstate->tbmiterator);
921 :
922 54 : if (DsaPointerIsValid(pstate->prefetch_iterator))
923 54 : tbm_free_shared_area(dsa, pstate->prefetch_iterator);
924 :
925 54 : pstate->tbmiterator = InvalidDsaPointer;
926 54 : pstate->prefetch_iterator = InvalidDsaPointer;
927 : }
928 :
929 : /* ----------------------------------------------------------------
930 : * ExecBitmapHeapInitializeWorker
931 : *
932 : * Copy relevant information from TOC into planstate.
933 : * ----------------------------------------------------------------
934 : */
935 : void
936 270 : ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
937 : ParallelWorkerContext *pwcxt)
938 : {
939 : char *ptr;
940 :
941 : Assert(node->ss.ps.state->es_query_dsa != NULL);
942 :
943 270 : ptr = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
944 :
945 270 : node->pstate = (ParallelBitmapHeapState *) ptr;
946 270 : ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
947 :
948 270 : if (node->ss.ps.instrument)
949 0 : node->sinstrument = (SharedBitmapHeapInstrumentation *) ptr;
950 270 : }
951 :
952 : /* ----------------------------------------------------------------
953 : * ExecBitmapHeapRetrieveInstrumentation
954 : *
955 : * Transfer bitmap heap scan statistics from DSM to private memory.
956 : * ----------------------------------------------------------------
957 : */
958 : void
959 0 : ExecBitmapHeapRetrieveInstrumentation(BitmapHeapScanState *node)
960 : {
961 0 : SharedBitmapHeapInstrumentation *sinstrument = node->sinstrument;
962 : Size size;
963 :
964 0 : if (sinstrument == NULL)
965 0 : return;
966 :
967 0 : size = offsetof(SharedBitmapHeapInstrumentation, sinstrument)
968 0 : + sinstrument->num_workers * sizeof(BitmapHeapScanInstrumentation);
969 :
970 0 : node->sinstrument = palloc(size);
971 0 : memcpy(node->sinstrument, sinstrument, size);
972 : }
|