Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * nodeBitmapHeapscan.c
4 : * Routines to support bitmapped scans of relations
5 : *
6 : * NOTE: it is critical that this plan type only be used with MVCC-compliant
7 : * snapshots (ie, regular snapshots, not SnapshotAny or one of the other
8 : * special snapshots). The reason is that since index and heap scans are
9 : * decoupled, there can be no assurance that the index tuple prompting a
10 : * visit to a particular heap TID still exists when the visit is made.
11 : * Therefore the tuple might not exist anymore either (which is OK because
12 : * heap_fetch will cope) --- but worse, the tuple slot could have been
13 : * re-used for a newer tuple. With an MVCC snapshot the newer tuple is
14 : * certain to fail the time qual and so it will not be mistakenly returned,
15 : * but with anything else we might return a tuple that doesn't meet the
16 : * required index qual conditions.
17 : *
18 : *
19 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
20 : * Portions Copyright (c) 1994, Regents of the University of California
21 : *
22 : *
23 : * IDENTIFICATION
24 : * src/backend/executor/nodeBitmapHeapscan.c
25 : *
26 : *-------------------------------------------------------------------------
27 : */
28 : /*
29 : * INTERFACE ROUTINES
30 : * ExecBitmapHeapScan scans a relation using bitmap info
31 : * ExecBitmapHeapNext workhorse for above
32 : * ExecInitBitmapHeapScan creates and initializes state info.
33 : * ExecReScanBitmapHeapScan prepares to rescan the plan.
34 : * ExecEndBitmapHeapScan releases all storage.
35 : */
36 : #include "postgres.h"
37 :
38 : #include <math.h>
39 :
40 : #include "access/relscan.h"
41 : #include "access/tableam.h"
42 : #include "access/visibilitymap.h"
43 : #include "executor/executor.h"
44 : #include "executor/nodeBitmapHeapscan.h"
45 : #include "miscadmin.h"
46 : #include "pgstat.h"
47 : #include "storage/bufmgr.h"
48 : #include "utils/rel.h"
49 : #include "utils/spccache.h"
50 :
51 : static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
52 : static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate);
53 : static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node);
54 : static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
55 : static inline void BitmapPrefetch(BitmapHeapScanState *node,
56 : TableScanDesc scan);
57 : static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate);
58 :
59 :
60 : /* ----------------------------------------------------------------
61 : * BitmapHeapNext
62 : *
63 : * Retrieve next tuple from the BitmapHeapScan node's currentRelation
64 : * ----------------------------------------------------------------
65 : */
66 : static TupleTableSlot *
67 5843510 : BitmapHeapNext(BitmapHeapScanState *node)
68 : {
69 : ExprContext *econtext;
70 : TableScanDesc scan;
71 : TIDBitmap *tbm;
72 : TupleTableSlot *slot;
73 5843510 : ParallelBitmapHeapState *pstate = node->pstate;
74 5843510 : dsa_area *dsa = node->ss.ps.state->es_query_dsa;
75 :
76 : /*
77 : * extract necessary information from index scan node
78 : */
79 5843510 : econtext = node->ss.ps.ps_ExprContext;
80 5843510 : slot = node->ss.ss_ScanTupleSlot;
81 5843510 : scan = node->ss.ss_currentScanDesc;
82 5843510 : tbm = node->tbm;
83 :
84 : /*
85 : * If we haven't yet performed the underlying index scan, do it, and begin
86 : * the iteration over the bitmap.
87 : *
88 : * For prefetching, we use *two* iterators, one for the pages we are
89 : * actually scanning and another that runs ahead of the first for
90 : * prefetching. node->prefetch_pages tracks exactly how many pages ahead
91 : * the prefetch iterator is. Also, node->prefetch_target tracks the
92 : * desired prefetch distance, which starts small and increases up to the
93 : * node->prefetch_maximum. This is to avoid doing a lot of prefetching in
94 : * a scan that stops after a few tuples because of a LIMIT.
95 : */
96 5843510 : if (!node->initialized)
97 : {
98 19578 : TBMIterator *tbmiterator = NULL;
99 19578 : TBMSharedIterator *shared_tbmiterator = NULL;
100 :
101 19578 : if (!pstate)
102 : {
103 19236 : tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
104 :
105 19236 : if (!tbm || !IsA(tbm, TIDBitmap))
106 0 : elog(ERROR, "unrecognized result from subplan");
107 :
108 19236 : node->tbm = tbm;
109 19236 : tbmiterator = tbm_begin_iterate(tbm);
110 :
111 : #ifdef USE_PREFETCH
112 19236 : if (node->prefetch_maximum > 0)
113 : {
114 19236 : node->prefetch_iterator = tbm_begin_iterate(tbm);
115 19236 : node->prefetch_pages = 0;
116 19236 : node->prefetch_target = -1;
117 : }
118 : #endif /* USE_PREFETCH */
119 : }
120 : else
121 : {
122 : /*
123 : * The leader will immediately come out of the function, but
124 : * others will be blocked until leader populates the TBM and wakes
125 : * them up.
126 : */
127 342 : if (BitmapShouldInitializeSharedState(pstate))
128 : {
129 72 : tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
130 72 : if (!tbm || !IsA(tbm, TIDBitmap))
131 0 : elog(ERROR, "unrecognized result from subplan");
132 :
133 72 : node->tbm = tbm;
134 :
135 : /*
136 : * Prepare to iterate over the TBM. This will return the
137 : * dsa_pointer of the iterator state which will be used by
138 : * multiple processes to iterate jointly.
139 : */
140 72 : pstate->tbmiterator = tbm_prepare_shared_iterate(tbm);
141 : #ifdef USE_PREFETCH
142 72 : if (node->prefetch_maximum > 0)
143 : {
144 72 : pstate->prefetch_iterator =
145 72 : tbm_prepare_shared_iterate(tbm);
146 :
147 : /*
148 : * We don't need the mutex here as we haven't yet woke up
149 : * others.
150 : */
151 72 : pstate->prefetch_pages = 0;
152 72 : pstate->prefetch_target = -1;
153 : }
154 : #endif
155 :
156 : /* We have initialized the shared state so wake up others. */
157 72 : BitmapDoneInitializingSharedState(pstate);
158 : }
159 :
160 : /* Allocate a private iterator and attach the shared state to it */
161 : shared_tbmiterator =
162 342 : tbm_attach_shared_iterate(dsa, pstate->tbmiterator);
163 :
164 : #ifdef USE_PREFETCH
165 342 : if (node->prefetch_maximum > 0)
166 : {
167 342 : node->shared_prefetch_iterator =
168 342 : tbm_attach_shared_iterate(dsa, pstate->prefetch_iterator);
169 : }
170 : #endif /* USE_PREFETCH */
171 : }
172 :
173 : /*
174 : * If this is the first scan of the underlying table, create the table
175 : * scan descriptor and begin the scan.
176 : */
177 19578 : if (!scan)
178 : {
179 15432 : bool need_tuples = false;
180 :
181 : /*
182 : * We can potentially skip fetching heap pages if we do not need
183 : * any columns of the table, either for checking non-indexable
184 : * quals or for returning data. This test is a bit simplistic, as
185 : * it checks the stronger condition that there's no qual or return
186 : * tlist at all. But in most cases it's probably not worth working
187 : * harder than that.
188 : */
189 28702 : need_tuples = (node->ss.ps.plan->qual != NIL ||
190 13270 : node->ss.ps.plan->targetlist != NIL);
191 :
192 15432 : scan = table_beginscan_bm(node->ss.ss_currentRelation,
193 15432 : node->ss.ps.state->es_snapshot,
194 : 0,
195 : NULL,
196 : need_tuples);
197 :
198 15432 : node->ss.ss_currentScanDesc = scan;
199 : }
200 :
201 19578 : scan->st.bitmap.rs_iterator = tbmiterator;
202 19578 : scan->st.bitmap.rs_shared_iterator = shared_tbmiterator;
203 19578 : node->initialized = true;
204 :
205 19578 : goto new_page;
206 : }
207 :
208 : for (;;)
209 : {
210 6803706 : while (table_scan_bitmap_next_tuple(scan, slot))
211 : {
212 : /*
213 : * Continuing in previously obtained page.
214 : */
215 :
216 6411880 : CHECK_FOR_INTERRUPTS();
217 :
218 : #ifdef USE_PREFETCH
219 :
220 : /*
221 : * Try to prefetch at least a few pages even before we get to the
222 : * second page if we don't stop reading after the first tuple.
223 : */
224 6411880 : if (!pstate)
225 : {
226 5217880 : if (node->prefetch_target < node->prefetch_maximum)
227 14478 : node->prefetch_target++;
228 : }
229 1194000 : else if (pstate->prefetch_target < node->prefetch_maximum)
230 : {
231 : /* take spinlock while updating shared state */
232 1926 : SpinLockAcquire(&pstate->mutex);
233 1926 : if (pstate->prefetch_target < node->prefetch_maximum)
234 1926 : pstate->prefetch_target++;
235 1926 : SpinLockRelease(&pstate->mutex);
236 : }
237 : #endif /* USE_PREFETCH */
238 :
239 : /*
240 : * We issue prefetch requests *after* fetching the current page to
241 : * try to avoid having prefetching interfere with the main I/O.
242 : * Also, this should happen only when we have determined there is
243 : * still something to do on the current page, else we may
244 : * uselessly prefetch the same page we are just about to request
245 : * for real.
246 : */
247 6411880 : BitmapPrefetch(node, scan);
248 :
249 : /*
250 : * If we are using lossy info, we have to recheck the qual
251 : * conditions at every tuple.
252 : */
253 6411880 : if (node->recheck)
254 : {
255 3124668 : econtext->ecxt_scantuple = slot;
256 3124668 : if (!ExecQualAndReset(node->bitmapqualorig, econtext))
257 : {
258 : /* Fails recheck, so drop it and loop back for another */
259 587506 : InstrCountFiltered2(node, 1);
260 587506 : ExecClearTuple(slot);
261 587506 : continue;
262 : }
263 : }
264 :
265 : /* OK to return this tuple */
266 5824374 : return slot;
267 : }
268 :
269 391826 : new_page:
270 :
271 411404 : BitmapAdjustPrefetchIterator(node);
272 :
273 : /*
274 : * Returns false if the bitmap is exhausted and there are no further
275 : * blocks we need to scan.
276 : */
277 411404 : if (!table_scan_bitmap_next_block(scan, &node->blockno,
278 : &node->recheck,
279 : &node->stats.lossy_pages,
280 : &node->stats.exact_pages))
281 19130 : break;
282 :
283 : /*
284 : * If serial, we can error out if the prefetch block doesn't stay
285 : * ahead of the current block.
286 : */
287 392268 : if (node->pstate == NULL &&
288 362160 : node->prefetch_iterator &&
289 362160 : node->prefetch_blockno < node->blockno)
290 0 : elog(ERROR,
291 : "prefetch and main iterators are out of sync. pfblockno: %d. blockno: %d",
292 : node->prefetch_blockno, node->blockno);
293 :
294 : /* Adjust the prefetch target */
295 392268 : BitmapAdjustPrefetchTarget(node);
296 : }
297 :
298 : /*
299 : * if we get here it means we are at the end of the scan..
300 : */
301 19130 : return ExecClearTuple(slot);
302 : }
303 :
304 : /*
305 : * BitmapDoneInitializingSharedState - Shared state is initialized
306 : *
307 : * By this time the leader has already populated the TBM and initialized the
308 : * shared state so wake up other processes.
309 : */
310 : static inline void
311 72 : BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
312 : {
313 72 : SpinLockAcquire(&pstate->mutex);
314 72 : pstate->state = BM_FINISHED;
315 72 : SpinLockRelease(&pstate->mutex);
316 72 : ConditionVariableBroadcast(&pstate->cv);
317 72 : }
318 :
319 : /*
320 : * BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
321 : *
322 : * We keep track of how far the prefetch iterator is ahead of the main
323 : * iterator in prefetch_pages. For each block the main iterator returns, we
324 : * decrement prefetch_pages.
325 : */
326 : static inline void
327 411404 : BitmapAdjustPrefetchIterator(BitmapHeapScanState *node)
328 : {
329 : #ifdef USE_PREFETCH
330 411404 : ParallelBitmapHeapState *pstate = node->pstate;
331 : TBMIterateResult *tbmpre;
332 :
333 411404 : if (pstate == NULL)
334 : {
335 380954 : TBMIterator *prefetch_iterator = node->prefetch_iterator;
336 :
337 380954 : if (node->prefetch_pages > 0)
338 : {
339 : /* The main iterator has closed the distance by one page */
340 340966 : node->prefetch_pages--;
341 : }
342 39988 : else if (prefetch_iterator)
343 : {
344 25556 : tbmpre = tbm_iterate(prefetch_iterator);
345 25556 : node->prefetch_blockno = tbmpre ? tbmpre->blockno :
346 : InvalidBlockNumber;
347 : }
348 380954 : return;
349 : }
350 :
351 : /*
352 : * XXX: There is a known issue with keeping the prefetch and current block
353 : * iterators in sync for parallel bitmap table scans. This can lead to
354 : * prefetching blocks that have already been read. See the discussion
355 : * here:
356 : * https://postgr.es/m/20240315211449.en2jcmdqxv5o6tlz%40alap3.anarazel.de
357 : * Note that moving the call site of BitmapAdjustPrefetchIterator()
358 : * exacerbates the effects of this bug.
359 : */
360 30450 : if (node->prefetch_maximum > 0)
361 : {
362 30450 : TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
363 :
364 30450 : SpinLockAcquire(&pstate->mutex);
365 30450 : if (pstate->prefetch_pages > 0)
366 : {
367 30114 : pstate->prefetch_pages--;
368 30114 : SpinLockRelease(&pstate->mutex);
369 : }
370 : else
371 : {
372 : /* Release the mutex before iterating */
373 336 : SpinLockRelease(&pstate->mutex);
374 :
375 : /*
376 : * In case of shared mode, we can not ensure that the current
377 : * blockno of the main iterator and that of the prefetch iterator
378 : * are same. It's possible that whatever blockno we are
379 : * prefetching will be processed by another process. Therefore,
380 : * we don't validate the blockno here as we do in non-parallel
381 : * case.
382 : */
383 336 : if (prefetch_iterator)
384 : {
385 336 : tbmpre = tbm_shared_iterate(prefetch_iterator);
386 336 : node->prefetch_blockno = tbmpre ? tbmpre->blockno :
387 : InvalidBlockNumber;
388 : }
389 : }
390 : }
391 : #endif /* USE_PREFETCH */
392 : }
393 :
394 : /*
395 : * BitmapAdjustPrefetchTarget - Adjust the prefetch target
396 : *
397 : * Increase prefetch target if it's not yet at the max. Note that
398 : * we will increase it to zero after fetching the very first
399 : * page/tuple, then to one after the second tuple is fetched, then
400 : * it doubles as later pages are fetched.
401 : */
402 : static inline void
403 392268 : BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
404 : {
405 : #ifdef USE_PREFETCH
406 392268 : ParallelBitmapHeapState *pstate = node->pstate;
407 :
408 392268 : if (pstate == NULL)
409 : {
410 362160 : if (node->prefetch_target >= node->prefetch_maximum)
411 : /* don't increase any further */ ;
412 15500 : else if (node->prefetch_target >= node->prefetch_maximum / 2)
413 448 : node->prefetch_target = node->prefetch_maximum;
414 15052 : else if (node->prefetch_target > 0)
415 0 : node->prefetch_target *= 2;
416 : else
417 15052 : node->prefetch_target++;
418 362160 : return;
419 : }
420 :
421 : /* Do an unlocked check first to save spinlock acquisitions. */
422 30108 : if (pstate->prefetch_target < node->prefetch_maximum)
423 : {
424 132 : SpinLockAcquire(&pstate->mutex);
425 132 : if (pstate->prefetch_target >= node->prefetch_maximum)
426 : /* don't increase any further */ ;
427 132 : else if (pstate->prefetch_target >= node->prefetch_maximum / 2)
428 60 : pstate->prefetch_target = node->prefetch_maximum;
429 72 : else if (pstate->prefetch_target > 0)
430 0 : pstate->prefetch_target *= 2;
431 : else
432 72 : pstate->prefetch_target++;
433 132 : SpinLockRelease(&pstate->mutex);
434 : }
435 : #endif /* USE_PREFETCH */
436 : }
437 :
438 : /*
439 : * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
440 : */
441 : static inline void
442 6411880 : BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
443 : {
444 : #ifdef USE_PREFETCH
445 6411880 : ParallelBitmapHeapState *pstate = node->pstate;
446 :
447 6411880 : if (pstate == NULL)
448 : {
449 5217880 : TBMIterator *prefetch_iterator = node->prefetch_iterator;
450 :
451 5217880 : if (prefetch_iterator)
452 : {
453 5466728 : while (node->prefetch_pages < node->prefetch_target)
454 : {
455 355840 : TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
456 : bool skip_fetch;
457 :
458 355840 : if (tbmpre == NULL)
459 : {
460 : /* No more pages to prefetch */
461 14846 : tbm_end_iterate(prefetch_iterator);
462 14846 : node->prefetch_iterator = NULL;
463 14846 : break;
464 : }
465 340994 : node->prefetch_pages++;
466 340994 : node->prefetch_blockno = tbmpre->blockno;
467 :
468 : /*
469 : * If we expect not to have to actually read this heap page,
470 : * skip this prefetch call, but continue to run the prefetch
471 : * logic normally. (Would it be better not to increment
472 : * prefetch_pages?)
473 : */
474 747456 : skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
475 371712 : !tbmpre->recheck &&
476 30718 : VM_ALL_VISIBLE(node->ss.ss_currentRelation,
477 : tbmpre->blockno,
478 : &node->pvmbuffer));
479 :
480 340994 : if (!skip_fetch)
481 340934 : PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
482 : }
483 : }
484 :
485 5217880 : return;
486 : }
487 :
488 1194000 : if (pstate->prefetch_pages < pstate->prefetch_target)
489 : {
490 135762 : TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
491 :
492 135762 : if (prefetch_iterator)
493 : {
494 : while (1)
495 30036 : {
496 : TBMIterateResult *tbmpre;
497 58754 : bool do_prefetch = false;
498 : bool skip_fetch;
499 :
500 : /*
501 : * Recheck under the mutex. If some other process has already
502 : * done enough prefetching then we need not to do anything.
503 : */
504 58754 : SpinLockAcquire(&pstate->mutex);
505 58754 : if (pstate->prefetch_pages < pstate->prefetch_target)
506 : {
507 30114 : pstate->prefetch_pages++;
508 30114 : do_prefetch = true;
509 : }
510 58754 : SpinLockRelease(&pstate->mutex);
511 :
512 58754 : if (!do_prefetch)
513 28640 : return;
514 :
515 30114 : tbmpre = tbm_shared_iterate(prefetch_iterator);
516 30114 : if (tbmpre == NULL)
517 : {
518 : /* No more pages to prefetch */
519 78 : tbm_end_shared_iterate(prefetch_iterator);
520 78 : node->shared_prefetch_iterator = NULL;
521 78 : break;
522 : }
523 :
524 30036 : node->prefetch_blockno = tbmpre->blockno;
525 :
526 : /* As above, skip prefetch if we expect not to need page */
527 88116 : skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
528 54984 : !tbmpre->recheck &&
529 24948 : VM_ALL_VISIBLE(node->ss.ss_currentRelation,
530 : tbmpre->blockno,
531 : &node->pvmbuffer));
532 :
533 30036 : if (!skip_fetch)
534 9396 : PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
535 : }
536 : }
537 : }
538 : #endif /* USE_PREFETCH */
539 : }
540 :
541 : /*
542 : * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
543 : */
544 : static bool
545 0 : BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
546 : {
547 : ExprContext *econtext;
548 :
549 : /*
550 : * extract necessary information from index scan node
551 : */
552 0 : econtext = node->ss.ps.ps_ExprContext;
553 :
554 : /* Does the tuple meet the original qual conditions? */
555 0 : econtext->ecxt_scantuple = slot;
556 0 : return ExecQualAndReset(node->bitmapqualorig, econtext);
557 : }
558 :
559 : /* ----------------------------------------------------------------
560 : * ExecBitmapHeapScan(node)
561 : * ----------------------------------------------------------------
562 : */
563 : static TupleTableSlot *
564 5586026 : ExecBitmapHeapScan(PlanState *pstate)
565 : {
566 5586026 : BitmapHeapScanState *node = castNode(BitmapHeapScanState, pstate);
567 :
568 5586026 : return ExecScan(&node->ss,
569 : (ExecScanAccessMtd) BitmapHeapNext,
570 : (ExecScanRecheckMtd) BitmapHeapRecheck);
571 : }
572 :
573 : /* ----------------------------------------------------------------
574 : * ExecReScanBitmapHeapScan(node)
575 : * ----------------------------------------------------------------
576 : */
577 : void
578 4958 : ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
579 : {
580 4958 : PlanState *outerPlan = outerPlanState(node);
581 :
582 4958 : TableScanDesc scan = node->ss.ss_currentScanDesc;
583 :
584 4958 : if (scan)
585 : {
586 : /*
587 : * End iteration on iterators saved in scan descriptor.
588 : */
589 4146 : if (scan->st.bitmap.rs_shared_iterator)
590 : {
591 54 : tbm_end_shared_iterate(scan->st.bitmap.rs_shared_iterator);
592 54 : scan->st.bitmap.rs_shared_iterator = NULL;
593 : }
594 :
595 4146 : if (scan->st.bitmap.rs_iterator)
596 : {
597 4092 : tbm_end_iterate(scan->st.bitmap.rs_iterator);
598 4092 : scan->st.bitmap.rs_iterator = NULL;
599 : }
600 :
601 : /* rescan to release any page pin */
602 4146 : table_rescan(node->ss.ss_currentScanDesc, NULL);
603 : }
604 :
605 : /* release bitmaps and buffers if any */
606 4958 : if (node->prefetch_iterator)
607 1374 : tbm_end_iterate(node->prefetch_iterator);
608 4958 : if (node->shared_prefetch_iterator)
609 0 : tbm_end_shared_iterate(node->shared_prefetch_iterator);
610 4958 : if (node->tbm)
611 4146 : tbm_free(node->tbm);
612 4958 : if (node->pvmbuffer != InvalidBuffer)
613 54 : ReleaseBuffer(node->pvmbuffer);
614 4958 : node->tbm = NULL;
615 4958 : node->prefetch_iterator = NULL;
616 4958 : node->initialized = false;
617 4958 : node->shared_prefetch_iterator = NULL;
618 4958 : node->pvmbuffer = InvalidBuffer;
619 4958 : node->recheck = true;
620 4958 : node->blockno = InvalidBlockNumber;
621 4958 : node->prefetch_blockno = InvalidBlockNumber;
622 :
623 4958 : ExecScanReScan(&node->ss);
624 :
625 : /*
626 : * if chgParam of subnode is not null then plan will be re-scanned by
627 : * first ExecProcNode.
628 : */
629 4958 : if (outerPlan->chgParam == NULL)
630 188 : ExecReScan(outerPlan);
631 4958 : }
632 :
633 : /* ----------------------------------------------------------------
634 : * ExecEndBitmapHeapScan
635 : * ----------------------------------------------------------------
636 : */
637 : void
638 20292 : ExecEndBitmapHeapScan(BitmapHeapScanState *node)
639 : {
640 : TableScanDesc scanDesc;
641 :
642 : /*
643 : * When ending a parallel worker, copy the statistics gathered by the
644 : * worker back into shared memory so that it can be picked up by the main
645 : * process to report in EXPLAIN ANALYZE.
646 : */
647 20292 : if (node->sinstrument != NULL && IsParallelWorker())
648 : {
649 : BitmapHeapScanInstrumentation *si;
650 :
651 : Assert(ParallelWorkerNumber <= node->sinstrument->num_workers);
652 0 : si = &node->sinstrument->sinstrument[ParallelWorkerNumber];
653 :
654 : /*
655 : * Here we accumulate the stats rather than performing memcpy on
656 : * node->stats into si. When a Gather/GatherMerge node finishes it
657 : * will perform planner shutdown on the workers. On rescan it will
658 : * spin up new workers which will have a new BitmapHeapScanState and
659 : * zeroed stats.
660 : */
661 0 : si->exact_pages += node->stats.exact_pages;
662 0 : si->lossy_pages += node->stats.lossy_pages;
663 : }
664 :
665 : /*
666 : * extract information from the node
667 : */
668 20292 : scanDesc = node->ss.ss_currentScanDesc;
669 :
670 : /*
671 : * close down subplans
672 : */
673 20292 : ExecEndNode(outerPlanState(node));
674 :
675 20292 : if (scanDesc)
676 : {
677 : /*
678 : * End iteration on iterators saved in scan descriptor.
679 : */
680 15318 : if (scanDesc->st.bitmap.rs_shared_iterator)
681 : {
682 288 : tbm_end_shared_iterate(scanDesc->st.bitmap.rs_shared_iterator);
683 288 : scanDesc->st.bitmap.rs_shared_iterator = NULL;
684 : }
685 :
686 15318 : if (scanDesc->st.bitmap.rs_iterator)
687 : {
688 15030 : tbm_end_iterate(scanDesc->st.bitmap.rs_iterator);
689 15030 : scanDesc->st.bitmap.rs_iterator = NULL;
690 : }
691 :
692 : /*
693 : * close table scan
694 : */
695 15318 : table_endscan(scanDesc);
696 : }
697 :
698 : /*
699 : * release bitmaps and buffers if any
700 : */
701 20292 : if (node->prefetch_iterator)
702 3010 : tbm_end_iterate(node->prefetch_iterator);
703 20292 : if (node->tbm)
704 15048 : tbm_free(node->tbm);
705 20292 : if (node->shared_prefetch_iterator)
706 264 : tbm_end_shared_iterate(node->shared_prefetch_iterator);
707 20292 : if (node->pvmbuffer != InvalidBuffer)
708 30 : ReleaseBuffer(node->pvmbuffer);
709 20292 : }
710 :
711 : /* ----------------------------------------------------------------
712 : * ExecInitBitmapHeapScan
713 : *
714 : * Initializes the scan's state information.
715 : * ----------------------------------------------------------------
716 : */
717 : BitmapHeapScanState *
718 20406 : ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
719 : {
720 : BitmapHeapScanState *scanstate;
721 : Relation currentRelation;
722 :
723 : /* check for unsupported flags */
724 : Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
725 :
726 : /*
727 : * Assert caller didn't ask for an unsafe snapshot --- see comments at
728 : * head of file.
729 : */
730 : Assert(IsMVCCSnapshot(estate->es_snapshot));
731 :
732 : /*
733 : * create state structure
734 : */
735 20406 : scanstate = makeNode(BitmapHeapScanState);
736 20406 : scanstate->ss.ps.plan = (Plan *) node;
737 20406 : scanstate->ss.ps.state = estate;
738 20406 : scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;
739 :
740 20406 : scanstate->tbm = NULL;
741 20406 : scanstate->pvmbuffer = InvalidBuffer;
742 :
743 : /* Zero the statistics counters */
744 20406 : memset(&scanstate->stats, 0, sizeof(BitmapHeapScanInstrumentation));
745 :
746 20406 : scanstate->prefetch_iterator = NULL;
747 20406 : scanstate->prefetch_pages = 0;
748 20406 : scanstate->prefetch_target = 0;
749 20406 : scanstate->initialized = false;
750 20406 : scanstate->shared_prefetch_iterator = NULL;
751 20406 : scanstate->pstate = NULL;
752 20406 : scanstate->recheck = true;
753 20406 : scanstate->blockno = InvalidBlockNumber;
754 20406 : scanstate->prefetch_blockno = InvalidBlockNumber;
755 :
756 : /*
757 : * Miscellaneous initialization
758 : *
759 : * create expression context for node
760 : */
761 20406 : ExecAssignExprContext(estate, &scanstate->ss.ps);
762 :
763 : /*
764 : * open the scan relation
765 : */
766 20406 : currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
767 :
768 : /*
769 : * initialize child nodes
770 : */
771 20406 : outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
772 :
773 : /*
774 : * get the scan type from the relation descriptor.
775 : */
776 20406 : ExecInitScanTupleSlot(estate, &scanstate->ss,
777 : RelationGetDescr(currentRelation),
778 : table_slot_callbacks(currentRelation));
779 :
780 : /*
781 : * Initialize result type and projection.
782 : */
783 20406 : ExecInitResultTypeTL(&scanstate->ss.ps);
784 20406 : ExecAssignScanProjectionInfo(&scanstate->ss);
785 :
786 : /*
787 : * initialize child expressions
788 : */
789 20406 : scanstate->ss.ps.qual =
790 20406 : ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
791 20406 : scanstate->bitmapqualorig =
792 20406 : ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);
793 :
794 : /*
795 : * Maximum number of prefetches for the tablespace if configured,
796 : * otherwise the current value of the effective_io_concurrency GUC.
797 : */
798 20406 : scanstate->prefetch_maximum =
799 20406 : get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
800 :
801 20406 : scanstate->ss.ss_currentRelation = currentRelation;
802 :
803 : /*
804 : * all done.
805 : */
806 20406 : return scanstate;
807 : }
808 :
809 : /*----------------
810 : * BitmapShouldInitializeSharedState
811 : *
812 : * The first process to come here and see the state to the BM_INITIAL
813 : * will become the leader for the parallel bitmap scan and will be
814 : * responsible for populating the TIDBitmap. The other processes will
815 : * be blocked by the condition variable until the leader wakes them up.
816 : * ---------------
817 : */
818 : static bool
819 342 : BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
820 : {
821 : SharedBitmapState state;
822 :
823 : while (1)
824 : {
825 342 : SpinLockAcquire(&pstate->mutex);
826 342 : state = pstate->state;
827 342 : if (pstate->state == BM_INITIAL)
828 72 : pstate->state = BM_INPROGRESS;
829 342 : SpinLockRelease(&pstate->mutex);
830 :
831 : /* Exit if bitmap is done, or if we're the leader. */
832 342 : if (state != BM_INPROGRESS)
833 342 : break;
834 :
835 : /* Wait for the leader to wake us up. */
836 0 : ConditionVariableSleep(&pstate->cv, WAIT_EVENT_PARALLEL_BITMAP_SCAN);
837 : }
838 :
839 342 : ConditionVariableCancelSleep();
840 :
841 342 : return (state == BM_INITIAL);
842 : }
843 :
844 : /* ----------------------------------------------------------------
845 : * ExecBitmapHeapEstimate
846 : *
847 : * Compute the amount of space we'll need in the parallel
848 : * query DSM, and inform pcxt->estimator about our needs.
849 : * ----------------------------------------------------------------
850 : */
851 : void
852 18 : ExecBitmapHeapEstimate(BitmapHeapScanState *node,
853 : ParallelContext *pcxt)
854 : {
855 : Size size;
856 :
857 18 : size = MAXALIGN(sizeof(ParallelBitmapHeapState));
858 :
859 : /* account for instrumentation, if required */
860 18 : if (node->ss.ps.instrument && pcxt->nworkers > 0)
861 : {
862 0 : size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
863 0 : size = add_size(size, mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation)));
864 : }
865 :
866 18 : shm_toc_estimate_chunk(&pcxt->estimator, size);
867 18 : shm_toc_estimate_keys(&pcxt->estimator, 1);
868 18 : }
869 :
870 : /* ----------------------------------------------------------------
871 : * ExecBitmapHeapInitializeDSM
872 : *
873 : * Set up a parallel bitmap heap scan descriptor.
874 : * ----------------------------------------------------------------
875 : */
876 : void
877 18 : ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
878 : ParallelContext *pcxt)
879 : {
880 : ParallelBitmapHeapState *pstate;
881 18 : SharedBitmapHeapInstrumentation *sinstrument = NULL;
882 18 : dsa_area *dsa = node->ss.ps.state->es_query_dsa;
883 : char *ptr;
884 : Size size;
885 :
886 : /* If there's no DSA, there are no workers; initialize nothing. */
887 18 : if (dsa == NULL)
888 0 : return;
889 :
890 18 : size = MAXALIGN(sizeof(ParallelBitmapHeapState));
891 18 : if (node->ss.ps.instrument && pcxt->nworkers > 0)
892 : {
893 0 : size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
894 0 : size = add_size(size, mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation)));
895 : }
896 :
897 18 : ptr = shm_toc_allocate(pcxt->toc, size);
898 18 : pstate = (ParallelBitmapHeapState *) ptr;
899 18 : ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
900 18 : if (node->ss.ps.instrument && pcxt->nworkers > 0)
901 0 : sinstrument = (SharedBitmapHeapInstrumentation *) ptr;
902 :
903 18 : pstate->tbmiterator = 0;
904 18 : pstate->prefetch_iterator = 0;
905 :
906 : /* Initialize the mutex */
907 18 : SpinLockInit(&pstate->mutex);
908 18 : pstate->prefetch_pages = 0;
909 18 : pstate->prefetch_target = 0;
910 18 : pstate->state = BM_INITIAL;
911 :
912 18 : ConditionVariableInit(&pstate->cv);
913 :
914 18 : if (sinstrument)
915 : {
916 0 : sinstrument->num_workers = pcxt->nworkers;
917 :
918 : /* ensure any unfilled slots will contain zeroes */
919 0 : memset(sinstrument->sinstrument, 0,
920 0 : pcxt->nworkers * sizeof(BitmapHeapScanInstrumentation));
921 : }
922 :
923 18 : shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
924 18 : node->pstate = pstate;
925 18 : node->sinstrument = sinstrument;
926 : }
927 :
928 : /* ----------------------------------------------------------------
929 : * ExecBitmapHeapReInitializeDSM
930 : *
931 : * Reset shared state before beginning a fresh scan.
932 : * ----------------------------------------------------------------
933 : */
934 : void
935 54 : ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node,
936 : ParallelContext *pcxt)
937 : {
938 54 : ParallelBitmapHeapState *pstate = node->pstate;
939 54 : dsa_area *dsa = node->ss.ps.state->es_query_dsa;
940 :
941 : /* If there's no DSA, there are no workers; do nothing. */
942 54 : if (dsa == NULL)
943 0 : return;
944 :
945 54 : pstate->state = BM_INITIAL;
946 :
947 54 : if (DsaPointerIsValid(pstate->tbmiterator))
948 54 : tbm_free_shared_area(dsa, pstate->tbmiterator);
949 :
950 54 : if (DsaPointerIsValid(pstate->prefetch_iterator))
951 54 : tbm_free_shared_area(dsa, pstate->prefetch_iterator);
952 :
953 54 : pstate->tbmiterator = InvalidDsaPointer;
954 54 : pstate->prefetch_iterator = InvalidDsaPointer;
955 : }
956 :
957 : /* ----------------------------------------------------------------
958 : * ExecBitmapHeapInitializeWorker
959 : *
960 : * Copy relevant information from TOC into planstate.
961 : * ----------------------------------------------------------------
962 : */
963 : void
964 270 : ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
965 : ParallelWorkerContext *pwcxt)
966 : {
967 : char *ptr;
968 :
969 : Assert(node->ss.ps.state->es_query_dsa != NULL);
970 :
971 270 : ptr = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
972 :
973 270 : node->pstate = (ParallelBitmapHeapState *) ptr;
974 270 : ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
975 :
976 270 : if (node->ss.ps.instrument)
977 0 : node->sinstrument = (SharedBitmapHeapInstrumentation *) ptr;
978 270 : }
979 :
980 : /* ----------------------------------------------------------------
981 : * ExecBitmapHeapRetrieveInstrumentation
982 : *
983 : * Transfer bitmap heap scan statistics from DSM to private memory.
984 : * ----------------------------------------------------------------
985 : */
986 : void
987 0 : ExecBitmapHeapRetrieveInstrumentation(BitmapHeapScanState *node)
988 : {
989 0 : SharedBitmapHeapInstrumentation *sinstrument = node->sinstrument;
990 : Size size;
991 :
992 0 : if (sinstrument == NULL)
993 0 : return;
994 :
995 0 : size = offsetof(SharedBitmapHeapInstrumentation, sinstrument)
996 0 : + sinstrument->num_workers * sizeof(BitmapHeapScanInstrumentation);
997 :
998 0 : node->sinstrument = palloc(size);
999 0 : memcpy(node->sinstrument, sinstrument, size);
1000 : }
|