Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * nodeBitmapHeapscan.c
4 : * Routines to support bitmapped scans of relations
5 : *
6 : * NOTE: it is critical that this plan type only be used with MVCC-compliant
7 : * snapshots (ie, regular snapshots, not SnapshotAny or one of the other
8 : * special snapshots). The reason is that since index and heap scans are
9 : * decoupled, there can be no assurance that the index tuple prompting a
10 : * visit to a particular heap TID still exists when the visit is made.
11 : * Therefore the tuple might not exist anymore either (which is OK because
12 : * heap_fetch will cope) --- but worse, the tuple slot could have been
13 : * re-used for a newer tuple. With an MVCC snapshot the newer tuple is
14 : * certain to fail the time qual and so it will not be mistakenly returned,
15 : * but with anything else we might return a tuple that doesn't meet the
16 : * required index qual conditions.
17 : *
18 : *
19 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
20 : * Portions Copyright (c) 1994, Regents of the University of California
21 : *
22 : *
23 : * IDENTIFICATION
24 : * src/backend/executor/nodeBitmapHeapscan.c
25 : *
26 : *-------------------------------------------------------------------------
27 : */
28 : /*
29 : * INTERFACE ROUTINES
30 : * ExecBitmapHeapScan scans a relation using bitmap info
31 : * ExecBitmapHeapNext workhorse for above
32 : * ExecInitBitmapHeapScan creates and initializes state info.
33 : * ExecReScanBitmapHeapScan prepares to rescan the plan.
34 : * ExecEndBitmapHeapScan releases all storage.
35 : */
36 : #include "postgres.h"
37 :
38 : #include <math.h>
39 :
40 : #include "access/relscan.h"
41 : #include "access/tableam.h"
42 : #include "access/transam.h"
43 : #include "access/visibilitymap.h"
44 : #include "executor/execdebug.h"
45 : #include "executor/nodeBitmapHeapscan.h"
46 : #include "miscadmin.h"
47 : #include "pgstat.h"
48 : #include "storage/bufmgr.h"
49 : #include "storage/predicate.h"
50 : #include "utils/memutils.h"
51 : #include "utils/rel.h"
52 : #include "utils/snapmgr.h"
53 : #include "utils/spccache.h"
54 :
55 : static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
56 : static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate);
57 : static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
58 : TBMIterateResult *tbmres);
59 : static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
60 : static inline void BitmapPrefetch(BitmapHeapScanState *node,
61 : TableScanDesc scan);
62 : static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate);
63 :
64 :
65 : /* ----------------------------------------------------------------
66 : * BitmapHeapNext
67 : *
68 : * Retrieve next tuple from the BitmapHeapScan node's currentRelation
69 : * ----------------------------------------------------------------
70 : */
71 : static TupleTableSlot *
72 5744412 : BitmapHeapNext(BitmapHeapScanState *node)
73 : {
74 : ExprContext *econtext;
75 : TableScanDesc scan;
76 : TIDBitmap *tbm;
77 5744412 : TBMIterator *tbmiterator = NULL;
78 5744412 : TBMSharedIterator *shared_tbmiterator = NULL;
79 : TBMIterateResult *tbmres;
80 : TupleTableSlot *slot;
81 5744412 : ParallelBitmapHeapState *pstate = node->pstate;
82 5744412 : dsa_area *dsa = node->ss.ps.state->es_query_dsa;
83 :
84 : /*
85 : * extract necessary information from index scan node
86 : */
87 5744412 : econtext = node->ss.ps.ps_ExprContext;
88 5744412 : slot = node->ss.ss_ScanTupleSlot;
89 5744412 : scan = node->ss.ss_currentScanDesc;
90 5744412 : tbm = node->tbm;
91 5744412 : if (pstate == NULL)
92 4550070 : tbmiterator = node->tbmiterator;
93 : else
94 1194342 : shared_tbmiterator = node->shared_tbmiterator;
95 5744412 : tbmres = node->tbmres;
96 :
97 : /*
98 : * If we haven't yet performed the underlying index scan, do it, and begin
99 : * the iteration over the bitmap.
100 : *
101 : * For prefetching, we use *two* iterators, one for the pages we are
102 : * actually scanning and another that runs ahead of the first for
103 : * prefetching. node->prefetch_pages tracks exactly how many pages ahead
104 : * the prefetch iterator is. Also, node->prefetch_target tracks the
105 : * desired prefetch distance, which starts small and increases up to the
106 : * node->prefetch_maximum. This is to avoid doing a lot of prefetching in
107 : * a scan that stops after a few tuples because of a LIMIT.
108 : */
109 5744412 : if (!node->initialized)
110 : {
111 16936 : if (!pstate)
112 : {
113 16588 : tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
114 :
115 16588 : if (!tbm || !IsA(tbm, TIDBitmap))
116 0 : elog(ERROR, "unrecognized result from subplan");
117 :
118 16588 : node->tbm = tbm;
119 16588 : node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm);
120 16588 : node->tbmres = tbmres = NULL;
121 :
122 : #ifdef USE_PREFETCH
123 16588 : if (node->prefetch_maximum > 0)
124 : {
125 16588 : node->prefetch_iterator = tbm_begin_iterate(tbm);
126 16588 : node->prefetch_pages = 0;
127 16588 : node->prefetch_target = -1;
128 : }
129 : #endif /* USE_PREFETCH */
130 : }
131 : else
132 : {
133 : /*
134 : * The leader will immediately come out of the function, but
135 : * others will be blocked until leader populates the TBM and wakes
136 : * them up.
137 : */
138 348 : if (BitmapShouldInitializeSharedState(pstate))
139 : {
140 72 : tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
141 72 : if (!tbm || !IsA(tbm, TIDBitmap))
142 0 : elog(ERROR, "unrecognized result from subplan");
143 :
144 72 : node->tbm = tbm;
145 :
146 : /*
147 : * Prepare to iterate over the TBM. This will return the
148 : * dsa_pointer of the iterator state which will be used by
149 : * multiple processes to iterate jointly.
150 : */
151 72 : pstate->tbmiterator = tbm_prepare_shared_iterate(tbm);
152 : #ifdef USE_PREFETCH
153 72 : if (node->prefetch_maximum > 0)
154 : {
155 72 : pstate->prefetch_iterator =
156 72 : tbm_prepare_shared_iterate(tbm);
157 :
158 : /*
159 : * We don't need the mutex here as we haven't yet woke up
160 : * others.
161 : */
162 72 : pstate->prefetch_pages = 0;
163 72 : pstate->prefetch_target = -1;
164 : }
165 : #endif
166 :
167 : /* We have initialized the shared state so wake up others. */
168 72 : BitmapDoneInitializingSharedState(pstate);
169 : }
170 :
171 : /* Allocate a private iterator and attach the shared state to it */
172 348 : node->shared_tbmiterator = shared_tbmiterator =
173 348 : tbm_attach_shared_iterate(dsa, pstate->tbmiterator);
174 348 : node->tbmres = tbmres = NULL;
175 :
176 : #ifdef USE_PREFETCH
177 348 : if (node->prefetch_maximum > 0)
178 : {
179 348 : node->shared_prefetch_iterator =
180 348 : tbm_attach_shared_iterate(dsa, pstate->prefetch_iterator);
181 : }
182 : #endif /* USE_PREFETCH */
183 : }
184 16936 : node->initialized = true;
185 : }
186 :
187 : for (;;)
188 956064 : {
189 : bool skip_fetch;
190 :
191 6700476 : CHECK_FOR_INTERRUPTS();
192 :
193 : /*
194 : * Get next page of results if needed
195 : */
196 6700476 : if (tbmres == NULL)
197 : {
198 401840 : if (!pstate)
199 371384 : node->tbmres = tbmres = tbm_iterate(tbmiterator);
200 : else
201 30456 : node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator);
202 401840 : if (tbmres == NULL)
203 : {
204 : /* no more entries in the bitmap */
205 16554 : break;
206 : }
207 :
208 385286 : BitmapAdjustPrefetchIterator(node, tbmres);
209 :
210 : /*
211 : * We can skip fetching the heap page if we don't need any fields
212 : * from the heap, and the bitmap entries don't need rechecking,
213 : * and all tuples on the page are visible to our transaction.
214 : *
215 : * XXX: It's a layering violation that we do these checks above
216 : * tableam, they should probably moved below it at some point.
217 : */
218 867740 : skip_fetch = (node->can_skip_fetch &&
219 443140 : !tbmres->recheck &&
220 57854 : VM_ALL_VISIBLE(node->ss.ss_currentRelation,
221 : tbmres->blockno,
222 : &node->vmbuffer));
223 :
224 385286 : if (skip_fetch)
225 : {
226 : /* can't be lossy in the skip_fetch case */
227 : Assert(tbmres->ntuples >= 0);
228 :
229 : /*
230 : * The number of tuples on this page is put into
231 : * node->return_empty_tuples.
232 : */
233 20766 : node->return_empty_tuples = tbmres->ntuples;
234 : }
235 364520 : else if (!table_scan_bitmap_next_block(scan, tbmres))
236 : {
237 : /* AM doesn't think this block is valid, skip */
238 4600 : continue;
239 : }
240 :
241 380680 : if (tbmres->ntuples >= 0)
242 223398 : node->exact_pages++;
243 : else
244 157282 : node->lossy_pages++;
245 :
246 : /* Adjust the prefetch target */
247 380680 : BitmapAdjustPrefetchTarget(node);
248 : }
249 : else
250 : {
251 : /*
252 : * Continuing in previously obtained page.
253 : */
254 :
255 : #ifdef USE_PREFETCH
256 :
257 : /*
258 : * Try to prefetch at least a few pages even before we get to the
259 : * second page if we don't stop reading after the first tuple.
260 : */
261 6298636 : if (!pstate)
262 : {
263 5125336 : if (node->prefetch_target < node->prefetch_maximum)
264 13266 : node->prefetch_target++;
265 : }
266 1173300 : else if (pstate->prefetch_target < node->prefetch_maximum)
267 : {
268 : /* take spinlock while updating shared state */
269 1866 : SpinLockAcquire(&pstate->mutex);
270 1866 : if (pstate->prefetch_target < node->prefetch_maximum)
271 1866 : pstate->prefetch_target++;
272 1866 : SpinLockRelease(&pstate->mutex);
273 : }
274 : #endif /* USE_PREFETCH */
275 : }
276 :
277 : /*
278 : * We issue prefetch requests *after* fetching the current page to try
279 : * to avoid having prefetching interfere with the main I/O. Also, this
280 : * should happen only when we have determined there is still something
281 : * to do on the current page, else we may uselessly prefetch the same
282 : * page we are just about to request for real.
283 : *
284 : * XXX: It's a layering violation that we do these checks above
285 : * tableam, they should probably moved below it at some point.
286 : */
287 6679316 : BitmapPrefetch(node, scan);
288 :
289 6679316 : if (node->return_empty_tuples > 0)
290 : {
291 : /*
292 : * If we don't have to fetch the tuple, just return nulls.
293 : */
294 588096 : ExecStoreAllNullTuple(slot);
295 :
296 588096 : if (--node->return_empty_tuples == 0)
297 : {
298 : /* no more tuples to return in the next round */
299 20766 : node->tbmres = tbmres = NULL;
300 : }
301 : }
302 : else
303 : {
304 : /*
305 : * Attempt to fetch tuple from AM.
306 : */
307 6091220 : if (!table_scan_bitmap_next_tuple(scan, tbmres, slot))
308 : {
309 : /* nothing more to look at on this page */
310 364138 : node->tbmres = tbmres = NULL;
311 364138 : continue;
312 : }
313 :
314 : /*
315 : * If we are using lossy info, we have to recheck the qual
316 : * conditions at every tuple.
317 : */
318 5727082 : if (tbmres->recheck)
319 : {
320 3124378 : econtext->ecxt_scantuple = slot;
321 3124378 : if (!ExecQualAndReset(node->bitmapqualorig, econtext))
322 : {
323 : /* Fails recheck, so drop it and loop back for another */
324 587326 : InstrCountFiltered2(node, 1);
325 587326 : ExecClearTuple(slot);
326 587326 : continue;
327 : }
328 : }
329 : }
330 :
331 : /* OK to return this tuple */
332 5727852 : return slot;
333 : }
334 :
335 : /*
336 : * if we get here it means we are at the end of the scan..
337 : */
338 16554 : return ExecClearTuple(slot);
339 : }
340 :
341 : /*
342 : * BitmapDoneInitializingSharedState - Shared state is initialized
343 : *
344 : * By this time the leader has already populated the TBM and initialized the
345 : * shared state so wake up other processes.
346 : */
347 : static inline void
348 72 : BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
349 : {
350 72 : SpinLockAcquire(&pstate->mutex);
351 72 : pstate->state = BM_FINISHED;
352 72 : SpinLockRelease(&pstate->mutex);
353 72 : ConditionVariableBroadcast(&pstate->cv);
354 72 : }
355 :
356 : /*
357 : * BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
358 : */
359 : static inline void
360 385286 : BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
361 : TBMIterateResult *tbmres)
362 : {
363 : #ifdef USE_PREFETCH
364 385286 : ParallelBitmapHeapState *pstate = node->pstate;
365 :
366 385286 : if (pstate == NULL)
367 : {
368 355178 : TBMIterator *prefetch_iterator = node->prefetch_iterator;
369 :
370 355178 : if (node->prefetch_pages > 0)
371 : {
372 : /* The main iterator has closed the distance by one page */
373 341354 : node->prefetch_pages--;
374 : }
375 13824 : else if (prefetch_iterator)
376 : {
377 : /* Do not let the prefetch iterator get behind the main one */
378 13824 : TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
379 :
380 13824 : if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno)
381 0 : elog(ERROR, "prefetch and main iterators are out of sync");
382 : }
383 355178 : return;
384 : }
385 :
386 30108 : if (node->prefetch_maximum > 0)
387 : {
388 30108 : TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
389 :
390 30108 : SpinLockAcquire(&pstate->mutex);
391 30108 : if (pstate->prefetch_pages > 0)
392 : {
393 30036 : pstate->prefetch_pages--;
394 30036 : SpinLockRelease(&pstate->mutex);
395 : }
396 : else
397 : {
398 : /* Release the mutex before iterating */
399 72 : SpinLockRelease(&pstate->mutex);
400 :
401 : /*
402 : * In case of shared mode, we can not ensure that the current
403 : * blockno of the main iterator and that of the prefetch iterator
404 : * are same. It's possible that whatever blockno we are
405 : * prefetching will be processed by another process. Therefore,
406 : * we don't validate the blockno here as we do in non-parallel
407 : * case.
408 : */
409 72 : if (prefetch_iterator)
410 72 : tbm_shared_iterate(prefetch_iterator);
411 : }
412 : }
413 : #endif /* USE_PREFETCH */
414 : }
415 :
416 : /*
417 : * BitmapAdjustPrefetchTarget - Adjust the prefetch target
418 : *
419 : * Increase prefetch target if it's not yet at the max. Note that
420 : * we will increase it to zero after fetching the very first
421 : * page/tuple, then to one after the second tuple is fetched, then
422 : * it doubles as later pages are fetched.
423 : */
424 : static inline void
425 380680 : BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
426 : {
427 : #ifdef USE_PREFETCH
428 380680 : ParallelBitmapHeapState *pstate = node->pstate;
429 :
430 380680 : if (pstate == NULL)
431 : {
432 350572 : if (node->prefetch_target >= node->prefetch_maximum)
433 : /* don't increase any further */ ;
434 13494 : else if (node->prefetch_target >= node->prefetch_maximum / 2)
435 176 : node->prefetch_target = node->prefetch_maximum;
436 13318 : else if (node->prefetch_target > 0)
437 0 : node->prefetch_target *= 2;
438 : else
439 13318 : node->prefetch_target++;
440 350572 : return;
441 : }
442 :
443 : /* Do an unlocked check first to save spinlock acquisitions. */
444 30108 : if (pstate->prefetch_target < node->prefetch_maximum)
445 : {
446 132 : SpinLockAcquire(&pstate->mutex);
447 132 : if (pstate->prefetch_target >= node->prefetch_maximum)
448 : /* don't increase any further */ ;
449 132 : else if (pstate->prefetch_target >= node->prefetch_maximum / 2)
450 60 : pstate->prefetch_target = node->prefetch_maximum;
451 72 : else if (pstate->prefetch_target > 0)
452 0 : pstate->prefetch_target *= 2;
453 : else
454 72 : pstate->prefetch_target++;
455 132 : SpinLockRelease(&pstate->mutex);
456 : }
457 : #endif /* USE_PREFETCH */
458 : }
459 :
460 : /*
461 : * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
462 : */
463 : static inline void
464 6679316 : BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
465 : {
466 : #ifdef USE_PREFETCH
467 6679316 : ParallelBitmapHeapState *pstate = node->pstate;
468 :
469 6679316 : if (pstate == NULL)
470 : {
471 5475908 : TBMIterator *prefetch_iterator = node->prefetch_iterator;
472 :
473 5475908 : if (prefetch_iterator)
474 : {
475 5720056 : while (node->prefetch_pages < node->prefetch_target)
476 : {
477 354502 : TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
478 : bool skip_fetch;
479 :
480 354502 : if (tbmpre == NULL)
481 : {
482 : /* No more pages to prefetch */
483 13148 : tbm_end_iterate(prefetch_iterator);
484 13148 : node->prefetch_iterator = NULL;
485 13148 : break;
486 : }
487 341354 : node->prefetch_pages++;
488 :
489 : /*
490 : * If we expect not to have to actually read this heap page,
491 : * skip this prefetch call, but continue to run the prefetch
492 : * logic normally. (Would it be better not to increment
493 : * prefetch_pages?)
494 : *
495 : * This depends on the assumption that the index AM will
496 : * report the same recheck flag for this future heap page as
497 : * it did for the current heap page; which is not a certainty
498 : * but is true in many cases.
499 : */
500 815808 : skip_fetch = (node->can_skip_fetch &&
501 373164 : (node->tbmres ? !node->tbmres->recheck : false) &&
502 31810 : VM_ALL_VISIBLE(node->ss.ss_currentRelation,
503 : tbmpre->blockno,
504 : &node->pvmbuffer));
505 :
506 341354 : if (!skip_fetch)
507 341306 : PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
508 : }
509 : }
510 :
511 5475908 : return;
512 : }
513 :
514 1203408 : if (pstate->prefetch_pages < pstate->prefetch_target)
515 : {
516 135842 : TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
517 :
518 135842 : if (prefetch_iterator)
519 : {
520 : while (1)
521 30036 : {
522 : TBMIterateResult *tbmpre;
523 58700 : bool do_prefetch = false;
524 : bool skip_fetch;
525 :
526 : /*
527 : * Recheck under the mutex. If some other process has already
528 : * done enough prefetching then we need not to do anything.
529 : */
530 58700 : SpinLockAcquire(&pstate->mutex);
531 58700 : if (pstate->prefetch_pages < pstate->prefetch_target)
532 : {
533 30110 : pstate->prefetch_pages++;
534 30110 : do_prefetch = true;
535 : }
536 58700 : SpinLockRelease(&pstate->mutex);
537 :
538 58700 : if (!do_prefetch)
539 28590 : return;
540 :
541 30110 : tbmpre = tbm_shared_iterate(prefetch_iterator);
542 30110 : if (tbmpre == NULL)
543 : {
544 : /* No more pages to prefetch */
545 74 : tbm_end_shared_iterate(prefetch_iterator);
546 74 : node->shared_prefetch_iterator = NULL;
547 74 : break;
548 : }
549 :
550 : /* As above, skip prefetch if we expect not to need page */
551 116160 : skip_fetch = (node->can_skip_fetch &&
552 54684 : (node->tbmres ? !node->tbmres->recheck : false) &&
553 24648 : VM_ALL_VISIBLE(node->ss.ss_currentRelation,
554 : tbmpre->blockno,
555 : &node->pvmbuffer));
556 :
557 30036 : if (!skip_fetch)
558 9396 : PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
559 : }
560 : }
561 : }
562 : #endif /* USE_PREFETCH */
563 : }
564 :
565 : /*
566 : * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
567 : */
568 : static bool
569 0 : BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
570 : {
571 : ExprContext *econtext;
572 :
573 : /*
574 : * extract necessary information from index scan node
575 : */
576 0 : econtext = node->ss.ps.ps_ExprContext;
577 :
578 : /* Does the tuple meet the original qual conditions? */
579 0 : econtext->ecxt_scantuple = slot;
580 0 : return ExecQualAndReset(node->bitmapqualorig, econtext);
581 : }
582 :
583 : /* ----------------------------------------------------------------
584 : * ExecBitmapHeapScan(node)
585 : * ----------------------------------------------------------------
586 : */
587 : static TupleTableSlot *
588 5509166 : ExecBitmapHeapScan(PlanState *pstate)
589 : {
590 5509166 : BitmapHeapScanState *node = castNode(BitmapHeapScanState, pstate);
591 :
592 5509166 : return ExecScan(&node->ss,
593 : (ExecScanAccessMtd) BitmapHeapNext,
594 : (ExecScanRecheckMtd) BitmapHeapRecheck);
595 : }
596 :
597 : /* ----------------------------------------------------------------
598 : * ExecReScanBitmapHeapScan(node)
599 : * ----------------------------------------------------------------
600 : */
601 : void
602 4388 : ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
603 : {
604 4388 : PlanState *outerPlan = outerPlanState(node);
605 :
606 : /* rescan to release any page pin */
607 4388 : table_rescan(node->ss.ss_currentScanDesc, NULL);
608 :
609 : /* release bitmaps and buffers if any */
610 4388 : if (node->tbmiterator)
611 3810 : tbm_end_iterate(node->tbmiterator);
612 4388 : if (node->prefetch_iterator)
613 1270 : tbm_end_iterate(node->prefetch_iterator);
614 4388 : if (node->shared_tbmiterator)
615 54 : tbm_end_shared_iterate(node->shared_tbmiterator);
616 4388 : if (node->shared_prefetch_iterator)
617 0 : tbm_end_shared_iterate(node->shared_prefetch_iterator);
618 4388 : if (node->tbm)
619 3864 : tbm_free(node->tbm);
620 4388 : if (node->vmbuffer != InvalidBuffer)
621 54 : ReleaseBuffer(node->vmbuffer);
622 4388 : if (node->pvmbuffer != InvalidBuffer)
623 54 : ReleaseBuffer(node->pvmbuffer);
624 4388 : node->tbm = NULL;
625 4388 : node->tbmiterator = NULL;
626 4388 : node->tbmres = NULL;
627 4388 : node->prefetch_iterator = NULL;
628 4388 : node->initialized = false;
629 4388 : node->shared_tbmiterator = NULL;
630 4388 : node->shared_prefetch_iterator = NULL;
631 4388 : node->vmbuffer = InvalidBuffer;
632 4388 : node->pvmbuffer = InvalidBuffer;
633 :
634 4388 : ExecScanReScan(&node->ss);
635 :
636 : /*
637 : * if chgParam of subnode is not null then plan will be re-scanned by
638 : * first ExecProcNode.
639 : */
640 4388 : if (outerPlan->chgParam == NULL)
641 170 : ExecReScan(outerPlan);
642 4388 : }
643 :
644 : /* ----------------------------------------------------------------
645 : * ExecEndBitmapHeapScan
646 : * ----------------------------------------------------------------
647 : */
648 : void
649 17110 : ExecEndBitmapHeapScan(BitmapHeapScanState *node)
650 : {
651 : TableScanDesc scanDesc;
652 :
653 : /*
654 : * extract information from the node
655 : */
656 17110 : scanDesc = node->ss.ss_currentScanDesc;
657 :
658 : /*
659 : * close down subplans
660 : */
661 17110 : ExecEndNode(outerPlanState(node));
662 :
663 : /*
664 : * release bitmaps and buffers if any
665 : */
666 17110 : if (node->tbmiterator)
667 12712 : tbm_end_iterate(node->tbmiterator);
668 17110 : if (node->prefetch_iterator)
669 2158 : tbm_end_iterate(node->prefetch_iterator);
670 17110 : if (node->tbm)
671 12730 : tbm_free(node->tbm);
672 17110 : if (node->shared_tbmiterator)
673 294 : tbm_end_shared_iterate(node->shared_tbmiterator);
674 17110 : if (node->shared_prefetch_iterator)
675 274 : tbm_end_shared_iterate(node->shared_prefetch_iterator);
676 17110 : if (node->vmbuffer != InvalidBuffer)
677 30 : ReleaseBuffer(node->vmbuffer);
678 17110 : if (node->pvmbuffer != InvalidBuffer)
679 24 : ReleaseBuffer(node->pvmbuffer);
680 :
681 : /*
682 : * close heap scan
683 : */
684 17110 : table_endscan(scanDesc);
685 17110 : }
686 :
687 : /* ----------------------------------------------------------------
688 : * ExecInitBitmapHeapScan
689 : *
690 : * Initializes the scan's state information.
691 : * ----------------------------------------------------------------
692 : */
693 : BitmapHeapScanState *
694 17176 : ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
695 : {
696 : BitmapHeapScanState *scanstate;
697 : Relation currentRelation;
698 :
699 : /* check for unsupported flags */
700 : Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
701 :
702 : /*
703 : * Assert caller didn't ask for an unsafe snapshot --- see comments at
704 : * head of file.
705 : */
706 : Assert(IsMVCCSnapshot(estate->es_snapshot));
707 :
708 : /*
709 : * create state structure
710 : */
711 17176 : scanstate = makeNode(BitmapHeapScanState);
712 17176 : scanstate->ss.ps.plan = (Plan *) node;
713 17176 : scanstate->ss.ps.state = estate;
714 17176 : scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;
715 :
716 17176 : scanstate->tbm = NULL;
717 17176 : scanstate->tbmiterator = NULL;
718 17176 : scanstate->tbmres = NULL;
719 17176 : scanstate->return_empty_tuples = 0;
720 17176 : scanstate->vmbuffer = InvalidBuffer;
721 17176 : scanstate->pvmbuffer = InvalidBuffer;
722 17176 : scanstate->exact_pages = 0;
723 17176 : scanstate->lossy_pages = 0;
724 17176 : scanstate->prefetch_iterator = NULL;
725 17176 : scanstate->prefetch_pages = 0;
726 17176 : scanstate->prefetch_target = 0;
727 17176 : scanstate->pscan_len = 0;
728 17176 : scanstate->initialized = false;
729 17176 : scanstate->shared_tbmiterator = NULL;
730 17176 : scanstate->shared_prefetch_iterator = NULL;
731 17176 : scanstate->pstate = NULL;
732 :
733 : /*
734 : * We can potentially skip fetching heap pages if we do not need any
735 : * columns of the table, either for checking non-indexable quals or for
736 : * returning data. This test is a bit simplistic, as it checks the
737 : * stronger condition that there's no qual or return tlist at all. But in
738 : * most cases it's probably not worth working harder than that.
739 : */
740 32306 : scanstate->can_skip_fetch = (node->scan.plan.qual == NIL &&
741 15130 : node->scan.plan.targetlist == NIL);
742 :
743 : /*
744 : * Miscellaneous initialization
745 : *
746 : * create expression context for node
747 : */
748 17176 : ExecAssignExprContext(estate, &scanstate->ss.ps);
749 :
750 : /*
751 : * open the scan relation
752 : */
753 17176 : currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
754 :
755 : /*
756 : * initialize child nodes
757 : */
758 17176 : outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
759 :
760 : /*
761 : * get the scan type from the relation descriptor.
762 : */
763 17176 : ExecInitScanTupleSlot(estate, &scanstate->ss,
764 : RelationGetDescr(currentRelation),
765 : table_slot_callbacks(currentRelation));
766 :
767 : /*
768 : * Initialize result type and projection.
769 : */
770 17176 : ExecInitResultTypeTL(&scanstate->ss.ps);
771 17176 : ExecAssignScanProjectionInfo(&scanstate->ss);
772 :
773 : /*
774 : * initialize child expressions
775 : */
776 17176 : scanstate->ss.ps.qual =
777 17176 : ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
778 17176 : scanstate->bitmapqualorig =
779 17176 : ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);
780 :
781 : /*
782 : * Maximum number of prefetches for the tablespace if configured,
783 : * otherwise the current value of the effective_io_concurrency GUC.
784 : */
785 17176 : scanstate->prefetch_maximum =
786 17176 : get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
787 :
788 17176 : scanstate->ss.ss_currentRelation = currentRelation;
789 :
790 17176 : scanstate->ss.ss_currentScanDesc = table_beginscan_bm(currentRelation,
791 : estate->es_snapshot,
792 : 0,
793 : NULL);
794 :
795 : /*
796 : * all done.
797 : */
798 17176 : return scanstate;
799 : }
800 :
801 : /*----------------
802 : * BitmapShouldInitializeSharedState
803 : *
804 : * The first process to come here and see the state to the BM_INITIAL
805 : * will become the leader for the parallel bitmap scan and will be
806 : * responsible for populating the TIDBitmap. The other processes will
807 : * be blocked by the condition variable until the leader wakes them up.
808 : * ---------------
809 : */
810 : static bool
811 348 : BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
812 : {
813 : SharedBitmapState state;
814 :
815 : while (1)
816 : {
817 348 : SpinLockAcquire(&pstate->mutex);
818 348 : state = pstate->state;
819 348 : if (pstate->state == BM_INITIAL)
820 72 : pstate->state = BM_INPROGRESS;
821 348 : SpinLockRelease(&pstate->mutex);
822 :
823 : /* Exit if bitmap is done, or if we're the leader. */
824 348 : if (state != BM_INPROGRESS)
825 348 : break;
826 :
827 : /* Wait for the leader to wake us up. */
828 0 : ConditionVariableSleep(&pstate->cv, WAIT_EVENT_PARALLEL_BITMAP_SCAN);
829 : }
830 :
831 348 : ConditionVariableCancelSleep();
832 :
833 348 : return (state == BM_INITIAL);
834 : }
835 :
836 : /* ----------------------------------------------------------------
837 : * ExecBitmapHeapEstimate
838 : *
839 : * Compute the amount of space we'll need in the parallel
840 : * query DSM, and inform pcxt->estimator about our needs.
841 : * ----------------------------------------------------------------
842 : */
843 : void
844 18 : ExecBitmapHeapEstimate(BitmapHeapScanState *node,
845 : ParallelContext *pcxt)
846 : {
847 18 : EState *estate = node->ss.ps.state;
848 :
849 18 : node->pscan_len = add_size(offsetof(ParallelBitmapHeapState,
850 : phs_snapshot_data),
851 : EstimateSnapshotSpace(estate->es_snapshot));
852 :
853 18 : shm_toc_estimate_chunk(&pcxt->estimator, node->pscan_len);
854 18 : shm_toc_estimate_keys(&pcxt->estimator, 1);
855 18 : }
856 :
857 : /* ----------------------------------------------------------------
858 : * ExecBitmapHeapInitializeDSM
859 : *
860 : * Set up a parallel bitmap heap scan descriptor.
861 : * ----------------------------------------------------------------
862 : */
863 : void
864 18 : ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
865 : ParallelContext *pcxt)
866 : {
867 : ParallelBitmapHeapState *pstate;
868 18 : EState *estate = node->ss.ps.state;
869 18 : dsa_area *dsa = node->ss.ps.state->es_query_dsa;
870 :
871 : /* If there's no DSA, there are no workers; initialize nothing. */
872 18 : if (dsa == NULL)
873 0 : return;
874 :
875 18 : pstate = shm_toc_allocate(pcxt->toc, node->pscan_len);
876 :
877 18 : pstate->tbmiterator = 0;
878 18 : pstate->prefetch_iterator = 0;
879 :
880 : /* Initialize the mutex */
881 18 : SpinLockInit(&pstate->mutex);
882 18 : pstate->prefetch_pages = 0;
883 18 : pstate->prefetch_target = 0;
884 18 : pstate->state = BM_INITIAL;
885 :
886 18 : ConditionVariableInit(&pstate->cv);
887 18 : SerializeSnapshot(estate->es_snapshot, pstate->phs_snapshot_data);
888 :
889 18 : shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
890 18 : node->pstate = pstate;
891 : }
892 :
893 : /* ----------------------------------------------------------------
894 : * ExecBitmapHeapReInitializeDSM
895 : *
896 : * Reset shared state before beginning a fresh scan.
897 : * ----------------------------------------------------------------
898 : */
899 : void
900 54 : ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node,
901 : ParallelContext *pcxt)
902 : {
903 54 : ParallelBitmapHeapState *pstate = node->pstate;
904 54 : dsa_area *dsa = node->ss.ps.state->es_query_dsa;
905 :
906 : /* If there's no DSA, there are no workers; do nothing. */
907 54 : if (dsa == NULL)
908 0 : return;
909 :
910 54 : pstate->state = BM_INITIAL;
911 :
912 54 : if (DsaPointerIsValid(pstate->tbmiterator))
913 54 : tbm_free_shared_area(dsa, pstate->tbmiterator);
914 :
915 54 : if (DsaPointerIsValid(pstate->prefetch_iterator))
916 54 : tbm_free_shared_area(dsa, pstate->prefetch_iterator);
917 :
918 54 : pstate->tbmiterator = InvalidDsaPointer;
919 54 : pstate->prefetch_iterator = InvalidDsaPointer;
920 : }
921 :
922 : /* ----------------------------------------------------------------
923 : * ExecBitmapHeapInitializeWorker
924 : *
925 : * Copy relevant information from TOC into planstate.
926 : * ----------------------------------------------------------------
927 : */
928 : void
929 276 : ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
930 : ParallelWorkerContext *pwcxt)
931 : {
932 : ParallelBitmapHeapState *pstate;
933 : Snapshot snapshot;
934 :
935 : Assert(node->ss.ps.state->es_query_dsa != NULL);
936 :
937 276 : pstate = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
938 276 : node->pstate = pstate;
939 :
940 276 : snapshot = RestoreSnapshot(pstate->phs_snapshot_data);
941 276 : table_scan_update_snapshot(node->ss.ss_currentScanDesc, snapshot);
942 276 : }
|