Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * nodeBitmapHeapscan.c
4 : * Routines to support bitmapped scans of relations
5 : *
6 : * NOTE: it is critical that this plan type only be used with MVCC-compliant
7 : * snapshots (ie, regular snapshots, not SnapshotAny or one of the other
8 : * special snapshots). The reason is that since index and heap scans are
9 : * decoupled, there can be no assurance that the index tuple prompting a
10 : * visit to a particular heap TID still exists when the visit is made.
11 : * Therefore the tuple might not exist anymore either (which is OK because
12 : * heap_fetch will cope) --- but worse, the tuple slot could have been
13 : * re-used for a newer tuple. With an MVCC snapshot the newer tuple is
14 : * certain to fail the time qual and so it will not be mistakenly returned,
15 : * but with anything else we might return a tuple that doesn't meet the
16 : * required index qual conditions.
17 : *
18 : *
19 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
20 : * Portions Copyright (c) 1994, Regents of the University of California
21 : *
22 : *
23 : * IDENTIFICATION
24 : * src/backend/executor/nodeBitmapHeapscan.c
25 : *
26 : *-------------------------------------------------------------------------
27 : */
28 : /*
29 : * INTERFACE ROUTINES
30 : * ExecBitmapHeapScan scans a relation using bitmap info
31 : * ExecBitmapHeapNext workhorse for above
32 : * ExecInitBitmapHeapScan creates and initializes state info.
33 : * ExecReScanBitmapHeapScan prepares to rescan the plan.
34 : * ExecEndBitmapHeapScan releases all storage.
35 : */
36 : #include "postgres.h"
37 :
38 : #include <math.h>
39 :
40 : #include "access/relscan.h"
41 : #include "access/tableam.h"
42 : #include "access/visibilitymap.h"
43 : #include "executor/executor.h"
44 : #include "executor/nodeBitmapHeapscan.h"
45 : #include "miscadmin.h"
46 : #include "pgstat.h"
47 : #include "storage/bufmgr.h"
48 : #include "utils/rel.h"
49 : #include "utils/snapmgr.h"
50 : #include "utils/spccache.h"
51 :
52 : static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
53 : static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate);
54 : static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
55 : BlockNumber blockno);
56 : static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
57 : static inline void BitmapPrefetch(BitmapHeapScanState *node,
58 : TableScanDesc scan);
59 : static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate);
60 :
61 :
62 : /* ----------------------------------------------------------------
63 : * BitmapHeapNext
64 : *
65 : * Retrieve next tuple from the BitmapHeapScan node's currentRelation
66 : * ----------------------------------------------------------------
67 : */
68 : static TupleTableSlot *
69 5767564 : BitmapHeapNext(BitmapHeapScanState *node)
70 : {
71 : ExprContext *econtext;
72 : TableScanDesc scan;
73 : TIDBitmap *tbm;
74 5767564 : TBMIterator *tbmiterator = NULL;
75 5767564 : TBMSharedIterator *shared_tbmiterator = NULL;
76 : TBMIterateResult *tbmres;
77 : TupleTableSlot *slot;
78 5767564 : ParallelBitmapHeapState *pstate = node->pstate;
79 5767564 : dsa_area *dsa = node->ss.ps.state->es_query_dsa;
80 :
81 : /*
82 : * extract necessary information from index scan node
83 : */
84 5767564 : econtext = node->ss.ps.ps_ExprContext;
85 5767564 : slot = node->ss.ss_ScanTupleSlot;
86 5767564 : scan = node->ss.ss_currentScanDesc;
87 5767564 : tbm = node->tbm;
88 5767564 : if (pstate == NULL)
89 4573228 : tbmiterator = node->tbmiterator;
90 : else
91 1194336 : shared_tbmiterator = node->shared_tbmiterator;
92 5767564 : tbmres = node->tbmres;
93 :
94 : /*
95 : * If we haven't yet performed the underlying index scan, do it, and begin
96 : * the iteration over the bitmap.
97 : *
98 : * For prefetching, we use *two* iterators, one for the pages we are
99 : * actually scanning and another that runs ahead of the first for
100 : * prefetching. node->prefetch_pages tracks exactly how many pages ahead
101 : * the prefetch iterator is. Also, node->prefetch_target tracks the
102 : * desired prefetch distance, which starts small and increases up to the
103 : * node->prefetch_maximum. This is to avoid doing a lot of prefetching in
104 : * a scan that stops after a few tuples because of a LIMIT.
105 : */
106 5767564 : if (!node->initialized)
107 : {
108 17616 : if (!pstate)
109 : {
110 17274 : tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
111 :
112 17274 : if (!tbm || !IsA(tbm, TIDBitmap))
113 0 : elog(ERROR, "unrecognized result from subplan");
114 :
115 17274 : node->tbm = tbm;
116 17274 : node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm);
117 17274 : node->tbmres = tbmres = NULL;
118 :
119 : #ifdef USE_PREFETCH
120 17274 : if (node->prefetch_maximum > 0)
121 : {
122 17274 : node->prefetch_iterator = tbm_begin_iterate(tbm);
123 17274 : node->prefetch_pages = 0;
124 17274 : node->prefetch_target = -1;
125 : }
126 : #endif /* USE_PREFETCH */
127 : }
128 : else
129 : {
130 : /*
131 : * The leader will immediately come out of the function, but
132 : * others will be blocked until leader populates the TBM and wakes
133 : * them up.
134 : */
135 342 : if (BitmapShouldInitializeSharedState(pstate))
136 : {
137 72 : tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
138 72 : if (!tbm || !IsA(tbm, TIDBitmap))
139 0 : elog(ERROR, "unrecognized result from subplan");
140 :
141 72 : node->tbm = tbm;
142 :
143 : /*
144 : * Prepare to iterate over the TBM. This will return the
145 : * dsa_pointer of the iterator state which will be used by
146 : * multiple processes to iterate jointly.
147 : */
148 72 : pstate->tbmiterator = tbm_prepare_shared_iterate(tbm);
149 : #ifdef USE_PREFETCH
150 72 : if (node->prefetch_maximum > 0)
151 : {
152 72 : pstate->prefetch_iterator =
153 72 : tbm_prepare_shared_iterate(tbm);
154 :
155 : /*
156 : * We don't need the mutex here as we haven't yet woke up
157 : * others.
158 : */
159 72 : pstate->prefetch_pages = 0;
160 72 : pstate->prefetch_target = -1;
161 : }
162 : #endif
163 :
164 : /* We have initialized the shared state so wake up others. */
165 72 : BitmapDoneInitializingSharedState(pstate);
166 : }
167 :
168 : /* Allocate a private iterator and attach the shared state to it */
169 342 : node->shared_tbmiterator = shared_tbmiterator =
170 342 : tbm_attach_shared_iterate(dsa, pstate->tbmiterator);
171 342 : node->tbmres = tbmres = NULL;
172 :
173 : #ifdef USE_PREFETCH
174 342 : if (node->prefetch_maximum > 0)
175 : {
176 342 : node->shared_prefetch_iterator =
177 342 : tbm_attach_shared_iterate(dsa, pstate->prefetch_iterator);
178 : }
179 : #endif /* USE_PREFETCH */
180 : }
181 :
182 : /*
183 : * If this is the first scan of the underlying table, create the table
184 : * scan descriptor and begin the scan.
185 : */
186 17616 : if (!scan)
187 : {
188 13598 : bool need_tuples = false;
189 :
190 : /*
191 : * We can potentially skip fetching heap pages if we do not need
192 : * any columns of the table, either for checking non-indexable
193 : * quals or for returning data. This test is a bit simplistic, as
194 : * it checks the stronger condition that there's no qual or return
195 : * tlist at all. But in most cases it's probably not worth working
196 : * harder than that.
197 : */
198 25512 : need_tuples = (node->ss.ps.plan->qual != NIL ||
199 11914 : node->ss.ps.plan->targetlist != NIL);
200 :
201 13598 : scan = table_beginscan_bm(node->ss.ss_currentRelation,
202 13598 : node->ss.ps.state->es_snapshot,
203 : 0,
204 : NULL,
205 : need_tuples);
206 :
207 13598 : node->ss.ss_currentScanDesc = scan;
208 : }
209 :
210 17616 : node->initialized = true;
211 : }
212 :
213 : for (;;)
214 981362 : {
215 : bool valid_block;
216 :
217 6748926 : CHECK_FOR_INTERRUPTS();
218 :
219 : /*
220 : * Get next page of results if needed
221 : */
222 6748926 : if (tbmres == NULL)
223 : {
224 405900 : if (!pstate)
225 375450 : node->tbmres = tbmres = tbm_iterate(tbmiterator);
226 : else
227 30450 : node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator);
228 405900 : if (tbmres == NULL)
229 : {
230 : /* no more entries in the bitmap */
231 17348 : break;
232 : }
233 :
234 388552 : BitmapAdjustPrefetchIterator(node, tbmres->blockno);
235 :
236 388552 : valid_block = table_scan_bitmap_next_block(scan, tbmres);
237 :
238 388546 : if (tbmres->ntuples >= 0)
239 231264 : node->stats.exact_pages++;
240 : else
241 157282 : node->stats.lossy_pages++;
242 :
243 388546 : if (!valid_block)
244 : {
245 : /* AM doesn't think this block is valid, skip */
246 5752 : continue;
247 : }
248 :
249 : /* Adjust the prefetch target */
250 382794 : BitmapAdjustPrefetchTarget(node);
251 : }
252 : else
253 : {
254 : /*
255 : * Continuing in previously obtained page.
256 : */
257 :
258 : #ifdef USE_PREFETCH
259 :
260 : /*
261 : * Try to prefetch at least a few pages even before we get to the
262 : * second page if we don't stop reading after the first tuple.
263 : */
264 6343026 : if (!pstate)
265 : {
266 5149026 : if (node->prefetch_target < node->prefetch_maximum)
267 13672 : node->prefetch_target++;
268 : }
269 1194000 : else if (pstate->prefetch_target < node->prefetch_maximum)
270 : {
271 : /* take spinlock while updating shared state */
272 1926 : SpinLockAcquire(&pstate->mutex);
273 1926 : if (pstate->prefetch_target < node->prefetch_maximum)
274 1926 : pstate->prefetch_target++;
275 1926 : SpinLockRelease(&pstate->mutex);
276 : }
277 : #endif /* USE_PREFETCH */
278 : }
279 :
280 : /*
281 : * We issue prefetch requests *after* fetching the current page to try
282 : * to avoid having prefetching interfere with the main I/O. Also, this
283 : * should happen only when we have determined there is still something
284 : * to do on the current page, else we may uselessly prefetch the same
285 : * page we are just about to request for real.
286 : */
287 6725820 : BitmapPrefetch(node, scan);
288 :
289 : /*
290 : * Attempt to fetch tuple from AM.
291 : */
292 6725820 : if (!table_scan_bitmap_next_tuple(scan, tbmres, slot))
293 : {
294 : /* nothing more to look at on this page */
295 388284 : node->tbmres = tbmres = NULL;
296 388284 : continue;
297 : }
298 :
299 : /*
300 : * If we are using lossy info, we have to recheck the qual conditions
301 : * at every tuple.
302 : */
303 6337536 : if (tbmres->recheck)
304 : {
305 3124386 : econtext->ecxt_scantuple = slot;
306 3124386 : if (!ExecQualAndReset(node->bitmapqualorig, econtext))
307 : {
308 : /* Fails recheck, so drop it and loop back for another */
309 587326 : InstrCountFiltered2(node, 1);
310 587326 : ExecClearTuple(slot);
311 587326 : continue;
312 : }
313 : }
314 :
315 : /* OK to return this tuple */
316 5750210 : return slot;
317 : }
318 :
319 : /*
320 : * if we get here it means we are at the end of the scan..
321 : */
322 17348 : return ExecClearTuple(slot);
323 : }
324 :
325 : /*
326 : * BitmapDoneInitializingSharedState - Shared state is initialized
327 : *
328 : * By this time the leader has already populated the TBM and initialized the
329 : * shared state so wake up other processes.
330 : */
331 : static inline void
332 72 : BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
333 : {
334 72 : SpinLockAcquire(&pstate->mutex);
335 72 : pstate->state = BM_FINISHED;
336 72 : SpinLockRelease(&pstate->mutex);
337 72 : ConditionVariableBroadcast(&pstate->cv);
338 72 : }
339 :
340 : /*
341 : * BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
342 : */
343 : static inline void
344 388552 : BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
345 : BlockNumber blockno)
346 : {
347 : #ifdef USE_PREFETCH
348 388552 : ParallelBitmapHeapState *pstate = node->pstate;
349 :
350 388552 : if (pstate == NULL)
351 : {
352 358444 : TBMIterator *prefetch_iterator = node->prefetch_iterator;
353 :
354 358444 : if (node->prefetch_pages > 0)
355 : {
356 : /* The main iterator has closed the distance by one page */
357 344290 : node->prefetch_pages--;
358 : }
359 14154 : else if (prefetch_iterator)
360 : {
361 : /* Do not let the prefetch iterator get behind the main one */
362 14154 : TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
363 :
364 14154 : if (tbmpre == NULL || tbmpre->blockno != blockno)
365 0 : elog(ERROR, "prefetch and main iterators are out of sync");
366 : }
367 358444 : return;
368 : }
369 :
370 30108 : if (node->prefetch_maximum > 0)
371 : {
372 30108 : TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
373 :
374 30108 : SpinLockAcquire(&pstate->mutex);
375 30108 : if (pstate->prefetch_pages > 0)
376 : {
377 30036 : pstate->prefetch_pages--;
378 30036 : SpinLockRelease(&pstate->mutex);
379 : }
380 : else
381 : {
382 : /* Release the mutex before iterating */
383 72 : SpinLockRelease(&pstate->mutex);
384 :
385 : /*
386 : * In case of shared mode, we can not ensure that the current
387 : * blockno of the main iterator and that of the prefetch iterator
388 : * are same. It's possible that whatever blockno we are
389 : * prefetching will be processed by another process. Therefore,
390 : * we don't validate the blockno here as we do in non-parallel
391 : * case.
392 : */
393 72 : if (prefetch_iterator)
394 72 : tbm_shared_iterate(prefetch_iterator);
395 : }
396 : }
397 : #endif /* USE_PREFETCH */
398 : }
399 :
400 : /*
401 : * BitmapAdjustPrefetchTarget - Adjust the prefetch target
402 : *
403 : * Increase prefetch target if it's not yet at the max. Note that
404 : * we will increase it to zero after fetching the very first
405 : * page/tuple, then to one after the second tuple is fetched, then
406 : * it doubles as later pages are fetched.
407 : */
408 : static inline void
409 382794 : BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
410 : {
411 : #ifdef USE_PREFETCH
412 382794 : ParallelBitmapHeapState *pstate = node->pstate;
413 :
414 382794 : if (pstate == NULL)
415 : {
416 352686 : if (node->prefetch_target >= node->prefetch_maximum)
417 : /* don't increase any further */ ;
418 13772 : else if (node->prefetch_target >= node->prefetch_maximum / 2)
419 220 : node->prefetch_target = node->prefetch_maximum;
420 13552 : else if (node->prefetch_target > 0)
421 0 : node->prefetch_target *= 2;
422 : else
423 13552 : node->prefetch_target++;
424 352686 : return;
425 : }
426 :
427 : /* Do an unlocked check first to save spinlock acquisitions. */
428 30108 : if (pstate->prefetch_target < node->prefetch_maximum)
429 : {
430 132 : SpinLockAcquire(&pstate->mutex);
431 132 : if (pstate->prefetch_target >= node->prefetch_maximum)
432 : /* don't increase any further */ ;
433 132 : else if (pstate->prefetch_target >= node->prefetch_maximum / 2)
434 60 : pstate->prefetch_target = node->prefetch_maximum;
435 72 : else if (pstate->prefetch_target > 0)
436 0 : pstate->prefetch_target *= 2;
437 : else
438 72 : pstate->prefetch_target++;
439 132 : SpinLockRelease(&pstate->mutex);
440 : }
441 : #endif /* USE_PREFETCH */
442 : }
443 :
444 : /*
445 : * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
446 : */
447 : static inline void
448 6725820 : BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
449 : {
450 : #ifdef USE_PREFETCH
451 6725820 : ParallelBitmapHeapState *pstate = node->pstate;
452 :
453 6725820 : if (pstate == NULL)
454 : {
455 5501712 : TBMIterator *prefetch_iterator = node->prefetch_iterator;
456 :
457 5501712 : if (prefetch_iterator)
458 : {
459 5748712 : while (node->prefetch_pages < node->prefetch_target)
460 : {
461 357842 : TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
462 : bool skip_fetch;
463 :
464 357842 : if (tbmpre == NULL)
465 : {
466 : /* No more pages to prefetch */
467 13552 : tbm_end_iterate(prefetch_iterator);
468 13552 : node->prefetch_iterator = NULL;
469 13552 : break;
470 : }
471 344290 : node->prefetch_pages++;
472 :
473 : /*
474 : * If we expect not to have to actually read this heap page,
475 : * skip this prefetch call, but continue to run the prefetch
476 : * logic normally. (Would it be better not to increment
477 : * prefetch_pages?)
478 : */
479 755042 : skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
480 376012 : !tbmpre->recheck &&
481 31722 : VM_ALL_VISIBLE(node->ss.ss_currentRelation,
482 : tbmpre->blockno,
483 : &node->pvmbuffer));
484 :
485 344290 : if (!skip_fetch)
486 344236 : PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
487 : }
488 : }
489 :
490 5501712 : return;
491 : }
492 :
493 1224108 : if (pstate->prefetch_pages < pstate->prefetch_target)
494 : {
495 138756 : TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
496 :
497 138756 : if (prefetch_iterator)
498 : {
499 : while (1)
500 30036 : {
501 : TBMIterateResult *tbmpre;
502 58760 : bool do_prefetch = false;
503 : bool skip_fetch;
504 :
505 : /*
506 : * Recheck under the mutex. If some other process has already
507 : * done enough prefetching then we need not to do anything.
508 : */
509 58760 : SpinLockAcquire(&pstate->mutex);
510 58760 : if (pstate->prefetch_pages < pstate->prefetch_target)
511 : {
512 30112 : pstate->prefetch_pages++;
513 30112 : do_prefetch = true;
514 : }
515 58760 : SpinLockRelease(&pstate->mutex);
516 :
517 58760 : if (!do_prefetch)
518 28648 : return;
519 :
520 30112 : tbmpre = tbm_shared_iterate(prefetch_iterator);
521 30112 : if (tbmpre == NULL)
522 : {
523 : /* No more pages to prefetch */
524 76 : tbm_end_shared_iterate(prefetch_iterator);
525 76 : node->shared_prefetch_iterator = NULL;
526 76 : break;
527 : }
528 :
529 : /* As above, skip prefetch if we expect not to need page */
530 88116 : skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
531 54984 : !tbmpre->recheck &&
532 24948 : VM_ALL_VISIBLE(node->ss.ss_currentRelation,
533 : tbmpre->blockno,
534 : &node->pvmbuffer));
535 :
536 30036 : if (!skip_fetch)
537 9396 : PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
538 : }
539 : }
540 : }
541 : #endif /* USE_PREFETCH */
542 : }
543 :
544 : /*
545 : * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
546 : */
547 : static bool
548 0 : BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
549 : {
550 : ExprContext *econtext;
551 :
552 : /*
553 : * extract necessary information from index scan node
554 : */
555 0 : econtext = node->ss.ps.ps_ExprContext;
556 :
557 : /* Does the tuple meet the original qual conditions? */
558 0 : econtext->ecxt_scantuple = slot;
559 0 : return ExecQualAndReset(node->bitmapqualorig, econtext);
560 : }
561 :
562 : /* ----------------------------------------------------------------
563 : * ExecBitmapHeapScan(node)
564 : * ----------------------------------------------------------------
565 : */
566 : static TupleTableSlot *
567 5513940 : ExecBitmapHeapScan(PlanState *pstate)
568 : {
569 5513940 : BitmapHeapScanState *node = castNode(BitmapHeapScanState, pstate);
570 :
571 5513940 : return ExecScan(&node->ss,
572 : (ExecScanAccessMtd) BitmapHeapNext,
573 : (ExecScanRecheckMtd) BitmapHeapRecheck);
574 : }
575 :
576 : /* ----------------------------------------------------------------
577 : * ExecReScanBitmapHeapScan(node)
578 : * ----------------------------------------------------------------
579 : */
580 : void
581 4630 : ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
582 : {
583 4630 : PlanState *outerPlan = outerPlanState(node);
584 :
585 : /* rescan to release any page pin */
586 4630 : if (node->ss.ss_currentScanDesc)
587 4018 : table_rescan(node->ss.ss_currentScanDesc, NULL);
588 :
589 : /* release bitmaps and buffers if any */
590 4630 : if (node->tbmiterator)
591 3964 : tbm_end_iterate(node->tbmiterator);
592 4630 : if (node->prefetch_iterator)
593 1344 : tbm_end_iterate(node->prefetch_iterator);
594 4630 : if (node->shared_tbmiterator)
595 54 : tbm_end_shared_iterate(node->shared_tbmiterator);
596 4630 : if (node->shared_prefetch_iterator)
597 0 : tbm_end_shared_iterate(node->shared_prefetch_iterator);
598 4630 : if (node->tbm)
599 4018 : tbm_free(node->tbm);
600 4630 : if (node->pvmbuffer != InvalidBuffer)
601 54 : ReleaseBuffer(node->pvmbuffer);
602 4630 : node->tbm = NULL;
603 4630 : node->tbmiterator = NULL;
604 4630 : node->tbmres = NULL;
605 4630 : node->prefetch_iterator = NULL;
606 4630 : node->initialized = false;
607 4630 : node->shared_tbmiterator = NULL;
608 4630 : node->shared_prefetch_iterator = NULL;
609 4630 : node->pvmbuffer = InvalidBuffer;
610 :
611 4630 : ExecScanReScan(&node->ss);
612 :
613 : /*
614 : * if chgParam of subnode is not null then plan will be re-scanned by
615 : * first ExecProcNode.
616 : */
617 4630 : if (outerPlan->chgParam == NULL)
618 188 : ExecReScan(outerPlan);
619 4630 : }
620 :
621 : /* ----------------------------------------------------------------
622 : * ExecEndBitmapHeapScan
623 : * ----------------------------------------------------------------
624 : */
625 : void
626 18152 : ExecEndBitmapHeapScan(BitmapHeapScanState *node)
627 : {
628 : TableScanDesc scanDesc;
629 :
630 : /*
631 : * When ending a parallel worker, copy the statistics gathered by the
632 : * worker back into shared memory so that it can be picked up by the main
633 : * process to report in EXPLAIN ANALYZE.
634 : */
635 18152 : if (node->sinstrument != NULL && IsParallelWorker())
636 : {
637 : BitmapHeapScanInstrumentation *si;
638 :
639 : Assert(ParallelWorkerNumber <= node->sinstrument->num_workers);
640 0 : si = &node->sinstrument->sinstrument[ParallelWorkerNumber];
641 :
642 : /*
643 : * Here we accumulate the stats rather than performing memcpy on
644 : * node->stats into si. When a Gather/GatherMerge node finishes it
645 : * will perform planner shutdown on the workers. On rescan it will
646 : * spin up new workers which will have a new BitmapHeapScanState and
647 : * zeroed stats.
648 : */
649 0 : si->exact_pages += node->stats.exact_pages;
650 0 : si->lossy_pages += node->stats.lossy_pages;
651 : }
652 :
653 : /*
654 : * extract information from the node
655 : */
656 18152 : scanDesc = node->ss.ss_currentScanDesc;
657 :
658 : /*
659 : * close down subplans
660 : */
661 18152 : ExecEndNode(outerPlanState(node));
662 :
663 : /*
664 : * release bitmaps and buffers if any
665 : */
666 18152 : if (node->tbmiterator)
667 13244 : tbm_end_iterate(node->tbmiterator);
668 18152 : if (node->prefetch_iterator)
669 2366 : tbm_end_iterate(node->prefetch_iterator);
670 18152 : if (node->tbm)
671 13262 : tbm_free(node->tbm);
672 18152 : if (node->shared_tbmiterator)
673 288 : tbm_end_shared_iterate(node->shared_tbmiterator);
674 18152 : if (node->shared_prefetch_iterator)
675 266 : tbm_end_shared_iterate(node->shared_prefetch_iterator);
676 18152 : if (node->pvmbuffer != InvalidBuffer)
677 24 : ReleaseBuffer(node->pvmbuffer);
678 :
679 : /*
680 : * close heap scan
681 : */
682 18152 : if (scanDesc)
683 13532 : table_endscan(scanDesc);
684 :
685 18152 : }
686 :
687 : /* ----------------------------------------------------------------
688 : * ExecInitBitmapHeapScan
689 : *
690 : * Initializes the scan's state information.
691 : * ----------------------------------------------------------------
692 : */
693 : BitmapHeapScanState *
694 18218 : ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
695 : {
696 : BitmapHeapScanState *scanstate;
697 : Relation currentRelation;
698 :
699 : /* check for unsupported flags */
700 : Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
701 :
702 : /*
703 : * Assert caller didn't ask for an unsafe snapshot --- see comments at
704 : * head of file.
705 : */
706 : Assert(IsMVCCSnapshot(estate->es_snapshot));
707 :
708 : /*
709 : * create state structure
710 : */
711 18218 : scanstate = makeNode(BitmapHeapScanState);
712 18218 : scanstate->ss.ps.plan = (Plan *) node;
713 18218 : scanstate->ss.ps.state = estate;
714 18218 : scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;
715 :
716 18218 : scanstate->tbm = NULL;
717 18218 : scanstate->tbmiterator = NULL;
718 18218 : scanstate->tbmres = NULL;
719 18218 : scanstate->pvmbuffer = InvalidBuffer;
720 :
721 : /* Zero the statistics counters */
722 18218 : memset(&scanstate->stats, 0, sizeof(BitmapHeapScanInstrumentation));
723 :
724 18218 : scanstate->prefetch_iterator = NULL;
725 18218 : scanstate->prefetch_pages = 0;
726 18218 : scanstate->prefetch_target = 0;
727 18218 : scanstate->initialized = false;
728 18218 : scanstate->shared_tbmiterator = NULL;
729 18218 : scanstate->shared_prefetch_iterator = NULL;
730 18218 : scanstate->pstate = NULL;
731 :
732 : /*
733 : * Miscellaneous initialization
734 : *
735 : * create expression context for node
736 : */
737 18218 : ExecAssignExprContext(estate, &scanstate->ss.ps);
738 :
739 : /*
740 : * open the scan relation
741 : */
742 18218 : currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
743 :
744 : /*
745 : * initialize child nodes
746 : */
747 18218 : outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
748 :
749 : /*
750 : * get the scan type from the relation descriptor.
751 : */
752 18218 : ExecInitScanTupleSlot(estate, &scanstate->ss,
753 : RelationGetDescr(currentRelation),
754 : table_slot_callbacks(currentRelation));
755 :
756 : /*
757 : * Initialize result type and projection.
758 : */
759 18218 : ExecInitResultTypeTL(&scanstate->ss.ps);
760 18218 : ExecAssignScanProjectionInfo(&scanstate->ss);
761 :
762 : /*
763 : * initialize child expressions
764 : */
765 18218 : scanstate->ss.ps.qual =
766 18218 : ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
767 18218 : scanstate->bitmapqualorig =
768 18218 : ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);
769 :
770 : /*
771 : * Maximum number of prefetches for the tablespace if configured,
772 : * otherwise the current value of the effective_io_concurrency GUC.
773 : */
774 18218 : scanstate->prefetch_maximum =
775 18218 : get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
776 :
777 18218 : scanstate->ss.ss_currentRelation = currentRelation;
778 :
779 : /*
780 : * all done.
781 : */
782 18218 : return scanstate;
783 : }
784 :
785 : /*----------------
786 : * BitmapShouldInitializeSharedState
787 : *
788 : * The first process to come here and see the state to the BM_INITIAL
789 : * will become the leader for the parallel bitmap scan and will be
790 : * responsible for populating the TIDBitmap. The other processes will
791 : * be blocked by the condition variable until the leader wakes them up.
792 : * ---------------
793 : */
794 : static bool
795 342 : BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
796 : {
797 : SharedBitmapState state;
798 :
799 : while (1)
800 : {
801 342 : SpinLockAcquire(&pstate->mutex);
802 342 : state = pstate->state;
803 342 : if (pstate->state == BM_INITIAL)
804 72 : pstate->state = BM_INPROGRESS;
805 342 : SpinLockRelease(&pstate->mutex);
806 :
807 : /* Exit if bitmap is done, or if we're the leader. */
808 342 : if (state != BM_INPROGRESS)
809 342 : break;
810 :
811 : /* Wait for the leader to wake us up. */
812 0 : ConditionVariableSleep(&pstate->cv, WAIT_EVENT_PARALLEL_BITMAP_SCAN);
813 : }
814 :
815 342 : ConditionVariableCancelSleep();
816 :
817 342 : return (state == BM_INITIAL);
818 : }
819 :
820 : /* ----------------------------------------------------------------
821 : * ExecBitmapHeapEstimate
822 : *
823 : * Compute the amount of space we'll need in the parallel
824 : * query DSM, and inform pcxt->estimator about our needs.
825 : * ----------------------------------------------------------------
826 : */
827 : void
828 18 : ExecBitmapHeapEstimate(BitmapHeapScanState *node,
829 : ParallelContext *pcxt)
830 : {
831 : Size size;
832 :
833 18 : size = MAXALIGN(sizeof(ParallelBitmapHeapState));
834 :
835 : /* account for instrumentation, if required */
836 18 : if (node->ss.ps.instrument && pcxt->nworkers > 0)
837 : {
838 0 : size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
839 0 : size = add_size(size, mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation)));
840 : }
841 :
842 18 : shm_toc_estimate_chunk(&pcxt->estimator, size);
843 18 : shm_toc_estimate_keys(&pcxt->estimator, 1);
844 18 : }
845 :
846 : /* ----------------------------------------------------------------
847 : * ExecBitmapHeapInitializeDSM
848 : *
849 : * Set up a parallel bitmap heap scan descriptor.
850 : * ----------------------------------------------------------------
851 : */
852 : void
853 18 : ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
854 : ParallelContext *pcxt)
855 : {
856 : ParallelBitmapHeapState *pstate;
857 18 : SharedBitmapHeapInstrumentation *sinstrument = NULL;
858 18 : dsa_area *dsa = node->ss.ps.state->es_query_dsa;
859 : char *ptr;
860 : Size size;
861 :
862 : /* If there's no DSA, there are no workers; initialize nothing. */
863 18 : if (dsa == NULL)
864 0 : return;
865 :
866 18 : size = MAXALIGN(sizeof(ParallelBitmapHeapState));
867 18 : if (node->ss.ps.instrument && pcxt->nworkers > 0)
868 : {
869 0 : size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
870 0 : size = add_size(size, mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation)));
871 : }
872 :
873 18 : ptr = shm_toc_allocate(pcxt->toc, size);
874 18 : pstate = (ParallelBitmapHeapState *) ptr;
875 18 : ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
876 18 : if (node->ss.ps.instrument && pcxt->nworkers > 0)
877 0 : sinstrument = (SharedBitmapHeapInstrumentation *) ptr;
878 :
879 18 : pstate->tbmiterator = 0;
880 18 : pstate->prefetch_iterator = 0;
881 :
882 : /* Initialize the mutex */
883 18 : SpinLockInit(&pstate->mutex);
884 18 : pstate->prefetch_pages = 0;
885 18 : pstate->prefetch_target = 0;
886 18 : pstate->state = BM_INITIAL;
887 :
888 18 : ConditionVariableInit(&pstate->cv);
889 :
890 18 : if (sinstrument)
891 : {
892 0 : sinstrument->num_workers = pcxt->nworkers;
893 :
894 : /* ensure any unfilled slots will contain zeroes */
895 0 : memset(sinstrument->sinstrument, 0,
896 0 : pcxt->nworkers * sizeof(BitmapHeapScanInstrumentation));
897 : }
898 :
899 18 : shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
900 18 : node->pstate = pstate;
901 18 : node->sinstrument = sinstrument;
902 : }
903 :
904 : /* ----------------------------------------------------------------
905 : * ExecBitmapHeapReInitializeDSM
906 : *
907 : * Reset shared state before beginning a fresh scan.
908 : * ----------------------------------------------------------------
909 : */
910 : void
911 54 : ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node,
912 : ParallelContext *pcxt)
913 : {
914 54 : ParallelBitmapHeapState *pstate = node->pstate;
915 54 : dsa_area *dsa = node->ss.ps.state->es_query_dsa;
916 :
917 : /* If there's no DSA, there are no workers; do nothing. */
918 54 : if (dsa == NULL)
919 0 : return;
920 :
921 54 : pstate->state = BM_INITIAL;
922 :
923 54 : if (DsaPointerIsValid(pstate->tbmiterator))
924 54 : tbm_free_shared_area(dsa, pstate->tbmiterator);
925 :
926 54 : if (DsaPointerIsValid(pstate->prefetch_iterator))
927 54 : tbm_free_shared_area(dsa, pstate->prefetch_iterator);
928 :
929 54 : pstate->tbmiterator = InvalidDsaPointer;
930 54 : pstate->prefetch_iterator = InvalidDsaPointer;
931 : }
932 :
933 : /* ----------------------------------------------------------------
934 : * ExecBitmapHeapInitializeWorker
935 : *
936 : * Copy relevant information from TOC into planstate.
937 : * ----------------------------------------------------------------
938 : */
939 : void
940 270 : ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
941 : ParallelWorkerContext *pwcxt)
942 : {
943 : char *ptr;
944 :
945 : Assert(node->ss.ps.state->es_query_dsa != NULL);
946 :
947 270 : ptr = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
948 :
949 270 : node->pstate = (ParallelBitmapHeapState *) ptr;
950 270 : ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
951 :
952 270 : if (node->ss.ps.instrument)
953 0 : node->sinstrument = (SharedBitmapHeapInstrumentation *) ptr;
954 270 : }
955 :
956 : /* ----------------------------------------------------------------
957 : * ExecBitmapHeapRetrieveInstrumentation
958 : *
959 : * Transfer bitmap heap scan statistics from DSM to private memory.
960 : * ----------------------------------------------------------------
961 : */
962 : void
963 0 : ExecBitmapHeapRetrieveInstrumentation(BitmapHeapScanState *node)
964 : {
965 0 : SharedBitmapHeapInstrumentation *sinstrument = node->sinstrument;
966 : Size size;
967 :
968 0 : if (sinstrument == NULL)
969 0 : return;
970 :
971 0 : size = offsetof(SharedBitmapHeapInstrumentation, sinstrument)
972 0 : + sinstrument->num_workers * sizeof(BitmapHeapScanInstrumentation);
973 :
974 0 : node->sinstrument = palloc(size);
975 0 : memcpy(node->sinstrument, sinstrument, size);
976 : }
|