Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * nodeIndexonlyscan.c
4 : * Routines to support index-only scans
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/executor/nodeIndexonlyscan.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : /*
16 : * INTERFACE ROUTINES
17 : * ExecIndexOnlyScan scans an index
18 : * IndexOnlyNext retrieve next tuple
19 : * ExecInitIndexOnlyScan creates and initializes state info.
20 : * ExecReScanIndexOnlyScan rescans the indexed relation.
21 : * ExecEndIndexOnlyScan releases all storage.
22 : * ExecIndexOnlyMarkPos marks scan position.
23 : * ExecIndexOnlyRestrPos restores scan position.
24 : * ExecIndexOnlyScanEstimate estimates DSM space needed for
25 : * parallel index-only scan
26 : * ExecIndexOnlyScanInitializeDSM initialize DSM for parallel
27 : * index-only scan
28 : * ExecIndexOnlyScanReInitializeDSM reinitialize DSM for fresh scan
29 : * ExecIndexOnlyScanInitializeWorker attach to DSM info in parallel worker
30 : */
31 : #include "postgres.h"
32 :
33 : #include "access/genam.h"
34 : #include "access/relscan.h"
35 : #include "access/tableam.h"
36 : #include "access/tupdesc.h"
37 : #include "access/visibilitymap.h"
38 : #include "catalog/pg_type.h"
39 : #include "executor/executor.h"
40 : #include "executor/nodeIndexonlyscan.h"
41 : #include "executor/nodeIndexscan.h"
42 : #include "miscadmin.h"
43 : #include "storage/bufmgr.h"
44 : #include "storage/predicate.h"
45 : #include "utils/builtins.h"
46 : #include "utils/rel.h"
47 :
48 :
49 : static TupleTableSlot *IndexOnlyNext(IndexOnlyScanState *node);
50 : static void StoreIndexTuple(IndexOnlyScanState *node, TupleTableSlot *slot,
51 : IndexTuple itup, TupleDesc itupdesc);
52 :
53 :
54 : /* ----------------------------------------------------------------
55 : * IndexOnlyNext
56 : *
57 : * Retrieve a tuple from the IndexOnlyScan node's index.
58 : * ----------------------------------------------------------------
59 : */
60 : static TupleTableSlot *
61 5645942 : IndexOnlyNext(IndexOnlyScanState *node)
62 : {
63 : EState *estate;
64 : ExprContext *econtext;
65 : ScanDirection direction;
66 : IndexScanDesc scandesc;
67 : TupleTableSlot *slot;
68 : ItemPointer tid;
69 :
70 : /*
71 : * extract necessary information from index scan node
72 : */
73 5645942 : estate = node->ss.ps.state;
74 :
75 : /*
76 : * Determine which direction to scan the index in based on the plan's scan
77 : * direction and the current direction of execution.
78 : */
79 5645942 : direction = ScanDirectionCombine(estate->es_direction,
80 : ((IndexOnlyScan *) node->ss.ps.plan)->indexorderdir);
81 5645942 : scandesc = node->ioss_ScanDesc;
82 5645942 : econtext = node->ss.ps.ps_ExprContext;
83 5645942 : slot = node->ss.ss_ScanTupleSlot;
84 :
85 5645942 : if (scandesc == NULL)
86 : {
87 : /*
88 : * We reach here if the index only scan is not parallel, or if we're
89 : * serially executing an index only scan that was planned to be
90 : * parallel.
91 : */
92 10212 : scandesc = index_beginscan(node->ss.ss_currentRelation,
93 : node->ioss_RelationDesc,
94 : estate->es_snapshot,
95 : &node->ioss_Instrument,
96 : node->ioss_NumScanKeys,
97 : node->ioss_NumOrderByKeys);
98 :
99 10212 : node->ioss_ScanDesc = scandesc;
100 :
101 :
102 : /* Set it up for index-only scan */
103 10212 : node->ioss_ScanDesc->xs_want_itup = true;
104 10212 : node->ioss_VMBuffer = InvalidBuffer;
105 :
106 : /*
107 : * If no run-time keys to calculate or they are ready, go ahead and
108 : * pass the scankeys to the index AM.
109 : */
110 10212 : if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
111 10212 : index_rescan(scandesc,
112 10212 : node->ioss_ScanKeys,
113 : node->ioss_NumScanKeys,
114 10212 : node->ioss_OrderByKeys,
115 : node->ioss_NumOrderByKeys);
116 : }
117 :
118 : /*
119 : * OK, now that we have what we need, fetch the next tuple.
120 : */
121 5817662 : while ((tid = index_getnext_tid(scandesc, direction)) != NULL)
122 : {
123 5614710 : bool tuple_from_heap = false;
124 :
125 5614710 : CHECK_FOR_INTERRUPTS();
126 :
127 : /*
128 : * We can skip the heap fetch if the TID references a heap page on
129 : * which all tuples are known visible to everybody. In any case,
130 : * we'll use the index tuple not the heap tuple as the data source.
131 : *
132 : * Note on Memory Ordering Effects: visibilitymap_get_status does not
133 : * lock the visibility map buffer, and therefore the result we read
134 : * here could be slightly stale. However, it can't be stale enough to
135 : * matter.
136 : *
137 : * We need to detect clearing a VM bit due to an insert right away,
138 : * because the tuple is present in the index page but not visible. The
139 : * reading of the TID by this scan (using a shared lock on the index
140 : * buffer) is serialized with the insert of the TID into the index
141 : * (using an exclusive lock on the index buffer). Because the VM bit
142 : * is cleared before updating the index, and locking/unlocking of the
143 : * index page acts as a full memory barrier, we are sure to see the
144 : * cleared bit if we see a recently-inserted TID.
145 : *
146 : * Deletes do not update the index page (only VACUUM will clear out
147 : * the TID), so the clearing of the VM bit by a delete is not
148 : * serialized with this test below, and we may see a value that is
149 : * significantly stale. However, we don't care about the delete right
150 : * away, because the tuple is still visible until the deleting
151 : * transaction commits or the statement ends (if it's our
152 : * transaction). In either case, the lock on the VM buffer will have
153 : * been released (acting as a write barrier) after clearing the bit.
154 : * And for us to have a snapshot that includes the deleting
155 : * transaction (making the tuple invisible), we must have acquired
156 : * ProcArrayLock after that time, acting as a read barrier.
157 : *
158 : * It's worth going through this complexity to avoid needing to lock
159 : * the VM buffer, which could cause significant contention.
160 : */
161 5614710 : if (!VM_ALL_VISIBLE(scandesc->heapRelation,
162 : ItemPointerGetBlockNumber(tid),
163 : &node->ioss_VMBuffer))
164 : {
165 : /*
166 : * Rats, we have to visit the heap to check visibility.
167 : */
168 2085186 : InstrCountTuples2(node, 1);
169 2085186 : if (!index_fetch_heap(scandesc, node->ioss_TableSlot))
170 171714 : continue; /* no visible tuple, try next index entry */
171 :
172 1913472 : ExecClearTuple(node->ioss_TableSlot);
173 :
174 : /*
175 : * Only MVCC snapshots are supported here, so there should be no
176 : * need to keep following the HOT chain once a visible entry has
177 : * been found. If we did want to allow that, we'd need to keep
178 : * more state to remember not to call index_getnext_tid next time.
179 : */
180 1913472 : if (scandesc->xs_heap_continue)
181 0 : elog(ERROR, "non-MVCC snapshots are not supported in index-only scans");
182 :
183 : /*
184 : * Note: at this point we are holding a pin on the heap page, as
185 : * recorded in scandesc->xs_cbuf. We could release that pin now,
186 : * but it's not clear whether it's a win to do so. The next index
187 : * entry might require a visit to the same heap page.
188 : */
189 :
190 1913472 : tuple_from_heap = true;
191 : }
192 :
193 : /*
194 : * Fill the scan tuple slot with data from the index. This might be
195 : * provided in either HeapTuple or IndexTuple format. Conceivably an
196 : * index AM might fill both fields, in which case we prefer the heap
197 : * format, since it's probably a bit cheaper to fill a slot from.
198 : */
199 5442996 : if (scandesc->xs_hitup)
200 : {
201 : /*
202 : * We don't take the trouble to verify that the provided tuple has
203 : * exactly the slot's format, but it seems worth doing a quick
204 : * check on the number of fields.
205 : */
206 : Assert(slot->tts_tupleDescriptor->natts ==
207 : scandesc->xs_hitupdesc->natts);
208 1437684 : ExecForceStoreHeapTuple(scandesc->xs_hitup, slot, false);
209 : }
210 4005312 : else if (scandesc->xs_itup)
211 4005312 : StoreIndexTuple(node, slot, scandesc->xs_itup, scandesc->xs_itupdesc);
212 : else
213 0 : elog(ERROR, "no data returned for index-only scan");
214 :
215 : /*
216 : * If the index was lossy, we have to recheck the index quals.
217 : */
218 5442996 : if (scandesc->xs_recheck)
219 : {
220 14 : econtext->ecxt_scantuple = slot;
221 14 : if (!ExecQualAndReset(node->recheckqual, econtext))
222 : {
223 : /* Fails recheck, so drop it and loop back for another */
224 6 : InstrCountFiltered2(node, 1);
225 6 : continue;
226 : }
227 : }
228 :
229 : /*
230 : * We don't currently support rechecking ORDER BY distances. (In
231 : * principle, if the index can support retrieval of the originally
232 : * indexed value, it should be able to produce an exact distance
233 : * calculation too. So it's not clear that adding code here for
234 : * recheck/re-sort would be worth the trouble. But we should at least
235 : * throw an error if someone tries it.)
236 : */
237 5442990 : if (scandesc->numberOfOrderBys > 0 && scandesc->xs_recheckorderby)
238 6 : ereport(ERROR,
239 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
240 : errmsg("lossy distance functions are not supported in index-only scans")));
241 :
242 : /*
243 : * If we didn't access the heap, then we'll need to take a predicate
244 : * lock explicitly, as if we had. For now we do that at page level.
245 : */
246 5442984 : if (!tuple_from_heap)
247 3529524 : PredicateLockPage(scandesc->heapRelation,
248 : ItemPointerGetBlockNumber(tid),
249 : estate->es_snapshot);
250 :
251 5442984 : return slot;
252 : }
253 :
254 : /*
255 : * if we get here it means the index scan failed so we are at the end of
256 : * the scan..
257 : */
258 202952 : return ExecClearTuple(slot);
259 : }
260 :
261 : /*
262 : * StoreIndexTuple
263 : * Fill the slot with data from the index tuple.
264 : *
265 : * At some point this might be generally-useful functionality, but
266 : * right now we don't need it elsewhere.
267 : */
268 : static void
269 4005312 : StoreIndexTuple(IndexOnlyScanState *node, TupleTableSlot *slot,
270 : IndexTuple itup, TupleDesc itupdesc)
271 : {
272 : /*
273 : * Note: we must use the tupdesc supplied by the AM in index_deform_tuple,
274 : * not the slot's tupdesc, in case the latter has different datatypes
275 : * (this happens for btree name_ops in particular). They'd better have
276 : * the same number of columns though, as well as being datatype-compatible
277 : * which is something we can't so easily check.
278 : */
279 : Assert(slot->tts_tupleDescriptor->natts == itupdesc->natts);
280 :
281 4005312 : ExecClearTuple(slot);
282 4005312 : index_deform_tuple(itup, itupdesc, slot->tts_values, slot->tts_isnull);
283 :
284 : /*
285 : * Copy all name columns stored as cstrings back into a NAMEDATALEN byte
286 : * sized allocation. We mark this branch as unlikely as generally "name"
287 : * is used only for the system catalogs and this would have to be a user
288 : * query running on those or some other user table with an index on a name
289 : * column.
290 : */
291 4005312 : if (unlikely(node->ioss_NameCStringAttNums != NULL))
292 : {
293 3702 : int attcount = node->ioss_NameCStringCount;
294 :
295 7404 : for (int idx = 0; idx < attcount; idx++)
296 : {
297 3702 : int attnum = node->ioss_NameCStringAttNums[idx];
298 : Name name;
299 :
300 : /* skip null Datums */
301 3702 : if (slot->tts_isnull[attnum])
302 0 : continue;
303 :
304 : /* allocate the NAMEDATALEN and copy the datum into that memory */
305 3702 : name = (Name) MemoryContextAlloc(node->ss.ps.ps_ExprContext->ecxt_per_tuple_memory,
306 : NAMEDATALEN);
307 :
308 : /* use namestrcpy to zero-pad all trailing bytes */
309 3702 : namestrcpy(name, DatumGetCString(slot->tts_values[attnum]));
310 3702 : slot->tts_values[attnum] = NameGetDatum(name);
311 : }
312 : }
313 :
314 4005312 : ExecStoreVirtualTuple(slot);
315 4005312 : }
316 :
317 : /*
318 : * IndexOnlyRecheck -- access method routine to recheck a tuple in EvalPlanQual
319 : *
320 : * This can't really happen, since an index can't supply CTID which would
321 : * be necessary data for any potential EvalPlanQual target relation. If it
322 : * did happen, the EPQ code would pass us the wrong data, namely a heap
323 : * tuple not an index tuple. So throw an error.
324 : */
325 : static bool
326 0 : IndexOnlyRecheck(IndexOnlyScanState *node, TupleTableSlot *slot)
327 : {
328 0 : elog(ERROR, "EvalPlanQual recheck is not supported in index-only scans");
329 : return false; /* keep compiler quiet */
330 : }
331 :
332 : /* ----------------------------------------------------------------
333 : * ExecIndexOnlyScan(node)
334 : * ----------------------------------------------------------------
335 : */
336 : static TupleTableSlot *
337 5345184 : ExecIndexOnlyScan(PlanState *pstate)
338 : {
339 5345184 : IndexOnlyScanState *node = castNode(IndexOnlyScanState, pstate);
340 :
341 : /*
342 : * If we have runtime keys and they've not already been set up, do it now.
343 : */
344 5345184 : if (node->ioss_NumRuntimeKeys != 0 && !node->ioss_RuntimeKeysReady)
345 558 : ExecReScan((PlanState *) node);
346 :
347 5345184 : return ExecScan(&node->ss,
348 : (ExecScanAccessMtd) IndexOnlyNext,
349 : (ExecScanRecheckMtd) IndexOnlyRecheck);
350 : }
351 :
352 : /* ----------------------------------------------------------------
353 : * ExecReScanIndexOnlyScan(node)
354 : *
355 : * Recalculates the values of any scan keys whose value depends on
356 : * information known at runtime, then rescans the indexed relation.
357 : *
358 : * Updating the scan key was formerly done separately in
359 : * ExecUpdateIndexScanKeys. Integrating it into ReScan makes
360 : * rescans of indices and relations/general streams more uniform.
361 : * ----------------------------------------------------------------
362 : */
363 : void
364 215278 : ExecReScanIndexOnlyScan(IndexOnlyScanState *node)
365 : {
366 : /*
367 : * If we are doing runtime key calculations (ie, any of the index key
368 : * values weren't simple Consts), compute the new key values. But first,
369 : * reset the context so we don't leak memory as each outer tuple is
370 : * scanned. Note this assumes that we will recalculate *all* runtime keys
371 : * on each call.
372 : */
373 215278 : if (node->ioss_NumRuntimeKeys != 0)
374 : {
375 215084 : ExprContext *econtext = node->ioss_RuntimeContext;
376 :
377 215084 : ResetExprContext(econtext);
378 215084 : ExecIndexEvalRuntimeKeys(econtext,
379 : node->ioss_RuntimeKeys,
380 : node->ioss_NumRuntimeKeys);
381 : }
382 215278 : node->ioss_RuntimeKeysReady = true;
383 :
384 : /* reset index scan */
385 215278 : if (node->ioss_ScanDesc)
386 213414 : index_rescan(node->ioss_ScanDesc,
387 213414 : node->ioss_ScanKeys, node->ioss_NumScanKeys,
388 213414 : node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
389 :
390 215278 : ExecScanReScan(&node->ss);
391 215278 : }
392 :
393 :
394 : /* ----------------------------------------------------------------
395 : * ExecEndIndexOnlyScan
396 : * ----------------------------------------------------------------
397 : */
398 : void
399 16504 : ExecEndIndexOnlyScan(IndexOnlyScanState *node)
400 : {
401 : Relation indexRelationDesc;
402 : IndexScanDesc indexScanDesc;
403 :
404 : /*
405 : * extract information from the node
406 : */
407 16504 : indexRelationDesc = node->ioss_RelationDesc;
408 16504 : indexScanDesc = node->ioss_ScanDesc;
409 :
410 : /* Release VM buffer pin, if any. */
411 16504 : if (node->ioss_VMBuffer != InvalidBuffer)
412 : {
413 6144 : ReleaseBuffer(node->ioss_VMBuffer);
414 6144 : node->ioss_VMBuffer = InvalidBuffer;
415 : }
416 :
417 : /*
418 : * When ending a parallel worker, copy the statistics gathered by the
419 : * worker back into shared memory so that it can be picked up by the main
420 : * process to report in EXPLAIN ANALYZE
421 : */
422 16504 : if (node->ioss_SharedInfo != NULL && IsParallelWorker())
423 : {
424 : IndexScanInstrumentation *winstrument;
425 :
426 : Assert(ParallelWorkerNumber <= node->ioss_SharedInfo->num_workers);
427 0 : winstrument = &node->ioss_SharedInfo->winstrument[ParallelWorkerNumber];
428 :
429 : /*
430 : * We have to accumulate the stats rather than performing a memcpy.
431 : * When a Gather/GatherMerge node finishes it will perform planner
432 : * shutdown on the workers. On rescan it will spin up new workers
433 : * which will have a new IndexOnlyScanState and zeroed stats.
434 : */
435 0 : winstrument->nsearches += node->ioss_Instrument.nsearches;
436 : }
437 :
438 : /*
439 : * close the index relation (no-op if we didn't open it)
440 : */
441 16504 : if (indexScanDesc)
442 10414 : index_endscan(indexScanDesc);
443 16504 : if (indexRelationDesc)
444 14002 : index_close(indexRelationDesc, NoLock);
445 16504 : }
446 :
447 : /* ----------------------------------------------------------------
448 : * ExecIndexOnlyMarkPos
449 : *
450 : * Note: we assume that no caller attempts to set a mark before having read
451 : * at least one tuple. Otherwise, ioss_ScanDesc might still be NULL.
452 : * ----------------------------------------------------------------
453 : */
454 : void
455 124036 : ExecIndexOnlyMarkPos(IndexOnlyScanState *node)
456 : {
457 124036 : EState *estate = node->ss.ps.state;
458 124036 : EPQState *epqstate = estate->es_epq_active;
459 :
460 124036 : if (epqstate != NULL)
461 : {
462 : /*
463 : * We are inside an EvalPlanQual recheck. If a test tuple exists for
464 : * this relation, then we shouldn't access the index at all. We would
465 : * instead need to save, and later restore, the state of the
466 : * relsubs_done flag, so that re-fetching the test tuple is possible.
467 : * However, given the assumption that no caller sets a mark at the
468 : * start of the scan, we can only get here with relsubs_done[i]
469 : * already set, and so no state need be saved.
470 : */
471 0 : Index scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
472 :
473 : Assert(scanrelid > 0);
474 0 : if (epqstate->relsubs_slot[scanrelid - 1] != NULL ||
475 0 : epqstate->relsubs_rowmark[scanrelid - 1] != NULL)
476 : {
477 : /* Verify the claim above */
478 0 : if (!epqstate->relsubs_done[scanrelid - 1])
479 0 : elog(ERROR, "unexpected ExecIndexOnlyMarkPos call in EPQ recheck");
480 0 : return;
481 : }
482 : }
483 :
484 124036 : index_markpos(node->ioss_ScanDesc);
485 : }
486 :
487 : /* ----------------------------------------------------------------
488 : * ExecIndexOnlyRestrPos
489 : * ----------------------------------------------------------------
490 : */
491 : void
492 0 : ExecIndexOnlyRestrPos(IndexOnlyScanState *node)
493 : {
494 0 : EState *estate = node->ss.ps.state;
495 0 : EPQState *epqstate = estate->es_epq_active;
496 :
497 0 : if (estate->es_epq_active != NULL)
498 : {
499 : /* See comments in ExecIndexMarkPos */
500 0 : Index scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
501 :
502 : Assert(scanrelid > 0);
503 0 : if (epqstate->relsubs_slot[scanrelid - 1] != NULL ||
504 0 : epqstate->relsubs_rowmark[scanrelid - 1] != NULL)
505 : {
506 : /* Verify the claim above */
507 0 : if (!epqstate->relsubs_done[scanrelid - 1])
508 0 : elog(ERROR, "unexpected ExecIndexOnlyRestrPos call in EPQ recheck");
509 0 : return;
510 : }
511 : }
512 :
513 0 : index_restrpos(node->ioss_ScanDesc);
514 : }
515 :
516 : /* ----------------------------------------------------------------
517 : * ExecInitIndexOnlyScan
518 : *
519 : * Initializes the index scan's state information, creates
520 : * scan keys, and opens the base and index relations.
521 : *
522 : * Note: index scans have 2 sets of state information because
523 : * we have to keep track of the base relation and the
524 : * index relation.
525 : * ----------------------------------------------------------------
526 : */
527 : IndexOnlyScanState *
528 16554 : ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
529 : {
530 : IndexOnlyScanState *indexstate;
531 : Relation currentRelation;
532 : Relation indexRelation;
533 : LOCKMODE lockmode;
534 : TupleDesc tupDesc;
535 : int indnkeyatts;
536 : int namecount;
537 :
538 : /*
539 : * create state structure
540 : */
541 16554 : indexstate = makeNode(IndexOnlyScanState);
542 16554 : indexstate->ss.ps.plan = (Plan *) node;
543 16554 : indexstate->ss.ps.state = estate;
544 16554 : indexstate->ss.ps.ExecProcNode = ExecIndexOnlyScan;
545 :
546 : /*
547 : * Miscellaneous initialization
548 : *
549 : * create expression context for node
550 : */
551 16554 : ExecAssignExprContext(estate, &indexstate->ss.ps);
552 :
553 : /*
554 : * open the scan relation
555 : */
556 16554 : currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
557 :
558 16554 : indexstate->ss.ss_currentRelation = currentRelation;
559 16554 : indexstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */
560 :
561 : /*
562 : * Build the scan tuple type using the indextlist generated by the
563 : * planner. We use this, rather than the index's physical tuple
564 : * descriptor, because the latter contains storage column types not the
565 : * types of the original datums. (It's the AM's responsibility to return
566 : * suitable data anyway.)
567 : */
568 16554 : tupDesc = ExecTypeFromTL(node->indextlist);
569 16554 : ExecInitScanTupleSlot(estate, &indexstate->ss, tupDesc,
570 : &TTSOpsVirtual);
571 :
572 : /*
573 : * We need another slot, in a format that's suitable for the table AM, for
574 : * when we need to fetch a tuple from the table for rechecking visibility.
575 : */
576 16554 : indexstate->ioss_TableSlot =
577 16554 : ExecAllocTableSlot(&estate->es_tupleTable,
578 : RelationGetDescr(currentRelation),
579 : table_slot_callbacks(currentRelation));
580 :
581 : /*
582 : * Initialize result type and projection info. The node's targetlist will
583 : * contain Vars with varno = INDEX_VAR, referencing the scan tuple.
584 : */
585 16554 : ExecInitResultTypeTL(&indexstate->ss.ps);
586 16554 : ExecAssignScanProjectionInfoWithVarno(&indexstate->ss, INDEX_VAR);
587 :
588 : /*
589 : * initialize child expressions
590 : *
591 : * Note: we don't initialize all of the indexorderby expression, only the
592 : * sub-parts corresponding to runtime keys (see below).
593 : */
594 16554 : indexstate->ss.ps.qual =
595 16554 : ExecInitQual(node->scan.plan.qual, (PlanState *) indexstate);
596 16554 : indexstate->recheckqual =
597 16554 : ExecInitQual(node->recheckqual, (PlanState *) indexstate);
598 :
599 : /*
600 : * If we are just doing EXPLAIN (ie, aren't going to run the plan), stop
601 : * here. This allows an index-advisor plugin to EXPLAIN a plan containing
602 : * references to nonexistent indexes.
603 : */
604 16554 : if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
605 2502 : return indexstate;
606 :
607 : /* Open the index relation. */
608 14052 : lockmode = exec_rt_fetch(node->scan.scanrelid, estate)->rellockmode;
609 14052 : indexRelation = index_open(node->indexid, lockmode);
610 14052 : indexstate->ioss_RelationDesc = indexRelation;
611 :
612 : /*
613 : * Initialize index-specific scan state
614 : */
615 14052 : indexstate->ioss_RuntimeKeysReady = false;
616 14052 : indexstate->ioss_RuntimeKeys = NULL;
617 14052 : indexstate->ioss_NumRuntimeKeys = 0;
618 :
619 : /*
620 : * build the index scan keys from the index qualification
621 : */
622 14052 : ExecIndexBuildScanKeys((PlanState *) indexstate,
623 : indexRelation,
624 : node->indexqual,
625 : false,
626 14052 : &indexstate->ioss_ScanKeys,
627 : &indexstate->ioss_NumScanKeys,
628 : &indexstate->ioss_RuntimeKeys,
629 : &indexstate->ioss_NumRuntimeKeys,
630 : NULL, /* no ArrayKeys */
631 : NULL);
632 :
633 : /*
634 : * any ORDER BY exprs have to be turned into scankeys in the same way
635 : */
636 14052 : ExecIndexBuildScanKeys((PlanState *) indexstate,
637 : indexRelation,
638 : node->indexorderby,
639 : true,
640 14052 : &indexstate->ioss_OrderByKeys,
641 : &indexstate->ioss_NumOrderByKeys,
642 : &indexstate->ioss_RuntimeKeys,
643 : &indexstate->ioss_NumRuntimeKeys,
644 : NULL, /* no ArrayKeys */
645 : NULL);
646 :
647 : /*
648 : * If we have runtime keys, we need an ExprContext to evaluate them. The
649 : * node's standard context won't do because we want to reset that context
650 : * for every tuple. So, build another context just like the other one...
651 : * -tgl 7/11/00
652 : */
653 14052 : if (indexstate->ioss_NumRuntimeKeys != 0)
654 : {
655 2352 : ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext;
656 :
657 2352 : ExecAssignExprContext(estate, &indexstate->ss.ps);
658 2352 : indexstate->ioss_RuntimeContext = indexstate->ss.ps.ps_ExprContext;
659 2352 : indexstate->ss.ps.ps_ExprContext = stdecontext;
660 : }
661 : else
662 : {
663 11700 : indexstate->ioss_RuntimeContext = NULL;
664 : }
665 :
666 14052 : indexstate->ioss_NameCStringAttNums = NULL;
667 14052 : indnkeyatts = indexRelation->rd_index->indnkeyatts;
668 14052 : namecount = 0;
669 :
670 : /*
671 : * The "name" type for btree uses text_ops which results in storing
672 : * cstrings in the indexed keys rather than names. Here we detect that in
673 : * a generic way in case other index AMs want to do the same optimization.
674 : * Check for opclasses with an opcintype of NAMEOID and an index tuple
675 : * descriptor with CSTRINGOID. If any of these are found, create an array
676 : * marking the index attribute number of each of them. StoreIndexTuple()
677 : * handles copying the name Datums into a NAMEDATALEN-byte allocation.
678 : */
679 :
680 : /* First, count the number of such index keys */
681 33174 : for (int attnum = 0; attnum < indnkeyatts; attnum++)
682 : {
683 19122 : if (TupleDescAttr(indexRelation->rd_att, attnum)->atttypid == CSTRINGOID &&
684 3412 : indexRelation->rd_opcintype[attnum] == NAMEOID)
685 3412 : namecount++;
686 : }
687 :
688 14052 : if (namecount > 0)
689 : {
690 3412 : int idx = 0;
691 :
692 : /*
693 : * Now create an array to mark the attribute numbers of the keys that
694 : * need to be converted from cstring to name.
695 : */
696 3412 : indexstate->ioss_NameCStringAttNums = (AttrNumber *)
697 3412 : palloc(sizeof(AttrNumber) * namecount);
698 :
699 10302 : for (int attnum = 0; attnum < indnkeyatts; attnum++)
700 : {
701 6890 : if (TupleDescAttr(indexRelation->rd_att, attnum)->atttypid == CSTRINGOID &&
702 3412 : indexRelation->rd_opcintype[attnum] == NAMEOID)
703 3412 : indexstate->ioss_NameCStringAttNums[idx++] = (AttrNumber) attnum;
704 : }
705 : }
706 :
707 14052 : indexstate->ioss_NameCStringCount = namecount;
708 :
709 : /*
710 : * all done.
711 : */
712 14052 : return indexstate;
713 : }
714 :
715 : /* ----------------------------------------------------------------
716 : * Parallel Index-only Scan Support
717 : * ----------------------------------------------------------------
718 : */
719 :
720 : /* ----------------------------------------------------------------
721 : * ExecIndexOnlyScanEstimate
722 : *
723 : * Compute the amount of space we'll need in the parallel
724 : * query DSM, and inform pcxt->estimator about our needs.
725 : * ----------------------------------------------------------------
726 : */
727 : void
728 58 : ExecIndexOnlyScanEstimate(IndexOnlyScanState *node,
729 : ParallelContext *pcxt)
730 : {
731 58 : EState *estate = node->ss.ps.state;
732 58 : bool instrument = (node->ss.ps.instrument != NULL);
733 58 : bool parallel_aware = node->ss.ps.plan->parallel_aware;
734 :
735 58 : if (!instrument && !parallel_aware)
736 : {
737 : /* No DSM required by the scan */
738 12 : return;
739 : }
740 :
741 46 : node->ioss_PscanLen = index_parallelscan_estimate(node->ioss_RelationDesc,
742 : node->ioss_NumScanKeys,
743 : node->ioss_NumOrderByKeys,
744 : estate->es_snapshot,
745 : instrument, parallel_aware,
746 : pcxt->nworkers);
747 46 : shm_toc_estimate_chunk(&pcxt->estimator, node->ioss_PscanLen);
748 46 : shm_toc_estimate_keys(&pcxt->estimator, 1);
749 : }
750 :
751 : /* ----------------------------------------------------------------
752 : * ExecIndexOnlyScanInitializeDSM
753 : *
754 : * Set up a parallel index-only scan descriptor.
755 : * ----------------------------------------------------------------
756 : */
757 : void
758 58 : ExecIndexOnlyScanInitializeDSM(IndexOnlyScanState *node,
759 : ParallelContext *pcxt)
760 : {
761 58 : EState *estate = node->ss.ps.state;
762 : ParallelIndexScanDesc piscan;
763 58 : bool instrument = node->ss.ps.instrument != NULL;
764 58 : bool parallel_aware = node->ss.ps.plan->parallel_aware;
765 :
766 58 : if (!instrument && !parallel_aware)
767 : {
768 : /* No DSM required by the scan */
769 12 : return;
770 : }
771 :
772 46 : piscan = shm_toc_allocate(pcxt->toc, node->ioss_PscanLen);
773 46 : index_parallelscan_initialize(node->ss.ss_currentRelation,
774 : node->ioss_RelationDesc,
775 : estate->es_snapshot,
776 : instrument, parallel_aware, pcxt->nworkers,
777 : &node->ioss_SharedInfo, piscan);
778 46 : shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, piscan);
779 :
780 46 : if (!parallel_aware)
781 : {
782 : /* Only here to initialize SharedInfo in DSM */
783 0 : return;
784 : }
785 :
786 46 : node->ioss_ScanDesc =
787 46 : index_beginscan_parallel(node->ss.ss_currentRelation,
788 : node->ioss_RelationDesc,
789 : &node->ioss_Instrument,
790 : node->ioss_NumScanKeys,
791 : node->ioss_NumOrderByKeys,
792 : piscan);
793 46 : node->ioss_ScanDesc->xs_want_itup = true;
794 46 : node->ioss_VMBuffer = InvalidBuffer;
795 :
796 : /*
797 : * If no run-time keys to calculate or they are ready, go ahead and pass
798 : * the scankeys to the index AM.
799 : */
800 46 : if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
801 46 : index_rescan(node->ioss_ScanDesc,
802 46 : node->ioss_ScanKeys, node->ioss_NumScanKeys,
803 46 : node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
804 : }
805 :
806 : /* ----------------------------------------------------------------
807 : * ExecIndexOnlyScanReInitializeDSM
808 : *
809 : * Reset shared state before beginning a fresh scan.
810 : * ----------------------------------------------------------------
811 : */
812 : void
813 12 : ExecIndexOnlyScanReInitializeDSM(IndexOnlyScanState *node,
814 : ParallelContext *pcxt)
815 : {
816 : Assert(node->ss.ps.plan->parallel_aware);
817 12 : index_parallelrescan(node->ioss_ScanDesc);
818 12 : }
819 :
820 : /* ----------------------------------------------------------------
821 : * ExecIndexOnlyScanInitializeWorker
822 : *
823 : * Copy relevant information from TOC into planstate.
824 : * ----------------------------------------------------------------
825 : */
826 : void
827 242 : ExecIndexOnlyScanInitializeWorker(IndexOnlyScanState *node,
828 : ParallelWorkerContext *pwcxt)
829 : {
830 : ParallelIndexScanDesc piscan;
831 242 : bool instrument = node->ss.ps.instrument != NULL;
832 242 : bool parallel_aware = node->ss.ps.plan->parallel_aware;
833 :
834 242 : if (!instrument && !parallel_aware)
835 : {
836 : /* No DSM required by the scan */
837 36 : return;
838 : }
839 :
840 206 : piscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
841 :
842 206 : if (instrument)
843 0 : node->ioss_SharedInfo = (SharedIndexScanInstrumentation *)
844 0 : OffsetToPointer(piscan, piscan->ps_offset_ins);
845 :
846 206 : if (!parallel_aware)
847 : {
848 : /* Only here to set up worker node's SharedInfo */
849 0 : return;
850 : }
851 :
852 206 : node->ioss_ScanDesc =
853 206 : index_beginscan_parallel(node->ss.ss_currentRelation,
854 : node->ioss_RelationDesc,
855 : &node->ioss_Instrument,
856 : node->ioss_NumScanKeys,
857 : node->ioss_NumOrderByKeys,
858 : piscan);
859 206 : node->ioss_ScanDesc->xs_want_itup = true;
860 :
861 : /*
862 : * If no run-time keys to calculate or they are ready, go ahead and pass
863 : * the scankeys to the index AM.
864 : */
865 206 : if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
866 206 : index_rescan(node->ioss_ScanDesc,
867 206 : node->ioss_ScanKeys, node->ioss_NumScanKeys,
868 206 : node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
869 : }
870 :
871 : /* ----------------------------------------------------------------
872 : * ExecIndexOnlyScanRetrieveInstrumentation
873 : *
874 : * Transfer index-only scan statistics from DSM to private memory.
875 : * ----------------------------------------------------------------
876 : */
877 : void
878 0 : ExecIndexOnlyScanRetrieveInstrumentation(IndexOnlyScanState *node)
879 : {
880 0 : SharedIndexScanInstrumentation *SharedInfo = node->ioss_SharedInfo;
881 : size_t size;
882 :
883 0 : if (SharedInfo == NULL)
884 0 : return;
885 :
886 : /* Create a copy of SharedInfo in backend-local memory */
887 0 : size = offsetof(SharedIndexScanInstrumentation, winstrument) +
888 0 : SharedInfo->num_workers * sizeof(IndexScanInstrumentation);
889 0 : node->ioss_SharedInfo = palloc(size);
890 0 : memcpy(node->ioss_SharedInfo, SharedInfo, size);
891 : }
|