Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * nodeIndexonlyscan.c
4 : * Routines to support index-only scans
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/executor/nodeIndexonlyscan.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : /*
16 : * INTERFACE ROUTINES
17 : * ExecIndexOnlyScan scans an index
18 : * IndexOnlyNext retrieve next tuple
19 : * ExecInitIndexOnlyScan creates and initializes state info.
20 : * ExecReScanIndexOnlyScan rescans the indexed relation.
21 : * ExecEndIndexOnlyScan releases all storage.
22 : * ExecIndexOnlyMarkPos marks scan position.
23 : * ExecIndexOnlyRestrPos restores scan position.
24 : * ExecIndexOnlyScanEstimate estimates DSM space needed for
25 : * parallel index-only scan
26 : * ExecIndexOnlyScanInitializeDSM initialize DSM for parallel
27 : * index-only scan
28 : * ExecIndexOnlyScanReInitializeDSM reinitialize DSM for fresh scan
29 : * ExecIndexOnlyScanInitializeWorker attach to DSM info in parallel worker
30 : */
31 : #include "postgres.h"
32 :
33 : #include "access/genam.h"
34 : #include "access/relscan.h"
35 : #include "access/tableam.h"
36 : #include "access/tupdesc.h"
37 : #include "access/visibilitymap.h"
38 : #include "catalog/pg_type.h"
39 : #include "executor/executor.h"
40 : #include "executor/instrument.h"
41 : #include "executor/nodeIndexonlyscan.h"
42 : #include "executor/nodeIndexscan.h"
43 : #include "miscadmin.h"
44 : #include "storage/bufmgr.h"
45 : #include "storage/predicate.h"
46 : #include "utils/builtins.h"
47 : #include "utils/rel.h"
48 :
49 :
50 : static TupleTableSlot *IndexOnlyNext(IndexOnlyScanState *node);
51 : static void StoreIndexTuple(IndexOnlyScanState *node, TupleTableSlot *slot,
52 : IndexTuple itup, TupleDesc itupdesc);
53 :
54 :
55 : /* ----------------------------------------------------------------
56 : * IndexOnlyNext
57 : *
58 : * Retrieve a tuple from the IndexOnlyScan node's index.
59 : * ----------------------------------------------------------------
60 : */
61 : static TupleTableSlot *
62 3705236 : IndexOnlyNext(IndexOnlyScanState *node)
63 : {
64 : EState *estate;
65 : ExprContext *econtext;
66 : ScanDirection direction;
67 : IndexScanDesc scandesc;
68 : TupleTableSlot *slot;
69 : ItemPointer tid;
70 :
71 : /*
72 : * extract necessary information from index scan node
73 : */
74 3705236 : estate = node->ss.ps.state;
75 :
76 : /*
77 : * Determine which direction to scan the index in based on the plan's scan
78 : * direction and the current direction of execution.
79 : */
80 3705236 : direction = ScanDirectionCombine(estate->es_direction,
81 : ((IndexOnlyScan *) node->ss.ps.plan)->indexorderdir);
82 3705236 : scandesc = node->ioss_ScanDesc;
83 3705236 : econtext = node->ss.ps.ps_ExprContext;
84 3705236 : slot = node->ss.ss_ScanTupleSlot;
85 :
86 3705236 : if (scandesc == NULL)
87 : {
88 : /*
89 : * We reach here if the index only scan is not parallel, or if we're
90 : * serially executing an index only scan that was planned to be
91 : * parallel.
92 : */
93 6437 : scandesc = index_beginscan(node->ss.ss_currentRelation,
94 : node->ioss_RelationDesc,
95 : estate->es_snapshot,
96 : &node->ioss_Instrument,
97 : node->ioss_NumScanKeys,
98 : node->ioss_NumOrderByKeys);
99 :
100 6437 : node->ioss_ScanDesc = scandesc;
101 :
102 :
103 : /* Set it up for index-only scan */
104 6437 : node->ioss_ScanDesc->xs_want_itup = true;
105 6437 : node->ioss_VMBuffer = InvalidBuffer;
106 :
107 : /*
108 : * If no run-time keys to calculate or they are ready, go ahead and
109 : * pass the scankeys to the index AM.
110 : */
111 6437 : if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
112 6437 : index_rescan(scandesc,
113 : node->ioss_ScanKeys,
114 : node->ioss_NumScanKeys,
115 : node->ioss_OrderByKeys,
116 : node->ioss_NumOrderByKeys);
117 : }
118 :
119 : /*
120 : * OK, now that we have what we need, fetch the next tuple.
121 : */
122 3794124 : while ((tid = index_getnext_tid(scandesc, direction)) != NULL)
123 : {
124 3659246 : bool tuple_from_heap = false;
125 :
126 3659246 : CHECK_FOR_INTERRUPTS();
127 :
128 : /*
129 : * We can skip the heap fetch if the TID references a heap page on
130 : * which all tuples are known visible to everybody. In any case,
131 : * we'll use the index tuple not the heap tuple as the data source.
132 : *
133 : * Note on Memory Ordering Effects: visibilitymap_get_status does not
134 : * lock the visibility map buffer, and therefore the result we read
135 : * here could be slightly stale. However, it can't be stale enough to
136 : * matter.
137 : *
138 : * We need to detect clearing a VM bit due to an insert right away,
139 : * because the tuple is present in the index page but not visible. The
140 : * reading of the TID by this scan (using a shared lock on the index
141 : * buffer) is serialized with the insert of the TID into the index
142 : * (using an exclusive lock on the index buffer). Because the VM bit
143 : * is cleared before updating the index, and locking/unlocking of the
144 : * index page acts as a full memory barrier, we are sure to see the
145 : * cleared bit if we see a recently-inserted TID.
146 : *
147 : * Deletes do not update the index page (only VACUUM will clear out
148 : * the TID), so the clearing of the VM bit by a delete is not
149 : * serialized with this test below, and we may see a value that is
150 : * significantly stale. However, we don't care about the delete right
151 : * away, because the tuple is still visible until the deleting
152 : * transaction commits or the statement ends (if it's our
153 : * transaction). In either case, the lock on the VM buffer will have
154 : * been released (acting as a write barrier) after clearing the bit.
155 : * And for us to have a snapshot that includes the deleting
156 : * transaction (making the tuple invisible), we must have acquired
157 : * ProcArrayLock after that time, acting as a read barrier.
158 : *
159 : * It's worth going through this complexity to avoid needing to lock
160 : * the VM buffer, which could cause significant contention.
161 : */
162 3659246 : if (!VM_ALL_VISIBLE(scandesc->heapRelation,
163 : ItemPointerGetBlockNumber(tid),
164 : &node->ioss_VMBuffer))
165 : {
166 : /*
167 : * Rats, we have to visit the heap to check visibility.
168 : */
169 1256160 : InstrCountTuples2(node, 1);
170 1256160 : if (!index_fetch_heap(scandesc, node->ioss_TableSlot))
171 88884 : continue; /* no visible tuple, try next index entry */
172 :
173 1167276 : ExecClearTuple(node->ioss_TableSlot);
174 :
175 : /*
176 : * Only MVCC snapshots are supported here, so there should be no
177 : * need to keep following the HOT chain once a visible entry has
178 : * been found. If we did want to allow that, we'd need to keep
179 : * more state to remember not to call index_getnext_tid next time.
180 : */
181 1167276 : if (scandesc->xs_heap_continue)
182 0 : elog(ERROR, "non-MVCC snapshots are not supported in index-only scans");
183 :
184 : /*
185 : * Note: at this point we are holding a pin on the heap page, as
186 : * recorded in scandesc->xs_cbuf. We could release that pin now,
187 : * but it's not clear whether it's a win to do so. The next index
188 : * entry might require a visit to the same heap page.
189 : */
190 :
191 1167276 : tuple_from_heap = true;
192 : }
193 :
194 : /*
195 : * Fill the scan tuple slot with data from the index. This might be
196 : * provided in either HeapTuple or IndexTuple format. Conceivably an
197 : * index AM might fill both fields, in which case we prefer the heap
198 : * format, since it's probably a bit cheaper to fill a slot from.
199 : */
200 3570362 : if (scandesc->xs_hitup)
201 : {
202 : /*
203 : * We don't take the trouble to verify that the provided tuple has
204 : * exactly the slot's format, but it seems worth doing a quick
205 : * check on the number of fields.
206 : */
207 : Assert(slot->tts_tupleDescriptor->natts ==
208 : scandesc->xs_hitupdesc->natts);
209 951676 : ExecForceStoreHeapTuple(scandesc->xs_hitup, slot, false);
210 : }
211 2618686 : else if (scandesc->xs_itup)
212 2618686 : StoreIndexTuple(node, slot, scandesc->xs_itup, scandesc->xs_itupdesc);
213 : else
214 0 : elog(ERROR, "no data returned for index-only scan");
215 :
216 : /*
217 : * If the index was lossy, we have to recheck the index quals.
218 : */
219 3570362 : if (scandesc->xs_recheck)
220 : {
221 9 : econtext->ecxt_scantuple = slot;
222 9 : if (!ExecQualAndReset(node->recheckqual, econtext))
223 : {
224 : /* Fails recheck, so drop it and loop back for another */
225 4 : InstrCountFiltered2(node, 1);
226 4 : continue;
227 : }
228 : }
229 :
230 : /*
231 : * We don't currently support rechecking ORDER BY distances. (In
232 : * principle, if the index can support retrieval of the originally
233 : * indexed value, it should be able to produce an exact distance
234 : * calculation too. So it's not clear that adding code here for
235 : * recheck/re-sort would be worth the trouble. But we should at least
236 : * throw an error if someone tries it.)
237 : */
238 3570358 : if (scandesc->numberOfOrderBys > 0 && scandesc->xs_recheckorderby)
239 4 : ereport(ERROR,
240 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
241 : errmsg("lossy distance functions are not supported in index-only scans")));
242 :
243 : /*
244 : * If we didn't access the heap, then we'll need to take a predicate
245 : * lock explicitly, as if we had. For now we do that at page level.
246 : */
247 3570354 : if (!tuple_from_heap)
248 2403086 : PredicateLockPage(scandesc->heapRelation,
249 : ItemPointerGetBlockNumber(tid),
250 : estate->es_snapshot);
251 :
252 3570354 : return slot;
253 : }
254 :
255 : /*
256 : * if we get here it means the index scan failed so we are at the end of
257 : * the scan..
258 : */
259 134878 : return ExecClearTuple(slot);
260 : }
261 :
262 : /*
263 : * StoreIndexTuple
264 : * Fill the slot with data from the index tuple.
265 : *
266 : * At some point this might be generally-useful functionality, but
267 : * right now we don't need it elsewhere.
268 : */
269 : static void
270 2618686 : StoreIndexTuple(IndexOnlyScanState *node, TupleTableSlot *slot,
271 : IndexTuple itup, TupleDesc itupdesc)
272 : {
273 : /*
274 : * Note: we must use the tupdesc supplied by the AM in index_deform_tuple,
275 : * not the slot's tupdesc, in case the latter has different datatypes
276 : * (this happens for btree name_ops in particular). They'd better have
277 : * the same number of columns though, as well as being datatype-compatible
278 : * which is something we can't so easily check.
279 : */
280 : Assert(slot->tts_tupleDescriptor->natts == itupdesc->natts);
281 :
282 2618686 : ExecClearTuple(slot);
283 2618686 : index_deform_tuple(itup, itupdesc, slot->tts_values, slot->tts_isnull);
284 :
285 : /*
286 : * Copy all name columns stored as cstrings back into a NAMEDATALEN byte
287 : * sized allocation. We mark this branch as unlikely as generally "name"
288 : * is used only for the system catalogs and this would have to be a user
289 : * query running on those or some other user table with an index on a name
290 : * column.
291 : */
292 2618686 : if (unlikely(node->ioss_NameCStringAttNums != NULL))
293 : {
294 1999 : int attcount = node->ioss_NameCStringCount;
295 :
296 3998 : for (int idx = 0; idx < attcount; idx++)
297 : {
298 1999 : int attnum = node->ioss_NameCStringAttNums[idx];
299 : Name name;
300 :
301 : /* skip null Datums */
302 1999 : if (slot->tts_isnull[attnum])
303 0 : continue;
304 :
305 : /* allocate the NAMEDATALEN and copy the datum into that memory */
306 1999 : name = (Name) MemoryContextAlloc(node->ss.ps.ps_ExprContext->ecxt_per_tuple_memory,
307 : NAMEDATALEN);
308 :
309 : /* use namestrcpy to zero-pad all trailing bytes */
310 1999 : namestrcpy(name, DatumGetCString(slot->tts_values[attnum]));
311 1999 : slot->tts_values[attnum] = NameGetDatum(name);
312 : }
313 : }
314 :
315 2618686 : ExecStoreVirtualTuple(slot);
316 2618686 : }
317 :
318 : /*
319 : * IndexOnlyRecheck -- access method routine to recheck a tuple in EvalPlanQual
320 : *
321 : * This can't really happen, since an index can't supply CTID which would
322 : * be necessary data for any potential EvalPlanQual target relation. If it
323 : * did happen, the EPQ code would pass us the wrong data, namely a heap
324 : * tuple not an index tuple. So throw an error.
325 : */
326 : static bool
327 0 : IndexOnlyRecheck(IndexOnlyScanState *node, TupleTableSlot *slot)
328 : {
329 0 : elog(ERROR, "EvalPlanQual recheck is not supported in index-only scans");
330 : return false; /* keep compiler quiet */
331 : }
332 :
333 : /* ----------------------------------------------------------------
334 : * ExecIndexOnlyScan(node)
335 : * ----------------------------------------------------------------
336 : */
337 : static TupleTableSlot *
338 3503142 : ExecIndexOnlyScan(PlanState *pstate)
339 : {
340 3503142 : IndexOnlyScanState *node = castNode(IndexOnlyScanState, pstate);
341 :
342 : /*
343 : * If we have runtime keys and they've not already been set up, do it now.
344 : */
345 3503142 : if (node->ioss_NumRuntimeKeys != 0 && !node->ioss_RuntimeKeysReady)
346 371 : ExecReScan((PlanState *) node);
347 :
348 3503142 : return ExecScan(&node->ss,
349 : (ExecScanAccessMtd) IndexOnlyNext,
350 : (ExecScanRecheckMtd) IndexOnlyRecheck);
351 : }
352 :
353 : /* ----------------------------------------------------------------
354 : * ExecReScanIndexOnlyScan(node)
355 : *
356 : * Recalculates the values of any scan keys whose value depends on
357 : * information known at runtime, then rescans the indexed relation.
358 : *
359 : * Updating the scan key was formerly done separately in
360 : * ExecUpdateIndexScanKeys. Integrating it into ReScan makes
361 : * rescans of indices and relations/general streams more uniform.
362 : * ----------------------------------------------------------------
363 : */
364 : void
365 152059 : ExecReScanIndexOnlyScan(IndexOnlyScanState *node)
366 : {
367 : /*
368 : * If we are doing runtime key calculations (ie, any of the index key
369 : * values weren't simple Consts), compute the new key values. But first,
370 : * reset the context so we don't leak memory as each outer tuple is
371 : * scanned. Note this assumes that we will recalculate *all* runtime keys
372 : * on each call.
373 : */
374 152059 : if (node->ioss_NumRuntimeKeys != 0)
375 : {
376 151926 : ExprContext *econtext = node->ioss_RuntimeContext;
377 :
378 151926 : ResetExprContext(econtext);
379 151926 : ExecIndexEvalRuntimeKeys(econtext,
380 : node->ioss_RuntimeKeys,
381 : node->ioss_NumRuntimeKeys);
382 : }
383 152059 : node->ioss_RuntimeKeysReady = true;
384 :
385 : /* reset index scan */
386 152059 : if (node->ioss_ScanDesc)
387 150561 : index_rescan(node->ioss_ScanDesc,
388 : node->ioss_ScanKeys, node->ioss_NumScanKeys,
389 : node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
390 :
391 152059 : ExecScanReScan(&node->ss);
392 152059 : }
393 :
394 :
395 : /* ----------------------------------------------------------------
396 : * ExecEndIndexOnlyScan
397 : * ----------------------------------------------------------------
398 : */
399 : void
400 12204 : ExecEndIndexOnlyScan(IndexOnlyScanState *node)
401 : {
402 : Relation indexRelationDesc;
403 : IndexScanDesc indexScanDesc;
404 :
405 : /*
406 : * extract information from the node
407 : */
408 12204 : indexRelationDesc = node->ioss_RelationDesc;
409 12204 : indexScanDesc = node->ioss_ScanDesc;
410 :
411 : /* Release VM buffer pin, if any. */
412 12204 : if (node->ioss_VMBuffer != InvalidBuffer)
413 : {
414 3727 : ReleaseBuffer(node->ioss_VMBuffer);
415 3727 : node->ioss_VMBuffer = InvalidBuffer;
416 : }
417 :
418 : /*
419 : * When ending a parallel worker, copy the statistics gathered by the
420 : * worker back into shared memory so that it can be picked up by the main
421 : * process to report in EXPLAIN ANALYZE
422 : */
423 12204 : if (node->ioss_SharedInfo != NULL && IsParallelWorker())
424 : {
425 : IndexScanInstrumentation *winstrument;
426 :
427 : Assert(ParallelWorkerNumber < node->ioss_SharedInfo->num_workers);
428 0 : winstrument = &node->ioss_SharedInfo->winstrument[ParallelWorkerNumber];
429 :
430 : /*
431 : * We have to accumulate the stats rather than performing a memcpy.
432 : * When a Gather/GatherMerge node finishes it will perform planner
433 : * shutdown on the workers. On rescan it will spin up new workers
434 : * which will have a new IndexOnlyScanState and zeroed stats.
435 : */
436 0 : winstrument->nsearches += node->ioss_Instrument.nsearches;
437 : }
438 :
439 : /*
440 : * close the index relation (no-op if we didn't open it)
441 : */
442 12204 : if (indexScanDesc)
443 6570 : index_endscan(indexScanDesc);
444 12204 : if (indexRelationDesc)
445 10453 : index_close(indexRelationDesc, NoLock);
446 12204 : }
447 :
448 : /* ----------------------------------------------------------------
449 : * ExecIndexOnlyMarkPos
450 : *
451 : * Note: we assume that no caller attempts to set a mark before having read
452 : * at least one tuple. Otherwise, ioss_ScanDesc might still be NULL.
453 : * ----------------------------------------------------------------
454 : */
455 : void
456 82019 : ExecIndexOnlyMarkPos(IndexOnlyScanState *node)
457 : {
458 82019 : EState *estate = node->ss.ps.state;
459 82019 : EPQState *epqstate = estate->es_epq_active;
460 :
461 82019 : if (epqstate != NULL)
462 : {
463 : /*
464 : * We are inside an EvalPlanQual recheck. If a test tuple exists for
465 : * this relation, then we shouldn't access the index at all. We would
466 : * instead need to save, and later restore, the state of the
467 : * relsubs_done flag, so that re-fetching the test tuple is possible.
468 : * However, given the assumption that no caller sets a mark at the
469 : * start of the scan, we can only get here with relsubs_done[i]
470 : * already set, and so no state need be saved.
471 : */
472 0 : Index scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
473 :
474 : Assert(scanrelid > 0);
475 0 : if (epqstate->relsubs_slot[scanrelid - 1] != NULL ||
476 0 : epqstate->relsubs_rowmark[scanrelid - 1] != NULL)
477 : {
478 : /* Verify the claim above */
479 0 : if (!epqstate->relsubs_done[scanrelid - 1])
480 0 : elog(ERROR, "unexpected ExecIndexOnlyMarkPos call in EPQ recheck");
481 0 : return;
482 : }
483 : }
484 :
485 82019 : index_markpos(node->ioss_ScanDesc);
486 : }
487 :
488 : /* ----------------------------------------------------------------
489 : * ExecIndexOnlyRestrPos
490 : * ----------------------------------------------------------------
491 : */
492 : void
493 0 : ExecIndexOnlyRestrPos(IndexOnlyScanState *node)
494 : {
495 0 : EState *estate = node->ss.ps.state;
496 0 : EPQState *epqstate = estate->es_epq_active;
497 :
498 0 : if (estate->es_epq_active != NULL)
499 : {
500 : /* See comments in ExecIndexMarkPos */
501 0 : Index scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
502 :
503 : Assert(scanrelid > 0);
504 0 : if (epqstate->relsubs_slot[scanrelid - 1] != NULL ||
505 0 : epqstate->relsubs_rowmark[scanrelid - 1] != NULL)
506 : {
507 : /* Verify the claim above */
508 0 : if (!epqstate->relsubs_done[scanrelid - 1])
509 0 : elog(ERROR, "unexpected ExecIndexOnlyRestrPos call in EPQ recheck");
510 0 : return;
511 : }
512 : }
513 :
514 0 : index_restrpos(node->ioss_ScanDesc);
515 : }
516 :
517 : /* ----------------------------------------------------------------
518 : * ExecInitIndexOnlyScan
519 : *
520 : * Initializes the index scan's state information, creates
521 : * scan keys, and opens the base and index relations.
522 : *
523 : * Note: index scans have 2 sets of state information because
524 : * we have to keep track of the base relation and the
525 : * index relation.
526 : * ----------------------------------------------------------------
527 : */
528 : IndexOnlyScanState *
529 12237 : ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
530 : {
531 : IndexOnlyScanState *indexstate;
532 : Relation currentRelation;
533 : Relation indexRelation;
534 : LOCKMODE lockmode;
535 : TupleDesc tupDesc;
536 : int indnkeyatts;
537 : int namecount;
538 :
539 : /*
540 : * create state structure
541 : */
542 12237 : indexstate = makeNode(IndexOnlyScanState);
543 12237 : indexstate->ss.ps.plan = (Plan *) node;
544 12237 : indexstate->ss.ps.state = estate;
545 12237 : indexstate->ss.ps.ExecProcNode = ExecIndexOnlyScan;
546 :
547 : /*
548 : * Miscellaneous initialization
549 : *
550 : * create expression context for node
551 : */
552 12237 : ExecAssignExprContext(estate, &indexstate->ss.ps);
553 :
554 : /*
555 : * open the scan relation
556 : */
557 12237 : currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
558 :
559 12237 : indexstate->ss.ss_currentRelation = currentRelation;
560 12237 : indexstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */
561 :
562 : /*
563 : * Build the scan tuple type using the indextlist generated by the
564 : * planner. We use this, rather than the index's physical tuple
565 : * descriptor, because the latter contains storage column types not the
566 : * types of the original datums. (It's the AM's responsibility to return
567 : * suitable data anyway.)
568 : */
569 12237 : tupDesc = ExecTypeFromTL(node->indextlist);
570 12237 : ExecInitScanTupleSlot(estate, &indexstate->ss, tupDesc,
571 : &TTSOpsVirtual,
572 : 0);
573 :
574 : /*
575 : * We need another slot, in a format that's suitable for the table AM, for
576 : * when we need to fetch a tuple from the table for rechecking visibility.
577 : */
578 12237 : indexstate->ioss_TableSlot =
579 12237 : ExecAllocTableSlot(&estate->es_tupleTable,
580 : RelationGetDescr(currentRelation),
581 : table_slot_callbacks(currentRelation), 0);
582 :
583 : /*
584 : * Initialize result type and projection info. The node's targetlist will
585 : * contain Vars with varno = INDEX_VAR, referencing the scan tuple.
586 : */
587 12237 : ExecInitResultTypeTL(&indexstate->ss.ps);
588 12237 : ExecAssignScanProjectionInfoWithVarno(&indexstate->ss, INDEX_VAR);
589 :
590 : /*
591 : * initialize child expressions
592 : *
593 : * Note: we don't initialize all of the indexorderby expression, only the
594 : * sub-parts corresponding to runtime keys (see below).
595 : */
596 12237 : indexstate->ss.ps.qual =
597 12237 : ExecInitQual(node->scan.plan.qual, (PlanState *) indexstate);
598 12237 : indexstate->recheckqual =
599 12237 : ExecInitQual(node->recheckqual, (PlanState *) indexstate);
600 :
601 : /*
602 : * If we are just doing EXPLAIN (ie, aren't going to run the plan), stop
603 : * here. This allows an index-advisor plugin to EXPLAIN a plan containing
604 : * references to nonexistent indexes.
605 : */
606 12237 : if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
607 1751 : return indexstate;
608 :
609 : /* Open the index relation. */
610 10486 : lockmode = exec_rt_fetch(node->scan.scanrelid, estate)->rellockmode;
611 10486 : indexRelation = index_open(node->indexid, lockmode);
612 10486 : indexstate->ioss_RelationDesc = indexRelation;
613 :
614 : /*
615 : * Initialize index-specific scan state
616 : */
617 10486 : indexstate->ioss_RuntimeKeysReady = false;
618 10486 : indexstate->ioss_RuntimeKeys = NULL;
619 10486 : indexstate->ioss_NumRuntimeKeys = 0;
620 :
621 : /*
622 : * build the index scan keys from the index qualification
623 : */
624 10486 : ExecIndexBuildScanKeys((PlanState *) indexstate,
625 : indexRelation,
626 : node->indexqual,
627 : false,
628 10486 : &indexstate->ioss_ScanKeys,
629 : &indexstate->ioss_NumScanKeys,
630 : &indexstate->ioss_RuntimeKeys,
631 : &indexstate->ioss_NumRuntimeKeys,
632 : NULL, /* no ArrayKeys */
633 : NULL);
634 :
635 : /*
636 : * any ORDER BY exprs have to be turned into scankeys in the same way
637 : */
638 10486 : ExecIndexBuildScanKeys((PlanState *) indexstate,
639 : indexRelation,
640 : node->indexorderby,
641 : true,
642 10486 : &indexstate->ioss_OrderByKeys,
643 : &indexstate->ioss_NumOrderByKeys,
644 : &indexstate->ioss_RuntimeKeys,
645 : &indexstate->ioss_NumRuntimeKeys,
646 : NULL, /* no ArrayKeys */
647 : NULL);
648 :
649 : /*
650 : * If we have runtime keys, we need an ExprContext to evaluate them. The
651 : * node's standard context won't do because we want to reset that context
652 : * for every tuple. So, build another context just like the other one...
653 : * -tgl 7/11/00
654 : */
655 10486 : if (indexstate->ioss_NumRuntimeKeys != 0)
656 : {
657 3204 : ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext;
658 :
659 3204 : ExecAssignExprContext(estate, &indexstate->ss.ps);
660 3204 : indexstate->ioss_RuntimeContext = indexstate->ss.ps.ps_ExprContext;
661 3204 : indexstate->ss.ps.ps_ExprContext = stdecontext;
662 : }
663 : else
664 : {
665 7282 : indexstate->ioss_RuntimeContext = NULL;
666 : }
667 :
668 10486 : indexstate->ioss_NameCStringAttNums = NULL;
669 10486 : indnkeyatts = indexRelation->rd_index->indnkeyatts;
670 10486 : namecount = 0;
671 :
672 : /*
673 : * The "name" type for btree uses text_ops which results in storing
674 : * cstrings in the indexed keys rather than names. Here we detect that in
675 : * a generic way in case other index AMs want to do the same optimization.
676 : * Check for opclasses with an opcintype of NAMEOID and an index tuple
677 : * descriptor with CSTRINGOID. If any of these are found, create an array
678 : * marking the index attribute number of each of them. StoreIndexTuple()
679 : * handles copying the name Datums into a NAMEDATALEN-byte allocation.
680 : */
681 :
682 : /* First, count the number of such index keys */
683 24298 : for (int attnum = 0; attnum < indnkeyatts; attnum++)
684 : {
685 13812 : if (TupleDescAttr(indexRelation->rd_att, attnum)->atttypid == CSTRINGOID &&
686 1747 : indexRelation->rd_opcintype[attnum] == NAMEOID)
687 1747 : namecount++;
688 : }
689 :
690 10486 : if (namecount > 0)
691 : {
692 1747 : int idx = 0;
693 :
694 : /*
695 : * Now create an array to mark the attribute numbers of the keys that
696 : * need to be converted from cstring to name.
697 : */
698 1747 : indexstate->ioss_NameCStringAttNums = palloc_array(AttrNumber, namecount);
699 :
700 5297 : for (int attnum = 0; attnum < indnkeyatts; attnum++)
701 : {
702 3550 : if (TupleDescAttr(indexRelation->rd_att, attnum)->atttypid == CSTRINGOID &&
703 1747 : indexRelation->rd_opcintype[attnum] == NAMEOID)
704 1747 : indexstate->ioss_NameCStringAttNums[idx++] = (AttrNumber) attnum;
705 : }
706 : }
707 :
708 10486 : indexstate->ioss_NameCStringCount = namecount;
709 :
710 : /*
711 : * all done.
712 : */
713 10486 : return indexstate;
714 : }
715 :
716 : /* ----------------------------------------------------------------
717 : * Parallel Index-only Scan Support
718 : * ----------------------------------------------------------------
719 : */
720 :
721 : /* ----------------------------------------------------------------
722 : * ExecIndexOnlyScanEstimate
723 : *
724 : * Compute the amount of space we'll need in the parallel
725 : * query DSM, and inform pcxt->estimator about our needs.
726 : * ----------------------------------------------------------------
727 : */
728 : void
729 42 : ExecIndexOnlyScanEstimate(IndexOnlyScanState *node,
730 : ParallelContext *pcxt)
731 : {
732 42 : EState *estate = node->ss.ps.state;
733 42 : bool instrument = (node->ss.ps.instrument != NULL);
734 42 : bool parallel_aware = node->ss.ps.plan->parallel_aware;
735 :
736 42 : if (!instrument && !parallel_aware)
737 : {
738 : /* No DSM required by the scan */
739 12 : return;
740 : }
741 :
742 30 : node->ioss_PscanLen = index_parallelscan_estimate(node->ioss_RelationDesc,
743 : node->ioss_NumScanKeys,
744 : node->ioss_NumOrderByKeys,
745 : estate->es_snapshot,
746 : instrument, parallel_aware,
747 : pcxt->nworkers);
748 30 : shm_toc_estimate_chunk(&pcxt->estimator, node->ioss_PscanLen);
749 30 : shm_toc_estimate_keys(&pcxt->estimator, 1);
750 : }
751 :
752 : /* ----------------------------------------------------------------
753 : * ExecIndexOnlyScanInitializeDSM
754 : *
755 : * Set up a parallel index-only scan descriptor.
756 : * ----------------------------------------------------------------
757 : */
758 : void
759 42 : ExecIndexOnlyScanInitializeDSM(IndexOnlyScanState *node,
760 : ParallelContext *pcxt)
761 : {
762 42 : EState *estate = node->ss.ps.state;
763 : ParallelIndexScanDesc piscan;
764 42 : bool instrument = node->ss.ps.instrument != NULL;
765 42 : bool parallel_aware = node->ss.ps.plan->parallel_aware;
766 :
767 42 : if (!instrument && !parallel_aware)
768 : {
769 : /* No DSM required by the scan */
770 12 : return;
771 : }
772 :
773 30 : piscan = shm_toc_allocate(pcxt->toc, node->ioss_PscanLen);
774 30 : index_parallelscan_initialize(node->ss.ss_currentRelation,
775 : node->ioss_RelationDesc,
776 : estate->es_snapshot,
777 : instrument, parallel_aware, pcxt->nworkers,
778 : &node->ioss_SharedInfo, piscan);
779 30 : shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, piscan);
780 :
781 30 : if (!parallel_aware)
782 : {
783 : /* Only here to initialize SharedInfo in DSM */
784 0 : return;
785 : }
786 :
787 30 : node->ioss_ScanDesc =
788 30 : index_beginscan_parallel(node->ss.ss_currentRelation,
789 : node->ioss_RelationDesc,
790 : &node->ioss_Instrument,
791 : node->ioss_NumScanKeys,
792 : node->ioss_NumOrderByKeys,
793 : piscan);
794 30 : node->ioss_ScanDesc->xs_want_itup = true;
795 30 : node->ioss_VMBuffer = InvalidBuffer;
796 :
797 : /*
798 : * If no run-time keys to calculate or they are ready, go ahead and pass
799 : * the scankeys to the index AM.
800 : */
801 30 : if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
802 30 : index_rescan(node->ioss_ScanDesc,
803 : node->ioss_ScanKeys, node->ioss_NumScanKeys,
804 : node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
805 : }
806 :
807 : /* ----------------------------------------------------------------
808 : * ExecIndexOnlyScanReInitializeDSM
809 : *
810 : * Reset shared state before beginning a fresh scan.
811 : * ----------------------------------------------------------------
812 : */
813 : void
814 8 : ExecIndexOnlyScanReInitializeDSM(IndexOnlyScanState *node,
815 : ParallelContext *pcxt)
816 : {
817 : Assert(node->ss.ps.plan->parallel_aware);
818 8 : index_parallelrescan(node->ioss_ScanDesc);
819 8 : }
820 :
821 : /* ----------------------------------------------------------------
822 : * ExecIndexOnlyScanInitializeWorker
823 : *
824 : * Copy relevant information from TOC into planstate.
825 : * ----------------------------------------------------------------
826 : */
827 : void
828 168 : ExecIndexOnlyScanInitializeWorker(IndexOnlyScanState *node,
829 : ParallelWorkerContext *pwcxt)
830 : {
831 : ParallelIndexScanDesc piscan;
832 168 : bool instrument = node->ss.ps.instrument != NULL;
833 168 : bool parallel_aware = node->ss.ps.plan->parallel_aware;
834 :
835 168 : if (!instrument && !parallel_aware)
836 : {
837 : /* No DSM required by the scan */
838 32 : return;
839 : }
840 :
841 136 : piscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
842 :
843 136 : if (instrument)
844 0 : node->ioss_SharedInfo = (SharedIndexScanInstrumentation *)
845 0 : OffsetToPointer(piscan, piscan->ps_offset_ins);
846 :
847 136 : if (!parallel_aware)
848 : {
849 : /* Only here to set up worker node's SharedInfo */
850 0 : return;
851 : }
852 :
853 136 : node->ioss_ScanDesc =
854 136 : index_beginscan_parallel(node->ss.ss_currentRelation,
855 : node->ioss_RelationDesc,
856 : &node->ioss_Instrument,
857 : node->ioss_NumScanKeys,
858 : node->ioss_NumOrderByKeys,
859 : piscan);
860 136 : node->ioss_ScanDesc->xs_want_itup = true;
861 :
862 : /*
863 : * If no run-time keys to calculate or they are ready, go ahead and pass
864 : * the scankeys to the index AM.
865 : */
866 136 : if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
867 136 : index_rescan(node->ioss_ScanDesc,
868 : node->ioss_ScanKeys, node->ioss_NumScanKeys,
869 : node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
870 : }
871 :
872 : /* ----------------------------------------------------------------
873 : * ExecIndexOnlyScanRetrieveInstrumentation
874 : *
875 : * Transfer index-only scan statistics from DSM to private memory.
876 : * ----------------------------------------------------------------
877 : */
878 : void
879 0 : ExecIndexOnlyScanRetrieveInstrumentation(IndexOnlyScanState *node)
880 : {
881 0 : SharedIndexScanInstrumentation *SharedInfo = node->ioss_SharedInfo;
882 : size_t size;
883 :
884 0 : if (SharedInfo == NULL)
885 0 : return;
886 :
887 : /* Create a copy of SharedInfo in backend-local memory */
888 0 : size = offsetof(SharedIndexScanInstrumentation, winstrument) +
889 0 : SharedInfo->num_workers * sizeof(IndexScanInstrumentation);
890 0 : node->ioss_SharedInfo = palloc(size);
891 0 : memcpy(node->ioss_SharedInfo, SharedInfo, size);
892 : }
|