Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * nodeIndexonlyscan.c
4 : * Routines to support index-only scans
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/executor/nodeIndexonlyscan.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : /*
16 : * INTERFACE ROUTINES
17 : * ExecIndexOnlyScan scans an index
18 : * IndexOnlyNext retrieve next tuple
19 : * ExecInitIndexOnlyScan creates and initializes state info.
20 : * ExecReScanIndexOnlyScan rescans the indexed relation.
21 : * ExecEndIndexOnlyScan releases all storage.
22 : * ExecIndexOnlyMarkPos marks scan position.
23 : * ExecIndexOnlyRestrPos restores scan position.
24 : * ExecIndexOnlyScanEstimate estimates DSM space needed for
25 : * parallel index-only scan
26 : * ExecIndexOnlyScanInitializeDSM initialize DSM for parallel
27 : * index-only scan
28 : * ExecIndexOnlyScanReInitializeDSM reinitialize DSM for fresh scan
29 : * ExecIndexOnlyScanInitializeWorker attach to DSM info in parallel worker
30 : */
31 : #include "postgres.h"
32 :
33 : #include "access/genam.h"
34 : #include "access/relscan.h"
35 : #include "access/tableam.h"
36 : #include "access/tupdesc.h"
37 : #include "access/visibilitymap.h"
38 : #include "catalog/pg_type.h"
39 : #include "executor/executor.h"
40 : #include "executor/instrument.h"
41 : #include "executor/nodeIndexonlyscan.h"
42 : #include "executor/nodeIndexscan.h"
43 : #include "miscadmin.h"
44 : #include "storage/bufmgr.h"
45 : #include "storage/predicate.h"
46 : #include "utils/builtins.h"
47 : #include "utils/rel.h"
48 :
49 :
50 : static TupleTableSlot *IndexOnlyNext(IndexOnlyScanState *node);
51 : static void StoreIndexTuple(IndexOnlyScanState *node, TupleTableSlot *slot,
52 : IndexTuple itup, TupleDesc itupdesc);
53 :
54 :
55 : /* ----------------------------------------------------------------
56 : * IndexOnlyNext
57 : *
58 : * Retrieve a tuple from the IndexOnlyScan node's index.
59 : * ----------------------------------------------------------------
60 : */
61 : static TupleTableSlot *
62 3806628 : IndexOnlyNext(IndexOnlyScanState *node)
63 : {
64 : EState *estate;
65 : ExprContext *econtext;
66 : ScanDirection direction;
67 : IndexScanDesc scandesc;
68 : TupleTableSlot *slot;
69 : ItemPointer tid;
70 :
71 : /*
72 : * extract necessary information from index scan node
73 : */
74 3806628 : estate = node->ss.ps.state;
75 :
76 : /*
77 : * Determine which direction to scan the index in based on the plan's scan
78 : * direction and the current direction of execution.
79 : */
80 3806628 : direction = ScanDirectionCombine(estate->es_direction,
81 : ((IndexOnlyScan *) node->ss.ps.plan)->indexorderdir);
82 3806628 : scandesc = node->ioss_ScanDesc;
83 3806628 : econtext = node->ss.ps.ps_ExprContext;
84 3806628 : slot = node->ss.ss_ScanTupleSlot;
85 :
86 3806628 : if (scandesc == NULL)
87 : {
88 : /*
89 : * We reach here if the index only scan is not parallel, or if we're
90 : * serially executing an index only scan that was planned to be
91 : * parallel.
92 : */
93 6533 : scandesc = index_beginscan(node->ss.ss_currentRelation,
94 : node->ioss_RelationDesc,
95 : estate->es_snapshot,
96 : node->ioss_Instrument,
97 : node->ioss_NumScanKeys,
98 : node->ioss_NumOrderByKeys,
99 6533 : ScanRelIsReadOnly(&node->ss) ?
100 : SO_HINT_REL_READ_ONLY : SO_NONE);
101 :
102 6533 : node->ioss_ScanDesc = scandesc;
103 :
104 :
105 : /* Set it up for index-only scan */
106 6533 : node->ioss_ScanDesc->xs_want_itup = true;
107 6533 : node->ioss_VMBuffer = InvalidBuffer;
108 :
109 : /*
110 : * If no run-time keys to calculate or they are ready, go ahead and
111 : * pass the scankeys to the index AM.
112 : */
113 6533 : if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
114 6533 : index_rescan(scandesc,
115 : node->ioss_ScanKeys,
116 : node->ioss_NumScanKeys,
117 : node->ioss_OrderByKeys,
118 : node->ioss_NumOrderByKeys);
119 : }
120 :
121 : /*
122 : * OK, now that we have what we need, fetch the next tuple.
123 : */
124 3945381 : while ((tid = index_getnext_tid(scandesc, direction)) != NULL)
125 : {
126 3810365 : bool tuple_from_heap = false;
127 :
128 3810365 : CHECK_FOR_INTERRUPTS();
129 :
130 : /*
131 : * We can skip the heap fetch if the TID references a heap page on
132 : * which all tuples are known visible to everybody. In any case,
133 : * we'll use the index tuple not the heap tuple as the data source.
134 : *
135 : * Note on Memory Ordering Effects: visibilitymap_get_status does not
136 : * lock the visibility map buffer, and therefore the result we read
137 : * here could be slightly stale. However, it can't be stale enough to
138 : * matter.
139 : *
140 : * We need to detect clearing a VM bit due to an insert right away,
141 : * because the tuple is present in the index page but not visible. The
142 : * reading of the TID by this scan (using a shared lock on the index
143 : * buffer) is serialized with the insert of the TID into the index
144 : * (using an exclusive lock on the index buffer). Because the VM bit
145 : * is cleared before updating the index, and locking/unlocking of the
146 : * index page acts as a full memory barrier, we are sure to see the
147 : * cleared bit if we see a recently-inserted TID.
148 : *
149 : * Deletes do not update the index page (only VACUUM will clear out
150 : * the TID), so the clearing of the VM bit by a delete is not
151 : * serialized with this test below, and we may see a value that is
152 : * significantly stale. However, we don't care about the delete right
153 : * away, because the tuple is still visible until the deleting
154 : * transaction commits or the statement ends (if it's our
155 : * transaction). In either case, the lock on the VM buffer will have
156 : * been released (acting as a write barrier) after clearing the bit.
157 : * And for us to have a snapshot that includes the deleting
158 : * transaction (making the tuple invisible), we must have acquired
159 : * ProcArrayLock after that time, acting as a read barrier.
160 : *
161 : * It's worth going through this complexity to avoid needing to lock
162 : * the VM buffer, which could cause significant contention.
163 : */
164 3810365 : if (!VM_ALL_VISIBLE(scandesc->heapRelation,
165 : ItemPointerGetBlockNumber(tid),
166 : &node->ioss_VMBuffer))
167 : {
168 : /*
169 : * Rats, we have to visit the heap to check visibility.
170 : */
171 601946 : InstrCountTuples2(node, 1);
172 601946 : if (!index_fetch_heap(scandesc, node->ioss_TableSlot))
173 138749 : continue; /* no visible tuple, try next index entry */
174 :
175 463197 : ExecClearTuple(node->ioss_TableSlot);
176 :
177 : /*
178 : * Only MVCC snapshots are supported here, so there should be no
179 : * need to keep following the HOT chain once a visible entry has
180 : * been found. If we did want to allow that, we'd need to keep
181 : * more state to remember not to call index_getnext_tid next time.
182 : */
183 463197 : if (scandesc->xs_heap_continue)
184 0 : elog(ERROR, "non-MVCC snapshots are not supported in index-only scans");
185 :
186 : /*
187 : * Note: at this point we are holding a pin on the heap page, as
188 : * recorded in scandesc->xs_cbuf. We could release that pin now,
189 : * but it's not clear whether it's a win to do so. The next index
190 : * entry might require a visit to the same heap page.
191 : */
192 :
193 463197 : tuple_from_heap = true;
194 : }
195 :
196 : /*
197 : * Fill the scan tuple slot with data from the index. This might be
198 : * provided in either HeapTuple or IndexTuple format. Conceivably an
199 : * index AM might fill both fields, in which case we prefer the heap
200 : * format, since it's probably a bit cheaper to fill a slot from.
201 : */
202 3671616 : if (scandesc->xs_hitup)
203 : {
204 : /*
205 : * We don't take the trouble to verify that the provided tuple has
206 : * exactly the slot's format, but it seems worth doing a quick
207 : * check on the number of fields.
208 : */
209 : Assert(slot->tts_tupleDescriptor->natts ==
210 : scandesc->xs_hitupdesc->natts);
211 969041 : ExecForceStoreHeapTuple(scandesc->xs_hitup, slot, false);
212 : }
213 2702575 : else if (scandesc->xs_itup)
214 2702575 : StoreIndexTuple(node, slot, scandesc->xs_itup, scandesc->xs_itupdesc);
215 : else
216 0 : elog(ERROR, "no data returned for index-only scan");
217 :
218 : /*
219 : * If the index was lossy, we have to recheck the index quals.
220 : */
221 3671616 : if (scandesc->xs_recheck)
222 : {
223 9 : econtext->ecxt_scantuple = slot;
224 9 : if (!ExecQualAndReset(node->recheckqual, econtext))
225 : {
226 : /* Fails recheck, so drop it and loop back for another */
227 4 : InstrCountFiltered2(node, 1);
228 4 : continue;
229 : }
230 : }
231 :
232 : /*
233 : * We don't currently support rechecking ORDER BY distances. (In
234 : * principle, if the index can support retrieval of the originally
235 : * indexed value, it should be able to produce an exact distance
236 : * calculation too. So it's not clear that adding code here for
237 : * recheck/re-sort would be worth the trouble. But we should at least
238 : * throw an error if someone tries it.)
239 : */
240 3671612 : if (scandesc->numberOfOrderBys > 0 && scandesc->xs_recheckorderby)
241 4 : ereport(ERROR,
242 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
243 : errmsg("lossy distance functions are not supported in index-only scans")));
244 :
245 : /*
246 : * If we didn't access the heap, then we'll need to take a predicate
247 : * lock explicitly, as if we had. For now we do that at page level.
248 : */
249 3671608 : if (!tuple_from_heap)
250 3208419 : PredicateLockPage(scandesc->heapRelation,
251 : ItemPointerGetBlockNumber(tid),
252 : estate->es_snapshot);
253 :
254 3671608 : return slot;
255 : }
256 :
257 : /*
258 : * if we get here it means the index scan failed so we are at the end of
259 : * the scan..
260 : */
261 135016 : return ExecClearTuple(slot);
262 : }
263 :
264 : /*
265 : * StoreIndexTuple
266 : * Fill the slot with data from the index tuple.
267 : *
268 : * At some point this might be generally-useful functionality, but
269 : * right now we don't need it elsewhere.
270 : */
271 : static void
272 2702575 : StoreIndexTuple(IndexOnlyScanState *node, TupleTableSlot *slot,
273 : IndexTuple itup, TupleDesc itupdesc)
274 : {
275 : /*
276 : * Note: we must use the tupdesc supplied by the AM in index_deform_tuple,
277 : * not the slot's tupdesc, in case the latter has different datatypes
278 : * (this happens for btree name_ops in particular). They'd better have
279 : * the same number of columns though, as well as being datatype-compatible
280 : * which is something we can't so easily check.
281 : */
282 : Assert(slot->tts_tupleDescriptor->natts == itupdesc->natts);
283 :
284 2702575 : ExecClearTuple(slot);
285 2702575 : index_deform_tuple(itup, itupdesc, slot->tts_values, slot->tts_isnull);
286 :
287 : /*
288 : * Copy all name columns stored as cstrings back into a NAMEDATALEN byte
289 : * sized allocation. We mark this branch as unlikely as generally "name"
290 : * is used only for the system catalogs and this would have to be a user
291 : * query running on those or some other user table with an index on a name
292 : * column.
293 : */
294 2702575 : if (unlikely(node->ioss_NameCStringAttNums != NULL))
295 : {
296 1961 : int attcount = node->ioss_NameCStringCount;
297 :
298 3922 : for (int idx = 0; idx < attcount; idx++)
299 : {
300 1961 : int attnum = node->ioss_NameCStringAttNums[idx];
301 : Name name;
302 :
303 : /* skip null Datums */
304 1961 : if (slot->tts_isnull[attnum])
305 0 : continue;
306 :
307 : /* allocate the NAMEDATALEN and copy the datum into that memory */
308 1961 : name = (Name) MemoryContextAlloc(node->ss.ps.ps_ExprContext->ecxt_per_tuple_memory,
309 : NAMEDATALEN);
310 :
311 : /* use namestrcpy to zero-pad all trailing bytes */
312 1961 : namestrcpy(name, DatumGetCString(slot->tts_values[attnum]));
313 1961 : slot->tts_values[attnum] = NameGetDatum(name);
314 : }
315 : }
316 :
317 2702575 : ExecStoreVirtualTuple(slot);
318 2702575 : }
319 :
320 : /*
321 : * IndexOnlyRecheck -- access method routine to recheck a tuple in EvalPlanQual
322 : *
323 : * This can't really happen, since an index can't supply CTID which would
324 : * be necessary data for any potential EvalPlanQual target relation. If it
325 : * did happen, the EPQ code would pass us the wrong data, namely a heap
326 : * tuple not an index tuple. So throw an error.
327 : */
328 : static bool
329 0 : IndexOnlyRecheck(IndexOnlyScanState *node, TupleTableSlot *slot)
330 : {
331 0 : elog(ERROR, "EvalPlanQual recheck is not supported in index-only scans");
332 : return false; /* keep compiler quiet */
333 : }
334 :
335 : /* ----------------------------------------------------------------
336 : * ExecIndexOnlyScan(node)
337 : * ----------------------------------------------------------------
338 : */
339 : static TupleTableSlot *
340 3604501 : ExecIndexOnlyScan(PlanState *pstate)
341 : {
342 3604501 : IndexOnlyScanState *node = castNode(IndexOnlyScanState, pstate);
343 :
344 : /*
345 : * If we have runtime keys and they've not already been set up, do it now.
346 : */
347 3604501 : if (node->ioss_NumRuntimeKeys != 0 && !node->ioss_RuntimeKeysReady)
348 372 : ExecReScan((PlanState *) node);
349 :
350 3604501 : return ExecScan(&node->ss,
351 : (ExecScanAccessMtd) IndexOnlyNext,
352 : (ExecScanRecheckMtd) IndexOnlyRecheck);
353 : }
354 :
355 : /* ----------------------------------------------------------------
356 : * ExecReScanIndexOnlyScan(node)
357 : *
358 : * Recalculates the values of any scan keys whose value depends on
359 : * information known at runtime, then rescans the indexed relation.
360 : *
361 : * Updating the scan key was formerly done separately in
362 : * ExecUpdateIndexScanKeys. Integrating it into ReScan makes
363 : * rescans of indices and relations/general streams more uniform.
364 : * ----------------------------------------------------------------
365 : */
366 : void
367 151278 : ExecReScanIndexOnlyScan(IndexOnlyScanState *node)
368 : {
369 : /*
370 : * If we are doing runtime key calculations (ie, any of the index key
371 : * values weren't simple Consts), compute the new key values. But first,
372 : * reset the context so we don't leak memory as each outer tuple is
373 : * scanned. Note this assumes that we will recalculate *all* runtime keys
374 : * on each call.
375 : */
376 151278 : if (node->ioss_NumRuntimeKeys != 0)
377 : {
378 151145 : ExprContext *econtext = node->ioss_RuntimeContext;
379 :
380 151145 : ResetExprContext(econtext);
381 151145 : ExecIndexEvalRuntimeKeys(econtext,
382 : node->ioss_RuntimeKeys,
383 : node->ioss_NumRuntimeKeys);
384 : }
385 151278 : node->ioss_RuntimeKeysReady = true;
386 :
387 : /* reset index scan */
388 151278 : if (node->ioss_ScanDesc)
389 149773 : index_rescan(node->ioss_ScanDesc,
390 : node->ioss_ScanKeys, node->ioss_NumScanKeys,
391 : node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
392 :
393 151278 : ExecScanReScan(&node->ss);
394 151278 : }
395 :
396 :
397 : /* ----------------------------------------------------------------
398 : * ExecEndIndexOnlyScan
399 : * ----------------------------------------------------------------
400 : */
401 : void
402 12297 : ExecEndIndexOnlyScan(IndexOnlyScanState *node)
403 : {
404 : Relation indexRelationDesc;
405 : IndexScanDesc indexScanDesc;
406 :
407 : /*
408 : * extract information from the node
409 : */
410 12297 : indexRelationDesc = node->ioss_RelationDesc;
411 12297 : indexScanDesc = node->ioss_ScanDesc;
412 :
413 : /* Release VM buffer pin, if any. */
414 12297 : if (node->ioss_VMBuffer != InvalidBuffer)
415 : {
416 4756 : ReleaseBuffer(node->ioss_VMBuffer);
417 4756 : node->ioss_VMBuffer = InvalidBuffer;
418 : }
419 :
420 : /*
421 : * When ending a parallel worker, copy the statistics gathered by the
422 : * worker back into shared memory so that it can be picked up by the main
423 : * process to report in EXPLAIN ANALYZE
424 : */
425 12297 : if (node->ioss_SharedInfo != NULL && IsParallelWorker())
426 : {
427 : IndexScanInstrumentation *winstrument;
428 :
429 : Assert(ParallelWorkerNumber < node->ioss_SharedInfo->num_workers);
430 0 : winstrument = &node->ioss_SharedInfo->winstrument[ParallelWorkerNumber];
431 :
432 : /*
433 : * We have to accumulate the stats rather than performing a memcpy.
434 : * When a Gather/GatherMerge node finishes it will perform planner
435 : * shutdown on the workers. On rescan it will spin up new workers
436 : * which will have a new IndexOnlyScanState and zeroed stats.
437 : */
438 0 : winstrument->nsearches += node->ioss_Instrument->nsearches;
439 : }
440 :
441 : /*
442 : * close the index relation (no-op if we didn't open it)
443 : */
444 12297 : if (indexScanDesc)
445 6666 : index_endscan(indexScanDesc);
446 12297 : if (indexRelationDesc)
447 10546 : index_close(indexRelationDesc, NoLock);
448 12297 : }
449 :
450 : /* ----------------------------------------------------------------
451 : * ExecIndexOnlyMarkPos
452 : *
453 : * Note: we assume that no caller attempts to set a mark before having read
454 : * at least one tuple. Otherwise, ioss_ScanDesc might still be NULL.
455 : * ----------------------------------------------------------------
456 : */
457 : void
458 82019 : ExecIndexOnlyMarkPos(IndexOnlyScanState *node)
459 : {
460 82019 : EState *estate = node->ss.ps.state;
461 82019 : EPQState *epqstate = estate->es_epq_active;
462 :
463 82019 : if (epqstate != NULL)
464 : {
465 : /*
466 : * We are inside an EvalPlanQual recheck. If a test tuple exists for
467 : * this relation, then we shouldn't access the index at all. We would
468 : * instead need to save, and later restore, the state of the
469 : * relsubs_done flag, so that re-fetching the test tuple is possible.
470 : * However, given the assumption that no caller sets a mark at the
471 : * start of the scan, we can only get here with relsubs_done[i]
472 : * already set, and so no state need be saved.
473 : */
474 0 : Index scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
475 :
476 : Assert(scanrelid > 0);
477 0 : if (epqstate->relsubs_slot[scanrelid - 1] != NULL ||
478 0 : epqstate->relsubs_rowmark[scanrelid - 1] != NULL)
479 : {
480 : /* Verify the claim above */
481 0 : if (!epqstate->relsubs_done[scanrelid - 1])
482 0 : elog(ERROR, "unexpected ExecIndexOnlyMarkPos call in EPQ recheck");
483 0 : return;
484 : }
485 : }
486 :
487 82019 : index_markpos(node->ioss_ScanDesc);
488 : }
489 :
490 : /* ----------------------------------------------------------------
491 : * ExecIndexOnlyRestrPos
492 : * ----------------------------------------------------------------
493 : */
494 : void
495 0 : ExecIndexOnlyRestrPos(IndexOnlyScanState *node)
496 : {
497 0 : EState *estate = node->ss.ps.state;
498 0 : EPQState *epqstate = estate->es_epq_active;
499 :
500 0 : if (estate->es_epq_active != NULL)
501 : {
502 : /* See comments in ExecIndexMarkPos */
503 0 : Index scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
504 :
505 : Assert(scanrelid > 0);
506 0 : if (epqstate->relsubs_slot[scanrelid - 1] != NULL ||
507 0 : epqstate->relsubs_rowmark[scanrelid - 1] != NULL)
508 : {
509 : /* Verify the claim above */
510 0 : if (!epqstate->relsubs_done[scanrelid - 1])
511 0 : elog(ERROR, "unexpected ExecIndexOnlyRestrPos call in EPQ recheck");
512 0 : return;
513 : }
514 : }
515 :
516 0 : index_restrpos(node->ioss_ScanDesc);
517 : }
518 :
519 : /* ----------------------------------------------------------------
520 : * ExecInitIndexOnlyScan
521 : *
522 : * Initializes the index scan's state information, creates
523 : * scan keys, and opens the base and index relations.
524 : *
525 : * Note: index scans have 2 sets of state information because
526 : * we have to keep track of the base relation and the
527 : * index relation.
528 : * ----------------------------------------------------------------
529 : */
530 : IndexOnlyScanState *
531 12330 : ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
532 : {
533 : IndexOnlyScanState *indexstate;
534 : Relation currentRelation;
535 : Relation indexRelation;
536 : LOCKMODE lockmode;
537 : TupleDesc tupDesc;
538 : int indnkeyatts;
539 : int namecount;
540 :
541 : /*
542 : * create state structure
543 : */
544 12330 : indexstate = makeNode(IndexOnlyScanState);
545 12330 : indexstate->ss.ps.plan = (Plan *) node;
546 12330 : indexstate->ss.ps.state = estate;
547 12330 : indexstate->ss.ps.ExecProcNode = ExecIndexOnlyScan;
548 :
549 : /*
550 : * Miscellaneous initialization
551 : *
552 : * create expression context for node
553 : */
554 12330 : ExecAssignExprContext(estate, &indexstate->ss.ps);
555 :
556 : /*
557 : * open the scan relation
558 : */
559 12330 : currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
560 :
561 12330 : indexstate->ss.ss_currentRelation = currentRelation;
562 12330 : indexstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */
563 :
564 : /*
565 : * Build the scan tuple type using the indextlist generated by the
566 : * planner. We use this, rather than the index's physical tuple
567 : * descriptor, because the latter contains storage column types not the
568 : * types of the original datums. (It's the AM's responsibility to return
569 : * suitable data anyway.)
570 : */
571 12330 : tupDesc = ExecTypeFromTL(node->indextlist);
572 12330 : ExecInitScanTupleSlot(estate, &indexstate->ss, tupDesc,
573 : &TTSOpsVirtual,
574 : 0);
575 :
576 : /*
577 : * We need another slot, in a format that's suitable for the table AM, for
578 : * when we need to fetch a tuple from the table for rechecking visibility.
579 : */
580 12330 : indexstate->ioss_TableSlot =
581 12330 : ExecAllocTableSlot(&estate->es_tupleTable,
582 : RelationGetDescr(currentRelation),
583 : table_slot_callbacks(currentRelation), 0);
584 :
585 : /*
586 : * Initialize result type and projection info. The node's targetlist will
587 : * contain Vars with varno = INDEX_VAR, referencing the scan tuple.
588 : */
589 12330 : ExecInitResultTypeTL(&indexstate->ss.ps);
590 12330 : ExecAssignScanProjectionInfoWithVarno(&indexstate->ss, INDEX_VAR);
591 :
592 : /*
593 : * initialize child expressions
594 : *
595 : * Note: we don't initialize all of the indexorderby expression, only the
596 : * sub-parts corresponding to runtime keys (see below).
597 : */
598 12330 : indexstate->ss.ps.qual =
599 12330 : ExecInitQual(node->scan.plan.qual, (PlanState *) indexstate);
600 12330 : indexstate->recheckqual =
601 12330 : ExecInitQual(node->recheckqual, (PlanState *) indexstate);
602 :
603 : /*
604 : * If we are just doing EXPLAIN (ie, aren't going to run the plan), stop
605 : * here. This allows an index-advisor plugin to EXPLAIN a plan containing
606 : * references to nonexistent indexes.
607 : */
608 12330 : if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
609 1751 : return indexstate;
610 :
611 : /* Set up instrumentation of index-only scans if requested */
612 10579 : if (estate->es_instrument)
613 84 : indexstate->ioss_Instrument = palloc0_object(IndexScanInstrumentation);
614 :
615 : /* Open the index relation. */
616 10579 : lockmode = exec_rt_fetch(node->scan.scanrelid, estate)->rellockmode;
617 10579 : indexRelation = index_open(node->indexid, lockmode);
618 10579 : indexstate->ioss_RelationDesc = indexRelation;
619 :
620 : /*
621 : * Initialize index-specific scan state
622 : */
623 10579 : indexstate->ioss_RuntimeKeysReady = false;
624 10579 : indexstate->ioss_RuntimeKeys = NULL;
625 10579 : indexstate->ioss_NumRuntimeKeys = 0;
626 :
627 : /*
628 : * build the index scan keys from the index qualification
629 : */
630 10579 : ExecIndexBuildScanKeys((PlanState *) indexstate,
631 : indexRelation,
632 : node->indexqual,
633 : false,
634 10579 : &indexstate->ioss_ScanKeys,
635 : &indexstate->ioss_NumScanKeys,
636 : &indexstate->ioss_RuntimeKeys,
637 : &indexstate->ioss_NumRuntimeKeys,
638 : NULL, /* no ArrayKeys */
639 : NULL);
640 :
641 : /*
642 : * any ORDER BY exprs have to be turned into scankeys in the same way
643 : */
644 10579 : ExecIndexBuildScanKeys((PlanState *) indexstate,
645 : indexRelation,
646 : node->indexorderby,
647 : true,
648 10579 : &indexstate->ioss_OrderByKeys,
649 : &indexstate->ioss_NumOrderByKeys,
650 : &indexstate->ioss_RuntimeKeys,
651 : &indexstate->ioss_NumRuntimeKeys,
652 : NULL, /* no ArrayKeys */
653 : NULL);
654 :
655 : /*
656 : * If we have runtime keys, we need an ExprContext to evaluate them. The
657 : * node's standard context won't do because we want to reset that context
658 : * for every tuple. So, build another context just like the other one...
659 : * -tgl 7/11/00
660 : */
661 10579 : if (indexstate->ioss_NumRuntimeKeys != 0)
662 : {
663 3210 : ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext;
664 :
665 3210 : ExecAssignExprContext(estate, &indexstate->ss.ps);
666 3210 : indexstate->ioss_RuntimeContext = indexstate->ss.ps.ps_ExprContext;
667 3210 : indexstate->ss.ps.ps_ExprContext = stdecontext;
668 : }
669 : else
670 : {
671 7369 : indexstate->ioss_RuntimeContext = NULL;
672 : }
673 :
674 10579 : indexstate->ioss_NameCStringAttNums = NULL;
675 10579 : indnkeyatts = indexRelation->rd_index->indnkeyatts;
676 10579 : namecount = 0;
677 :
678 : /*
679 : * The "name" type for btree uses text_ops which results in storing
680 : * cstrings in the indexed keys rather than names. Here we detect that in
681 : * a generic way in case other index AMs want to do the same optimization.
682 : * Check for opclasses with an opcintype of NAMEOID and an index tuple
683 : * descriptor with CSTRINGOID. If any of these are found, create an array
684 : * marking the index attribute number of each of them. StoreIndexTuple()
685 : * handles copying the name Datums into a NAMEDATALEN-byte allocation.
686 : */
687 :
688 : /* First, count the number of such index keys */
689 24511 : for (int attnum = 0; attnum < indnkeyatts; attnum++)
690 : {
691 13932 : if (TupleDescAttr(indexRelation->rd_att, attnum)->atttypid == CSTRINGOID &&
692 1749 : indexRelation->rd_opcintype[attnum] == NAMEOID)
693 1749 : namecount++;
694 : }
695 :
696 10579 : if (namecount > 0)
697 : {
698 1749 : int idx = 0;
699 :
700 : /*
701 : * Now create an array to mark the attribute numbers of the keys that
702 : * need to be converted from cstring to name.
703 : */
704 1749 : indexstate->ioss_NameCStringAttNums = palloc_array(AttrNumber, namecount);
705 :
706 5303 : for (int attnum = 0; attnum < indnkeyatts; attnum++)
707 : {
708 3554 : if (TupleDescAttr(indexRelation->rd_att, attnum)->atttypid == CSTRINGOID &&
709 1749 : indexRelation->rd_opcintype[attnum] == NAMEOID)
710 1749 : indexstate->ioss_NameCStringAttNums[idx++] = (AttrNumber) attnum;
711 : }
712 : }
713 :
714 10579 : indexstate->ioss_NameCStringCount = namecount;
715 :
716 : /*
717 : * all done.
718 : */
719 10579 : return indexstate;
720 : }
721 :
722 : /* ----------------------------------------------------------------
723 : * Parallel Index-only Scan Support
724 : * ----------------------------------------------------------------
725 : */
726 :
727 : /* ----------------------------------------------------------------
728 : * ExecIndexOnlyScanEstimate
729 : *
730 : * Compute the amount of space we'll need in the parallel
731 : * query DSM, and inform pcxt->estimator about our needs.
732 : * ----------------------------------------------------------------
733 : */
734 : void
735 30 : ExecIndexOnlyScanEstimate(IndexOnlyScanState *node,
736 : ParallelContext *pcxt)
737 : {
738 30 : EState *estate = node->ss.ps.state;
739 :
740 30 : node->ioss_PscanLen = index_parallelscan_estimate(node->ioss_RelationDesc,
741 : node->ioss_NumScanKeys,
742 : node->ioss_NumOrderByKeys,
743 : estate->es_snapshot);
744 30 : shm_toc_estimate_chunk(&pcxt->estimator, node->ioss_PscanLen);
745 30 : shm_toc_estimate_keys(&pcxt->estimator, 1);
746 30 : }
747 :
748 : /* ----------------------------------------------------------------
749 : * ExecIndexOnlyScanInitializeDSM
750 : *
751 : * Set up a parallel index-only scan descriptor.
752 : * ----------------------------------------------------------------
753 : */
754 : void
755 30 : ExecIndexOnlyScanInitializeDSM(IndexOnlyScanState *node,
756 : ParallelContext *pcxt)
757 : {
758 30 : EState *estate = node->ss.ps.state;
759 : ParallelIndexScanDesc piscan;
760 :
761 30 : piscan = shm_toc_allocate(pcxt->toc, node->ioss_PscanLen);
762 30 : index_parallelscan_initialize(node->ss.ss_currentRelation,
763 : node->ioss_RelationDesc,
764 : estate->es_snapshot,
765 : piscan);
766 30 : shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, piscan);
767 :
768 30 : node->ioss_ScanDesc =
769 30 : index_beginscan_parallel(node->ss.ss_currentRelation,
770 : node->ioss_RelationDesc,
771 : node->ioss_Instrument,
772 : node->ioss_NumScanKeys,
773 : node->ioss_NumOrderByKeys,
774 : piscan,
775 30 : ScanRelIsReadOnly(&node->ss) ?
776 : SO_HINT_REL_READ_ONLY : SO_NONE);
777 30 : node->ioss_ScanDesc->xs_want_itup = true;
778 30 : node->ioss_VMBuffer = InvalidBuffer;
779 :
780 : /*
781 : * If no run-time keys to calculate or they are ready, go ahead and pass
782 : * the scankeys to the index AM.
783 : */
784 30 : if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
785 30 : index_rescan(node->ioss_ScanDesc,
786 : node->ioss_ScanKeys, node->ioss_NumScanKeys,
787 : node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
788 30 : }
789 :
790 : /* ----------------------------------------------------------------
791 : * ExecIndexOnlyScanReInitializeDSM
792 : *
793 : * Reset shared state before beginning a fresh scan.
794 : * ----------------------------------------------------------------
795 : */
796 : void
797 8 : ExecIndexOnlyScanReInitializeDSM(IndexOnlyScanState *node,
798 : ParallelContext *pcxt)
799 : {
800 : Assert(node->ss.ps.plan->parallel_aware);
801 8 : index_parallelrescan(node->ioss_ScanDesc);
802 8 : }
803 :
804 : /* ----------------------------------------------------------------
805 : * ExecIndexOnlyScanInitializeWorker
806 : *
807 : * Copy relevant information from TOC into planstate.
808 : * ----------------------------------------------------------------
809 : */
810 : void
811 136 : ExecIndexOnlyScanInitializeWorker(IndexOnlyScanState *node,
812 : ParallelWorkerContext *pwcxt)
813 : {
814 : ParallelIndexScanDesc piscan;
815 :
816 136 : piscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
817 :
818 136 : node->ioss_ScanDesc =
819 136 : index_beginscan_parallel(node->ss.ss_currentRelation,
820 : node->ioss_RelationDesc,
821 : node->ioss_Instrument,
822 : node->ioss_NumScanKeys,
823 : node->ioss_NumOrderByKeys,
824 : piscan,
825 136 : ScanRelIsReadOnly(&node->ss) ?
826 : SO_HINT_REL_READ_ONLY : SO_NONE);
827 136 : node->ioss_ScanDesc->xs_want_itup = true;
828 :
829 : /*
830 : * If no run-time keys to calculate or they are ready, go ahead and pass
831 : * the scankeys to the index AM.
832 : */
833 136 : if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
834 136 : index_rescan(node->ioss_ScanDesc,
835 : node->ioss_ScanKeys, node->ioss_NumScanKeys,
836 : node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
837 136 : }
838 :
839 : /*
840 : * Compute the amount of space we'll need for the shared instrumentation and
841 : * inform pcxt->estimator.
842 : */
843 : void
844 42 : ExecIndexOnlyScanInstrumentEstimate(IndexOnlyScanState *node,
845 : ParallelContext *pcxt)
846 : {
847 : Size size;
848 :
849 42 : if (!node->ss.ps.instrument || pcxt->nworkers == 0)
850 42 : return;
851 :
852 : /*
853 : * This size calculation is trivial enough that we don't bother saving it
854 : * in the IndexOnlyScanState. We'll recalculate the needed size in
855 : * ExecIndexOnlyScanInstrumentInitDSM().
856 : */
857 0 : size = add_size(offsetof(SharedIndexScanInstrumentation, winstrument),
858 0 : mul_size(pcxt->nworkers, sizeof(IndexScanInstrumentation)));
859 0 : shm_toc_estimate_chunk(&pcxt->estimator, size);
860 0 : shm_toc_estimate_keys(&pcxt->estimator, 1);
861 : }
862 :
863 : /*
864 : * Set up parallel index-only scan instrumentation.
865 : */
866 : void
867 42 : ExecIndexOnlyScanInstrumentInitDSM(IndexOnlyScanState *node,
868 : ParallelContext *pcxt)
869 : {
870 : Size size;
871 :
872 42 : if (!node->ss.ps.instrument || pcxt->nworkers == 0)
873 42 : return;
874 :
875 0 : size = add_size(offsetof(SharedIndexScanInstrumentation, winstrument),
876 0 : mul_size(pcxt->nworkers, sizeof(IndexScanInstrumentation)));
877 0 : node->ioss_SharedInfo =
878 0 : (SharedIndexScanInstrumentation *) shm_toc_allocate(pcxt->toc, size);
879 :
880 : /* Each per-worker area must start out as zeroes */
881 0 : memset(node->ioss_SharedInfo, 0, size);
882 0 : node->ioss_SharedInfo->num_workers = pcxt->nworkers;
883 0 : shm_toc_insert(pcxt->toc,
884 0 : node->ss.ps.plan->plan_node_id +
885 : PARALLEL_KEY_SCAN_INSTRUMENT_OFFSET,
886 0 : node->ioss_SharedInfo);
887 : }
888 :
889 : /*
890 : * Look up and save the location of the shared instrumentation.
891 : */
892 : void
893 168 : ExecIndexOnlyScanInstrumentInitWorker(IndexOnlyScanState *node,
894 : ParallelWorkerContext *pwcxt)
895 : {
896 168 : if (!node->ss.ps.instrument)
897 168 : return;
898 :
899 0 : node->ioss_SharedInfo = (SharedIndexScanInstrumentation *)
900 0 : shm_toc_lookup(pwcxt->toc,
901 0 : node->ss.ps.plan->plan_node_id +
902 : PARALLEL_KEY_SCAN_INSTRUMENT_OFFSET,
903 : false);
904 : }
905 :
906 : /* ----------------------------------------------------------------
907 : * ExecIndexOnlyScanRetrieveInstrumentation
908 : *
909 : * Transfer index-only scan statistics from DSM to private memory.
910 : * ----------------------------------------------------------------
911 : */
912 : void
913 0 : ExecIndexOnlyScanRetrieveInstrumentation(IndexOnlyScanState *node)
914 : {
915 0 : SharedIndexScanInstrumentation *SharedInfo = node->ioss_SharedInfo;
916 : size_t size;
917 :
918 0 : if (SharedInfo == NULL)
919 0 : return;
920 :
921 : /* Create a copy of SharedInfo in backend-local memory */
922 0 : size = offsetof(SharedIndexScanInstrumentation, winstrument) +
923 0 : SharedInfo->num_workers * sizeof(IndexScanInstrumentation);
924 0 : node->ioss_SharedInfo = palloc(size);
925 0 : memcpy(node->ioss_SharedInfo, SharedInfo, size);
926 : }
|