Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * nodeTidrangescan.c
4 : * Routines to support TID range scans of relations
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/executor/nodeTidrangescan.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/relscan.h"
18 : #include "access/sysattr.h"
19 : #include "access/tableam.h"
20 : #include "catalog/pg_operator.h"
21 : #include "executor/executor.h"
22 : #include "executor/nodeTidrangescan.h"
23 : #include "nodes/nodeFuncs.h"
24 : #include "utils/rel.h"
25 :
26 :
27 : /*
28 : * It's sufficient to check varattno to identify the CTID variable, as any
29 : * Var in the relation scan qual must be for our table. (Even if it's a
30 : * parameterized scan referencing some other table's CTID, the other table's
31 : * Var would have become a Param by the time it gets here.)
32 : */
33 : #define IsCTIDVar(node) \
34 : ((node) != NULL && \
35 : IsA((node), Var) && \
36 : ((Var *) (node))->varattno == SelfItemPointerAttributeNumber)
37 :
38 : typedef enum
39 : {
40 : TIDEXPR_UPPER_BOUND,
41 : TIDEXPR_LOWER_BOUND,
42 : } TidExprType;
43 :
44 : /* Upper or lower range bound for scan */
45 : typedef struct TidOpExpr
46 : {
47 : TidExprType exprtype; /* type of op; lower or upper */
48 : ExprState *exprstate; /* ExprState for a TID-yielding subexpr */
49 : bool inclusive; /* whether op is inclusive */
50 : } TidOpExpr;
51 :
52 : /*
53 : * For the given 'expr', build and return an appropriate TidOpExpr taking into
54 : * account the expr's operator and operand order.
55 : */
56 : static TidOpExpr *
57 1087 : MakeTidOpExpr(OpExpr *expr, TidRangeScanState *tidstate)
58 : {
59 1087 : Node *arg1 = get_leftop((Expr *) expr);
60 1087 : Node *arg2 = get_rightop((Expr *) expr);
61 1087 : ExprState *exprstate = NULL;
62 1087 : bool invert = false;
63 : TidOpExpr *tidopexpr;
64 :
65 1087 : if (IsCTIDVar(arg1))
66 1069 : exprstate = ExecInitExpr((Expr *) arg2, &tidstate->ss.ps);
67 18 : else if (IsCTIDVar(arg2))
68 : {
69 18 : exprstate = ExecInitExpr((Expr *) arg1, &tidstate->ss.ps);
70 18 : invert = true;
71 : }
72 : else
73 0 : elog(ERROR, "could not identify CTID variable");
74 :
75 1087 : tidopexpr = palloc_object(TidOpExpr);
76 1087 : tidopexpr->inclusive = false; /* for now */
77 :
78 1087 : switch (expr->opno)
79 : {
80 21 : case TIDLessEqOperator:
81 21 : tidopexpr->inclusive = true;
82 : pg_fallthrough;
83 122 : case TIDLessOperator:
84 122 : tidopexpr->exprtype = invert ? TIDEXPR_LOWER_BOUND : TIDEXPR_UPPER_BOUND;
85 122 : break;
86 897 : case TIDGreaterEqOperator:
87 897 : tidopexpr->inclusive = true;
88 : pg_fallthrough;
89 965 : case TIDGreaterOperator:
90 965 : tidopexpr->exprtype = invert ? TIDEXPR_UPPER_BOUND : TIDEXPR_LOWER_BOUND;
91 965 : break;
92 0 : default:
93 0 : elog(ERROR, "could not identify CTID operator");
94 : }
95 :
96 1087 : tidopexpr->exprstate = exprstate;
97 :
98 1087 : return tidopexpr;
99 : }
100 :
101 : /*
102 : * Extract the qual subexpressions that yield TIDs to search for,
103 : * and compile them into ExprStates if they're ordinary expressions.
104 : */
105 : static void
106 1051 : TidExprListCreate(TidRangeScanState *tidrangestate)
107 : {
108 1051 : TidRangeScan *node = (TidRangeScan *) tidrangestate->ss.ps.plan;
109 1051 : List *tidexprs = NIL;
110 : ListCell *l;
111 :
112 2138 : foreach(l, node->tidrangequals)
113 : {
114 1087 : OpExpr *opexpr = lfirst(l);
115 : TidOpExpr *tidopexpr;
116 :
117 1087 : if (!IsA(opexpr, OpExpr))
118 0 : elog(ERROR, "could not identify CTID expression");
119 :
120 1087 : tidopexpr = MakeTidOpExpr(opexpr, tidrangestate);
121 1087 : tidexprs = lappend(tidexprs, tidopexpr);
122 : }
123 :
124 1051 : tidrangestate->trss_tidexprs = tidexprs;
125 1051 : }
126 :
127 : /* ----------------------------------------------------------------
128 : * TidRangeEval
129 : *
130 : * Compute and set node's block and offset range to scan by evaluating
131 : * node->trss_tidexprs. Returns false if we detect the range cannot
132 : * contain any tuples. Returns true if it's possible for the range to
133 : * contain tuples. We don't bother validating that trss_mintid is less
134 : * than or equal to trss_maxtid, as the scan_set_tidrange() table AM
135 : * function will handle that.
136 : * ----------------------------------------------------------------
137 : */
138 : static bool
139 1039 : TidRangeEval(TidRangeScanState *node)
140 : {
141 1039 : ExprContext *econtext = node->ss.ps.ps_ExprContext;
142 : ItemPointerData lowerBound;
143 : ItemPointerData upperBound;
144 : ListCell *l;
145 :
146 : /*
147 : * Set the upper and lower bounds to the absolute limits of the range of
148 : * the ItemPointer type. Below we'll try to narrow this range on either
149 : * side by looking at the TidOpExprs.
150 : */
151 1039 : ItemPointerSet(&lowerBound, 0, 0);
152 1039 : ItemPointerSet(&upperBound, InvalidBlockNumber, PG_UINT16_MAX);
153 :
154 2102 : foreach(l, node->trss_tidexprs)
155 : {
156 1066 : TidOpExpr *tidopexpr = (TidOpExpr *) lfirst(l);
157 : ItemPointer itemptr;
158 : bool isNull;
159 :
160 : /* Evaluate this bound. */
161 : itemptr = (ItemPointer)
162 1066 : DatumGetPointer(ExecEvalExprSwitchContext(tidopexpr->exprstate,
163 : econtext,
164 : &isNull));
165 :
166 : /* If the bound is NULL, *nothing* matches the qual. */
167 1066 : if (isNull)
168 3 : return false;
169 :
170 1063 : if (tidopexpr->exprtype == TIDEXPR_LOWER_BOUND)
171 : {
172 : ItemPointerData lb;
173 :
174 929 : ItemPointerCopy(itemptr, &lb);
175 :
176 : /*
177 : * Normalize non-inclusive ranges to become inclusive. The
178 : * resulting ItemPointer here may not be a valid item pointer.
179 : */
180 929 : if (!tidopexpr->inclusive)
181 53 : ItemPointerInc(&lb);
182 :
183 : /* Check if we can narrow the range using this qual */
184 929 : if (ItemPointerCompare(&lb, &lowerBound) > 0)
185 929 : ItemPointerCopy(&lb, &lowerBound);
186 : }
187 :
188 134 : else if (tidopexpr->exprtype == TIDEXPR_UPPER_BOUND)
189 : {
190 : ItemPointerData ub;
191 :
192 134 : ItemPointerCopy(itemptr, &ub);
193 :
194 : /*
195 : * Normalize non-inclusive ranges to become inclusive. The
196 : * resulting ItemPointer here may not be a valid item pointer.
197 : */
198 134 : if (!tidopexpr->inclusive)
199 77 : ItemPointerDec(&ub);
200 :
201 : /* Check if we can narrow the range using this qual */
202 134 : if (ItemPointerCompare(&ub, &upperBound) < 0)
203 134 : ItemPointerCopy(&ub, &upperBound);
204 : }
205 : }
206 :
207 1036 : ItemPointerCopy(&lowerBound, &node->trss_mintid);
208 1036 : ItemPointerCopy(&upperBound, &node->trss_maxtid);
209 :
210 1036 : return true;
211 : }
212 :
213 : /* ----------------------------------------------------------------
214 : * TidRangeNext
215 : *
216 : * Retrieve a tuple from the TidRangeScan node's currentRelation
217 : * using the TIDs in the TidRangeScanState information.
218 : *
219 : * ----------------------------------------------------------------
220 : */
221 : static TupleTableSlot *
222 5639 : TidRangeNext(TidRangeScanState *node)
223 : {
224 : TableScanDesc scandesc;
225 : EState *estate;
226 : ScanDirection direction;
227 : TupleTableSlot *slot;
228 :
229 : /*
230 : * extract necessary information from TID scan node
231 : */
232 5639 : scandesc = node->ss.ss_currentScanDesc;
233 5639 : estate = node->ss.ps.state;
234 5639 : slot = node->ss.ss_ScanTupleSlot;
235 5639 : direction = estate->es_direction;
236 :
237 5639 : if (!node->trss_inScan)
238 : {
239 : /* First time through, compute TID range to scan */
240 1038 : if (!TidRangeEval(node))
241 3 : return NULL;
242 :
243 1035 : if (scandesc == NULL)
244 : {
245 930 : scandesc = table_beginscan_tidrange(node->ss.ss_currentRelation,
246 : estate->es_snapshot,
247 : &node->trss_mintid,
248 : &node->trss_maxtid);
249 930 : node->ss.ss_currentScanDesc = scandesc;
250 : }
251 : else
252 : {
253 : /* rescan with the updated TID range */
254 105 : table_rescan_tidrange(scandesc, &node->trss_mintid,
255 : &node->trss_maxtid);
256 : }
257 :
258 1035 : node->trss_inScan = true;
259 : }
260 :
261 : /* Fetch the next tuple. */
262 5636 : if (!table_scan_getnextslot_tidrange(scandesc, direction, slot))
263 : {
264 160 : node->trss_inScan = false;
265 160 : ExecClearTuple(slot);
266 : }
267 :
268 5628 : return slot;
269 : }
270 :
271 : /*
272 : * TidRangeRecheck -- access method routine to recheck a tuple in EvalPlanQual
273 : */
274 : static bool
275 1 : TidRangeRecheck(TidRangeScanState *node, TupleTableSlot *slot)
276 : {
277 1 : if (!TidRangeEval(node))
278 0 : return false;
279 :
280 : Assert(ItemPointerIsValid(&slot->tts_tid));
281 :
282 : /* Recheck the ctid is still within range */
283 2 : if (ItemPointerCompare(&slot->tts_tid, &node->trss_mintid) < 0 ||
284 1 : ItemPointerCompare(&slot->tts_tid, &node->trss_maxtid) > 0)
285 1 : return false;
286 :
287 0 : return true;
288 : }
289 :
290 : /* ----------------------------------------------------------------
291 : * ExecTidRangeScan(node)
292 : *
293 : * Scans the relation using tids and returns the next qualifying tuple.
294 : * We call the ExecScan() routine and pass it the appropriate
295 : * access method functions.
296 : *
297 : * Conditions:
298 : * -- the "cursor" maintained by the AMI is positioned at the tuple
299 : * returned previously.
300 : *
301 : * Initial States:
302 : * -- the relation indicated is opened for TID range scanning.
303 : * ----------------------------------------------------------------
304 : */
305 : static TupleTableSlot *
306 5640 : ExecTidRangeScan(PlanState *pstate)
307 : {
308 5640 : TidRangeScanState *node = castNode(TidRangeScanState, pstate);
309 :
310 5640 : return ExecScan(&node->ss,
311 : (ExecScanAccessMtd) TidRangeNext,
312 : (ExecScanRecheckMtd) TidRangeRecheck);
313 : }
314 :
315 : /* ----------------------------------------------------------------
316 : * ExecReScanTidRangeScan(node)
317 : * ----------------------------------------------------------------
318 : */
319 : void
320 48 : ExecReScanTidRangeScan(TidRangeScanState *node)
321 : {
322 : /* mark scan as not in progress, and tid range list as not computed yet */
323 48 : node->trss_inScan = false;
324 :
325 : /*
326 : * We must wait until TidRangeNext before calling table_rescan_tidrange.
327 : */
328 48 : ExecScanReScan(&node->ss);
329 48 : }
330 :
331 : /* ----------------------------------------------------------------
332 : * ExecEndTidRangeScan
333 : *
334 : * Releases any storage allocated through C routines.
335 : * Returns nothing.
336 : * ----------------------------------------------------------------
337 : */
338 : void
339 182 : ExecEndTidRangeScan(TidRangeScanState *node)
340 : {
341 182 : TableScanDesc scan = node->ss.ss_currentScanDesc;
342 :
343 182 : if (scan != NULL)
344 121 : table_endscan(scan);
345 182 : }
346 :
347 : /* ----------------------------------------------------------------
348 : * ExecInitTidRangeScan
349 : *
350 : * Initializes the tid range scan's state information, creates
351 : * scan keys, and opens the scan relation.
352 : *
353 : * Parameters:
354 : * node: TidRangeScan node produced by the planner.
355 : * estate: the execution state initialized in InitPlan.
356 : * ----------------------------------------------------------------
357 : */
358 : TidRangeScanState *
359 1051 : ExecInitTidRangeScan(TidRangeScan *node, EState *estate, int eflags)
360 : {
361 : TidRangeScanState *tidrangestate;
362 : Relation currentRelation;
363 :
364 : /*
365 : * create state structure
366 : */
367 1051 : tidrangestate = makeNode(TidRangeScanState);
368 1051 : tidrangestate->ss.ps.plan = (Plan *) node;
369 1051 : tidrangestate->ss.ps.state = estate;
370 1051 : tidrangestate->ss.ps.ExecProcNode = ExecTidRangeScan;
371 :
372 : /*
373 : * Miscellaneous initialization
374 : *
375 : * create expression context for node
376 : */
377 1051 : ExecAssignExprContext(estate, &tidrangestate->ss.ps);
378 :
379 : /*
380 : * mark scan as not in progress, and TID range as not computed yet
381 : */
382 1051 : tidrangestate->trss_inScan = false;
383 :
384 : /*
385 : * open the scan relation
386 : */
387 1051 : currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
388 :
389 1051 : tidrangestate->ss.ss_currentRelation = currentRelation;
390 1051 : tidrangestate->ss.ss_currentScanDesc = NULL; /* no table scan here */
391 :
392 : /*
393 : * get the scan type from the relation descriptor.
394 : */
395 1051 : ExecInitScanTupleSlot(estate, &tidrangestate->ss,
396 : RelationGetDescr(currentRelation),
397 : table_slot_callbacks(currentRelation));
398 :
399 : /*
400 : * Initialize result type and projection.
401 : */
402 1051 : ExecInitResultTypeTL(&tidrangestate->ss.ps);
403 1051 : ExecAssignScanProjectionInfo(&tidrangestate->ss);
404 :
405 : /*
406 : * initialize child expressions
407 : */
408 1051 : tidrangestate->ss.ps.qual =
409 1051 : ExecInitQual(node->scan.plan.qual, (PlanState *) tidrangestate);
410 :
411 1051 : TidExprListCreate(tidrangestate);
412 :
413 : /*
414 : * all done.
415 : */
416 1051 : return tidrangestate;
417 : }
418 :
419 : /* ----------------------------------------------------------------
420 : * Parallel Scan Support
421 : * ----------------------------------------------------------------
422 : */
423 :
424 : /* ----------------------------------------------------------------
425 : * ExecTidRangeScanEstimate
426 : *
427 : * Compute the amount of space we'll need in the parallel
428 : * query DSM, and inform pcxt->estimator about our needs.
429 : * ----------------------------------------------------------------
430 : */
431 : void
432 12 : ExecTidRangeScanEstimate(TidRangeScanState *node, ParallelContext *pcxt)
433 : {
434 12 : EState *estate = node->ss.ps.state;
435 :
436 12 : node->trss_pscanlen =
437 12 : table_parallelscan_estimate(node->ss.ss_currentRelation,
438 : estate->es_snapshot);
439 12 : shm_toc_estimate_chunk(&pcxt->estimator, node->trss_pscanlen);
440 12 : shm_toc_estimate_keys(&pcxt->estimator, 1);
441 12 : }
442 :
443 : /* ----------------------------------------------------------------
444 : * ExecTidRangeScanInitializeDSM
445 : *
446 : * Set up a parallel TID range scan descriptor.
447 : * ----------------------------------------------------------------
448 : */
449 : void
450 12 : ExecTidRangeScanInitializeDSM(TidRangeScanState *node, ParallelContext *pcxt)
451 : {
452 12 : EState *estate = node->ss.ps.state;
453 : ParallelTableScanDesc pscan;
454 :
455 12 : pscan = shm_toc_allocate(pcxt->toc, node->trss_pscanlen);
456 12 : table_parallelscan_initialize(node->ss.ss_currentRelation,
457 : pscan,
458 : estate->es_snapshot);
459 12 : shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan);
460 12 : node->ss.ss_currentScanDesc =
461 12 : table_beginscan_parallel_tidrange(node->ss.ss_currentRelation,
462 : pscan);
463 12 : }
464 :
465 : /* ----------------------------------------------------------------
466 : * ExecTidRangeScanReInitializeDSM
467 : *
468 : * Reset shared state before beginning a fresh scan.
469 : * ----------------------------------------------------------------
470 : */
471 : void
472 0 : ExecTidRangeScanReInitializeDSM(TidRangeScanState *node,
473 : ParallelContext *pcxt)
474 : {
475 : ParallelTableScanDesc pscan;
476 :
477 0 : pscan = node->ss.ss_currentScanDesc->rs_parallel;
478 0 : table_parallelscan_reinitialize(node->ss.ss_currentRelation, pscan);
479 0 : }
480 :
481 : /* ----------------------------------------------------------------
482 : * ExecTidRangeScanInitializeWorker
483 : *
484 : * Copy relevant information from TOC into planstate.
485 : * ----------------------------------------------------------------
486 : */
487 : void
488 48 : ExecTidRangeScanInitializeWorker(TidRangeScanState *node,
489 : ParallelWorkerContext *pwcxt)
490 : {
491 : ParallelTableScanDesc pscan;
492 :
493 48 : pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
494 48 : node->ss.ss_currentScanDesc =
495 48 : table_beginscan_parallel_tidrange(node->ss.ss_currentRelation,
496 : pscan);
497 48 : }
|