Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * nodeSeqscan.c
4 : * Support routines for sequential scans of relations.
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/executor/nodeSeqscan.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : /*
16 : * INTERFACE ROUTINES
17 : * ExecSeqScan sequentially scans a relation.
18 : * ExecSeqNext retrieve next tuple in sequential order.
19 : * ExecInitSeqScan creates and initializes a seqscan node.
20 : * ExecEndSeqScan releases any storage allocated.
21 : * ExecReScanSeqScan rescans the relation
22 : *
23 : * ExecSeqScanEstimate estimates DSM space needed for parallel scan
24 : * ExecSeqScanInitializeDSM initialize DSM for parallel scan
25 : * ExecSeqScanReInitializeDSM reinitialize DSM for fresh parallel scan
26 : * ExecSeqScanInitializeWorker attach to DSM info in parallel worker
27 : */
28 : #include "postgres.h"
29 :
30 : #include "access/relscan.h"
31 : #include "access/tableam.h"
32 : #include "executor/execParallel.h"
33 : #include "executor/execScan.h"
34 : #include "executor/executor.h"
35 : #include "executor/nodeSeqscan.h"
36 : #include "utils/rel.h"
37 :
38 : static TupleTableSlot *SeqNext(SeqScanState *node);
39 :
40 : /* ----------------------------------------------------------------
41 : * Scan Support
42 : * ----------------------------------------------------------------
43 : */
44 :
45 : /* ----------------------------------------------------------------
46 : * SeqNext
47 : *
48 : * This is a workhorse for ExecSeqScan
49 : * ----------------------------------------------------------------
50 : */
51 : static pg_attribute_always_inline TupleTableSlot *
52 62254514 : SeqNext(SeqScanState *node)
53 : {
54 : TableScanDesc scandesc;
55 : EState *estate;
56 : ScanDirection direction;
57 : TupleTableSlot *slot;
58 :
59 : /*
60 : * get information from the estate and scan state
61 : */
62 62254514 : scandesc = node->ss.ss_currentScanDesc;
63 62254514 : estate = node->ss.ps.state;
64 62254514 : direction = estate->es_direction;
65 62254514 : slot = node->ss.ss_ScanTupleSlot;
66 :
67 62254514 : if (scandesc == NULL)
68 : {
69 130761 : uint32 flags = SO_NONE;
70 :
71 130761 : if (ScanRelIsReadOnly(&node->ss))
72 115389 : flags |= SO_HINT_REL_READ_ONLY;
73 :
74 130761 : if (estate->es_instrument & INSTRUMENT_IO)
75 8 : flags |= SO_SCAN_INSTRUMENT;
76 :
77 : /*
78 : * We reach here if the scan is not parallel, or if we're serially
79 : * executing a scan that was planned to be parallel.
80 : */
81 130761 : scandesc = table_beginscan(node->ss.ss_currentRelation,
82 : estate->es_snapshot,
83 : 0, NULL, flags);
84 130761 : node->ss.ss_currentScanDesc = scandesc;
85 : }
86 :
87 : /*
88 : * get the next tuple from the table
89 : */
90 62254514 : if (table_scan_getnextslot(scandesc, direction, slot))
91 61277345 : return slot;
92 977144 : return NULL;
93 : }
94 :
95 : /*
96 : * SeqRecheck -- access method routine to recheck a tuple in EvalPlanQual
97 : */
98 : static pg_attribute_always_inline bool
99 130 : SeqRecheck(SeqScanState *node, TupleTableSlot *slot)
100 : {
101 : /*
102 : * Note that unlike IndexScan, SeqScan never use keys in heap_beginscan
103 : * (and this is very bad) - so, here we do not check are keys ok or not.
104 : */
105 130 : return true;
106 : }
107 :
108 : /* ----------------------------------------------------------------
109 : * ExecSeqScan(node)
110 : *
111 : * Scans the relation sequentially and returns the next qualifying
112 : * tuple. This variant is used when there is no es_epq_active, no qual
113 : * and no projection. Passing const-NULLs for these to ExecScanExtended
114 : * allows the compiler to eliminate the additional code that would
115 : * ordinarily be required for the evaluation of these.
116 : * ----------------------------------------------------------------
117 : */
118 : static TupleTableSlot *
119 10173875 : ExecSeqScan(PlanState *pstate)
120 : {
121 10173875 : SeqScanState *node = castNode(SeqScanState, pstate);
122 :
123 : Assert(pstate->state->es_epq_active == NULL);
124 : Assert(pstate->qual == NULL);
125 : Assert(pstate->ps_ProjInfo == NULL);
126 :
127 10173875 : return ExecScanExtended(&node->ss,
128 : (ExecScanAccessMtd) SeqNext,
129 : (ExecScanRecheckMtd) SeqRecheck,
130 : NULL,
131 : NULL,
132 : NULL);
133 : }
134 :
135 : /*
136 : * Variant of ExecSeqScan() but when qual evaluation is required.
137 : */
138 : static TupleTableSlot *
139 4509968 : ExecSeqScanWithQual(PlanState *pstate)
140 : {
141 4509968 : SeqScanState *node = castNode(SeqScanState, pstate);
142 :
143 : /*
144 : * Use pg_assume() for != NULL tests to make the compiler realize no
145 : * runtime check for the field is needed in ExecScanExtended().
146 : */
147 : Assert(pstate->state->es_epq_active == NULL);
148 4509968 : pg_assume(pstate->qual != NULL);
149 : Assert(pstate->ps_ProjInfo == NULL);
150 :
151 4509968 : return ExecScanExtended(&node->ss,
152 : (ExecScanAccessMtd) SeqNext,
153 : (ExecScanRecheckMtd) SeqRecheck,
154 : NULL,
155 : pstate->qual,
156 : NULL);
157 : }
158 :
159 : /*
160 : * Variant of ExecSeqScan() but when projection is required.
161 : */
162 : static TupleTableSlot *
163 18284067 : ExecSeqScanWithProject(PlanState *pstate)
164 : {
165 18284067 : SeqScanState *node = castNode(SeqScanState, pstate);
166 :
167 : Assert(pstate->state->es_epq_active == NULL);
168 : Assert(pstate->qual == NULL);
169 18284067 : pg_assume(pstate->ps_ProjInfo != NULL);
170 :
171 18284067 : return ExecScanExtended(&node->ss,
172 : (ExecScanAccessMtd) SeqNext,
173 : (ExecScanRecheckMtd) SeqRecheck,
174 : NULL,
175 : NULL,
176 : pstate->ps_ProjInfo);
177 : }
178 :
179 : /*
180 : * Variant of ExecSeqScan() but when qual evaluation and projection are
181 : * required.
182 : */
183 : static TupleTableSlot *
184 5309301 : ExecSeqScanWithQualProject(PlanState *pstate)
185 : {
186 5309301 : SeqScanState *node = castNode(SeqScanState, pstate);
187 :
188 : Assert(pstate->state->es_epq_active == NULL);
189 5309301 : pg_assume(pstate->qual != NULL);
190 5309301 : pg_assume(pstate->ps_ProjInfo != NULL);
191 :
192 5309301 : return ExecScanExtended(&node->ss,
193 : (ExecScanAccessMtd) SeqNext,
194 : (ExecScanRecheckMtd) SeqRecheck,
195 : NULL,
196 : pstate->qual,
197 : pstate->ps_ProjInfo);
198 : }
199 :
200 : /*
201 : * Variant of ExecSeqScan for when EPQ evaluation is required. We don't
202 : * bother adding variants of this for with/without qual and projection as
203 : * EPQ doesn't seem as exciting a case to optimize for.
204 : */
205 : static TupleTableSlot *
206 259 : ExecSeqScanEPQ(PlanState *pstate)
207 : {
208 259 : SeqScanState *node = castNode(SeqScanState, pstate);
209 :
210 259 : return ExecScan(&node->ss,
211 : (ExecScanAccessMtd) SeqNext,
212 : (ExecScanRecheckMtd) SeqRecheck);
213 : }
214 :
215 : /* ----------------------------------------------------------------
216 : * ExecInitSeqScan
217 : * ----------------------------------------------------------------
218 : */
219 : SeqScanState *
220 168246 : ExecInitSeqScan(SeqScan *node, EState *estate, int eflags)
221 : {
222 : SeqScanState *scanstate;
223 :
224 : /*
225 : * Once upon a time it was possible to have an outerPlan of a SeqScan, but
226 : * not any more.
227 : */
228 : Assert(outerPlan(node) == NULL);
229 : Assert(innerPlan(node) == NULL);
230 :
231 : /*
232 : * create state structure
233 : */
234 168246 : scanstate = makeNode(SeqScanState);
235 168246 : scanstate->ss.ps.plan = (Plan *) node;
236 168246 : scanstate->ss.ps.state = estate;
237 :
238 : /*
239 : * Miscellaneous initialization
240 : *
241 : * create expression context for node
242 : */
243 168246 : ExecAssignExprContext(estate, &scanstate->ss.ps);
244 :
245 : /*
246 : * open the scan relation
247 : */
248 168238 : scanstate->ss.ss_currentRelation =
249 168246 : ExecOpenScanRelation(estate,
250 : node->scan.scanrelid,
251 : eflags);
252 :
253 : /* and create slot with the appropriate rowtype */
254 168238 : ExecInitScanTupleSlot(estate, &scanstate->ss,
255 168238 : RelationGetDescr(scanstate->ss.ss_currentRelation),
256 : table_slot_callbacks(scanstate->ss.ss_currentRelation),
257 : TTS_FLAG_OBEYS_NOT_NULL_CONSTRAINTS);
258 :
259 : /*
260 : * Initialize result type and projection.
261 : */
262 168238 : ExecInitResultTypeTL(&scanstate->ss.ps);
263 168238 : ExecAssignScanProjectionInfo(&scanstate->ss);
264 :
265 : /*
266 : * initialize child expressions
267 : */
268 168238 : scanstate->ss.ps.qual =
269 168238 : ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
270 :
271 : /*
272 : * When EvalPlanQual() is not in use, assign ExecProcNode for this node
273 : * based on the presence of qual and projection. Each ExecSeqScan*()
274 : * variant is optimized for the specific combination of these conditions.
275 : */
276 168238 : if (scanstate->ss.ps.state->es_epq_active != NULL)
277 144 : scanstate->ss.ps.ExecProcNode = ExecSeqScanEPQ;
278 168094 : else if (scanstate->ss.ps.qual == NULL)
279 : {
280 86545 : if (scanstate->ss.ps.ps_ProjInfo == NULL)
281 41054 : scanstate->ss.ps.ExecProcNode = ExecSeqScan;
282 : else
283 45491 : scanstate->ss.ps.ExecProcNode = ExecSeqScanWithProject;
284 : }
285 : else
286 : {
287 81549 : if (scanstate->ss.ps.ps_ProjInfo == NULL)
288 38496 : scanstate->ss.ps.ExecProcNode = ExecSeqScanWithQual;
289 : else
290 43053 : scanstate->ss.ps.ExecProcNode = ExecSeqScanWithQualProject;
291 : }
292 :
293 168238 : return scanstate;
294 : }
295 :
296 : /* ----------------------------------------------------------------
297 : * ExecEndSeqScan
298 : *
299 : * frees any storage allocated through C routines.
300 : * ----------------------------------------------------------------
301 : */
302 : void
303 166397 : ExecEndSeqScan(SeqScanState *node)
304 : {
305 : TableScanDesc scanDesc;
306 :
307 : /*
308 : * get information from node
309 : */
310 166397 : scanDesc = node->ss.ss_currentScanDesc;
311 :
312 : /*
313 : * Collect I/O stats for this process into shared instrumentation.
314 : */
315 166397 : if (node->sinstrument != NULL && IsParallelWorker())
316 : {
317 : SeqScanInstrumentation *si;
318 :
319 : Assert(ParallelWorkerNumber < node->sinstrument->num_workers);
320 0 : si = &node->sinstrument->sinstrument[ParallelWorkerNumber];
321 :
322 0 : if (scanDesc && scanDesc->rs_instrument)
323 : {
324 0 : AccumulateIOStats(&si->stats.io, &scanDesc->rs_instrument->io);
325 : }
326 : }
327 :
328 : /*
329 : * close heap scan
330 : */
331 166397 : if (scanDesc != NULL)
332 133392 : table_endscan(scanDesc);
333 166397 : }
334 :
335 : /* ----------------------------------------------------------------
336 : * Join Support
337 : * ----------------------------------------------------------------
338 : */
339 :
340 : /* ----------------------------------------------------------------
341 : * ExecReScanSeqScan
342 : *
343 : * Rescans the relation.
344 : * ----------------------------------------------------------------
345 : */
346 : void
347 875946 : ExecReScanSeqScan(SeqScanState *node)
348 : {
349 : TableScanDesc scan;
350 :
351 875946 : scan = node->ss.ss_currentScanDesc;
352 :
353 875946 : if (scan != NULL)
354 860392 : table_rescan(scan, /* scan desc */
355 : NULL); /* new scan keys */
356 :
357 875946 : ExecScanReScan((ScanState *) node);
358 875946 : }
359 :
360 : /* ----------------------------------------------------------------
361 : * Parallel Scan Support
362 : * ----------------------------------------------------------------
363 : */
364 :
365 : /* ----------------------------------------------------------------
366 : * ExecSeqScanEstimate
367 : *
368 : * Compute the amount of space we'll need in the parallel
369 : * query DSM, and inform pcxt->estimator about our needs.
370 : * ----------------------------------------------------------------
371 : */
372 : void
373 1190 : ExecSeqScanEstimate(SeqScanState *node,
374 : ParallelContext *pcxt)
375 : {
376 1190 : EState *estate = node->ss.ps.state;
377 :
378 1190 : node->pscan_len = table_parallelscan_estimate(node->ss.ss_currentRelation,
379 : estate->es_snapshot);
380 1190 : shm_toc_estimate_chunk(&pcxt->estimator, node->pscan_len);
381 1190 : shm_toc_estimate_keys(&pcxt->estimator, 1);
382 1190 : }
383 :
384 : /* ----------------------------------------------------------------
385 : * ExecSeqScanInitializeDSM
386 : *
387 : * Set up a parallel heap scan descriptor.
388 : * ----------------------------------------------------------------
389 : */
390 : void
391 1190 : ExecSeqScanInitializeDSM(SeqScanState *node,
392 : ParallelContext *pcxt)
393 : {
394 1190 : EState *estate = node->ss.ps.state;
395 : ParallelTableScanDesc pscan;
396 1190 : uint32 flags = SO_NONE;
397 :
398 1190 : if (ScanRelIsReadOnly(&node->ss))
399 1190 : flags |= SO_HINT_REL_READ_ONLY;
400 :
401 1190 : if (estate->es_instrument & INSTRUMENT_IO)
402 0 : flags |= SO_SCAN_INSTRUMENT;
403 :
404 1190 : pscan = shm_toc_allocate(pcxt->toc, node->pscan_len);
405 1190 : table_parallelscan_initialize(node->ss.ss_currentRelation,
406 : pscan,
407 : estate->es_snapshot);
408 1190 : shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan);
409 :
410 1190 : node->ss.ss_currentScanDesc =
411 1190 : table_beginscan_parallel(node->ss.ss_currentRelation, pscan, flags);
412 1190 : }
413 :
414 : /* ----------------------------------------------------------------
415 : * ExecSeqScanReInitializeDSM
416 : *
417 : * Reset shared state before beginning a fresh scan.
418 : * ----------------------------------------------------------------
419 : */
420 : void
421 152 : ExecSeqScanReInitializeDSM(SeqScanState *node,
422 : ParallelContext *pcxt)
423 : {
424 : ParallelTableScanDesc pscan;
425 :
426 152 : pscan = node->ss.ss_currentScanDesc->rs_parallel;
427 152 : table_parallelscan_reinitialize(node->ss.ss_currentRelation, pscan);
428 152 : }
429 :
430 : /* ----------------------------------------------------------------
431 : * ExecSeqScanInitializeWorker
432 : *
433 : * Copy relevant information from TOC into planstate.
434 : * ----------------------------------------------------------------
435 : */
436 : void
437 2971 : ExecSeqScanInitializeWorker(SeqScanState *node,
438 : ParallelWorkerContext *pwcxt)
439 : {
440 : ParallelTableScanDesc pscan;
441 2971 : uint32 flags = SO_NONE;
442 :
443 2971 : if (ScanRelIsReadOnly(&node->ss))
444 2971 : flags |= SO_HINT_REL_READ_ONLY;
445 :
446 2971 : if (node->ss.ps.state->es_instrument & INSTRUMENT_IO)
447 0 : flags |= SO_SCAN_INSTRUMENT;
448 :
449 2971 : pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
450 2971 : node->ss.ss_currentScanDesc =
451 2971 : table_beginscan_parallel(node->ss.ss_currentRelation, pscan, flags);
452 2971 : }
453 :
454 : /*
455 : * Compute the amount of space we'll need for the shared instrumentation and
456 : * inform pcxt->estimator.
457 : */
458 : void
459 1528 : ExecSeqScanInstrumentEstimate(SeqScanState *node, ParallelContext *pcxt)
460 : {
461 1528 : EState *estate = node->ss.ps.state;
462 : Size size;
463 :
464 1528 : if ((estate->es_instrument & INSTRUMENT_IO) == 0 || pcxt->nworkers == 0)
465 1528 : return;
466 :
467 0 : size = add_size(offsetof(SharedSeqScanInstrumentation, sinstrument),
468 0 : mul_size(pcxt->nworkers, sizeof(SeqScanInstrumentation)));
469 :
470 0 : shm_toc_estimate_chunk(&pcxt->estimator, size);
471 0 : shm_toc_estimate_keys(&pcxt->estimator, 1);
472 : }
473 :
474 : /*
475 : * Set up parallel sequential scan instrumentation.
476 : */
477 : void
478 1528 : ExecSeqScanInstrumentInitDSM(SeqScanState *node, ParallelContext *pcxt)
479 : {
480 1528 : EState *estate = node->ss.ps.state;
481 : SharedSeqScanInstrumentation *sinstrument;
482 : Size size;
483 :
484 1528 : if ((estate->es_instrument & INSTRUMENT_IO) == 0 || pcxt->nworkers == 0)
485 1528 : return;
486 :
487 0 : size = add_size(offsetof(SharedSeqScanInstrumentation, sinstrument),
488 0 : mul_size(pcxt->nworkers, sizeof(SeqScanInstrumentation)));
489 0 : sinstrument = shm_toc_allocate(pcxt->toc, size);
490 0 : memset(sinstrument, 0, size);
491 0 : sinstrument->num_workers = pcxt->nworkers;
492 0 : shm_toc_insert(pcxt->toc,
493 0 : node->ss.ps.plan->plan_node_id +
494 : PARALLEL_KEY_SCAN_INSTRUMENT_OFFSET,
495 : sinstrument);
496 0 : node->sinstrument = sinstrument;
497 : }
498 :
499 : /*
500 : * Look up and save the location of the shared instrumentation.
501 : */
502 : void
503 3749 : ExecSeqScanInstrumentInitWorker(SeqScanState *node,
504 : ParallelWorkerContext *pwcxt)
505 : {
506 3749 : EState *estate = node->ss.ps.state;
507 :
508 3749 : if ((estate->es_instrument & INSTRUMENT_IO) == 0)
509 3749 : return;
510 :
511 0 : node->sinstrument = shm_toc_lookup(pwcxt->toc,
512 0 : node->ss.ps.plan->plan_node_id +
513 : PARALLEL_KEY_SCAN_INSTRUMENT_OFFSET,
514 : false);
515 : }
516 :
517 : /*
518 : * Transfer sequential scan instrumentation from DSM to private memory.
519 : */
520 : void
521 232 : ExecSeqScanRetrieveInstrumentation(SeqScanState *node)
522 : {
523 232 : SharedSeqScanInstrumentation *sinstrument = node->sinstrument;
524 : Size size;
525 :
526 232 : if (sinstrument == NULL)
527 232 : return;
528 :
529 0 : size = offsetof(SharedSeqScanInstrumentation, sinstrument)
530 0 : + sinstrument->num_workers * sizeof(SeqScanInstrumentation);
531 :
532 0 : node->sinstrument = palloc(size);
533 0 : memcpy(node->sinstrument, sinstrument, size);
534 : }
|