Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * nodeSamplescan.c
4 : * Support routines for sample scans of relations (table sampling).
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/executor/nodeSamplescan.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/relscan.h"
18 : #include "access/tableam.h"
19 : #include "access/tsmapi.h"
20 : #include "common/pg_prng.h"
21 : #include "executor/executor.h"
22 : #include "executor/nodeSamplescan.h"
23 : #include "utils/fmgrprotos.h"
24 : #include "utils/rel.h"
25 :
26 : static TupleTableSlot *SampleNext(SampleScanState *node);
27 : static void tablesample_init(SampleScanState *scanstate);
28 : static TupleTableSlot *tablesample_getnext(SampleScanState *scanstate);
29 :
30 : /* ----------------------------------------------------------------
31 : * Scan Support
32 : * ----------------------------------------------------------------
33 : */
34 :
35 : /* ----------------------------------------------------------------
36 : * SampleNext
37 : *
38 : * This is a workhorse for ExecSampleScan
39 : * ----------------------------------------------------------------
40 : */
41 : static TupleTableSlot *
42 160843 : SampleNext(SampleScanState *node)
43 : {
44 : /*
45 : * if this is first call within a scan, initialize
46 : */
47 160843 : if (!node->begun)
48 139 : tablesample_init(node);
49 :
50 : /*
51 : * get the next tuple, and store it in our result slot
52 : */
53 160817 : return tablesample_getnext(node);
54 : }
55 :
56 : /*
57 : * SampleRecheck -- access method routine to recheck a tuple in EvalPlanQual
58 : */
59 : static bool
60 0 : SampleRecheck(SampleScanState *node, TupleTableSlot *slot)
61 : {
62 : /*
63 : * No need to recheck for SampleScan, since like SeqScan we don't pass any
64 : * checkable keys to heap_beginscan.
65 : */
66 0 : return true;
67 : }
68 :
69 : /* ----------------------------------------------------------------
70 : * ExecSampleScan(node)
71 : *
72 : * Scans the relation using the sampling method and returns
73 : * the next qualifying tuple.
74 : * We call the ExecScan() routine and pass it the appropriate
75 : * access method functions.
76 : * ----------------------------------------------------------------
77 : */
78 : static TupleTableSlot *
79 160839 : ExecSampleScan(PlanState *pstate)
80 : {
81 160839 : SampleScanState *node = castNode(SampleScanState, pstate);
82 :
83 160839 : return ExecScan(&node->ss,
84 : (ExecScanAccessMtd) SampleNext,
85 : (ExecScanRecheckMtd) SampleRecheck);
86 : }
87 :
88 : /* ----------------------------------------------------------------
89 : * ExecInitSampleScan
90 : * ----------------------------------------------------------------
91 : */
92 : SampleScanState *
93 198 : ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
94 : {
95 : SampleScanState *scanstate;
96 198 : TableSampleClause *tsc = node->tablesample;
97 : TsmRoutine *tsm;
98 :
99 : Assert(outerPlan(node) == NULL);
100 : Assert(innerPlan(node) == NULL);
101 :
102 : /*
103 : * create state structure
104 : */
105 198 : scanstate = makeNode(SampleScanState);
106 198 : scanstate->ss.ps.plan = (Plan *) node;
107 198 : scanstate->ss.ps.state = estate;
108 198 : scanstate->ss.ps.ExecProcNode = ExecSampleScan;
109 :
110 : /*
111 : * Miscellaneous initialization
112 : *
113 : * create expression context for node
114 : */
115 198 : ExecAssignExprContext(estate, &scanstate->ss.ps);
116 :
117 : /*
118 : * open the scan relation
119 : */
120 198 : scanstate->ss.ss_currentRelation =
121 198 : ExecOpenScanRelation(estate,
122 : node->scan.scanrelid,
123 : eflags);
124 :
125 : /* we won't set up the HeapScanDesc till later */
126 198 : scanstate->ss.ss_currentScanDesc = NULL;
127 :
128 : /* and create slot with appropriate rowtype */
129 198 : ExecInitScanTupleSlot(estate, &scanstate->ss,
130 198 : RelationGetDescr(scanstate->ss.ss_currentRelation),
131 : table_slot_callbacks(scanstate->ss.ss_currentRelation),
132 : TTS_FLAG_OBEYS_NOT_NULL_CONSTRAINTS);
133 :
134 : /*
135 : * Initialize result type and projection.
136 : */
137 198 : ExecInitResultTypeTL(&scanstate->ss.ps);
138 198 : ExecAssignScanProjectionInfo(&scanstate->ss);
139 :
140 : /*
141 : * initialize child expressions
142 : */
143 198 : scanstate->ss.ps.qual =
144 198 : ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
145 :
146 198 : scanstate->args = ExecInitExprList(tsc->args, (PlanState *) scanstate);
147 198 : scanstate->repeatable =
148 198 : ExecInitExpr(tsc->repeatable, (PlanState *) scanstate);
149 :
150 : /*
151 : * If we don't have a REPEATABLE clause, select a random seed. We want to
152 : * do this just once, since the seed shouldn't change over rescans.
153 : */
154 198 : if (tsc->repeatable == NULL)
155 110 : scanstate->seed = pg_prng_uint32(&pg_global_prng_state);
156 :
157 : /*
158 : * Finally, initialize the TABLESAMPLE method handler.
159 : */
160 198 : tsm = GetTsmRoutine(tsc->tsmhandler);
161 198 : scanstate->tsmroutine = tsm;
162 198 : scanstate->tsm_state = NULL;
163 :
164 198 : if (tsm->InitSampleScan)
165 198 : tsm->InitSampleScan(scanstate, eflags);
166 :
167 : /* We'll do BeginSampleScan later; we can't evaluate params yet */
168 198 : scanstate->begun = false;
169 :
170 198 : return scanstate;
171 : }
172 :
173 : /* ----------------------------------------------------------------
174 : * ExecEndSampleScan
175 : *
176 : * frees any storage allocated through C routines.
177 : * ----------------------------------------------------------------
178 : */
179 : void
180 172 : ExecEndSampleScan(SampleScanState *node)
181 : {
182 : /*
183 : * Tell sampling function that we finished the scan.
184 : */
185 172 : if (node->tsmroutine->EndSampleScan)
186 0 : node->tsmroutine->EndSampleScan(node);
187 :
188 : /*
189 : * close heap scan
190 : */
191 172 : if (node->ss.ss_currentScanDesc)
192 94 : table_endscan(node->ss.ss_currentScanDesc);
193 172 : }
194 :
195 : /* ----------------------------------------------------------------
196 : * ExecReScanSampleScan
197 : *
198 : * Rescans the relation.
199 : *
200 : * ----------------------------------------------------------------
201 : */
202 : void
203 37 : ExecReScanSampleScan(SampleScanState *node)
204 : {
205 : /* Remember we need to do BeginSampleScan again (if we did it at all) */
206 37 : node->begun = false;
207 37 : node->done = false;
208 37 : node->haveblock = false;
209 37 : node->donetuples = 0;
210 :
211 37 : ExecScanReScan(&node->ss);
212 37 : }
213 :
214 :
215 : /*
216 : * Initialize the TABLESAMPLE method: evaluate params and call BeginSampleScan.
217 : */
218 : static void
219 139 : tablesample_init(SampleScanState *scanstate)
220 : {
221 139 : TsmRoutine *tsm = scanstate->tsmroutine;
222 139 : ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
223 : Datum *params;
224 : Datum datum;
225 : bool isnull;
226 : uint32 seed;
227 : bool allow_sync;
228 : int i;
229 : ListCell *arg;
230 :
231 139 : scanstate->donetuples = 0;
232 139 : params = palloc_array(Datum, list_length(scanstate->args));
233 :
234 139 : i = 0;
235 274 : foreach(arg, scanstate->args)
236 : {
237 139 : ExprState *argstate = (ExprState *) lfirst(arg);
238 :
239 139 : params[i] = ExecEvalExprSwitchContext(argstate,
240 : econtext,
241 : &isnull);
242 139 : if (isnull)
243 4 : ereport(ERROR,
244 : (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
245 : errmsg("TABLESAMPLE parameter cannot be null")));
246 135 : i++;
247 : }
248 :
249 135 : if (scanstate->repeatable)
250 : {
251 48 : datum = ExecEvalExprSwitchContext(scanstate->repeatable,
252 : econtext,
253 : &isnull);
254 48 : if (isnull)
255 4 : ereport(ERROR,
256 : (errcode(ERRCODE_INVALID_TABLESAMPLE_REPEAT),
257 : errmsg("TABLESAMPLE REPEATABLE parameter cannot be null")));
258 :
259 : /*
260 : * The REPEATABLE parameter has been coerced to float8 by the parser.
261 : * The reason for using float8 at the SQL level is that it will
262 : * produce unsurprising results both for users used to databases that
263 : * accept only integers in the REPEATABLE clause and for those who
264 : * might expect that REPEATABLE works like setseed() (a float in the
265 : * range from -1 to 1).
266 : *
267 : * We use hashfloat8() to convert the supplied value into a suitable
268 : * seed. For regression-testing purposes, that has the convenient
269 : * property that REPEATABLE(0) gives a machine-independent result.
270 : */
271 44 : seed = DatumGetUInt32(DirectFunctionCall1(hashfloat8, datum));
272 : }
273 : else
274 : {
275 : /* Use the seed selected by ExecInitSampleScan */
276 87 : seed = scanstate->seed;
277 : }
278 :
279 : /* Set default values for params that BeginSampleScan can adjust */
280 131 : scanstate->use_bulkread = true;
281 131 : scanstate->use_pagemode = true;
282 :
283 : /* Let tablesample method do its thing */
284 131 : tsm->BeginSampleScan(scanstate,
285 : params,
286 131 : list_length(scanstate->args),
287 : seed);
288 :
289 : /* We'll use syncscan if there's no NextSampleBlock function */
290 113 : allow_sync = (tsm->NextSampleBlock == NULL);
291 :
292 : /* Now we can create or reset the HeapScanDesc */
293 113 : if (scanstate->ss.ss_currentScanDesc == NULL)
294 : {
295 94 : scanstate->ss.ss_currentScanDesc =
296 94 : table_beginscan_sampling(scanstate->ss.ss_currentRelation,
297 94 : scanstate->ss.ps.state->es_snapshot,
298 : 0, NULL,
299 94 : scanstate->use_bulkread,
300 : allow_sync,
301 94 : scanstate->use_pagemode);
302 : }
303 : else
304 : {
305 19 : table_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL,
306 19 : scanstate->use_bulkread,
307 : allow_sync,
308 19 : scanstate->use_pagemode);
309 : }
310 :
311 113 : pfree(params);
312 :
313 : /* And we're initialized. */
314 113 : scanstate->begun = true;
315 113 : }
316 :
317 : /*
318 : * Get next tuple from TABLESAMPLE method.
319 : */
320 : static TupleTableSlot *
321 160817 : tablesample_getnext(SampleScanState *scanstate)
322 : {
323 160817 : TableScanDesc scan = scanstate->ss.ss_currentScanDesc;
324 160817 : TupleTableSlot *slot = scanstate->ss.ss_ScanTupleSlot;
325 :
326 160817 : ExecClearTuple(slot);
327 :
328 160817 : if (scanstate->done)
329 0 : return NULL;
330 :
331 : for (;;)
332 : {
333 169292 : if (!scanstate->haveblock)
334 : {
335 8588 : if (!table_scan_sample_next_block(scan, scanstate))
336 : {
337 109 : scanstate->haveblock = false;
338 109 : scanstate->done = true;
339 :
340 : /* exhausted relation */
341 109 : return NULL;
342 : }
343 :
344 8479 : scanstate->haveblock = true;
345 : }
346 :
347 169183 : if (!table_scan_sample_next_tuple(scan, scanstate, slot))
348 : {
349 : /*
350 : * If we get here, it means we've exhausted the items on this page
351 : * and it's time to move to the next.
352 : */
353 8475 : scanstate->haveblock = false;
354 8475 : continue;
355 : }
356 :
357 : /* Found visible tuple, return it. */
358 160708 : break;
359 : }
360 :
361 160708 : scanstate->donetuples++;
362 :
363 160708 : return slot;
364 : }
|