Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * nodeSort.c
4 : * Routines to handle sorting of relations.
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/executor/nodeSort.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 :
16 : #include "postgres.h"
17 :
18 : #include "access/parallel.h"
19 : #include "executor/execdebug.h"
20 : #include "executor/nodeSort.h"
21 : #include "miscadmin.h"
22 : #include "utils/tuplesort.h"
23 :
24 :
25 : /* ----------------------------------------------------------------
26 : * ExecSort
27 : *
28 : * Sorts tuples from the outer subtree of the node using tuplesort,
29 : * which saves the results in a temporary file or memory. After the
30 : * initial call, returns a tuple from the file with each call.
31 : *
32 : * There are two distinct ways that this sort can be performed:
33 : *
34 : * 1) When the result is a single column we perform a Datum sort.
35 : *
36 : * 2) When the result contains multiple columns we perform a tuple sort.
37 : *
38 : * We could do this by always performing a tuple sort, however sorting
39 : * Datums only can be significantly faster than sorting tuples,
40 : * especially when the Datums are of a pass-by-value type.
41 : *
42 : * Conditions:
43 : * -- none.
44 : *
45 : * Initial States:
46 : * -- the outer child is prepared to return the first tuple.
47 : * ----------------------------------------------------------------
48 : */
49 : static TupleTableSlot *
50 10532584 : ExecSort(PlanState *pstate)
51 : {
52 10532584 : SortState *node = castNode(SortState, pstate);
53 : EState *estate;
54 : ScanDirection dir;
55 : Tuplesortstate *tuplesortstate;
56 : TupleTableSlot *slot;
57 :
58 10532584 : CHECK_FOR_INTERRUPTS();
59 :
60 : /*
61 : * get state info from node
62 : */
63 : SO1_printf("ExecSort: %s\n",
64 : "entering routine");
65 :
66 10532584 : estate = node->ss.ps.state;
67 10532584 : dir = estate->es_direction;
68 10532584 : tuplesortstate = (Tuplesortstate *) node->tuplesortstate;
69 :
70 : /*
71 : * If first time through, read all tuples from outer plan and pass them to
72 : * tuplesort.c. Subsequent calls just fetch tuples from tuplesort.
73 : */
74 :
75 10532584 : if (!node->sort_Done)
76 : {
77 109076 : Sort *plannode = (Sort *) node->ss.ps.plan;
78 : PlanState *outerNode;
79 : TupleDesc tupDesc;
80 109076 : int tuplesortopts = TUPLESORT_NONE;
81 :
82 : SO1_printf("ExecSort: %s\n",
83 : "sorting subplan");
84 :
85 : /*
86 : * Want to scan subplan in the forward direction while creating the
87 : * sorted data.
88 : */
89 109076 : estate->es_direction = ForwardScanDirection;
90 :
91 : /*
92 : * Initialize tuplesort module.
93 : */
94 : SO1_printf("ExecSort: %s\n",
95 : "calling tuplesort_begin");
96 :
97 109076 : outerNode = outerPlanState(node);
98 109076 : tupDesc = ExecGetResultType(outerNode);
99 :
100 109076 : if (node->randomAccess)
101 5608 : tuplesortopts |= TUPLESORT_RANDOMACCESS;
102 109076 : if (node->bounded)
103 938 : tuplesortopts |= TUPLESORT_ALLOWBOUNDED;
104 :
105 109076 : if (node->datumSort)
106 5028 : tuplesortstate = tuplesort_begin_datum(TupleDescAttr(tupDesc, 0)->atttypid,
107 5028 : plannode->sortOperators[0],
108 5028 : plannode->collations[0],
109 5028 : plannode->nullsFirst[0],
110 : work_mem,
111 : NULL,
112 : tuplesortopts);
113 : else
114 104048 : tuplesortstate = tuplesort_begin_heap(tupDesc,
115 : plannode->numCols,
116 : plannode->sortColIdx,
117 : plannode->sortOperators,
118 : plannode->collations,
119 : plannode->nullsFirst,
120 : work_mem,
121 : NULL,
122 : tuplesortopts);
123 109064 : if (node->bounded)
124 938 : tuplesort_set_bound(tuplesortstate, node->bound);
125 109064 : node->tuplesortstate = tuplesortstate;
126 :
127 : /*
128 : * Scan the subplan and feed all the tuples to tuplesort using the
129 : * appropriate method based on the type of sort we're doing.
130 : */
131 109064 : if (node->datumSort)
132 : {
133 : for (;;)
134 : {
135 1527264 : slot = ExecProcNode(outerNode);
136 :
137 1527260 : if (TupIsNull(slot))
138 : break;
139 1522236 : slot_getsomeattrs(slot, 1);
140 1522236 : tuplesort_putdatum(tuplesortstate,
141 1522236 : slot->tts_values[0],
142 1522236 : slot->tts_isnull[0]);
143 : }
144 : }
145 : else
146 : {
147 : for (;;)
148 : {
149 10704326 : slot = ExecProcNode(outerNode);
150 :
151 10704326 : if (TupIsNull(slot))
152 : break;
153 10600290 : tuplesort_puttupleslot(tuplesortstate, slot);
154 : }
155 : }
156 :
157 : /*
158 : * Complete the sort.
159 : */
160 109060 : tuplesort_performsort(tuplesortstate);
161 :
162 : /*
163 : * restore to user specified direction
164 : */
165 109060 : estate->es_direction = dir;
166 :
167 : /*
168 : * finally set the sorted flag to true
169 : */
170 109060 : node->sort_Done = true;
171 109060 : node->bounded_Done = node->bounded;
172 109060 : node->bound_Done = node->bound;
173 109060 : if (node->shared_info && node->am_worker)
174 : {
175 : TuplesortInstrumentation *si;
176 :
177 : Assert(IsParallelWorker());
178 : Assert(ParallelWorkerNumber <= node->shared_info->num_workers);
179 96 : si = &node->shared_info->sinstrument[ParallelWorkerNumber];
180 96 : tuplesort_get_stats(tuplesortstate, si);
181 : }
182 : SO1_printf("ExecSort: %s\n", "sorting done");
183 : }
184 :
185 : SO1_printf("ExecSort: %s\n",
186 : "retrieving tuple from tuplesort");
187 :
188 10532568 : slot = node->ss.ps.ps_ResultTupleSlot;
189 :
190 : /*
191 : * Fetch the next sorted item from the appropriate tuplesort function. For
192 : * datum sorts we must manage the slot ourselves and leave it clear when
193 : * tuplesort_getdatum returns false to indicate there are no more datums.
194 : * For tuple sorts, tuplesort_gettupleslot manages the slot for us and
195 : * empties the slot when it runs out of tuples.
196 : */
197 10532568 : if (node->datumSort)
198 : {
199 1242008 : ExecClearTuple(slot);
200 1242008 : if (tuplesort_getdatum(tuplesortstate, ScanDirectionIsForward(dir),
201 : false, &(slot->tts_values[0]),
202 : &(slot->tts_isnull[0]), NULL))
203 1237202 : ExecStoreVirtualTuple(slot);
204 : }
205 : else
206 9290560 : (void) tuplesort_gettupleslot(tuplesortstate,
207 : ScanDirectionIsForward(dir),
208 : false, slot, NULL);
209 :
210 10532568 : return slot;
211 : }
212 :
213 : /* ----------------------------------------------------------------
214 : * ExecInitSort
215 : *
216 : * Creates the run-time state information for the sort node
217 : * produced by the planner and initializes its outer subtree.
218 : * ----------------------------------------------------------------
219 : */
220 : SortState *
221 69380 : ExecInitSort(Sort *node, EState *estate, int eflags)
222 : {
223 : SortState *sortstate;
224 : TupleDesc outerTupDesc;
225 :
226 : SO1_printf("ExecInitSort: %s\n",
227 : "initializing sort node");
228 :
229 : /*
230 : * create state structure
231 : */
232 69380 : sortstate = makeNode(SortState);
233 69380 : sortstate->ss.ps.plan = (Plan *) node;
234 69380 : sortstate->ss.ps.state = estate;
235 69380 : sortstate->ss.ps.ExecProcNode = ExecSort;
236 :
237 : /*
238 : * We must have random access to the sort output to do backward scan or
239 : * mark/restore. We also prefer to materialize the sort output if we
240 : * might be called on to rewind and replay it many times.
241 : */
242 69380 : sortstate->randomAccess = (eflags & (EXEC_FLAG_REWIND |
243 : EXEC_FLAG_BACKWARD |
244 69380 : EXEC_FLAG_MARK)) != 0;
245 :
246 69380 : sortstate->bounded = false;
247 69380 : sortstate->sort_Done = false;
248 69380 : sortstate->tuplesortstate = NULL;
249 :
250 : /*
251 : * Miscellaneous initialization
252 : *
253 : * Sort nodes don't initialize their ExprContexts because they never call
254 : * ExecQual or ExecProject.
255 : */
256 :
257 : /*
258 : * initialize child nodes
259 : *
260 : * We shield the child node from the need to support REWIND, BACKWARD, or
261 : * MARK/RESTORE.
262 : */
263 69380 : eflags &= ~(EXEC_FLAG_REWIND | EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK);
264 :
265 69380 : outerPlanState(sortstate) = ExecInitNode(outerPlan(node), estate, eflags);
266 :
267 : /*
268 : * Initialize scan slot and type.
269 : */
270 69374 : ExecCreateScanSlotFromOuterPlan(estate, &sortstate->ss, &TTSOpsVirtual);
271 :
272 : /*
273 : * Initialize return slot and type. No need to initialize projection info
274 : * because this node doesn't do projections.
275 : */
276 69374 : ExecInitResultTupleSlotTL(&sortstate->ss.ps, &TTSOpsMinimalTuple);
277 69374 : sortstate->ss.ps.ps_ProjInfo = NULL;
278 :
279 69374 : outerTupDesc = ExecGetResultType(outerPlanState(sortstate));
280 :
281 : /*
282 : * We perform a Datum sort when we're sorting just a single column,
283 : * otherwise we perform a tuple sort.
284 : */
285 69374 : if (outerTupDesc->natts == 1)
286 8258 : sortstate->datumSort = true;
287 : else
288 61116 : sortstate->datumSort = false;
289 :
290 : SO1_printf("ExecInitSort: %s\n",
291 : "sort node initialized");
292 :
293 69374 : return sortstate;
294 : }
295 :
296 : /* ----------------------------------------------------------------
297 : * ExecEndSort(node)
298 : * ----------------------------------------------------------------
299 : */
300 : void
301 69226 : ExecEndSort(SortState *node)
302 : {
303 : SO1_printf("ExecEndSort: %s\n",
304 : "shutting down sort node");
305 :
306 : /*
307 : * Release tuplesort resources
308 : */
309 69226 : if (node->tuplesortstate != NULL)
310 61332 : tuplesort_end((Tuplesortstate *) node->tuplesortstate);
311 69226 : node->tuplesortstate = NULL;
312 :
313 : /*
314 : * shut down the subplan
315 : */
316 69226 : ExecEndNode(outerPlanState(node));
317 :
318 : SO1_printf("ExecEndSort: %s\n",
319 : "sort node shutdown");
320 69226 : }
321 :
322 : /* ----------------------------------------------------------------
323 : * ExecSortMarkPos
324 : *
325 : * Calls tuplesort to save the current position in the sorted file.
326 : * ----------------------------------------------------------------
327 : */
328 : void
329 578350 : ExecSortMarkPos(SortState *node)
330 : {
331 : /*
332 : * if we haven't sorted yet, just return
333 : */
334 578350 : if (!node->sort_Done)
335 0 : return;
336 :
337 578350 : tuplesort_markpos((Tuplesortstate *) node->tuplesortstate);
338 : }
339 :
340 : /* ----------------------------------------------------------------
341 : * ExecSortRestrPos
342 : *
343 : * Calls tuplesort to restore the last saved sort file position.
344 : * ----------------------------------------------------------------
345 : */
346 : void
347 31206 : ExecSortRestrPos(SortState *node)
348 : {
349 : /*
350 : * if we haven't sorted yet, just return.
351 : */
352 31206 : if (!node->sort_Done)
353 0 : return;
354 :
355 : /*
356 : * restore the scan to the previously marked position
357 : */
358 31206 : tuplesort_restorepos((Tuplesortstate *) node->tuplesortstate);
359 : }
360 :
361 : void
362 48550 : ExecReScanSort(SortState *node)
363 : {
364 48550 : PlanState *outerPlan = outerPlanState(node);
365 :
366 : /*
367 : * If we haven't sorted yet, just return. If outerplan's chgParam is not
368 : * NULL then it will be re-scanned by ExecProcNode, else no reason to
369 : * re-scan it at all.
370 : */
371 48550 : if (!node->sort_Done)
372 920 : return;
373 :
374 : /* must drop pointer to sort result tuple */
375 47630 : ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
376 :
377 : /*
378 : * If subnode is to be rescanned then we forget previous sort results; we
379 : * have to re-read the subplan and re-sort. Also must re-sort if the
380 : * bounded-sort parameters changed or we didn't select randomAccess.
381 : *
382 : * Otherwise we can just rewind and rescan the sorted output.
383 : */
384 47630 : if (outerPlan->chgParam != NULL ||
385 496 : node->bounded != node->bounded_Done ||
386 496 : node->bound != node->bound_Done ||
387 442 : !node->randomAccess)
388 : {
389 47596 : node->sort_Done = false;
390 47596 : tuplesort_end((Tuplesortstate *) node->tuplesortstate);
391 47596 : node->tuplesortstate = NULL;
392 :
393 : /*
394 : * if chgParam of subnode is not null then plan will be re-scanned by
395 : * first ExecProcNode.
396 : */
397 47596 : if (outerPlan->chgParam == NULL)
398 462 : ExecReScan(outerPlan);
399 : }
400 : else
401 34 : tuplesort_rescan((Tuplesortstate *) node->tuplesortstate);
402 : }
403 :
404 : /* ----------------------------------------------------------------
405 : * Parallel Query Support
406 : * ----------------------------------------------------------------
407 : */
408 :
409 : /* ----------------------------------------------------------------
410 : * ExecSortEstimate
411 : *
412 : * Estimate space required to propagate sort statistics.
413 : * ----------------------------------------------------------------
414 : */
415 : void
416 152 : ExecSortEstimate(SortState *node, ParallelContext *pcxt)
417 : {
418 : Size size;
419 :
420 : /* don't need this if not instrumenting or no workers */
421 152 : if (!node->ss.ps.instrument || pcxt->nworkers == 0)
422 140 : return;
423 :
424 12 : size = mul_size(pcxt->nworkers, sizeof(TuplesortInstrumentation));
425 12 : size = add_size(size, offsetof(SharedSortInfo, sinstrument));
426 12 : shm_toc_estimate_chunk(&pcxt->estimator, size);
427 12 : shm_toc_estimate_keys(&pcxt->estimator, 1);
428 : }
429 :
430 : /* ----------------------------------------------------------------
431 : * ExecSortInitializeDSM
432 : *
433 : * Initialize DSM space for sort statistics.
434 : * ----------------------------------------------------------------
435 : */
436 : void
437 152 : ExecSortInitializeDSM(SortState *node, ParallelContext *pcxt)
438 : {
439 : Size size;
440 :
441 : /* don't need this if not instrumenting or no workers */
442 152 : if (!node->ss.ps.instrument || pcxt->nworkers == 0)
443 140 : return;
444 :
445 12 : size = offsetof(SharedSortInfo, sinstrument)
446 12 : + pcxt->nworkers * sizeof(TuplesortInstrumentation);
447 12 : node->shared_info = shm_toc_allocate(pcxt->toc, size);
448 : /* ensure any unfilled slots will contain zeroes */
449 12 : memset(node->shared_info, 0, size);
450 12 : node->shared_info->num_workers = pcxt->nworkers;
451 12 : shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id,
452 12 : node->shared_info);
453 : }
454 :
455 : /* ----------------------------------------------------------------
456 : * ExecSortInitializeWorker
457 : *
458 : * Attach worker to DSM space for sort statistics.
459 : * ----------------------------------------------------------------
460 : */
461 : void
462 452 : ExecSortInitializeWorker(SortState *node, ParallelWorkerContext *pwcxt)
463 : {
464 452 : node->shared_info =
465 452 : shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, true);
466 452 : node->am_worker = true;
467 452 : }
468 :
469 : /* ----------------------------------------------------------------
470 : * ExecSortRetrieveInstrumentation
471 : *
472 : * Transfer sort statistics from DSM to private memory.
473 : * ----------------------------------------------------------------
474 : */
475 : void
476 12 : ExecSortRetrieveInstrumentation(SortState *node)
477 : {
478 : Size size;
479 : SharedSortInfo *si;
480 :
481 12 : if (node->shared_info == NULL)
482 0 : return;
483 :
484 12 : size = offsetof(SharedSortInfo, sinstrument)
485 12 : + node->shared_info->num_workers * sizeof(TuplesortInstrumentation);
486 12 : si = palloc(size);
487 12 : memcpy(si, node->shared_info, size);
488 12 : node->shared_info = si;
489 : }
|