Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * nodeSort.c
4 : * Routines to handle sorting of relations.
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/executor/nodeSort.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 :
16 : #include "postgres.h"
17 :
18 : #include "access/parallel.h"
19 : #include "executor/execdebug.h"
20 : #include "executor/nodeSort.h"
21 : #include "miscadmin.h"
22 : #include "utils/tuplesort.h"
23 :
24 :
25 : /* ----------------------------------------------------------------
26 : * ExecSort
27 : *
28 : * Sorts tuples from the outer subtree of the node using tuplesort,
29 : * which saves the results in a temporary file or memory. After the
30 : * initial call, returns a tuple from the file with each call.
31 : *
32 : * There are two distinct ways that this sort can be performed:
33 : *
34 : * 1) When the result is a single column we perform a Datum sort.
35 : *
36 : * 2) When the result contains multiple columns we perform a tuple sort.
37 : *
38 : * We could do this by always performing a tuple sort, however sorting
39 : * Datums only can be significantly faster than sorting tuples,
40 : * especially when the Datums are of a pass-by-value type.
41 : *
42 : * Conditions:
43 : * -- none.
44 : *
45 : * Initial States:
46 : * -- the outer child is prepared to return the first tuple.
47 : * ----------------------------------------------------------------
48 : */
49 : static TupleTableSlot *
50 9749274 : ExecSort(PlanState *pstate)
51 : {
52 9749274 : SortState *node = castNode(SortState, pstate);
53 : EState *estate;
54 : ScanDirection dir;
55 : Tuplesortstate *tuplesortstate;
56 : TupleTableSlot *slot;
57 :
58 9749274 : CHECK_FOR_INTERRUPTS();
59 :
60 : /*
61 : * get state info from node
62 : */
63 : SO1_printf("ExecSort: %s\n",
64 : "entering routine");
65 :
66 9749274 : estate = node->ss.ps.state;
67 9749274 : dir = estate->es_direction;
68 9749274 : tuplesortstate = (Tuplesortstate *) node->tuplesortstate;
69 :
70 : /*
71 : * If first time through, read all tuples from outer plan and pass them to
72 : * tuplesort.c. Subsequent calls just fetch tuples from tuplesort.
73 : */
74 :
75 9749274 : if (!node->sort_Done)
76 : {
77 80132 : Sort *plannode = (Sort *) node->ss.ps.plan;
78 : PlanState *outerNode;
79 : TupleDesc tupDesc;
80 80132 : int tuplesortopts = TUPLESORT_NONE;
81 :
82 : SO1_printf("ExecSort: %s\n",
83 : "sorting subplan");
84 :
85 : /*
86 : * Want to scan subplan in the forward direction while creating the
87 : * sorted data.
88 : */
89 80132 : estate->es_direction = ForwardScanDirection;
90 :
91 : /*
92 : * Initialize tuplesort module.
93 : */
94 : SO1_printf("ExecSort: %s\n",
95 : "calling tuplesort_begin");
96 :
97 80132 : outerNode = outerPlanState(node);
98 80132 : tupDesc = ExecGetResultType(outerNode);
99 :
100 80132 : if (node->randomAccess)
101 3682 : tuplesortopts |= TUPLESORT_RANDOMACCESS;
102 80132 : if (node->bounded)
103 1462 : tuplesortopts |= TUPLESORT_ALLOWBOUNDED;
104 :
105 80132 : if (node->datumSort)
106 4220 : tuplesortstate = tuplesort_begin_datum(TupleDescAttr(tupDesc, 0)->atttypid,
107 4220 : plannode->sortOperators[0],
108 4220 : plannode->collations[0],
109 4220 : plannode->nullsFirst[0],
110 : work_mem,
111 : NULL,
112 : tuplesortopts);
113 : else
114 75912 : tuplesortstate = tuplesort_begin_heap(tupDesc,
115 : plannode->numCols,
116 : plannode->sortColIdx,
117 : plannode->sortOperators,
118 : plannode->collations,
119 : plannode->nullsFirst,
120 : work_mem,
121 : NULL,
122 : tuplesortopts);
123 80120 : if (node->bounded)
124 1462 : tuplesort_set_bound(tuplesortstate, node->bound);
125 80120 : node->tuplesortstate = (void *) tuplesortstate;
126 :
127 : /*
128 : * Scan the subplan and feed all the tuples to tuplesort using the
129 : * appropriate method based on the type of sort we're doing.
130 : */
131 80120 : if (node->datumSort)
132 : {
133 : for (;;)
134 : {
135 1475798 : slot = ExecProcNode(outerNode);
136 :
137 1475792 : if (TupIsNull(slot))
138 : break;
139 1471578 : slot_getsomeattrs(slot, 1);
140 1471578 : tuplesort_putdatum(tuplesortstate,
141 1471578 : slot->tts_values[0],
142 1471578 : slot->tts_isnull[0]);
143 : }
144 : }
145 : else
146 : {
147 : for (;;)
148 : {
149 9954000 : slot = ExecProcNode(outerNode);
150 :
151 9954000 : if (TupIsNull(slot))
152 : break;
153 9878100 : tuplesort_puttupleslot(tuplesortstate, slot);
154 : }
155 : }
156 :
157 : /*
158 : * Complete the sort.
159 : */
160 80114 : tuplesort_performsort(tuplesortstate);
161 :
162 : /*
163 : * restore to user specified direction
164 : */
165 80114 : estate->es_direction = dir;
166 :
167 : /*
168 : * finally set the sorted flag to true
169 : */
170 80114 : node->sort_Done = true;
171 80114 : node->bounded_Done = node->bounded;
172 80114 : node->bound_Done = node->bound;
173 80114 : if (node->shared_info && node->am_worker)
174 : {
175 : TuplesortInstrumentation *si;
176 :
177 : Assert(IsParallelWorker());
178 : Assert(ParallelWorkerNumber <= node->shared_info->num_workers);
179 96 : si = &node->shared_info->sinstrument[ParallelWorkerNumber];
180 96 : tuplesort_get_stats(tuplesortstate, si);
181 : }
182 : SO1_printf("ExecSort: %s\n", "sorting done");
183 : }
184 :
185 : SO1_printf("ExecSort: %s\n",
186 : "retrieving tuple from tuplesort");
187 :
188 9749256 : slot = node->ss.ps.ps_ResultTupleSlot;
189 :
190 : /*
191 : * Fetch the next sorted item from the appropriate tuplesort function. For
192 : * datum sorts we must manage the slot ourselves and leave it clear when
193 : * tuplesort_getdatum returns false to indicate there are no more datums.
194 : * For tuple sorts, tuplesort_gettupleslot manages the slot for us and
195 : * empties the slot when it runs out of tuples.
196 : */
197 9749256 : if (node->datumSort)
198 : {
199 1185108 : ExecClearTuple(slot);
200 1185108 : if (tuplesort_getdatum(tuplesortstate, ScanDirectionIsForward(dir),
201 : false, &(slot->tts_values[0]),
202 : &(slot->tts_isnull[0]), NULL))
203 1181178 : ExecStoreVirtualTuple(slot);
204 : }
205 : else
206 8564148 : (void) tuplesort_gettupleslot(tuplesortstate,
207 : ScanDirectionIsForward(dir),
208 : false, slot, NULL);
209 :
210 9749256 : return slot;
211 : }
212 :
213 : /* ----------------------------------------------------------------
214 : * ExecInitSort
215 : *
216 : * Creates the run-time state information for the sort node
217 : * produced by the planner and initializes its outer subtree.
218 : * ----------------------------------------------------------------
219 : */
220 : SortState *
221 50562 : ExecInitSort(Sort *node, EState *estate, int eflags)
222 : {
223 : SortState *sortstate;
224 : TupleDesc outerTupDesc;
225 :
226 : SO1_printf("ExecInitSort: %s\n",
227 : "initializing sort node");
228 :
229 : /*
230 : * create state structure
231 : */
232 50562 : sortstate = makeNode(SortState);
233 50562 : sortstate->ss.ps.plan = (Plan *) node;
234 50562 : sortstate->ss.ps.state = estate;
235 50562 : sortstate->ss.ps.ExecProcNode = ExecSort;
236 :
237 : /*
238 : * We must have random access to the sort output to do backward scan or
239 : * mark/restore. We also prefer to materialize the sort output if we
240 : * might be called on to rewind and replay it many times.
241 : */
242 50562 : sortstate->randomAccess = (eflags & (EXEC_FLAG_REWIND |
243 : EXEC_FLAG_BACKWARD |
244 50562 : EXEC_FLAG_MARK)) != 0;
245 :
246 50562 : sortstate->bounded = false;
247 50562 : sortstate->sort_Done = false;
248 50562 : sortstate->tuplesortstate = NULL;
249 :
250 : /*
251 : * Miscellaneous initialization
252 : *
253 : * Sort nodes don't initialize their ExprContexts because they never call
254 : * ExecQual or ExecProject.
255 : */
256 :
257 : /*
258 : * initialize child nodes
259 : *
260 : * We shield the child node from the need to support REWIND, BACKWARD, or
261 : * MARK/RESTORE.
262 : */
263 50562 : eflags &= ~(EXEC_FLAG_REWIND | EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK);
264 :
265 50562 : outerPlanState(sortstate) = ExecInitNode(outerPlan(node), estate, eflags);
266 :
267 : /*
268 : * Initialize scan slot and type.
269 : */
270 50556 : ExecCreateScanSlotFromOuterPlan(estate, &sortstate->ss, &TTSOpsVirtual);
271 :
272 : /*
273 : * Initialize return slot and type. No need to initialize projection info
274 : * because this node doesn't do projections.
275 : */
276 50556 : ExecInitResultTupleSlotTL(&sortstate->ss.ps, &TTSOpsMinimalTuple);
277 50556 : sortstate->ss.ps.ps_ProjInfo = NULL;
278 :
279 50556 : outerTupDesc = ExecGetResultType(outerPlanState(sortstate));
280 :
281 : /*
282 : * We perform a Datum sort when we're sorting just a single column,
283 : * otherwise we perform a tuple sort.
284 : */
285 50556 : if (outerTupDesc->natts == 1)
286 6672 : sortstate->datumSort = true;
287 : else
288 43884 : sortstate->datumSort = false;
289 :
290 : SO1_printf("ExecInitSort: %s\n",
291 : "sort node initialized");
292 :
293 50556 : return sortstate;
294 : }
295 :
296 : /* ----------------------------------------------------------------
297 : * ExecEndSort(node)
298 : * ----------------------------------------------------------------
299 : */
300 : void
301 50484 : ExecEndSort(SortState *node)
302 : {
303 : SO1_printf("ExecEndSort: %s\n",
304 : "shutting down sort node");
305 :
306 : /*
307 : * clean out the tuple table
308 : */
309 50484 : ExecClearTuple(node->ss.ss_ScanTupleSlot);
310 : /* must drop pointer to sort result tuple */
311 50484 : ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
312 :
313 : /*
314 : * Release tuplesort resources
315 : */
316 50484 : if (node->tuplesortstate != NULL)
317 44302 : tuplesort_end((Tuplesortstate *) node->tuplesortstate);
318 50484 : node->tuplesortstate = NULL;
319 :
320 : /*
321 : * shut down the subplan
322 : */
323 50484 : ExecEndNode(outerPlanState(node));
324 :
325 : SO1_printf("ExecEndSort: %s\n",
326 : "sort node shutdown");
327 50484 : }
328 :
329 : /* ----------------------------------------------------------------
330 : * ExecSortMarkPos
331 : *
332 : * Calls tuplesort to save the current position in the sorted file.
333 : * ----------------------------------------------------------------
334 : */
335 : void
336 569522 : ExecSortMarkPos(SortState *node)
337 : {
338 : /*
339 : * if we haven't sorted yet, just return
340 : */
341 569522 : if (!node->sort_Done)
342 0 : return;
343 :
344 569522 : tuplesort_markpos((Tuplesortstate *) node->tuplesortstate);
345 : }
346 :
347 : /* ----------------------------------------------------------------
348 : * ExecSortRestrPos
349 : *
350 : * Calls tuplesort to restore the last saved sort file position.
351 : * ----------------------------------------------------------------
352 : */
353 : void
354 29208 : ExecSortRestrPos(SortState *node)
355 : {
356 : /*
357 : * if we haven't sorted yet, just return.
358 : */
359 29208 : if (!node->sort_Done)
360 0 : return;
361 :
362 : /*
363 : * restore the scan to the previously marked position
364 : */
365 29208 : tuplesort_restorepos((Tuplesortstate *) node->tuplesortstate);
366 : }
367 :
368 : void
369 36494 : ExecReScanSort(SortState *node)
370 : {
371 36494 : PlanState *outerPlan = outerPlanState(node);
372 :
373 : /*
374 : * If we haven't sorted yet, just return. If outerplan's chgParam is not
375 : * NULL then it will be re-scanned by ExecProcNode, else no reason to
376 : * re-scan it at all.
377 : */
378 36494 : if (!node->sort_Done)
379 702 : return;
380 :
381 : /* must drop pointer to sort result tuple */
382 35792 : ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
383 :
384 : /*
385 : * If subnode is to be rescanned then we forget previous sort results; we
386 : * have to re-read the subplan and re-sort. Also must re-sort if the
387 : * bounded-sort parameters changed or we didn't select randomAccess.
388 : *
389 : * Otherwise we can just rewind and rescan the sorted output.
390 : */
391 35792 : if (outerPlan->chgParam != NULL ||
392 448 : node->bounded != node->bounded_Done ||
393 448 : node->bound != node->bound_Done ||
394 394 : !node->randomAccess)
395 : {
396 35758 : node->sort_Done = false;
397 35758 : tuplesort_end((Tuplesortstate *) node->tuplesortstate);
398 35758 : node->tuplesortstate = NULL;
399 :
400 : /*
401 : * if chgParam of subnode is not null then plan will be re-scanned by
402 : * first ExecProcNode.
403 : */
404 35758 : if (outerPlan->chgParam == NULL)
405 414 : ExecReScan(outerPlan);
406 : }
407 : else
408 34 : tuplesort_rescan((Tuplesortstate *) node->tuplesortstate);
409 : }
410 :
411 : /* ----------------------------------------------------------------
412 : * Parallel Query Support
413 : * ----------------------------------------------------------------
414 : */
415 :
416 : /* ----------------------------------------------------------------
417 : * ExecSortEstimate
418 : *
419 : * Estimate space required to propagate sort statistics.
420 : * ----------------------------------------------------------------
421 : */
422 : void
423 140 : ExecSortEstimate(SortState *node, ParallelContext *pcxt)
424 : {
425 : Size size;
426 :
427 : /* don't need this if not instrumenting or no workers */
428 140 : if (!node->ss.ps.instrument || pcxt->nworkers == 0)
429 128 : return;
430 :
431 12 : size = mul_size(pcxt->nworkers, sizeof(TuplesortInstrumentation));
432 12 : size = add_size(size, offsetof(SharedSortInfo, sinstrument));
433 12 : shm_toc_estimate_chunk(&pcxt->estimator, size);
434 12 : shm_toc_estimate_keys(&pcxt->estimator, 1);
435 : }
436 :
437 : /* ----------------------------------------------------------------
438 : * ExecSortInitializeDSM
439 : *
440 : * Initialize DSM space for sort statistics.
441 : * ----------------------------------------------------------------
442 : */
443 : void
444 140 : ExecSortInitializeDSM(SortState *node, ParallelContext *pcxt)
445 : {
446 : Size size;
447 :
448 : /* don't need this if not instrumenting or no workers */
449 140 : if (!node->ss.ps.instrument || pcxt->nworkers == 0)
450 128 : return;
451 :
452 12 : size = offsetof(SharedSortInfo, sinstrument)
453 12 : + pcxt->nworkers * sizeof(TuplesortInstrumentation);
454 12 : node->shared_info = shm_toc_allocate(pcxt->toc, size);
455 : /* ensure any unfilled slots will contain zeroes */
456 12 : memset(node->shared_info, 0, size);
457 12 : node->shared_info->num_workers = pcxt->nworkers;
458 12 : shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id,
459 12 : node->shared_info);
460 : }
461 :
462 : /* ----------------------------------------------------------------
463 : * ExecSortInitializeWorker
464 : *
465 : * Attach worker to DSM space for sort statistics.
466 : * ----------------------------------------------------------------
467 : */
468 : void
469 428 : ExecSortInitializeWorker(SortState *node, ParallelWorkerContext *pwcxt)
470 : {
471 428 : node->shared_info =
472 428 : shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, true);
473 428 : node->am_worker = true;
474 428 : }
475 :
476 : /* ----------------------------------------------------------------
477 : * ExecSortRetrieveInstrumentation
478 : *
479 : * Transfer sort statistics from DSM to private memory.
480 : * ----------------------------------------------------------------
481 : */
482 : void
483 12 : ExecSortRetrieveInstrumentation(SortState *node)
484 : {
485 : Size size;
486 : SharedSortInfo *si;
487 :
488 12 : if (node->shared_info == NULL)
489 0 : return;
490 :
491 12 : size = offsetof(SharedSortInfo, sinstrument)
492 12 : + node->shared_info->num_workers * sizeof(TuplesortInstrumentation);
493 12 : si = palloc(size);
494 12 : memcpy(si, node->shared_info, size);
495 12 : node->shared_info = si;
496 : }
|