Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * nodeSetOp.c
4 : * Routines to handle INTERSECT and EXCEPT selection
5 : *
6 : * The input of a SetOp node consists of tuples from two relations,
7 : * which have been combined into one dataset, with a junk attribute added
8 : * that shows which relation each tuple came from. In SETOP_SORTED mode,
9 : * the input has furthermore been sorted according to all the grouping
10 : * columns (ie, all the non-junk attributes). The SetOp node scans each
11 : * group of identical tuples to determine how many came from each input
12 : * relation. Then it is a simple matter to emit the output demanded by the
13 : * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
14 : *
15 : * In SETOP_HASHED mode, the input is delivered in no particular order,
16 : * except that we know all the tuples from one input relation will come before
17 : * all the tuples of the other. The planner guarantees that the first input
18 : * relation is the left-hand one for EXCEPT, and tries to make the smaller
19 : * input relation come first for INTERSECT. We build a hash table in memory
20 : * with one entry for each group of identical tuples, and count the number of
21 : * tuples in the group from each relation. After seeing all the input, we
22 : * scan the hashtable and generate the correct output using those counts.
23 : * We can avoid making hashtable entries for any tuples appearing only in the
24 : * second input relation, since they cannot result in any output.
25 : *
26 : * This node type is not used for UNION or UNION ALL, since those can be
27 : * implemented more cheaply (there's no need for the junk attribute to
28 : * identify the source relation).
29 : *
30 : * Note that SetOp does no qual checking nor projection. The delivered
31 : * output tuples are just copies of the first-to-arrive tuple in each
32 : * input group.
33 : *
34 : *
35 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
36 : * Portions Copyright (c) 1994, Regents of the University of California
37 : *
38 : *
39 : * IDENTIFICATION
40 : * src/backend/executor/nodeSetOp.c
41 : *
42 : *-------------------------------------------------------------------------
43 : */
44 :
45 : #include "postgres.h"
46 :
47 : #include "access/htup_details.h"
48 : #include "executor/executor.h"
49 : #include "executor/nodeSetOp.h"
50 : #include "miscadmin.h"
51 : #include "utils/memutils.h"
52 :
53 :
54 : /*
55 : * SetOpStatePerGroupData - per-group working state
56 : *
57 : * These values are working state that is initialized at the start of
58 : * an input tuple group and updated for each input tuple.
59 : *
60 : * In SETOP_SORTED mode, we need only one of these structs, and it's kept in
61 : * the plan state node. In SETOP_HASHED mode, the hash table contains one
62 : * of these for each tuple group.
63 : */
64 : typedef struct SetOpStatePerGroupData
65 : {
66 : long numLeft; /* number of left-input dups in group */
67 : long numRight; /* number of right-input dups in group */
68 : } SetOpStatePerGroupData;
69 :
70 :
71 : static TupleTableSlot *setop_retrieve_direct(SetOpState *setopstate);
72 : static void setop_fill_hash_table(SetOpState *setopstate);
73 : static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate);
74 :
75 :
76 : /*
77 : * Initialize state for a new group of input values.
78 : */
79 : static inline void
80 472052 : initialize_counts(SetOpStatePerGroup pergroup)
81 : {
82 472052 : pergroup->numLeft = pergroup->numRight = 0;
83 472052 : }
84 :
85 : /*
86 : * Advance the appropriate counter for one input tuple.
87 : */
88 : static inline void
89 973750 : advance_counts(SetOpStatePerGroup pergroup, int flag)
90 : {
91 973750 : if (flag)
92 501356 : pergroup->numRight++;
93 : else
94 472394 : pergroup->numLeft++;
95 973750 : }
96 :
97 : /*
98 : * Fetch the "flag" column from an input tuple.
99 : * This is an integer column with value 0 for left side, 1 for right side.
100 : */
101 : static int
102 1004554 : fetch_tuple_flag(SetOpState *setopstate, TupleTableSlot *inputslot)
103 : {
104 1004554 : SetOp *node = (SetOp *) setopstate->ps.plan;
105 : int flag;
106 : bool isNull;
107 :
108 1004554 : flag = DatumGetInt32(slot_getattr(inputslot,
109 1004554 : node->flagColIdx,
110 : &isNull));
111 : Assert(!isNull);
112 : Assert(flag == 0 || flag == 1);
113 1004554 : return flag;
114 : }
115 :
116 : /*
117 : * Initialize the hash table to empty.
118 : */
119 : static void
120 488 : build_hash_table(SetOpState *setopstate)
121 : {
122 488 : SetOp *node = (SetOp *) setopstate->ps.plan;
123 488 : ExprContext *econtext = setopstate->ps.ps_ExprContext;
124 488 : TupleDesc desc = ExecGetResultType(outerPlanState(setopstate));
125 :
126 : Assert(node->strategy == SETOP_HASHED);
127 : Assert(node->numGroups > 0);
128 :
129 976 : setopstate->hashtable = BuildTupleHashTableExt(&setopstate->ps,
130 : desc,
131 : node->numCols,
132 : node->dupColIdx,
133 488 : setopstate->eqfuncoids,
134 : setopstate->hashfunctions,
135 : node->dupCollations,
136 : node->numGroups,
137 : 0,
138 488 : setopstate->ps.state->es_query_cxt,
139 : setopstate->tableContext,
140 : econtext->ecxt_per_tuple_memory,
141 : false);
142 488 : }
143 :
144 : /*
145 : * We've completed processing a tuple group. Decide how many copies (if any)
146 : * of its representative row to emit, and store the count into numOutput.
147 : * This logic is straight from the SQL92 specification.
148 : */
149 : static void
150 472052 : set_output_count(SetOpState *setopstate, SetOpStatePerGroup pergroup)
151 : {
152 472052 : SetOp *plannode = (SetOp *) setopstate->ps.plan;
153 :
154 472052 : switch (plannode->cmd)
155 : {
156 90336 : case SETOPCMD_INTERSECT:
157 90336 : if (pergroup->numLeft > 0 && pergroup->numRight > 0)
158 60192 : setopstate->numOutput = 1;
159 : else
160 30144 : setopstate->numOutput = 0;
161 90336 : break;
162 36 : case SETOPCMD_INTERSECT_ALL:
163 36 : setopstate->numOutput =
164 36 : (pergroup->numLeft < pergroup->numRight) ?
165 36 : pergroup->numLeft : pergroup->numRight;
166 36 : break;
167 369596 : case SETOPCMD_EXCEPT:
168 369596 : if (pergroup->numLeft > 0 && pergroup->numRight == 0)
169 822 : setopstate->numOutput = 1;
170 : else
171 368774 : setopstate->numOutput = 0;
172 369596 : break;
173 12084 : case SETOPCMD_EXCEPT_ALL:
174 12084 : setopstate->numOutput =
175 12084 : (pergroup->numLeft < pergroup->numRight) ?
176 12084 : 0 : (pergroup->numLeft - pergroup->numRight);
177 12084 : break;
178 0 : default:
179 0 : elog(ERROR, "unrecognized set op: %d", (int) plannode->cmd);
180 : break;
181 : }
182 472052 : }
183 :
184 :
185 : /* ----------------------------------------------------------------
186 : * ExecSetOp
187 : * ----------------------------------------------------------------
188 : */
189 : static TupleTableSlot * /* return: a tuple or NULL */
190 62306 : ExecSetOp(PlanState *pstate)
191 : {
192 62306 : SetOpState *node = castNode(SetOpState, pstate);
193 62306 : SetOp *plannode = (SetOp *) node->ps.plan;
194 62306 : TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot;
195 :
196 62306 : CHECK_FOR_INTERRUPTS();
197 :
198 : /*
199 : * If the previously-returned tuple needs to be returned more than once,
200 : * keep returning it.
201 : */
202 62306 : if (node->numOutput > 0)
203 : {
204 48 : node->numOutput--;
205 48 : return resultTupleSlot;
206 : }
207 :
208 : /* Otherwise, we're done if we are out of groups */
209 62258 : if (node->setop_done)
210 30 : return NULL;
211 :
212 : /* Fetch the next tuple group according to the correct strategy */
213 62228 : if (plannode->strategy == SETOP_HASHED)
214 : {
215 32048 : if (!node->table_filled)
216 1046 : setop_fill_hash_table(node);
217 32048 : return setop_retrieve_hash_table(node);
218 : }
219 : else
220 30180 : return setop_retrieve_direct(node);
221 : }
222 :
223 : /*
224 : * ExecSetOp for non-hashed case
225 : */
226 : static TupleTableSlot *
227 30180 : setop_retrieve_direct(SetOpState *setopstate)
228 : {
229 : PlanState *outerPlan;
230 : SetOpStatePerGroup pergroup;
231 : TupleTableSlot *outerslot;
232 : TupleTableSlot *resultTupleSlot;
233 30180 : ExprContext *econtext = setopstate->ps.ps_ExprContext;
234 :
235 : /*
236 : * get state info from node
237 : */
238 30180 : outerPlan = outerPlanState(setopstate);
239 30180 : pergroup = (SetOpStatePerGroup) setopstate->pergroup;
240 30180 : resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
241 :
242 : /*
243 : * We loop retrieving groups until we find one we should return
244 : */
245 120330 : while (!setopstate->setop_done)
246 : {
247 : /*
248 : * If we don't already have the first tuple of the new group, fetch it
249 : * from the outer plan.
250 : */
251 120252 : if (setopstate->grp_firstTuple == NULL)
252 : {
253 108 : outerslot = ExecProcNode(outerPlan);
254 108 : if (!TupIsNull(outerslot))
255 : {
256 : /* Make a copy of the first input tuple */
257 108 : setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot);
258 : }
259 : else
260 : {
261 : /* outer plan produced no tuples at all */
262 0 : setopstate->setop_done = true;
263 0 : return NULL;
264 : }
265 : }
266 :
267 : /*
268 : * Store the copied first input tuple in the tuple table slot reserved
269 : * for it. The tuple will be deleted when it is cleared from the
270 : * slot.
271 : */
272 120252 : ExecStoreHeapTuple(setopstate->grp_firstTuple,
273 : resultTupleSlot,
274 : true);
275 120252 : setopstate->grp_firstTuple = NULL; /* don't keep two pointers */
276 :
277 : /* Initialize working state for a new input tuple group */
278 120252 : initialize_counts(pergroup);
279 :
280 : /* Count the first input tuple */
281 120252 : advance_counts(pergroup,
282 : fetch_tuple_flag(setopstate, resultTupleSlot));
283 :
284 : /*
285 : * Scan the outer plan until we exhaust it or cross a group boundary.
286 : */
287 : for (;;)
288 : {
289 240474 : outerslot = ExecProcNode(outerPlan);
290 240474 : if (TupIsNull(outerslot))
291 : {
292 : /* no more outer-plan tuples available */
293 108 : setopstate->setop_done = true;
294 108 : break;
295 : }
296 :
297 : /*
298 : * Check whether we've crossed a group boundary.
299 : */
300 240366 : econtext->ecxt_outertuple = resultTupleSlot;
301 240366 : econtext->ecxt_innertuple = outerslot;
302 :
303 240366 : if (!ExecQualAndReset(setopstate->eqfunction, econtext))
304 : {
305 : /*
306 : * Save the first input tuple of the next group.
307 : */
308 120144 : setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot);
309 120144 : break;
310 : }
311 :
312 : /* Still in same group, so count this tuple */
313 120222 : advance_counts(pergroup,
314 : fetch_tuple_flag(setopstate, outerslot));
315 : }
316 :
317 : /*
318 : * Done scanning input tuple group. See if we should emit any copies
319 : * of result tuple, and if so return the first copy.
320 : */
321 120252 : set_output_count(setopstate, pergroup);
322 :
323 120252 : if (setopstate->numOutput > 0)
324 : {
325 30102 : setopstate->numOutput--;
326 30102 : return resultTupleSlot;
327 : }
328 : }
329 :
330 : /* No more groups */
331 78 : ExecClearTuple(resultTupleSlot);
332 78 : return NULL;
333 : }
334 :
335 : /*
336 : * ExecSetOp for hashed case: phase 1, read input and build hash table
337 : */
338 : static void
339 1046 : setop_fill_hash_table(SetOpState *setopstate)
340 : {
341 1046 : SetOp *node = (SetOp *) setopstate->ps.plan;
342 : PlanState *outerPlan;
343 : int firstFlag;
344 : bool in_first_rel PG_USED_FOR_ASSERTS_ONLY;
345 1046 : ExprContext *econtext = setopstate->ps.ps_ExprContext;
346 :
347 : /*
348 : * get state info from node
349 : */
350 1046 : outerPlan = outerPlanState(setopstate);
351 1046 : firstFlag = node->firstFlag;
352 : /* verify planner didn't mess up */
353 : Assert(firstFlag == 0 ||
354 : (firstFlag == 1 &&
355 : (node->cmd == SETOPCMD_INTERSECT ||
356 : node->cmd == SETOPCMD_INTERSECT_ALL)));
357 :
358 : /*
359 : * Process each outer-plan tuple, and then fetch the next one, until we
360 : * exhaust the outer plan.
361 : */
362 1046 : in_first_rel = true;
363 : for (;;)
364 764080 : {
365 : TupleTableSlot *outerslot;
366 : int flag;
367 : TupleHashEntryData *entry;
368 : bool isnew;
369 :
370 765126 : outerslot = ExecProcNode(outerPlan);
371 765126 : if (TupIsNull(outerslot))
372 : break;
373 :
374 : /* Identify whether it's left or right input */
375 764080 : flag = fetch_tuple_flag(setopstate, outerslot);
376 :
377 764080 : if (flag == firstFlag)
378 : {
379 : /* (still) in first input relation */
380 : Assert(in_first_rel);
381 :
382 : /* Find or build hashtable entry for this tuple's group */
383 382130 : entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
384 : &isnew, NULL);
385 :
386 : /* If new tuple group, initialize counts */
387 382130 : if (isnew)
388 : {
389 351800 : entry->additional = (SetOpStatePerGroup)
390 351800 : MemoryContextAlloc(setopstate->hashtable->tablecxt,
391 : sizeof(SetOpStatePerGroupData));
392 351800 : initialize_counts((SetOpStatePerGroup) entry->additional);
393 : }
394 :
395 : /* Advance the counts */
396 382130 : advance_counts((SetOpStatePerGroup) entry->additional, flag);
397 : }
398 : else
399 : {
400 : /* reached second relation */
401 381950 : in_first_rel = false;
402 :
403 : /* For tuples not seen previously, do not make hashtable entry */
404 381950 : entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
405 : NULL, NULL);
406 :
407 : /* Advance the counts if entry is already present */
408 381950 : if (entry)
409 351146 : advance_counts((SetOpStatePerGroup) entry->additional, flag);
410 : }
411 :
412 : /* Must reset expression context after each hashtable lookup */
413 764080 : ResetExprContext(econtext);
414 : }
415 :
416 1046 : setopstate->table_filled = true;
417 : /* Initialize to walk the hash table */
418 1046 : ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter);
419 1046 : }
420 :
421 : /*
422 : * ExecSetOp for hashed case: phase 2, retrieving groups from hash table
423 : */
424 : static TupleTableSlot *
425 32048 : setop_retrieve_hash_table(SetOpState *setopstate)
426 : {
427 : TupleHashEntryData *entry;
428 : TupleTableSlot *resultTupleSlot;
429 :
430 : /*
431 : * get state info from node
432 : */
433 32048 : resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
434 :
435 : /*
436 : * We loop retrieving groups until we find one we should return
437 : */
438 352846 : while (!setopstate->setop_done)
439 : {
440 352846 : CHECK_FOR_INTERRUPTS();
441 :
442 : /*
443 : * Find the next entry in the hash table
444 : */
445 352846 : entry = ScanTupleHashTable(setopstate->hashtable, &setopstate->hashiter);
446 352846 : if (entry == NULL)
447 : {
448 : /* No more entries in hashtable, so done */
449 1046 : setopstate->setop_done = true;
450 1046 : return NULL;
451 : }
452 :
453 : /*
454 : * See if we should emit any copies of this tuple, and if so return
455 : * the first copy.
456 : */
457 351800 : set_output_count(setopstate, (SetOpStatePerGroup) entry->additional);
458 :
459 351800 : if (setopstate->numOutput > 0)
460 : {
461 31002 : setopstate->numOutput--;
462 31002 : return ExecStoreMinimalTuple(entry->firstTuple,
463 : resultTupleSlot,
464 : false);
465 : }
466 : }
467 :
468 : /* No more groups */
469 0 : ExecClearTuple(resultTupleSlot);
470 0 : return NULL;
471 : }
472 :
473 : /* ----------------------------------------------------------------
474 : * ExecInitSetOp
475 : *
476 : * This initializes the setop node state structures and
477 : * the node's subplan.
478 : * ----------------------------------------------------------------
479 : */
480 : SetOpState *
481 650 : ExecInitSetOp(SetOp *node, EState *estate, int eflags)
482 : {
483 : SetOpState *setopstate;
484 : TupleDesc outerDesc;
485 :
486 : /* check for unsupported flags */
487 : Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
488 :
489 : /*
490 : * create state structure
491 : */
492 650 : setopstate = makeNode(SetOpState);
493 650 : setopstate->ps.plan = (Plan *) node;
494 650 : setopstate->ps.state = estate;
495 650 : setopstate->ps.ExecProcNode = ExecSetOp;
496 :
497 650 : setopstate->eqfuncoids = NULL;
498 650 : setopstate->hashfunctions = NULL;
499 650 : setopstate->setop_done = false;
500 650 : setopstate->numOutput = 0;
501 650 : setopstate->pergroup = NULL;
502 650 : setopstate->grp_firstTuple = NULL;
503 650 : setopstate->hashtable = NULL;
504 650 : setopstate->tableContext = NULL;
505 :
506 : /*
507 : * create expression context
508 : */
509 650 : ExecAssignExprContext(estate, &setopstate->ps);
510 :
511 : /*
512 : * If hashing, we also need a longer-lived context to store the hash
513 : * table. The table can't just be kept in the per-query context because
514 : * we want to be able to throw it away in ExecReScanSetOp.
515 : */
516 650 : if (node->strategy == SETOP_HASHED)
517 488 : setopstate->tableContext =
518 488 : AllocSetContextCreate(CurrentMemoryContext,
519 : "SetOp hash table",
520 : ALLOCSET_DEFAULT_SIZES);
521 :
522 : /*
523 : * initialize child nodes
524 : *
525 : * If we are hashing then the child plan does not need to handle REWIND
526 : * efficiently; see ExecReScanSetOp.
527 : */
528 650 : if (node->strategy == SETOP_HASHED)
529 488 : eflags &= ~EXEC_FLAG_REWIND;
530 650 : outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags);
531 650 : outerDesc = ExecGetResultType(outerPlanState(setopstate));
532 :
533 : /*
534 : * Initialize result slot and type. Setop nodes do no projections, so
535 : * initialize projection info for this node appropriately.
536 : */
537 650 : ExecInitResultTupleSlotTL(&setopstate->ps,
538 650 : node->strategy == SETOP_HASHED ?
539 : &TTSOpsMinimalTuple : &TTSOpsHeapTuple);
540 650 : setopstate->ps.ps_ProjInfo = NULL;
541 :
542 : /*
543 : * Precompute fmgr lookup data for inner loop. We need both equality and
544 : * hashing functions to do it by hashing, but only equality if not
545 : * hashing.
546 : */
547 650 : if (node->strategy == SETOP_HASHED)
548 488 : execTuplesHashPrepare(node->numCols,
549 488 : node->dupOperators,
550 : &setopstate->eqfuncoids,
551 : &setopstate->hashfunctions);
552 : else
553 162 : setopstate->eqfunction =
554 162 : execTuplesMatchPrepare(outerDesc,
555 : node->numCols,
556 162 : node->dupColIdx,
557 162 : node->dupOperators,
558 162 : node->dupCollations,
559 : &setopstate->ps);
560 :
561 650 : if (node->strategy == SETOP_HASHED)
562 : {
563 488 : build_hash_table(setopstate);
564 488 : setopstate->table_filled = false;
565 : }
566 : else
567 : {
568 162 : setopstate->pergroup =
569 162 : (SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData));
570 : }
571 :
572 650 : return setopstate;
573 : }
574 :
575 : /* ----------------------------------------------------------------
576 : * ExecEndSetOp
577 : *
578 : * This shuts down the subplan and frees resources allocated
579 : * to this node.
580 : * ----------------------------------------------------------------
581 : */
582 : void
583 650 : ExecEndSetOp(SetOpState *node)
584 : {
585 : /* free subsidiary stuff including hashtable */
586 650 : if (node->tableContext)
587 488 : MemoryContextDelete(node->tableContext);
588 :
589 650 : ExecEndNode(outerPlanState(node));
590 650 : }
591 :
592 :
593 : void
594 600 : ExecReScanSetOp(SetOpState *node)
595 : {
596 600 : PlanState *outerPlan = outerPlanState(node);
597 :
598 600 : ExecClearTuple(node->ps.ps_ResultTupleSlot);
599 600 : node->setop_done = false;
600 600 : node->numOutput = 0;
601 :
602 600 : if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
603 : {
604 : /*
605 : * In the hashed case, if we haven't yet built the hash table then we
606 : * can just return; nothing done yet, so nothing to undo. If subnode's
607 : * chgParam is not NULL then it will be re-scanned by ExecProcNode,
608 : * else no reason to re-scan it at all.
609 : */
610 600 : if (!node->table_filled)
611 6 : return;
612 :
613 : /*
614 : * If we do have the hash table and the subplan does not have any
615 : * parameter changes, then we can just rescan the existing hash table;
616 : * no need to build it again.
617 : */
618 594 : if (outerPlan->chgParam == NULL)
619 : {
620 0 : ResetTupleHashIterator(node->hashtable, &node->hashiter);
621 0 : return;
622 : }
623 : }
624 :
625 : /* Release first tuple of group, if we have made a copy */
626 594 : if (node->grp_firstTuple != NULL)
627 : {
628 0 : heap_freetuple(node->grp_firstTuple);
629 0 : node->grp_firstTuple = NULL;
630 : }
631 :
632 : /* Release any hashtable storage */
633 594 : if (node->tableContext)
634 594 : MemoryContextReset(node->tableContext);
635 :
636 : /* And rebuild empty hashtable if needed */
637 594 : if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
638 : {
639 594 : ResetTupleHashTable(node->hashtable);
640 594 : node->table_filled = false;
641 : }
642 :
643 : /*
644 : * if chgParam of subnode is not null then plan will be re-scanned by
645 : * first ExecProcNode.
646 : */
647 594 : if (outerPlan->chgParam == NULL)
648 0 : ExecReScan(outerPlan);
649 : }
|