Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * prepunion.c
4 : * Routines to plan set-operation queries. The filename is a leftover
5 : * from a time when only UNIONs were implemented.
6 : *
7 : * There are two code paths in the planner for set-operation queries.
8 : * If a subquery consists entirely of simple UNION ALL operations, it
9 : * is converted into an "append relation". Otherwise, it is handled
10 : * by the general code in this module (plan_set_operations and its
11 : * subroutines). There is some support code here for the append-relation
12 : * case, but most of the heavy lifting for that is done elsewhere,
13 : * notably in prepjointree.c and allpaths.c.
14 : *
15 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
16 : * Portions Copyright (c) 1994, Regents of the University of California
17 : *
18 : *
19 : * IDENTIFICATION
20 : * src/backend/optimizer/prep/prepunion.c
21 : *
22 : *-------------------------------------------------------------------------
23 : */
24 : #include "postgres.h"
25 :
26 : #include "access/htup_details.h"
27 : #include "catalog/pg_type.h"
28 : #include "miscadmin.h"
29 : #include "nodes/makefuncs.h"
30 : #include "nodes/nodeFuncs.h"
31 : #include "optimizer/cost.h"
32 : #include "optimizer/pathnode.h"
33 : #include "optimizer/paths.h"
34 : #include "optimizer/planner.h"
35 : #include "optimizer/prep.h"
36 : #include "optimizer/tlist.h"
37 : #include "parser/parse_coerce.h"
38 : #include "utils/selfuncs.h"
39 :
40 :
41 : static RelOptInfo *recurse_set_operations(Node *setOp, PlannerInfo *root,
42 : SetOperationStmt *parentOp,
43 : List *colTypes, List *colCollations,
44 : List *refnames_tlist,
45 : List **pTargetList,
46 : bool *istrivial_tlist);
47 : static RelOptInfo *generate_recursion_path(SetOperationStmt *setOp,
48 : PlannerInfo *root,
49 : List *refnames_tlist,
50 : List **pTargetList);
51 : static void build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
52 : bool trivial_tlist, List *child_tlist,
53 : List *interesting_pathkeys,
54 : double *pNumGroups);
55 : static RelOptInfo *generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
56 : List *refnames_tlist,
57 : List **pTargetList);
58 : static RelOptInfo *generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
59 : List *refnames_tlist,
60 : List **pTargetList);
61 : static List *plan_union_children(PlannerInfo *root,
62 : SetOperationStmt *top_union,
63 : List *refnames_tlist,
64 : List **tlist_list,
65 : List **istrivial_tlist);
66 : static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
67 : static List *generate_setop_tlist(List *colTypes, List *colCollations,
68 : Index varno,
69 : bool hack_constants,
70 : List *input_tlist,
71 : List *refnames_tlist,
72 : bool *trivial_tlist);
73 : static List *generate_append_tlist(List *colTypes, List *colCollations,
74 : List *input_tlists,
75 : List *refnames_tlist);
76 : static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
77 :
78 :
79 : /*
80 : * plan_set_operations
81 : *
82 : * Plans the queries for a tree of set operations (UNION/INTERSECT/EXCEPT)
83 : *
84 : * This routine only deals with the setOperations tree of the given query.
85 : * Any top-level ORDER BY requested in root->parse->sortClause will be handled
86 : * when we return to grouping_planner; likewise for LIMIT.
87 : *
88 : * What we return is an "upperrel" RelOptInfo containing at least one Path
89 : * that implements the set-operation tree. In addition, root->processed_tlist
90 : * receives a targetlist representing the output of the topmost setop node.
91 : */
92 : RelOptInfo *
93 5728 : plan_set_operations(PlannerInfo *root)
94 : {
95 5728 : Query *parse = root->parse;
96 5728 : SetOperationStmt *topop = castNode(SetOperationStmt, parse->setOperations);
97 : Node *node;
98 : RangeTblEntry *leftmostRTE;
99 : Query *leftmostQuery;
100 : RelOptInfo *setop_rel;
101 : List *top_tlist;
102 :
103 : Assert(topop);
104 :
105 : /* check for unsupported stuff */
106 : Assert(parse->jointree->fromlist == NIL);
107 : Assert(parse->jointree->quals == NULL);
108 : Assert(parse->groupClause == NIL);
109 : Assert(parse->havingQual == NULL);
110 : Assert(parse->windowClause == NIL);
111 : Assert(parse->distinctClause == NIL);
112 :
113 : /*
114 : * In the outer query level, equivalence classes are limited to classes
115 : * which define that the top-level target entry is equivalent to the
116 : * corresponding child target entry. There won't be any equivalence class
117 : * merging. Mark that merging is complete to allow us to make pathkeys.
118 : */
119 : Assert(root->eq_classes == NIL);
120 5728 : root->ec_merging_done = true;
121 :
122 : /*
123 : * We'll need to build RelOptInfos for each of the leaf subqueries, which
124 : * are RTE_SUBQUERY rangetable entries in this Query. Prepare the index
125 : * arrays for those, and for AppendRelInfos in case they're needed.
126 : */
127 5728 : setup_simple_rel_arrays(root);
128 :
129 : /*
130 : * Find the leftmost component Query. We need to use its column names for
131 : * all generated tlists (else SELECT INTO won't work right).
132 : */
133 5728 : node = topop->larg;
134 9418 : while (node && IsA(node, SetOperationStmt))
135 3690 : node = ((SetOperationStmt *) node)->larg;
136 : Assert(node && IsA(node, RangeTblRef));
137 5728 : leftmostRTE = root->simple_rte_array[((RangeTblRef *) node)->rtindex];
138 5728 : leftmostQuery = leftmostRTE->subquery;
139 : Assert(leftmostQuery != NULL);
140 :
141 : /*
142 : * If the topmost node is a recursive union, it needs special processing.
143 : */
144 5728 : if (root->hasRecursion)
145 : {
146 826 : setop_rel = generate_recursion_path(topop, root,
147 : leftmostQuery->targetList,
148 : &top_tlist);
149 : }
150 : else
151 : {
152 : bool trivial_tlist;
153 :
154 : /*
155 : * Recurse on setOperations tree to generate paths for set ops. The
156 : * final output paths should have just the column types shown as the
157 : * output from the top-level node.
158 : */
159 4902 : setop_rel = recurse_set_operations((Node *) topop, root,
160 : NULL, /* no parent */
161 : topop->colTypes, topop->colCollations,
162 : leftmostQuery->targetList,
163 : &top_tlist,
164 : &trivial_tlist);
165 : }
166 :
167 : /* Must return the built tlist into root->processed_tlist. */
168 5722 : root->processed_tlist = top_tlist;
169 :
170 5722 : return setop_rel;
171 : }
172 :
173 : /*
174 : * recurse_set_operations
175 : * Recursively handle one step in a tree of set operations
176 : *
177 : * setOp: current step (could be a SetOperationStmt or a leaf RangeTblRef)
178 : * parentOp: parent step, or NULL if none (but see below)
179 : * colTypes: OID list of set-op's result column datatypes
180 : * colCollations: OID list of set-op's result column collations
181 : * refnames_tlist: targetlist to take column names from
182 : *
183 : * parentOp should be passed as NULL unless that step is interested in
184 : * getting sorted output from this step. ("Sorted" means "sorted according
185 : * to the default btree opclasses of the result column datatypes".)
186 : *
187 : * Returns a RelOptInfo for the subtree, as well as these output parameters:
188 : * *pTargetList: receives the fully-fledged tlist for the subtree's top plan
189 : * *istrivial_tlist: true if, and only if, datatypes between parent and child
190 : * match.
191 : *
192 : * If setOp is a leaf node, this function plans the sub-query but does
193 : * not populate the pathlist of the returned RelOptInfo. The caller will
194 : * generate SubqueryScan paths using useful path(s) of the subquery (see
195 : * build_setop_child_paths). But this function does build the paths for
196 : * set-operation nodes.
197 : *
198 : * The pTargetList output parameter is mostly redundant with the pathtarget
199 : * of the returned RelOptInfo, but for the moment we need it because much of
200 : * the logic in this file depends on flag columns being marked resjunk.
201 : * XXX Now that there are no flag columns and hence no resjunk columns, we
202 : * could probably refactor this file to deal only in pathtargets.
203 : *
204 : * We don't have to care about typmods here: the only allowed difference
205 : * between set-op input and output typmods is input is a specific typmod
206 : * and output is -1, and that does not require a coercion.
207 : */
208 : static RelOptInfo *
209 20270 : recurse_set_operations(Node *setOp, PlannerInfo *root,
210 : SetOperationStmt *parentOp,
211 : List *colTypes, List *colCollations,
212 : List *refnames_tlist,
213 : List **pTargetList,
214 : bool *istrivial_tlist)
215 : {
216 : RelOptInfo *rel;
217 :
218 20270 : *istrivial_tlist = true; /* for now */
219 :
220 : /* Guard against stack overflow due to overly complex setop nests */
221 20270 : check_stack_depth();
222 :
223 20270 : if (IsA(setOp, RangeTblRef))
224 : {
225 15212 : RangeTblRef *rtr = (RangeTblRef *) setOp;
226 15212 : RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
227 15212 : Query *subquery = rte->subquery;
228 : PlannerInfo *subroot;
229 : List *tlist;
230 : bool trivial_tlist;
231 :
232 : Assert(subquery != NULL);
233 :
234 : /* Build a RelOptInfo for this leaf subquery. */
235 15212 : rel = build_simple_rel(root, rtr->rtindex, NULL);
236 :
237 : /* plan_params should not be in use in current query level */
238 : Assert(root->plan_params == NIL);
239 :
240 : /*
241 : * Generate a subroot and Paths for the subquery. If we have a
242 : * parentOp, pass that down to encourage subquery_planner to consider
243 : * suitably-sorted Paths.
244 : */
245 15212 : subroot = rel->subroot = subquery_planner(root->glob, subquery, root,
246 : false, root->tuple_fraction,
247 : parentOp);
248 :
249 : /*
250 : * It should not be possible for the primitive query to contain any
251 : * cross-references to other primitive queries in the setop tree.
252 : */
253 15212 : if (root->plan_params)
254 0 : elog(ERROR, "unexpected outer reference in set operation subquery");
255 :
256 : /* Figure out the appropriate target list for this subquery. */
257 15212 : tlist = generate_setop_tlist(colTypes, colCollations,
258 15212 : rtr->rtindex,
259 : true,
260 : subroot->processed_tlist,
261 : refnames_tlist,
262 : &trivial_tlist);
263 15212 : rel->reltarget = create_pathtarget(root, tlist);
264 :
265 : /* Return the fully-fledged tlist to caller, too */
266 15212 : *pTargetList = tlist;
267 15212 : *istrivial_tlist = trivial_tlist;
268 : }
269 5058 : else if (IsA(setOp, SetOperationStmt))
270 : {
271 5058 : SetOperationStmt *op = (SetOperationStmt *) setOp;
272 :
273 : /* UNIONs are much different from INTERSECT/EXCEPT */
274 5058 : if (op->op == SETOP_UNION)
275 4372 : rel = generate_union_paths(op, root,
276 : refnames_tlist,
277 : pTargetList);
278 : else
279 686 : rel = generate_nonunion_paths(op, root,
280 : refnames_tlist,
281 : pTargetList);
282 :
283 : /*
284 : * If necessary, add a Result node to project the caller-requested
285 : * output columns.
286 : *
287 : * XXX you don't really want to know about this: setrefs.c will apply
288 : * fix_upper_expr() to the Result node's tlist. This would fail if the
289 : * Vars generated by generate_setop_tlist() were not exactly equal()
290 : * to the corresponding tlist entries of the subplan. However, since
291 : * the subplan was generated by generate_union_paths() or
292 : * generate_nonunion_paths(), and hence its tlist was generated by
293 : * generate_append_tlist() or generate_setop_tlist(), this will work.
294 : * We just tell generate_setop_tlist() to use varno 0.
295 : */
296 5058 : if (!tlist_same_datatypes(*pTargetList, colTypes, false) ||
297 5046 : !tlist_same_collations(*pTargetList, colCollations, false))
298 : {
299 : PathTarget *target;
300 : bool trivial_tlist;
301 : ListCell *lc;
302 :
303 12 : *pTargetList = generate_setop_tlist(colTypes, colCollations,
304 : 0,
305 : false,
306 : *pTargetList,
307 : refnames_tlist,
308 : &trivial_tlist);
309 12 : *istrivial_tlist = trivial_tlist;
310 12 : target = create_pathtarget(root, *pTargetList);
311 :
312 : /* Apply projection to each path */
313 24 : foreach(lc, rel->pathlist)
314 : {
315 12 : Path *subpath = (Path *) lfirst(lc);
316 : Path *path;
317 :
318 : Assert(subpath->param_info == NULL);
319 12 : path = apply_projection_to_path(root, subpath->parent,
320 : subpath, target);
321 : /* If we had to add a Result, path is different from subpath */
322 12 : if (path != subpath)
323 12 : lfirst(lc) = path;
324 : }
325 :
326 : /* Apply projection to each partial path */
327 12 : foreach(lc, rel->partial_pathlist)
328 : {
329 0 : Path *subpath = (Path *) lfirst(lc);
330 : Path *path;
331 :
332 : Assert(subpath->param_info == NULL);
333 :
334 : /* avoid apply_projection_to_path, in case of multiple refs */
335 0 : path = (Path *) create_projection_path(root, subpath->parent,
336 : subpath, target);
337 0 : lfirst(lc) = path;
338 : }
339 : }
340 5058 : postprocess_setop_rel(root, rel);
341 : }
342 : else
343 : {
344 0 : elog(ERROR, "unrecognized node type: %d",
345 : (int) nodeTag(setOp));
346 : *pTargetList = NIL;
347 : rel = NULL; /* keep compiler quiet */
348 : }
349 :
350 20270 : return rel;
351 : }
352 :
353 : /*
354 : * Generate paths for a recursive UNION node
355 : */
356 : static RelOptInfo *
357 826 : generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
358 : List *refnames_tlist,
359 : List **pTargetList)
360 : {
361 : RelOptInfo *result_rel;
362 : Path *path;
363 : RelOptInfo *lrel,
364 : *rrel;
365 : Path *lpath;
366 : Path *rpath;
367 : List *lpath_tlist;
368 : bool lpath_trivial_tlist;
369 : List *rpath_tlist;
370 : bool rpath_trivial_tlist;
371 : List *tlist;
372 : List *groupList;
373 : double dNumGroups;
374 :
375 : /* Parser should have rejected other cases */
376 826 : if (setOp->op != SETOP_UNION)
377 0 : elog(ERROR, "only UNION queries can be recursive");
378 : /* Worktable ID should be assigned */
379 : Assert(root->wt_param_id >= 0);
380 :
381 : /*
382 : * Unlike a regular UNION node, process the left and right inputs
383 : * separately without any intention of combining them into one Append.
384 : */
385 826 : lrel = recurse_set_operations(setOp->larg, root,
386 : NULL, /* no value in sorted results */
387 : setOp->colTypes, setOp->colCollations,
388 : refnames_tlist,
389 : &lpath_tlist,
390 : &lpath_trivial_tlist);
391 826 : if (lrel->rtekind == RTE_SUBQUERY)
392 826 : build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
393 : NIL, NULL);
394 826 : lpath = lrel->cheapest_total_path;
395 : /* The right path will want to look at the left one ... */
396 826 : root->non_recursive_path = lpath;
397 826 : rrel = recurse_set_operations(setOp->rarg, root,
398 : NULL, /* no value in sorted results */
399 : setOp->colTypes, setOp->colCollations,
400 : refnames_tlist,
401 : &rpath_tlist,
402 : &rpath_trivial_tlist);
403 826 : if (rrel->rtekind == RTE_SUBQUERY)
404 820 : build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
405 : NIL, NULL);
406 826 : rpath = rrel->cheapest_total_path;
407 826 : root->non_recursive_path = NULL;
408 :
409 : /*
410 : * Generate tlist for RecursiveUnion path node --- same as in Append cases
411 : */
412 826 : tlist = generate_append_tlist(setOp->colTypes, setOp->colCollations,
413 826 : list_make2(lpath_tlist, rpath_tlist),
414 : refnames_tlist);
415 :
416 826 : *pTargetList = tlist;
417 :
418 : /* Build result relation. */
419 826 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
420 826 : bms_union(lrel->relids, rrel->relids));
421 826 : result_rel->reltarget = create_pathtarget(root, tlist);
422 :
423 : /*
424 : * If UNION, identify the grouping operators
425 : */
426 826 : if (setOp->all)
427 : {
428 452 : groupList = NIL;
429 452 : dNumGroups = 0;
430 : }
431 : else
432 : {
433 : /* Identify the grouping semantics */
434 374 : groupList = generate_setop_grouplist(setOp, tlist);
435 :
436 : /* We only support hashing here */
437 374 : if (!grouping_is_hashable(groupList))
438 6 : ereport(ERROR,
439 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
440 : errmsg("could not implement recursive UNION"),
441 : errdetail("All column datatypes must be hashable.")));
442 :
443 : /*
444 : * For the moment, take the number of distinct groups as equal to the
445 : * total input size, ie, the worst case.
446 : */
447 368 : dNumGroups = lpath->rows + rpath->rows * 10;
448 : }
449 :
450 : /*
451 : * And make the path node.
452 : */
453 820 : path = (Path *) create_recursiveunion_path(root,
454 : result_rel,
455 : lpath,
456 : rpath,
457 820 : result_rel->reltarget,
458 : groupList,
459 : root->wt_param_id,
460 : dNumGroups);
461 :
462 820 : add_path(result_rel, path);
463 820 : postprocess_setop_rel(root, result_rel);
464 820 : return result_rel;
465 : }
466 :
467 : /*
468 : * build_setop_child_paths
469 : * Build paths for the set op child relation denoted by 'rel'.
470 : *
471 : * 'rel' is an RTE_SUBQUERY relation. We have already generated paths within
472 : * the subquery's subroot; the task here is to create SubqueryScan paths for
473 : * 'rel', representing scans of the useful subquery paths.
474 : *
475 : * interesting_pathkeys: if not NIL, also include paths that suit these
476 : * pathkeys, sorting any unsorted paths as required.
477 : * *pNumGroups: if not NULL, we estimate the number of distinct groups
478 : * in the result, and store it there.
479 : */
480 : static void
481 15212 : build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
482 : bool trivial_tlist, List *child_tlist,
483 : List *interesting_pathkeys, double *pNumGroups)
484 : {
485 : RelOptInfo *final_rel;
486 15212 : List *setop_pathkeys = rel->subroot->setop_pathkeys;
487 : ListCell *lc;
488 :
489 : /* it can't be a set op child rel if it's not a subquery */
490 : Assert(rel->rtekind == RTE_SUBQUERY);
491 :
492 : /* when sorting is needed, add child rel equivalences */
493 15212 : if (interesting_pathkeys != NIL)
494 11778 : add_setop_child_rel_equivalences(root,
495 : rel,
496 : child_tlist,
497 : interesting_pathkeys);
498 :
499 : /*
500 : * Mark rel with estimated output rows, width, etc. Note that we have to
501 : * do this before generating outer-query paths, else cost_subqueryscan is
502 : * not happy.
503 : */
504 15212 : set_subquery_size_estimates(root, rel);
505 :
506 : /*
507 : * Since we may want to add a partial path to this relation, we must set
508 : * its consider_parallel flag correctly.
509 : */
510 15212 : final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);
511 15212 : rel->consider_parallel = final_rel->consider_parallel;
512 :
513 : /* Generate subquery scan paths for any interesting path in final_rel */
514 39602 : foreach(lc, final_rel->pathlist)
515 : {
516 24390 : Path *subpath = (Path *) lfirst(lc);
517 : List *pathkeys;
518 24390 : Path *cheapest_input_path = final_rel->cheapest_total_path;
519 : bool is_sorted;
520 : int presorted_keys;
521 :
522 : /*
523 : * Include the cheapest path as-is so that the set operation can be
524 : * cheaply implemented using a method which does not require the input
525 : * to be sorted.
526 : */
527 24390 : if (subpath == cheapest_input_path)
528 : {
529 : /* Convert subpath's pathkeys to outer representation */
530 15212 : pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
531 : make_tlist_from_pathtarget(subpath->pathtarget));
532 :
533 : /* Generate outer path using this subpath */
534 15212 : add_path(rel, (Path *) create_subqueryscan_path(root,
535 : rel,
536 : subpath,
537 : trivial_tlist,
538 : pathkeys,
539 : NULL));
540 : }
541 :
542 : /* skip dealing with sorted paths if the setop doesn't need them */
543 24390 : if (interesting_pathkeys == NIL)
544 3490 : continue;
545 :
546 : /*
547 : * Create paths to suit final sort order required for setop_pathkeys.
548 : * Here we'll sort the cheapest input path (if not sorted already) and
549 : * incremental sort any paths which are partially sorted.
550 : */
551 20912 : is_sorted = pathkeys_count_contained_in(setop_pathkeys,
552 : subpath->pathkeys,
553 : &presorted_keys);
554 :
555 20912 : if (!is_sorted)
556 : {
557 13780 : double limittuples = rel->subroot->limit_tuples;
558 :
559 : /*
560 : * Try at least sorting the cheapest path and also try
561 : * incrementally sorting any path which is partially sorted
562 : * already (no need to deal with paths which have presorted keys
563 : * when incremental sort is disabled unless it's the cheapest
564 : * input path).
565 : */
566 13780 : if (subpath != cheapest_input_path &&
567 3090 : (presorted_keys == 0 || !enable_incremental_sort))
568 12 : continue;
569 :
570 : /*
571 : * We've no need to consider both a sort and incremental sort.
572 : * We'll just do a sort if there are no presorted keys and an
573 : * incremental sort when there are presorted keys.
574 : */
575 13768 : if (presorted_keys == 0 || !enable_incremental_sort)
576 10594 : subpath = (Path *) create_sort_path(rel->subroot,
577 : final_rel,
578 : subpath,
579 : setop_pathkeys,
580 : limittuples);
581 : else
582 3174 : subpath = (Path *) create_incremental_sort_path(rel->subroot,
583 : final_rel,
584 : subpath,
585 : setop_pathkeys,
586 : presorted_keys,
587 : limittuples);
588 : }
589 :
590 : /*
591 : * subpath is now sorted, so add it to the pathlist. We already added
592 : * the cheapest_input_path above, so don't add it again unless we just
593 : * sorted it.
594 : */
595 20900 : if (subpath != cheapest_input_path)
596 : {
597 : /* Convert subpath's pathkeys to outer representation */
598 19812 : pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
599 : make_tlist_from_pathtarget(subpath->pathtarget));
600 :
601 : /* Generate outer path using this subpath */
602 19812 : add_path(rel, (Path *) create_subqueryscan_path(root,
603 : rel,
604 : subpath,
605 : trivial_tlist,
606 : pathkeys,
607 : NULL));
608 : }
609 : }
610 :
611 : /* if consider_parallel is false, there should be no partial paths */
612 : Assert(final_rel->consider_parallel ||
613 : final_rel->partial_pathlist == NIL);
614 :
615 : /*
616 : * If we have a partial path for the child relation, we can use that to
617 : * build a partial path for this relation. But there's no point in
618 : * considering any path but the cheapest.
619 : */
620 15212 : if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
621 10364 : final_rel->partial_pathlist != NIL)
622 : {
623 : Path *partial_subpath;
624 : Path *partial_path;
625 :
626 12 : partial_subpath = linitial(final_rel->partial_pathlist);
627 : partial_path = (Path *)
628 12 : create_subqueryscan_path(root, rel, partial_subpath,
629 : trivial_tlist,
630 : NIL, NULL);
631 12 : add_partial_path(rel, partial_path);
632 : }
633 :
634 15212 : postprocess_setop_rel(root, rel);
635 :
636 : /*
637 : * Estimate number of groups if caller wants it. If the subquery used
638 : * grouping or aggregation, its output is probably mostly unique anyway;
639 : * otherwise do statistical estimation.
640 : *
641 : * XXX you don't really want to know about this: we do the estimation
642 : * using the subroot->parse's original targetlist expressions, not the
643 : * subroot->processed_tlist which might seem more appropriate. The reason
644 : * is that if the subquery is itself a setop, it may return a
645 : * processed_tlist containing "varno 0" Vars generated by
646 : * generate_append_tlist, and those would confuse estimate_num_groups
647 : * mightily. We ought to get rid of the "varno 0" hack, but that requires
648 : * a redesign of the parsetree representation of setops, so that there can
649 : * be an RTE corresponding to each setop's output. Note, we use this not
650 : * subquery's targetlist but subroot->parse's targetlist, because it was
651 : * revised by self-join removal. subquery's targetlist might contain the
652 : * references to the removed relids.
653 : */
654 15212 : if (pNumGroups)
655 : {
656 1342 : PlannerInfo *subroot = rel->subroot;
657 1342 : Query *subquery = subroot->parse;
658 :
659 1342 : if (subquery->groupClause || subquery->groupingSets ||
660 1342 : subquery->distinctClause || subroot->hasHavingQual ||
661 1330 : subquery->hasAggs)
662 12 : *pNumGroups = rel->cheapest_total_path->rows;
663 : else
664 1330 : *pNumGroups = estimate_num_groups(subroot,
665 1330 : get_tlist_exprs(subroot->parse->targetList, false),
666 1330 : rel->cheapest_total_path->rows,
667 : NULL,
668 : NULL);
669 : }
670 15212 : }
671 :
672 : /*
673 : * Generate paths for a UNION or UNION ALL node
674 : */
675 : static RelOptInfo *
676 4372 : generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
677 : List *refnames_tlist,
678 : List **pTargetList)
679 : {
680 4372 : Relids relids = NULL;
681 : RelOptInfo *result_rel;
682 : ListCell *lc;
683 : ListCell *lc2;
684 : ListCell *lc3;
685 4372 : List *cheapest_pathlist = NIL;
686 4372 : List *ordered_pathlist = NIL;
687 4372 : List *partial_pathlist = NIL;
688 4372 : bool partial_paths_valid = true;
689 4372 : bool consider_parallel = true;
690 : List *rellist;
691 : List *tlist_list;
692 : List *trivial_tlist_list;
693 : List *tlist;
694 4372 : List *groupList = NIL;
695 : Path *apath;
696 4372 : Path *gpath = NULL;
697 4372 : bool try_sorted = false;
698 4372 : List *union_pathkeys = NIL;
699 :
700 : /*
701 : * If any of my children are identical UNION nodes (same op, all-flag, and
702 : * colTypes/colCollations) then they can be merged into this node so that
703 : * we generate only one Append/MergeAppend and unique-ification for the
704 : * lot. Recurse to find such nodes.
705 : */
706 4372 : rellist = plan_union_children(root,
707 : op,
708 : refnames_tlist,
709 : &tlist_list,
710 : &trivial_tlist_list);
711 :
712 : /*
713 : * Generate tlist for Append/MergeAppend plan node.
714 : *
715 : * The tlist for an Append plan isn't important as far as the Append is
716 : * concerned, but we must make it look real anyway for the benefit of the
717 : * next plan level up.
718 : */
719 4372 : tlist = generate_append_tlist(op->colTypes, op->colCollations,
720 : tlist_list, refnames_tlist);
721 4372 : *pTargetList = tlist;
722 :
723 : /* For UNIONs (not UNION ALL), try sorting, if sorting is possible */
724 4372 : if (!op->all)
725 : {
726 : /* Identify the grouping semantics */
727 3784 : groupList = generate_setop_grouplist(op, tlist);
728 :
729 3784 : if (grouping_is_sortable(op->groupClauses))
730 : {
731 3692 : try_sorted = true;
732 : /* Determine the pathkeys for sorting by the whole target list */
733 3692 : union_pathkeys = make_pathkeys_for_sortclauses(root, groupList,
734 : tlist);
735 :
736 3692 : root->query_pathkeys = union_pathkeys;
737 : }
738 : }
739 :
740 : /*
741 : * Now that we've got the append target list, we can build the union child
742 : * paths.
743 : */
744 16716 : forthree(lc, rellist, lc2, trivial_tlist_list, lc3, tlist_list)
745 : {
746 12344 : RelOptInfo *rel = lfirst(lc);
747 12344 : bool trivial_tlist = lfirst_int(lc2);
748 12344 : List *child_tlist = lfirst_node(List, lc3);
749 :
750 : /* only build paths for the union children */
751 12344 : if (rel->rtekind == RTE_SUBQUERY)
752 12224 : build_setop_child_paths(root, rel, trivial_tlist, child_tlist,
753 : union_pathkeys, NULL);
754 : }
755 :
756 : /* Build path lists and relid set. */
757 16716 : foreach(lc, rellist)
758 : {
759 12344 : RelOptInfo *rel = lfirst(lc);
760 : Path *ordered_path;
761 :
762 12344 : cheapest_pathlist = lappend(cheapest_pathlist,
763 12344 : rel->cheapest_total_path);
764 :
765 12344 : if (try_sorted)
766 : {
767 3948 : ordered_path = get_cheapest_path_for_pathkeys(rel->pathlist,
768 : union_pathkeys,
769 : NULL,
770 : TOTAL_COST,
771 : false);
772 :
773 3948 : if (ordered_path != NULL)
774 476 : ordered_pathlist = lappend(ordered_pathlist, ordered_path);
775 : else
776 : {
777 : /*
778 : * If we can't find a sorted path, just give up trying to
779 : * generate a list of correctly sorted child paths. This can
780 : * happen when type coercion was added to the targetlist due
781 : * to mismatching types from the union children.
782 : */
783 3472 : try_sorted = false;
784 : }
785 : }
786 :
787 12344 : if (consider_parallel)
788 : {
789 8780 : if (!rel->consider_parallel)
790 : {
791 3318 : consider_parallel = false;
792 3318 : partial_paths_valid = false;
793 : }
794 5462 : else if (rel->partial_pathlist == NIL)
795 5450 : partial_paths_valid = false;
796 : else
797 12 : partial_pathlist = lappend(partial_pathlist,
798 12 : linitial(rel->partial_pathlist));
799 : }
800 :
801 12344 : relids = bms_union(relids, rel->relids);
802 : }
803 :
804 : /* Build result relation. */
805 4372 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
806 4372 : result_rel->reltarget = create_pathtarget(root, tlist);
807 4372 : result_rel->consider_parallel = consider_parallel;
808 4372 : result_rel->consider_startup = (root->tuple_fraction > 0);
809 :
810 : /*
811 : * Append the child results together using the cheapest paths from each
812 : * union child.
813 : */
814 4372 : apath = (Path *) create_append_path(root, result_rel, cheapest_pathlist,
815 : NIL, NIL, NULL, 0, false, -1);
816 :
817 : /*
818 : * Estimate number of groups. For now we just assume the output is unique
819 : * --- this is certainly true for the UNION case, and we want worst-case
820 : * estimates anyway.
821 : */
822 4372 : result_rel->rows = apath->rows;
823 :
824 : /*
825 : * Now consider doing the same thing using the partial paths plus Append
826 : * plus Gather.
827 : */
828 4372 : if (partial_paths_valid)
829 : {
830 : Path *papath;
831 6 : int parallel_workers = 0;
832 :
833 : /* Find the highest number of workers requested for any subpath. */
834 18 : foreach(lc, partial_pathlist)
835 : {
836 12 : Path *subpath = lfirst(lc);
837 :
838 12 : parallel_workers = Max(parallel_workers,
839 : subpath->parallel_workers);
840 : }
841 : Assert(parallel_workers > 0);
842 :
843 : /*
844 : * If the use of parallel append is permitted, always request at least
845 : * log2(# of children) paths. We assume it can be useful to have
846 : * extra workers in this case because they will be spread out across
847 : * the children. The precise formula is just a guess; see
848 : * add_paths_to_append_rel.
849 : */
850 6 : if (enable_parallel_append)
851 : {
852 6 : parallel_workers = Max(parallel_workers,
853 : pg_leftmost_one_pos32(list_length(partial_pathlist)) + 1);
854 6 : parallel_workers = Min(parallel_workers,
855 : max_parallel_workers_per_gather);
856 : }
857 : Assert(parallel_workers > 0);
858 :
859 : papath = (Path *)
860 6 : create_append_path(root, result_rel, NIL, partial_pathlist,
861 : NIL, NULL, parallel_workers,
862 : enable_parallel_append, -1);
863 : gpath = (Path *)
864 6 : create_gather_path(root, result_rel, papath,
865 6 : result_rel->reltarget, NULL, NULL);
866 : }
867 :
868 4372 : if (!op->all)
869 : {
870 : double dNumGroups;
871 3784 : bool can_sort = grouping_is_sortable(groupList);
872 3784 : bool can_hash = grouping_is_hashable(groupList);
873 :
874 : /*
875 : * XXX for the moment, take the number of distinct groups as equal to
876 : * the total input size, i.e., the worst case. This is too
877 : * conservative, but it's not clear how to get a decent estimate of
878 : * the true size. One should note as well the propensity of novices
879 : * to write UNION rather than UNION ALL even when they don't expect
880 : * any duplicates...
881 : */
882 3784 : dNumGroups = apath->rows;
883 :
884 3784 : if (can_hash)
885 : {
886 : Path *path;
887 :
888 : /*
889 : * Try a hash aggregate plan on 'apath'. This is the cheapest
890 : * available path containing each append child.
891 : */
892 3712 : path = (Path *) create_agg_path(root,
893 : result_rel,
894 : apath,
895 : create_pathtarget(root, tlist),
896 : AGG_HASHED,
897 : AGGSPLIT_SIMPLE,
898 : groupList,
899 : NIL,
900 : NULL,
901 : dNumGroups);
902 3712 : add_path(result_rel, path);
903 :
904 : /* Try hash aggregate on the Gather path, if valid */
905 3712 : if (gpath != NULL)
906 : {
907 : /* Hashed aggregate plan --- no sort needed */
908 6 : path = (Path *) create_agg_path(root,
909 : result_rel,
910 : gpath,
911 : create_pathtarget(root, tlist),
912 : AGG_HASHED,
913 : AGGSPLIT_SIMPLE,
914 : groupList,
915 : NIL,
916 : NULL,
917 : dNumGroups);
918 6 : add_path(result_rel, path);
919 : }
920 : }
921 :
922 3784 : if (can_sort)
923 : {
924 3692 : Path *path = apath;
925 :
926 : /* Try Sort -> Unique on the Append path */
927 3692 : if (groupList != NIL)
928 3662 : path = (Path *) create_sort_path(root, result_rel, path,
929 : make_pathkeys_for_sortclauses(root, groupList, tlist),
930 : -1.0);
931 :
932 3692 : path = (Path *) create_upper_unique_path(root,
933 : result_rel,
934 : path,
935 3692 : list_length(path->pathkeys),
936 : dNumGroups);
937 :
938 3692 : add_path(result_rel, path);
939 :
940 : /* Try Sort -> Unique on the Gather path, if set */
941 3692 : if (gpath != NULL)
942 : {
943 6 : path = gpath;
944 :
945 6 : path = (Path *) create_sort_path(root, result_rel, path,
946 : make_pathkeys_for_sortclauses(root, groupList, tlist),
947 : -1.0);
948 :
949 6 : path = (Path *) create_upper_unique_path(root,
950 : result_rel,
951 : path,
952 6 : list_length(path->pathkeys),
953 : dNumGroups);
954 6 : add_path(result_rel, path);
955 : }
956 : }
957 :
958 : /*
959 : * Try making a MergeAppend path if we managed to find a path with the
960 : * correct pathkeys in each union child query.
961 : */
962 3784 : if (try_sorted && groupList != NIL)
963 : {
964 : Path *path;
965 :
966 190 : path = (Path *) create_merge_append_path(root,
967 : result_rel,
968 : ordered_pathlist,
969 : union_pathkeys,
970 : NULL);
971 :
972 : /* and make the MergeAppend unique */
973 190 : path = (Path *) create_upper_unique_path(root,
974 : result_rel,
975 : path,
976 : list_length(tlist),
977 : dNumGroups);
978 :
979 190 : add_path(result_rel, path);
980 : }
981 : }
982 : else
983 : {
984 : /* UNION ALL */
985 588 : add_path(result_rel, apath);
986 :
987 588 : if (gpath != NULL)
988 0 : add_path(result_rel, gpath);
989 : }
990 :
991 4372 : return result_rel;
992 : }
993 :
994 : /*
995 : * Generate paths for an INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL node
996 : */
997 : static RelOptInfo *
998 686 : generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
999 : List *refnames_tlist,
1000 : List **pTargetList)
1001 : {
1002 : RelOptInfo *result_rel;
1003 : RelOptInfo *lrel,
1004 : *rrel;
1005 686 : double save_fraction = root->tuple_fraction;
1006 : Path *lpath,
1007 : *rpath,
1008 : *path;
1009 : List *lpath_tlist,
1010 : *rpath_tlist,
1011 : *tlist,
1012 : *groupList;
1013 : bool lpath_trivial_tlist,
1014 : rpath_trivial_tlist,
1015 : result_trivial_tlist;
1016 686 : List *nonunion_pathkeys = NIL;
1017 : double dLeftGroups,
1018 : dRightGroups,
1019 : dNumGroups,
1020 : dNumOutputRows;
1021 : bool can_sort;
1022 : bool can_hash;
1023 : SetOpCmd cmd;
1024 :
1025 : /*
1026 : * Tell children to fetch all tuples.
1027 : */
1028 686 : root->tuple_fraction = 0.0;
1029 :
1030 : /* Recurse on children */
1031 686 : lrel = recurse_set_operations(op->larg, root,
1032 : op,
1033 : op->colTypes, op->colCollations,
1034 : refnames_tlist,
1035 : &lpath_tlist,
1036 : &lpath_trivial_tlist);
1037 :
1038 686 : rrel = recurse_set_operations(op->rarg, root,
1039 : op,
1040 : op->colTypes, op->colCollations,
1041 : refnames_tlist,
1042 : &rpath_tlist,
1043 : &rpath_trivial_tlist);
1044 :
1045 : /*
1046 : * Generate tlist for SetOp plan node.
1047 : *
1048 : * The tlist for a SetOp plan isn't important so far as the SetOp is
1049 : * concerned, but we must make it look real anyway for the benefit of the
1050 : * next plan level up.
1051 : */
1052 686 : tlist = generate_setop_tlist(op->colTypes, op->colCollations,
1053 : 0, false, lpath_tlist, refnames_tlist,
1054 : &result_trivial_tlist);
1055 :
1056 : /* We should not have needed any type coercions in the tlist */
1057 : Assert(result_trivial_tlist);
1058 :
1059 686 : *pTargetList = tlist;
1060 :
1061 : /* Identify the grouping semantics */
1062 686 : groupList = generate_setop_grouplist(op, tlist);
1063 :
1064 : /* Check whether the operators support sorting or hashing */
1065 686 : can_sort = grouping_is_sortable(groupList);
1066 686 : can_hash = grouping_is_hashable(groupList);
1067 686 : if (!can_sort && !can_hash)
1068 0 : ereport(ERROR,
1069 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1070 : /* translator: %s is INTERSECT or EXCEPT */
1071 : errmsg("could not implement %s",
1072 : (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT"),
1073 : errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
1074 :
1075 686 : if (can_sort)
1076 : {
1077 : /* Determine the pathkeys for sorting by the whole target list */
1078 686 : nonunion_pathkeys = make_pathkeys_for_sortclauses(root, groupList,
1079 : tlist);
1080 :
1081 686 : root->query_pathkeys = nonunion_pathkeys;
1082 : }
1083 :
1084 : /*
1085 : * Now that we've got all that info, we can build the child paths.
1086 : */
1087 686 : if (lrel->rtekind == RTE_SUBQUERY)
1088 662 : build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
1089 : nonunion_pathkeys, &dLeftGroups);
1090 : else
1091 24 : dLeftGroups = lrel->rows;
1092 686 : if (rrel->rtekind == RTE_SUBQUERY)
1093 680 : build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
1094 : nonunion_pathkeys, &dRightGroups);
1095 : else
1096 6 : dRightGroups = rrel->rows;
1097 :
1098 : /* Undo effects of forcing tuple_fraction to 0 */
1099 686 : root->tuple_fraction = save_fraction;
1100 :
1101 : /*
1102 : * For EXCEPT, we must put the left input first. For INTERSECT, either
1103 : * order should give the same results, and we prefer to put the smaller
1104 : * input first in order to (a) minimize the size of the hash table in the
1105 : * hashing case, and (b) improve our chances of exploiting the executor's
1106 : * fast path for empty left-hand input. "Smaller" means the one with the
1107 : * fewer groups.
1108 : */
1109 686 : if (op->op != SETOP_EXCEPT && dLeftGroups > dRightGroups)
1110 : {
1111 : /* need to swap the two inputs */
1112 : RelOptInfo *tmprel;
1113 : List *tmplist;
1114 : double tmpd;
1115 :
1116 30 : tmprel = lrel;
1117 30 : lrel = rrel;
1118 30 : rrel = tmprel;
1119 30 : tmplist = lpath_tlist;
1120 30 : lpath_tlist = rpath_tlist;
1121 30 : rpath_tlist = tmplist;
1122 30 : tmpd = dLeftGroups;
1123 30 : dLeftGroups = dRightGroups;
1124 30 : dRightGroups = tmpd;
1125 : }
1126 :
1127 686 : lpath = lrel->cheapest_total_path;
1128 686 : rpath = rrel->cheapest_total_path;
1129 :
1130 : /* Build result relation. */
1131 686 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
1132 686 : bms_union(lrel->relids, rrel->relids));
1133 686 : result_rel->reltarget = create_pathtarget(root, tlist);
1134 :
1135 : /*
1136 : * Estimate number of distinct groups that we'll need hashtable entries
1137 : * for; this is the size of the left-hand input for EXCEPT, or the smaller
1138 : * input for INTERSECT. Also estimate the number of eventual output rows.
1139 : * In non-ALL cases, we estimate each group produces one output row; in
1140 : * ALL cases use the relevant relation size. These are worst-case
1141 : * estimates, of course, but we need to be conservative.
1142 : */
1143 686 : if (op->op == SETOP_EXCEPT)
1144 : {
1145 458 : dNumGroups = dLeftGroups;
1146 458 : dNumOutputRows = op->all ? lpath->rows : dNumGroups;
1147 : }
1148 : else
1149 : {
1150 228 : dNumGroups = dLeftGroups;
1151 228 : dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
1152 : }
1153 686 : result_rel->rows = dNumOutputRows;
1154 :
1155 : /* Select the SetOpCmd type */
1156 686 : switch (op->op)
1157 : {
1158 228 : case SETOP_INTERSECT:
1159 228 : cmd = op->all ? SETOPCMD_INTERSECT_ALL : SETOPCMD_INTERSECT;
1160 228 : break;
1161 458 : case SETOP_EXCEPT:
1162 458 : cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT;
1163 458 : break;
1164 0 : default:
1165 0 : elog(ERROR, "unrecognized set op: %d", (int) op->op);
1166 : cmd = SETOPCMD_INTERSECT; /* keep compiler quiet */
1167 : break;
1168 : }
1169 :
1170 : /*
1171 : * If we can hash, that just requires a SetOp atop the cheapest inputs.
1172 : */
1173 686 : if (can_hash)
1174 : {
1175 626 : path = (Path *) create_setop_path(root,
1176 : result_rel,
1177 : lpath,
1178 : rpath,
1179 : cmd,
1180 : SETOP_HASHED,
1181 : groupList,
1182 : dNumGroups,
1183 : dNumOutputRows);
1184 626 : add_path(result_rel, path);
1185 : }
1186 :
1187 : /*
1188 : * If we can sort, generate the cheapest sorted input paths, and add a
1189 : * SetOp atop those.
1190 : */
1191 686 : if (can_sort)
1192 : {
1193 : List *pathkeys;
1194 : Path *slpath,
1195 : *srpath;
1196 :
1197 : /* First the left input ... */
1198 686 : pathkeys = make_pathkeys_for_sortclauses(root,
1199 : groupList,
1200 : lpath_tlist);
1201 686 : if (pathkeys_contained_in(pathkeys, lpath->pathkeys))
1202 96 : slpath = lpath; /* cheapest path is already sorted */
1203 : else
1204 : {
1205 590 : slpath = get_cheapest_path_for_pathkeys(lrel->pathlist,
1206 : nonunion_pathkeys,
1207 : NULL,
1208 : TOTAL_COST,
1209 : false);
1210 : /* Subquery failed to produce any presorted paths? */
1211 590 : if (slpath == NULL)
1212 168 : slpath = (Path *) create_sort_path(root,
1213 : lpath->parent,
1214 : lpath,
1215 : pathkeys,
1216 : -1.0);
1217 : }
1218 :
1219 : /* and now the same for the right. */
1220 686 : pathkeys = make_pathkeys_for_sortclauses(root,
1221 : groupList,
1222 : rpath_tlist);
1223 686 : if (pathkeys_contained_in(pathkeys, rpath->pathkeys))
1224 108 : srpath = rpath; /* cheapest path is already sorted */
1225 : else
1226 : {
1227 578 : srpath = get_cheapest_path_for_pathkeys(rrel->pathlist,
1228 : nonunion_pathkeys,
1229 : NULL,
1230 : TOTAL_COST,
1231 : false);
1232 : /* Subquery failed to produce any presorted paths? */
1233 578 : if (srpath == NULL)
1234 174 : srpath = (Path *) create_sort_path(root,
1235 : rpath->parent,
1236 : rpath,
1237 : pathkeys,
1238 : -1.0);
1239 : }
1240 :
1241 686 : path = (Path *) create_setop_path(root,
1242 : result_rel,
1243 : slpath,
1244 : srpath,
1245 : cmd,
1246 : SETOP_SORTED,
1247 : groupList,
1248 : dNumGroups,
1249 : dNumOutputRows);
1250 686 : add_path(result_rel, path);
1251 : }
1252 :
1253 686 : return result_rel;
1254 : }
1255 :
1256 : /*
1257 : * Pull up children of a UNION node that are identically-propertied UNIONs,
1258 : * and perform planning of the queries underneath the N-way UNION.
1259 : *
1260 : * The result is a list of RelOptInfos containing Paths for sub-nodes, with
1261 : * one entry for each descendant that is a leaf query or non-identical setop.
1262 : * We also return parallel lists of the childrens' targetlists and
1263 : * is-trivial-tlist flags.
1264 : *
1265 : * NOTE: we can also pull a UNION ALL up into a UNION, since the distinct
1266 : * output rows will be lost anyway.
1267 : */
1268 : static List *
1269 4372 : plan_union_children(PlannerInfo *root,
1270 : SetOperationStmt *top_union,
1271 : List *refnames_tlist,
1272 : List **tlist_list,
1273 : List **istrivial_tlist)
1274 : {
1275 4372 : List *pending_rels = list_make1(top_union);
1276 4372 : List *result = NIL;
1277 : List *child_tlist;
1278 : bool trivial_tlist;
1279 :
1280 4372 : *tlist_list = NIL;
1281 4372 : *istrivial_tlist = NIL;
1282 :
1283 24688 : while (pending_rels != NIL)
1284 : {
1285 20316 : Node *setOp = linitial(pending_rels);
1286 :
1287 20316 : pending_rels = list_delete_first(pending_rels);
1288 :
1289 20316 : if (IsA(setOp, SetOperationStmt))
1290 : {
1291 8092 : SetOperationStmt *op = (SetOperationStmt *) setOp;
1292 :
1293 8092 : if (op->op == top_union->op &&
1294 15986 : (op->all == top_union->all || op->all) &&
1295 15956 : equal(op->colTypes, top_union->colTypes) &&
1296 7972 : equal(op->colCollations, top_union->colCollations))
1297 : {
1298 : /* Same UNION, so fold children into parent */
1299 7972 : pending_rels = lcons(op->rarg, pending_rels);
1300 7972 : pending_rels = lcons(op->larg, pending_rels);
1301 7972 : continue;
1302 : }
1303 : }
1304 :
1305 : /*
1306 : * Not same, so plan this child separately.
1307 : *
1308 : * If top_union isn't a UNION ALL, then we are interested in sorted
1309 : * output from the child, so pass top_union as parentOp. Note that
1310 : * this isn't necessarily the child node's immediate SetOperationStmt
1311 : * parent, but that's fine: it's the effective parent.
1312 : */
1313 12344 : result = lappend(result, recurse_set_operations(setOp, root,
1314 12344 : top_union->all ? NULL : top_union,
1315 : top_union->colTypes,
1316 : top_union->colCollations,
1317 : refnames_tlist,
1318 : &child_tlist,
1319 : &trivial_tlist));
1320 12344 : *tlist_list = lappend(*tlist_list, child_tlist);
1321 12344 : *istrivial_tlist = lappend_int(*istrivial_tlist, trivial_tlist);
1322 : }
1323 :
1324 4372 : return result;
1325 : }
1326 :
1327 : /*
1328 : * postprocess_setop_rel - perform steps required after adding paths
1329 : */
1330 : static void
1331 21090 : postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel)
1332 : {
1333 : /*
1334 : * We don't currently worry about allowing FDWs to contribute paths to
1335 : * this relation, but give extensions a chance.
1336 : */
1337 21090 : if (create_upper_paths_hook)
1338 0 : (*create_upper_paths_hook) (root, UPPERREL_SETOP,
1339 : NULL, rel, NULL);
1340 :
1341 : /* Select cheapest path */
1342 21090 : set_cheapest(rel);
1343 21090 : }
1344 :
1345 : /*
1346 : * Generate targetlist for a set-operation plan node
1347 : *
1348 : * colTypes: OID list of set-op's result column datatypes
1349 : * colCollations: OID list of set-op's result column collations
1350 : * varno: varno to use in generated Vars
1351 : * hack_constants: true to copy up constants (see comments in code)
1352 : * input_tlist: targetlist of this node's input node
1353 : * refnames_tlist: targetlist to take column names from
1354 : * trivial_tlist: output parameter, set to true if targetlist is trivial
1355 : */
1356 : static List *
1357 15910 : generate_setop_tlist(List *colTypes, List *colCollations,
1358 : Index varno,
1359 : bool hack_constants,
1360 : List *input_tlist,
1361 : List *refnames_tlist,
1362 : bool *trivial_tlist)
1363 : {
1364 15910 : List *tlist = NIL;
1365 15910 : int resno = 1;
1366 : ListCell *ctlc,
1367 : *cclc,
1368 : *itlc,
1369 : *rtlc;
1370 : TargetEntry *tle;
1371 : Node *expr;
1372 :
1373 15910 : *trivial_tlist = true; /* until proven differently */
1374 :
1375 66138 : forfour(ctlc, colTypes, cclc, colCollations,
1376 : itlc, input_tlist, rtlc, refnames_tlist)
1377 : {
1378 50228 : Oid colType = lfirst_oid(ctlc);
1379 50228 : Oid colColl = lfirst_oid(cclc);
1380 50228 : TargetEntry *inputtle = (TargetEntry *) lfirst(itlc);
1381 50228 : TargetEntry *reftle = (TargetEntry *) lfirst(rtlc);
1382 :
1383 : Assert(inputtle->resno == resno);
1384 : Assert(reftle->resno == resno);
1385 : Assert(!inputtle->resjunk);
1386 : Assert(!reftle->resjunk);
1387 :
1388 : /*
1389 : * Generate columns referencing input columns and having appropriate
1390 : * data types and column names. Insert datatype coercions where
1391 : * necessary.
1392 : *
1393 : * HACK: constants in the input's targetlist are copied up as-is
1394 : * rather than being referenced as subquery outputs. This is mainly
1395 : * to ensure that when we try to coerce them to the output column's
1396 : * datatype, the right things happen for UNKNOWN constants. But do
1397 : * this only at the first level of subquery-scan plans; we don't want
1398 : * phony constants appearing in the output tlists of upper-level
1399 : * nodes!
1400 : *
1401 : * Note that copying a constant doesn't in itself require us to mark
1402 : * the tlist nontrivial; see trivial_subqueryscan() in setrefs.c.
1403 : */
1404 50228 : if (hack_constants && inputtle->expr && IsA(inputtle->expr, Const))
1405 15628 : expr = (Node *) inputtle->expr;
1406 : else
1407 138400 : expr = (Node *) makeVar(varno,
1408 34600 : inputtle->resno,
1409 34600 : exprType((Node *) inputtle->expr),
1410 34600 : exprTypmod((Node *) inputtle->expr),
1411 34600 : exprCollation((Node *) inputtle->expr),
1412 : 0);
1413 :
1414 50228 : if (exprType(expr) != colType)
1415 : {
1416 : /*
1417 : * Note: it's not really cool to be applying coerce_to_common_type
1418 : * here; one notable point is that assign_expr_collations never
1419 : * gets run on any generated nodes. For the moment that's not a
1420 : * problem because we force the correct exposed collation below.
1421 : * It would likely be best to make the parser generate the correct
1422 : * output tlist for every set-op to begin with, though.
1423 : */
1424 1486 : expr = coerce_to_common_type(NULL, /* no UNKNOWNs here */
1425 : expr,
1426 : colType,
1427 : "UNION/INTERSECT/EXCEPT");
1428 1486 : *trivial_tlist = false; /* the coercion makes it not trivial */
1429 : }
1430 :
1431 : /*
1432 : * Ensure the tlist entry's exposed collation matches the set-op. This
1433 : * is necessary because plan_set_operations() reports the result
1434 : * ordering as a list of SortGroupClauses, which don't carry collation
1435 : * themselves but just refer to tlist entries. If we don't show the
1436 : * right collation then planner.c might do the wrong thing in
1437 : * higher-level queries.
1438 : *
1439 : * Note we use RelabelType, not CollateExpr, since this expression
1440 : * will reach the executor without any further processing.
1441 : */
1442 50228 : if (exprCollation(expr) != colColl)
1443 : {
1444 13018 : expr = applyRelabelType(expr,
1445 : exprType(expr), exprTypmod(expr), colColl,
1446 : COERCE_IMPLICIT_CAST, -1, false);
1447 13018 : *trivial_tlist = false; /* the relabel makes it not trivial */
1448 : }
1449 :
1450 100456 : tle = makeTargetEntry((Expr *) expr,
1451 50228 : (AttrNumber) resno++,
1452 50228 : pstrdup(reftle->resname),
1453 : false);
1454 :
1455 : /*
1456 : * By convention, all output columns in a setop tree have
1457 : * ressortgroupref equal to their resno. In some cases the ref isn't
1458 : * needed, but this is a cleaner way than modifying the tlist later.
1459 : */
1460 50228 : tle->ressortgroupref = tle->resno;
1461 :
1462 50228 : tlist = lappend(tlist, tle);
1463 : }
1464 :
1465 15910 : return tlist;
1466 : }
1467 :
1468 : /*
1469 : * Generate targetlist for a set-operation Append node
1470 : *
1471 : * colTypes: OID list of set-op's result column datatypes
1472 : * colCollations: OID list of set-op's result column collations
1473 : * input_tlists: list of tlists for sub-plans of the Append
1474 : * refnames_tlist: targetlist to take column names from
1475 : *
1476 : * The entries in the Append's targetlist should always be simple Vars;
1477 : * we just have to make sure they have the right datatypes/typmods/collations.
1478 : * The Vars are always generated with varno 0.
1479 : *
1480 : * XXX a problem with the varno-zero approach is that set_pathtarget_cost_width
1481 : * cannot figure out a realistic width for the tlist we make here. But we
1482 : * ought to refactor this code to produce a PathTarget directly, anyway.
1483 : */
1484 : static List *
1485 5198 : generate_append_tlist(List *colTypes, List *colCollations,
1486 : List *input_tlists,
1487 : List *refnames_tlist)
1488 : {
1489 5198 : List *tlist = NIL;
1490 5198 : int resno = 1;
1491 : ListCell *curColType;
1492 : ListCell *curColCollation;
1493 : ListCell *ref_tl_item;
1494 : int colindex;
1495 : TargetEntry *tle;
1496 : Node *expr;
1497 : ListCell *tlistl;
1498 : int32 *colTypmods;
1499 :
1500 : /*
1501 : * First extract typmods to use.
1502 : *
1503 : * If the inputs all agree on type and typmod of a particular column, use
1504 : * that typmod; else use -1.
1505 : */
1506 5198 : colTypmods = (int32 *) palloc(list_length(colTypes) * sizeof(int32));
1507 :
1508 19194 : foreach(tlistl, input_tlists)
1509 : {
1510 13996 : List *subtlist = (List *) lfirst(tlistl);
1511 : ListCell *subtlistl;
1512 :
1513 13996 : curColType = list_head(colTypes);
1514 13996 : colindex = 0;
1515 55584 : foreach(subtlistl, subtlist)
1516 : {
1517 41588 : TargetEntry *subtle = (TargetEntry *) lfirst(subtlistl);
1518 :
1519 : Assert(!subtle->resjunk);
1520 : Assert(curColType != NULL);
1521 41588 : if (exprType((Node *) subtle->expr) == lfirst_oid(curColType))
1522 : {
1523 : /* If first subplan, copy the typmod; else compare */
1524 41588 : int32 subtypmod = exprTypmod((Node *) subtle->expr);
1525 :
1526 41588 : if (tlistl == list_head(input_tlists))
1527 14628 : colTypmods[colindex] = subtypmod;
1528 26960 : else if (subtypmod != colTypmods[colindex])
1529 12 : colTypmods[colindex] = -1;
1530 : }
1531 : else
1532 : {
1533 : /* types disagree, so force typmod to -1 */
1534 0 : colTypmods[colindex] = -1;
1535 : }
1536 41588 : curColType = lnext(colTypes, curColType);
1537 41588 : colindex++;
1538 : }
1539 : Assert(curColType == NULL);
1540 : }
1541 :
1542 : /*
1543 : * Now we can build the tlist for the Append.
1544 : */
1545 5198 : colindex = 0;
1546 19826 : forthree(curColType, colTypes, curColCollation, colCollations,
1547 : ref_tl_item, refnames_tlist)
1548 : {
1549 14628 : Oid colType = lfirst_oid(curColType);
1550 14628 : int32 colTypmod = colTypmods[colindex++];
1551 14628 : Oid colColl = lfirst_oid(curColCollation);
1552 14628 : TargetEntry *reftle = (TargetEntry *) lfirst(ref_tl_item);
1553 :
1554 : Assert(reftle->resno == resno);
1555 : Assert(!reftle->resjunk);
1556 14628 : expr = (Node *) makeVar(0,
1557 : resno,
1558 : colType,
1559 : colTypmod,
1560 : colColl,
1561 : 0);
1562 29256 : tle = makeTargetEntry((Expr *) expr,
1563 14628 : (AttrNumber) resno++,
1564 14628 : pstrdup(reftle->resname),
1565 : false);
1566 :
1567 : /*
1568 : * By convention, all output columns in a setop tree have
1569 : * ressortgroupref equal to their resno. In some cases the ref isn't
1570 : * needed, but this is a cleaner way than modifying the tlist later.
1571 : */
1572 14628 : tle->ressortgroupref = tle->resno;
1573 :
1574 14628 : tlist = lappend(tlist, tle);
1575 : }
1576 :
1577 5198 : pfree(colTypmods);
1578 :
1579 5198 : return tlist;
1580 : }
1581 :
1582 : /*
1583 : * generate_setop_grouplist
1584 : * Build a SortGroupClause list defining the sort/grouping properties
1585 : * of the setop's output columns.
1586 : *
1587 : * Parse analysis already determined the properties and built a suitable
1588 : * list, except that the entries do not have sortgrouprefs set because
1589 : * the parser output representation doesn't include a tlist for each
1590 : * setop. So what we need to do here is copy that list and install
1591 : * proper sortgrouprefs into it (copying those from the targetlist).
1592 : */
1593 : static List *
1594 4844 : generate_setop_grouplist(SetOperationStmt *op, List *targetlist)
1595 : {
1596 4844 : List *grouplist = copyObject(op->groupClauses);
1597 : ListCell *lg;
1598 : ListCell *lt;
1599 :
1600 4844 : lg = list_head(grouplist);
1601 19330 : foreach(lt, targetlist)
1602 : {
1603 14486 : TargetEntry *tle = (TargetEntry *) lfirst(lt);
1604 : SortGroupClause *sgc;
1605 :
1606 : Assert(!tle->resjunk);
1607 :
1608 : /* non-resjunk columns should have sortgroupref = resno */
1609 : Assert(tle->ressortgroupref == tle->resno);
1610 :
1611 : /* non-resjunk columns should have grouping clauses */
1612 : Assert(lg != NULL);
1613 14486 : sgc = (SortGroupClause *) lfirst(lg);
1614 14486 : lg = lnext(grouplist, lg);
1615 : Assert(sgc->tleSortGroupRef == 0);
1616 :
1617 14486 : sgc->tleSortGroupRef = tle->ressortgroupref;
1618 : }
1619 : Assert(lg == NULL);
1620 4844 : return grouplist;
1621 : }
|