Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * prepunion.c
4 : * Routines to plan set-operation queries. The filename is a leftover
5 : * from a time when only UNIONs were implemented.
6 : *
7 : * There are two code paths in the planner for set-operation queries.
8 : * If a subquery consists entirely of simple UNION ALL operations, it
9 : * is converted into an "append relation". Otherwise, it is handled
10 : * by the general code in this module (plan_set_operations and its
11 : * subroutines). There is some support code here for the append-relation
12 : * case, but most of the heavy lifting for that is done elsewhere,
13 : * notably in prepjointree.c and allpaths.c.
14 : *
15 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
16 : * Portions Copyright (c) 1994, Regents of the University of California
17 : *
18 : *
19 : * IDENTIFICATION
20 : * src/backend/optimizer/prep/prepunion.c
21 : *
22 : *-------------------------------------------------------------------------
23 : */
24 : #include "postgres.h"
25 :
26 : #include "access/htup_details.h"
27 : #include "catalog/pg_type.h"
28 : #include "miscadmin.h"
29 : #include "nodes/makefuncs.h"
30 : #include "nodes/nodeFuncs.h"
31 : #include "optimizer/cost.h"
32 : #include "optimizer/pathnode.h"
33 : #include "optimizer/paths.h"
34 : #include "optimizer/planner.h"
35 : #include "optimizer/prep.h"
36 : #include "optimizer/tlist.h"
37 : #include "parser/parse_coerce.h"
38 : #include "utils/selfuncs.h"
39 :
40 :
41 : static RelOptInfo *recurse_set_operations(Node *setOp, PlannerInfo *root,
42 : SetOperationStmt *parentOp,
43 : List *colTypes, List *colCollations,
44 : List *refnames_tlist,
45 : List **pTargetList,
46 : bool *istrivial_tlist);
47 : static RelOptInfo *generate_recursion_path(SetOperationStmt *setOp,
48 : PlannerInfo *root,
49 : List *refnames_tlist,
50 : List **pTargetList);
51 : static void build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
52 : bool trivial_tlist, List *child_tlist,
53 : List *interesting_pathkeys,
54 : double *pNumGroups);
55 : static RelOptInfo *generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
56 : List *refnames_tlist,
57 : List **pTargetList);
58 : static RelOptInfo *generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
59 : List *refnames_tlist,
60 : List **pTargetList);
61 : static List *plan_union_children(PlannerInfo *root,
62 : SetOperationStmt *top_union,
63 : List *refnames_tlist,
64 : List **tlist_list,
65 : List **istrivial_tlist);
66 : static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
67 : static List *generate_setop_tlist(List *colTypes, List *colCollations,
68 : Index varno,
69 : bool hack_constants,
70 : List *input_tlist,
71 : List *refnames_tlist,
72 : bool *trivial_tlist);
73 : static List *generate_append_tlist(List *colTypes, List *colCollations,
74 : List *input_tlists,
75 : List *refnames_tlist);
76 : static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
77 :
78 :
79 : /*
80 : * plan_set_operations
81 : *
82 : * Plans the queries for a tree of set operations (UNION/INTERSECT/EXCEPT)
83 : *
84 : * This routine only deals with the setOperations tree of the given query.
85 : * Any top-level ORDER BY requested in root->parse->sortClause will be handled
86 : * when we return to grouping_planner; likewise for LIMIT.
87 : *
88 : * What we return is an "upperrel" RelOptInfo containing at least one Path
89 : * that implements the set-operation tree. In addition, root->processed_tlist
90 : * receives a targetlist representing the output of the topmost setop node.
91 : */
92 : RelOptInfo *
93 5504 : plan_set_operations(PlannerInfo *root)
94 : {
95 5504 : Query *parse = root->parse;
96 5504 : SetOperationStmt *topop = castNode(SetOperationStmt, parse->setOperations);
97 : Node *node;
98 : RangeTblEntry *leftmostRTE;
99 : Query *leftmostQuery;
100 : RelOptInfo *setop_rel;
101 : List *top_tlist;
102 :
103 : Assert(topop);
104 :
105 : /* check for unsupported stuff */
106 : Assert(parse->jointree->fromlist == NIL);
107 : Assert(parse->jointree->quals == NULL);
108 : Assert(parse->groupClause == NIL);
109 : Assert(parse->havingQual == NULL);
110 : Assert(parse->windowClause == NIL);
111 : Assert(parse->distinctClause == NIL);
112 :
113 : /*
114 : * In the outer query level, equivalence classes are limited to classes
115 : * which define that the top-level target entry is equivalent to the
116 : * corresponding child target entry. There won't be any equivalence class
117 : * merging. Mark that merging is complete to allow us to make pathkeys.
118 : */
119 : Assert(root->eq_classes == NIL);
120 5504 : root->ec_merging_done = true;
121 :
122 : /*
123 : * We'll need to build RelOptInfos for each of the leaf subqueries, which
124 : * are RTE_SUBQUERY rangetable entries in this Query. Prepare the index
125 : * arrays for those, and for AppendRelInfos in case they're needed.
126 : */
127 5504 : setup_simple_rel_arrays(root);
128 :
129 : /*
130 : * Find the leftmost component Query. We need to use its column names for
131 : * all generated tlists (else SELECT INTO won't work right).
132 : */
133 5504 : node = topop->larg;
134 9000 : while (node && IsA(node, SetOperationStmt))
135 3496 : node = ((SetOperationStmt *) node)->larg;
136 : Assert(node && IsA(node, RangeTblRef));
137 5504 : leftmostRTE = root->simple_rte_array[((RangeTblRef *) node)->rtindex];
138 5504 : leftmostQuery = leftmostRTE->subquery;
139 : Assert(leftmostQuery != NULL);
140 :
141 : /*
142 : * If the topmost node is a recursive union, it needs special processing.
143 : */
144 5504 : if (root->hasRecursion)
145 : {
146 822 : setop_rel = generate_recursion_path(topop, root,
147 : leftmostQuery->targetList,
148 : &top_tlist);
149 : }
150 : else
151 : {
152 : bool trivial_tlist;
153 :
154 : /*
155 : * Recurse on setOperations tree to generate paths for set ops. The
156 : * final output paths should have just the column types shown as the
157 : * output from the top-level node.
158 : */
159 4682 : setop_rel = recurse_set_operations((Node *) topop, root,
160 : NULL, /* no parent */
161 : topop->colTypes, topop->colCollations,
162 : leftmostQuery->targetList,
163 : &top_tlist,
164 : &trivial_tlist);
165 : }
166 :
167 : /* Must return the built tlist into root->processed_tlist. */
168 5498 : root->processed_tlist = top_tlist;
169 :
170 5498 : return setop_rel;
171 : }
172 :
173 : /*
174 : * recurse_set_operations
175 : * Recursively handle one step in a tree of set operations
176 : *
177 : * setOp: current step (could be a SetOperationStmt or a leaf RangeTblRef)
178 : * parentOp: parent step, or NULL if none (but see below)
179 : * colTypes: OID list of set-op's result column datatypes
180 : * colCollations: OID list of set-op's result column collations
181 : * refnames_tlist: targetlist to take column names from
182 : *
183 : * parentOp should be passed as NULL unless that step is interested in
184 : * getting sorted output from this step. ("Sorted" means "sorted according
185 : * to the default btree opclasses of the result column datatypes".)
186 : *
187 : * Returns a RelOptInfo for the subtree, as well as these output parameters:
188 : * *pTargetList: receives the fully-fledged tlist for the subtree's top plan
189 : * *istrivial_tlist: true if, and only if, datatypes between parent and child
190 : * match.
191 : *
192 : * If setOp is a leaf node, this function plans the sub-query but does
193 : * not populate the pathlist of the returned RelOptInfo. The caller will
194 : * generate SubqueryScan paths using useful path(s) of the subquery (see
195 : * build_setop_child_paths). But this function does build the paths for
196 : * set-operation nodes.
197 : *
198 : * The pTargetList output parameter is mostly redundant with the pathtarget
199 : * of the returned RelOptInfo, but for the moment we need it because much of
200 : * the logic in this file depends on flag columns being marked resjunk.
201 : * XXX Now that there are no flag columns and hence no resjunk columns, we
202 : * could probably refactor this file to deal only in pathtargets.
203 : *
204 : * We don't have to care about typmods here: the only allowed difference
205 : * between set-op input and output typmods is input is a specific typmod
206 : * and output is -1, and that does not require a coercion.
207 : */
208 : static RelOptInfo *
209 19408 : recurse_set_operations(Node *setOp, PlannerInfo *root,
210 : SetOperationStmt *parentOp,
211 : List *colTypes, List *colCollations,
212 : List *refnames_tlist,
213 : List **pTargetList,
214 : bool *istrivial_tlist)
215 : {
216 : RelOptInfo *rel;
217 :
218 19408 : *istrivial_tlist = true; /* for now */
219 :
220 : /* Guard against stack overflow due to overly complex setop nests */
221 19408 : check_stack_depth();
222 :
223 19408 : if (IsA(setOp, RangeTblRef))
224 : {
225 14570 : RangeTblRef *rtr = (RangeTblRef *) setOp;
226 14570 : RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
227 14570 : Query *subquery = rte->subquery;
228 : PlannerInfo *subroot;
229 : List *tlist;
230 : bool trivial_tlist;
231 :
232 : Assert(subquery != NULL);
233 :
234 : /* Build a RelOptInfo for this leaf subquery. */
235 14570 : rel = build_simple_rel(root, rtr->rtindex, NULL);
236 :
237 : /* plan_params should not be in use in current query level */
238 : Assert(root->plan_params == NIL);
239 :
240 : /*
241 : * Generate a subroot and Paths for the subquery. If we have a
242 : * parentOp, pass that down to encourage subquery_planner to consider
243 : * suitably-sorted Paths.
244 : */
245 14570 : subroot = rel->subroot = subquery_planner(root->glob, subquery, root,
246 : false, root->tuple_fraction,
247 : parentOp);
248 :
249 : /*
250 : * It should not be possible for the primitive query to contain any
251 : * cross-references to other primitive queries in the setop tree.
252 : */
253 14570 : if (root->plan_params)
254 0 : elog(ERROR, "unexpected outer reference in set operation subquery");
255 :
256 : /* Figure out the appropriate target list for this subquery. */
257 14570 : tlist = generate_setop_tlist(colTypes, colCollations,
258 14570 : rtr->rtindex,
259 : true,
260 : subroot->processed_tlist,
261 : refnames_tlist,
262 : &trivial_tlist);
263 14570 : rel->reltarget = create_pathtarget(root, tlist);
264 :
265 : /* Return the fully-fledged tlist to caller, too */
266 14570 : *pTargetList = tlist;
267 14570 : *istrivial_tlist = trivial_tlist;
268 : }
269 4838 : else if (IsA(setOp, SetOperationStmt))
270 : {
271 4838 : SetOperationStmt *op = (SetOperationStmt *) setOp;
272 :
273 : /* UNIONs are much different from INTERSECT/EXCEPT */
274 4838 : if (op->op == SETOP_UNION)
275 4158 : rel = generate_union_paths(op, root,
276 : refnames_tlist,
277 : pTargetList);
278 : else
279 680 : rel = generate_nonunion_paths(op, root,
280 : refnames_tlist,
281 : pTargetList);
282 :
283 : /*
284 : * If necessary, add a Result node to project the caller-requested
285 : * output columns.
286 : *
287 : * XXX you don't really want to know about this: setrefs.c will apply
288 : * fix_upper_expr() to the Result node's tlist. This would fail if the
289 : * Vars generated by generate_setop_tlist() were not exactly equal()
290 : * to the corresponding tlist entries of the subplan. However, since
291 : * the subplan was generated by generate_union_paths() or
292 : * generate_nonunion_paths(), and hence its tlist was generated by
293 : * generate_append_tlist() or generate_setop_tlist(), this will work.
294 : * We just tell generate_setop_tlist() to use varno 0.
295 : */
296 4838 : if (!tlist_same_datatypes(*pTargetList, colTypes, false) ||
297 4826 : !tlist_same_collations(*pTargetList, colCollations, false))
298 : {
299 : PathTarget *target;
300 : bool trivial_tlist;
301 : ListCell *lc;
302 :
303 12 : *pTargetList = generate_setop_tlist(colTypes, colCollations,
304 : 0,
305 : false,
306 : *pTargetList,
307 : refnames_tlist,
308 : &trivial_tlist);
309 12 : *istrivial_tlist = trivial_tlist;
310 12 : target = create_pathtarget(root, *pTargetList);
311 :
312 : /* Apply projection to each path */
313 24 : foreach(lc, rel->pathlist)
314 : {
315 12 : Path *subpath = (Path *) lfirst(lc);
316 : Path *path;
317 :
318 : Assert(subpath->param_info == NULL);
319 12 : path = apply_projection_to_path(root, subpath->parent,
320 : subpath, target);
321 : /* If we had to add a Result, path is different from subpath */
322 12 : if (path != subpath)
323 12 : lfirst(lc) = path;
324 : }
325 :
326 : /* Apply projection to each partial path */
327 12 : foreach(lc, rel->partial_pathlist)
328 : {
329 0 : Path *subpath = (Path *) lfirst(lc);
330 : Path *path;
331 :
332 : Assert(subpath->param_info == NULL);
333 :
334 : /* avoid apply_projection_to_path, in case of multiple refs */
335 0 : path = (Path *) create_projection_path(root, subpath->parent,
336 : subpath, target);
337 0 : lfirst(lc) = path;
338 : }
339 : }
340 4838 : postprocess_setop_rel(root, rel);
341 : }
342 : else
343 : {
344 0 : elog(ERROR, "unrecognized node type: %d",
345 : (int) nodeTag(setOp));
346 : *pTargetList = NIL;
347 : rel = NULL; /* keep compiler quiet */
348 : }
349 :
350 19408 : return rel;
351 : }
352 :
353 : /*
354 : * Generate paths for a recursive UNION node
355 : */
356 : static RelOptInfo *
357 822 : generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
358 : List *refnames_tlist,
359 : List **pTargetList)
360 : {
361 : RelOptInfo *result_rel;
362 : Path *path;
363 : RelOptInfo *lrel,
364 : *rrel;
365 : Path *lpath;
366 : Path *rpath;
367 : List *lpath_tlist;
368 : bool lpath_trivial_tlist;
369 : List *rpath_tlist;
370 : bool rpath_trivial_tlist;
371 : List *tlist;
372 : List *groupList;
373 : double dNumGroups;
374 :
375 : /* Parser should have rejected other cases */
376 822 : if (setOp->op != SETOP_UNION)
377 0 : elog(ERROR, "only UNION queries can be recursive");
378 : /* Worktable ID should be assigned */
379 : Assert(root->wt_param_id >= 0);
380 :
381 : /*
382 : * Unlike a regular UNION node, process the left and right inputs
383 : * separately without any intention of combining them into one Append.
384 : */
385 822 : lrel = recurse_set_operations(setOp->larg, root,
386 : NULL, /* no value in sorted results */
387 : setOp->colTypes, setOp->colCollations,
388 : refnames_tlist,
389 : &lpath_tlist,
390 : &lpath_trivial_tlist);
391 822 : if (lrel->rtekind == RTE_SUBQUERY)
392 822 : build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
393 : NIL, NULL);
394 822 : lpath = lrel->cheapest_total_path;
395 : /* The right path will want to look at the left one ... */
396 822 : root->non_recursive_path = lpath;
397 822 : rrel = recurse_set_operations(setOp->rarg, root,
398 : NULL, /* no value in sorted results */
399 : setOp->colTypes, setOp->colCollations,
400 : refnames_tlist,
401 : &rpath_tlist,
402 : &rpath_trivial_tlist);
403 822 : if (rrel->rtekind == RTE_SUBQUERY)
404 816 : build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
405 : NIL, NULL);
406 822 : rpath = rrel->cheapest_total_path;
407 822 : root->non_recursive_path = NULL;
408 :
409 : /*
410 : * Generate tlist for RecursiveUnion path node --- same as in Append cases
411 : */
412 822 : tlist = generate_append_tlist(setOp->colTypes, setOp->colCollations,
413 822 : list_make2(lpath_tlist, rpath_tlist),
414 : refnames_tlist);
415 :
416 822 : *pTargetList = tlist;
417 :
418 : /* Build result relation. */
419 822 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
420 822 : bms_union(lrel->relids, rrel->relids));
421 822 : result_rel->reltarget = create_pathtarget(root, tlist);
422 :
423 : /*
424 : * If UNION, identify the grouping operators
425 : */
426 822 : if (setOp->all)
427 : {
428 452 : groupList = NIL;
429 452 : dNumGroups = 0;
430 : }
431 : else
432 : {
433 : /* Identify the grouping semantics */
434 370 : groupList = generate_setop_grouplist(setOp, tlist);
435 :
436 : /* We only support hashing here */
437 370 : if (!grouping_is_hashable(groupList))
438 6 : ereport(ERROR,
439 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
440 : errmsg("could not implement recursive UNION"),
441 : errdetail("All column datatypes must be hashable.")));
442 :
443 : /*
444 : * For the moment, take the number of distinct groups as equal to the
445 : * total input size, ie, the worst case.
446 : */
447 364 : dNumGroups = lpath->rows + rpath->rows * 10;
448 : }
449 :
450 : /*
451 : * And make the path node.
452 : */
453 816 : path = (Path *) create_recursiveunion_path(root,
454 : result_rel,
455 : lpath,
456 : rpath,
457 816 : result_rel->reltarget,
458 : groupList,
459 : root->wt_param_id,
460 : dNumGroups);
461 :
462 816 : add_path(result_rel, path);
463 816 : postprocess_setop_rel(root, result_rel);
464 816 : return result_rel;
465 : }
466 :
467 : /*
468 : * build_setop_child_paths
469 : * Build paths for the set op child relation denoted by 'rel'.
470 : *
471 : * 'rel' is an RTE_SUBQUERY relation. We have already generated paths within
472 : * the subquery's subroot; the task here is to create SubqueryScan paths for
473 : * 'rel', representing scans of the useful subquery paths.
474 : *
475 : * interesting_pathkeys: if not NIL, also include paths that suit these
476 : * pathkeys, sorting any unsorted paths as required.
477 : * *pNumGroups: if not NULL, we estimate the number of distinct groups
478 : * in the result, and store it there.
479 : */
480 : static void
481 14570 : build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
482 : bool trivial_tlist, List *child_tlist,
483 : List *interesting_pathkeys, double *pNumGroups)
484 : {
485 : RelOptInfo *final_rel;
486 14570 : List *setop_pathkeys = rel->subroot->setop_pathkeys;
487 : ListCell *lc;
488 :
489 : /* it can't be a set op child rel if it's not a subquery */
490 : Assert(rel->rtekind == RTE_SUBQUERY);
491 :
492 : /* when sorting is needed, add child rel equivalences */
493 14570 : if (interesting_pathkeys != NIL)
494 11168 : add_setop_child_rel_equivalences(root,
495 : rel,
496 : child_tlist,
497 : interesting_pathkeys);
498 :
499 : /*
500 : * Mark rel with estimated output rows, width, etc. Note that we have to
501 : * do this before generating outer-query paths, else cost_subqueryscan is
502 : * not happy.
503 : */
504 14570 : set_subquery_size_estimates(root, rel);
505 :
506 : /*
507 : * Since we may want to add a partial path to this relation, we must set
508 : * its consider_parallel flag correctly.
509 : */
510 14570 : final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);
511 14570 : rel->consider_parallel = final_rel->consider_parallel;
512 :
513 : /* Generate subquery scan paths for any interesting path in final_rel */
514 37742 : foreach(lc, final_rel->pathlist)
515 : {
516 23172 : Path *subpath = (Path *) lfirst(lc);
517 : List *pathkeys;
518 23172 : Path *cheapest_input_path = final_rel->cheapest_total_path;
519 : bool is_sorted;
520 : int presorted_keys;
521 :
522 : /*
523 : * Include the cheapest path as-is so that the set operation can be
524 : * cheaply implemented using a method which does not require the input
525 : * to be sorted.
526 : */
527 23172 : if (subpath == cheapest_input_path)
528 : {
529 : /* Convert subpath's pathkeys to outer representation */
530 14570 : pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
531 : make_tlist_from_pathtarget(subpath->pathtarget));
532 :
533 : /* Generate outer path using this subpath */
534 14570 : add_path(rel, (Path *) create_subqueryscan_path(root,
535 : rel,
536 : subpath,
537 : trivial_tlist,
538 : pathkeys,
539 : NULL));
540 : }
541 :
542 : /* skip dealing with sorted paths if the setop doesn't need them */
543 23172 : if (interesting_pathkeys == NIL)
544 3458 : continue;
545 :
546 : /*
547 : * Create paths to suit final sort order required for setop_pathkeys.
548 : * Here we'll sort the cheapest input path (if not sorted already) and
549 : * incremental sort any paths which are partially sorted.
550 : */
551 19726 : is_sorted = pathkeys_count_contained_in(setop_pathkeys,
552 : subpath->pathkeys,
553 : &presorted_keys);
554 :
555 19726 : if (!is_sorted)
556 : {
557 12970 : double limittuples = rel->subroot->limit_tuples;
558 :
559 : /*
560 : * Try at least sorting the cheapest path and also try
561 : * incrementally sorting any path which is partially sorted
562 : * already (no need to deal with paths which have presorted keys
563 : * when incremental sort is disabled unless it's the cheapest
564 : * input path).
565 : */
566 12970 : if (subpath != cheapest_input_path &&
567 2898 : (presorted_keys == 0 || !enable_incremental_sort))
568 12 : continue;
569 :
570 : /*
571 : * We've no need to consider both a sort and incremental sort.
572 : * We'll just do a sort if there are no presorted keys and an
573 : * incremental sort when there are presorted keys.
574 : */
575 12958 : if (presorted_keys == 0 || !enable_incremental_sort)
576 9976 : subpath = (Path *) create_sort_path(rel->subroot,
577 : final_rel,
578 : subpath,
579 : setop_pathkeys,
580 : limittuples);
581 : else
582 2982 : subpath = (Path *) create_incremental_sort_path(rel->subroot,
583 : final_rel,
584 : subpath,
585 : setop_pathkeys,
586 : presorted_keys,
587 : limittuples);
588 : }
589 :
590 : /*
591 : * subpath is now sorted, so add it to the pathlist. We already added
592 : * the cheapest_input_path above, so don't add it again unless we just
593 : * sorted it.
594 : */
595 19714 : if (subpath != cheapest_input_path)
596 : {
597 : /* Convert subpath's pathkeys to outer representation */
598 18618 : pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
599 : make_tlist_from_pathtarget(subpath->pathtarget));
600 :
601 : /* Generate outer path using this subpath */
602 18618 : add_path(rel, (Path *) create_subqueryscan_path(root,
603 : rel,
604 : subpath,
605 : trivial_tlist,
606 : pathkeys,
607 : NULL));
608 : }
609 : }
610 :
611 : /* if consider_parallel is false, there should be no partial paths */
612 : Assert(final_rel->consider_parallel ||
613 : final_rel->partial_pathlist == NIL);
614 :
615 : /*
616 : * If we have a partial path for the child relation, we can use that to
617 : * build a partial path for this relation. But there's no point in
618 : * considering any path but the cheapest.
619 : */
620 14570 : if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
621 9940 : final_rel->partial_pathlist != NIL)
622 : {
623 : Path *partial_subpath;
624 : Path *partial_path;
625 :
626 12 : partial_subpath = linitial(final_rel->partial_pathlist);
627 : partial_path = (Path *)
628 12 : create_subqueryscan_path(root, rel, partial_subpath,
629 : trivial_tlist,
630 : NIL, NULL);
631 12 : add_partial_path(rel, partial_path);
632 : }
633 :
634 14570 : postprocess_setop_rel(root, rel);
635 :
636 : /*
637 : * Estimate number of groups if caller wants it. If the subquery used
638 : * grouping or aggregation, its output is probably mostly unique anyway;
639 : * otherwise do statistical estimation.
640 : *
641 : * XXX you don't really want to know about this: we do the estimation
642 : * using the subquery's original targetlist expressions, not the
643 : * subroot->processed_tlist which might seem more appropriate. The reason
644 : * is that if the subquery is itself a setop, it may return a
645 : * processed_tlist containing "varno 0" Vars generated by
646 : * generate_append_tlist, and those would confuse estimate_num_groups
647 : * mightily. We ought to get rid of the "varno 0" hack, but that requires
648 : * a redesign of the parsetree representation of setops, so that there can
649 : * be an RTE corresponding to each setop's output.
650 : */
651 14570 : if (pNumGroups)
652 : {
653 1330 : PlannerInfo *subroot = rel->subroot;
654 1330 : Query *subquery = subroot->parse;
655 :
656 1330 : if (subquery->groupClause || subquery->groupingSets ||
657 1330 : subquery->distinctClause || subroot->hasHavingQual ||
658 1318 : subquery->hasAggs)
659 12 : *pNumGroups = rel->cheapest_total_path->rows;
660 : else
661 1318 : *pNumGroups = estimate_num_groups(subroot,
662 : get_tlist_exprs(subquery->targetList, false),
663 1318 : rel->cheapest_total_path->rows,
664 : NULL,
665 : NULL);
666 : }
667 14570 : }
668 :
669 : /*
670 : * Generate paths for a UNION or UNION ALL node
671 : */
672 : static RelOptInfo *
673 4158 : generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
674 : List *refnames_tlist,
675 : List **pTargetList)
676 : {
677 4158 : Relids relids = NULL;
678 : RelOptInfo *result_rel;
679 : ListCell *lc;
680 : ListCell *lc2;
681 : ListCell *lc3;
682 4158 : List *cheapest_pathlist = NIL;
683 4158 : List *ordered_pathlist = NIL;
684 4158 : List *partial_pathlist = NIL;
685 4158 : bool partial_paths_valid = true;
686 4158 : bool consider_parallel = true;
687 : List *rellist;
688 : List *tlist_list;
689 : List *trivial_tlist_list;
690 : List *tlist;
691 4158 : List *groupList = NIL;
692 : Path *apath;
693 4158 : Path *gpath = NULL;
694 4158 : bool try_sorted = false;
695 4158 : List *union_pathkeys = NIL;
696 :
697 : /*
698 : * If any of my children are identical UNION nodes (same op, all-flag, and
699 : * colTypes/colCollations) then they can be merged into this node so that
700 : * we generate only one Append/MergeAppend and unique-ification for the
701 : * lot. Recurse to find such nodes.
702 : */
703 4158 : rellist = plan_union_children(root,
704 : op,
705 : refnames_tlist,
706 : &tlist_list,
707 : &trivial_tlist_list);
708 :
709 : /*
710 : * Generate tlist for Append/MergeAppend plan node.
711 : *
712 : * The tlist for an Append plan isn't important as far as the Append is
713 : * concerned, but we must make it look real anyway for the benefit of the
714 : * next plan level up.
715 : */
716 4158 : tlist = generate_append_tlist(op->colTypes, op->colCollations,
717 : tlist_list, refnames_tlist);
718 4158 : *pTargetList = tlist;
719 :
720 : /* For UNIONs (not UNION ALL), try sorting, if sorting is possible */
721 4158 : if (!op->all)
722 : {
723 : /* Identify the grouping semantics */
724 3578 : groupList = generate_setop_grouplist(op, tlist);
725 :
726 3578 : if (grouping_is_sortable(op->groupClauses))
727 : {
728 3486 : try_sorted = true;
729 : /* Determine the pathkeys for sorting by the whole target list */
730 3486 : union_pathkeys = make_pathkeys_for_sortclauses(root, groupList,
731 : tlist);
732 :
733 3486 : root->query_pathkeys = union_pathkeys;
734 : }
735 : }
736 :
737 : /*
738 : * Now that we've got the append target list, we can build the union child
739 : * paths.
740 : */
741 15880 : forthree(lc, rellist, lc2, trivial_tlist_list, lc3, tlist_list)
742 : {
743 11722 : RelOptInfo *rel = lfirst(lc);
744 11722 : bool trivial_tlist = lfirst_int(lc2);
745 11722 : List *child_tlist = lfirst_node(List, lc3);
746 :
747 : /* only build paths for the union children */
748 11722 : if (rel->rtekind == RTE_SUBQUERY)
749 11602 : build_setop_child_paths(root, rel, trivial_tlist, child_tlist,
750 : union_pathkeys, NULL);
751 : }
752 :
753 : /* Build path lists and relid set. */
754 15880 : foreach(lc, rellist)
755 : {
756 11722 : RelOptInfo *rel = lfirst(lc);
757 : Path *ordered_path;
758 :
759 11722 : cheapest_pathlist = lappend(cheapest_pathlist,
760 11722 : rel->cheapest_total_path);
761 :
762 11722 : if (try_sorted)
763 : {
764 3742 : ordered_path = get_cheapest_path_for_pathkeys(rel->pathlist,
765 : union_pathkeys,
766 : NULL,
767 : TOTAL_COST,
768 : false);
769 :
770 3742 : if (ordered_path != NULL)
771 476 : ordered_pathlist = lappend(ordered_pathlist, ordered_path);
772 : else
773 : {
774 : /*
775 : * If we can't find a sorted path, just give up trying to
776 : * generate a list of correctly sorted child paths. This can
777 : * happen when type coercion was added to the targetlist due
778 : * to mismatching types from the union children.
779 : */
780 3266 : try_sorted = false;
781 : }
782 : }
783 :
784 11722 : if (consider_parallel)
785 : {
786 8358 : if (!rel->consider_parallel)
787 : {
788 3112 : consider_parallel = false;
789 3112 : partial_paths_valid = false;
790 : }
791 5246 : else if (rel->partial_pathlist == NIL)
792 5234 : partial_paths_valid = false;
793 : else
794 12 : partial_pathlist = lappend(partial_pathlist,
795 12 : linitial(rel->partial_pathlist));
796 : }
797 :
798 11722 : relids = bms_union(relids, rel->relids);
799 : }
800 :
801 : /* Build result relation. */
802 4158 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
803 4158 : result_rel->reltarget = create_pathtarget(root, tlist);
804 4158 : result_rel->consider_parallel = consider_parallel;
805 4158 : result_rel->consider_startup = (root->tuple_fraction > 0);
806 :
807 : /*
808 : * Append the child results together using the cheapest paths from each
809 : * union child.
810 : */
811 4158 : apath = (Path *) create_append_path(root, result_rel, cheapest_pathlist,
812 : NIL, NIL, NULL, 0, false, -1);
813 :
814 : /*
815 : * Estimate number of groups. For now we just assume the output is unique
816 : * --- this is certainly true for the UNION case, and we want worst-case
817 : * estimates anyway.
818 : */
819 4158 : result_rel->rows = apath->rows;
820 :
821 : /*
822 : * Now consider doing the same thing using the partial paths plus Append
823 : * plus Gather.
824 : */
825 4158 : if (partial_paths_valid)
826 : {
827 : Path *papath;
828 6 : int parallel_workers = 0;
829 :
830 : /* Find the highest number of workers requested for any subpath. */
831 18 : foreach(lc, partial_pathlist)
832 : {
833 12 : Path *subpath = lfirst(lc);
834 :
835 12 : parallel_workers = Max(parallel_workers,
836 : subpath->parallel_workers);
837 : }
838 : Assert(parallel_workers > 0);
839 :
840 : /*
841 : * If the use of parallel append is permitted, always request at least
842 : * log2(# of children) paths. We assume it can be useful to have
843 : * extra workers in this case because they will be spread out across
844 : * the children. The precise formula is just a guess; see
845 : * add_paths_to_append_rel.
846 : */
847 6 : if (enable_parallel_append)
848 : {
849 6 : parallel_workers = Max(parallel_workers,
850 : pg_leftmost_one_pos32(list_length(partial_pathlist)) + 1);
851 6 : parallel_workers = Min(parallel_workers,
852 : max_parallel_workers_per_gather);
853 : }
854 : Assert(parallel_workers > 0);
855 :
856 : papath = (Path *)
857 6 : create_append_path(root, result_rel, NIL, partial_pathlist,
858 : NIL, NULL, parallel_workers,
859 : enable_parallel_append, -1);
860 : gpath = (Path *)
861 6 : create_gather_path(root, result_rel, papath,
862 6 : result_rel->reltarget, NULL, NULL);
863 : }
864 :
865 4158 : if (!op->all)
866 : {
867 : double dNumGroups;
868 3578 : bool can_sort = grouping_is_sortable(groupList);
869 3578 : bool can_hash = grouping_is_hashable(groupList);
870 :
871 : /*
872 : * XXX for the moment, take the number of distinct groups as equal to
873 : * the total input size, i.e., the worst case. This is too
874 : * conservative, but it's not clear how to get a decent estimate of
875 : * the true size. One should note as well the propensity of novices
876 : * to write UNION rather than UNION ALL even when they don't expect
877 : * any duplicates...
878 : */
879 3578 : dNumGroups = apath->rows;
880 :
881 3578 : if (can_hash)
882 : {
883 : Path *path;
884 :
885 : /*
886 : * Try a hash aggregate plan on 'apath'. This is the cheapest
887 : * available path containing each append child.
888 : */
889 3506 : path = (Path *) create_agg_path(root,
890 : result_rel,
891 : apath,
892 : create_pathtarget(root, tlist),
893 : AGG_HASHED,
894 : AGGSPLIT_SIMPLE,
895 : groupList,
896 : NIL,
897 : NULL,
898 : dNumGroups);
899 3506 : add_path(result_rel, path);
900 :
901 : /* Try hash aggregate on the Gather path, if valid */
902 3506 : if (gpath != NULL)
903 : {
904 : /* Hashed aggregate plan --- no sort needed */
905 6 : path = (Path *) create_agg_path(root,
906 : result_rel,
907 : gpath,
908 : create_pathtarget(root, tlist),
909 : AGG_HASHED,
910 : AGGSPLIT_SIMPLE,
911 : groupList,
912 : NIL,
913 : NULL,
914 : dNumGroups);
915 6 : add_path(result_rel, path);
916 : }
917 : }
918 :
919 3578 : if (can_sort)
920 : {
921 3486 : Path *path = apath;
922 :
923 : /* Try Sort -> Unique on the Append path */
924 3486 : if (groupList != NIL)
925 3456 : path = (Path *) create_sort_path(root, result_rel, path,
926 : make_pathkeys_for_sortclauses(root, groupList, tlist),
927 : -1.0);
928 :
929 3486 : path = (Path *) create_upper_unique_path(root,
930 : result_rel,
931 : path,
932 3486 : list_length(path->pathkeys),
933 : dNumGroups);
934 :
935 3486 : add_path(result_rel, path);
936 :
937 : /* Try Sort -> Unique on the Gather path, if set */
938 3486 : if (gpath != NULL)
939 : {
940 6 : path = gpath;
941 :
942 6 : path = (Path *) create_sort_path(root, result_rel, path,
943 : make_pathkeys_for_sortclauses(root, groupList, tlist),
944 : -1.0);
945 :
946 6 : path = (Path *) create_upper_unique_path(root,
947 : result_rel,
948 : path,
949 6 : list_length(path->pathkeys),
950 : dNumGroups);
951 6 : add_path(result_rel, path);
952 : }
953 : }
954 :
955 : /*
956 : * Try making a MergeAppend path if we managed to find a path with the
957 : * correct pathkeys in each union child query.
958 : */
959 3578 : if (try_sorted && groupList != NIL)
960 : {
961 : Path *path;
962 :
963 190 : path = (Path *) create_merge_append_path(root,
964 : result_rel,
965 : ordered_pathlist,
966 : union_pathkeys,
967 : NULL);
968 :
969 : /* and make the MergeAppend unique */
970 190 : path = (Path *) create_upper_unique_path(root,
971 : result_rel,
972 : path,
973 : list_length(tlist),
974 : dNumGroups);
975 :
976 190 : add_path(result_rel, path);
977 : }
978 : }
979 : else
980 : {
981 : /* UNION ALL */
982 580 : add_path(result_rel, apath);
983 :
984 580 : if (gpath != NULL)
985 0 : add_path(result_rel, gpath);
986 : }
987 :
988 4158 : return result_rel;
989 : }
990 :
991 : /*
992 : * Generate paths for an INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL node
993 : */
994 : static RelOptInfo *
995 680 : generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
996 : List *refnames_tlist,
997 : List **pTargetList)
998 : {
999 : RelOptInfo *result_rel;
1000 : RelOptInfo *lrel,
1001 : *rrel;
1002 680 : double save_fraction = root->tuple_fraction;
1003 : Path *lpath,
1004 : *rpath,
1005 : *path;
1006 : List *lpath_tlist,
1007 : *rpath_tlist,
1008 : *tlist,
1009 : *groupList;
1010 : bool lpath_trivial_tlist,
1011 : rpath_trivial_tlist,
1012 : result_trivial_tlist;
1013 680 : List *nonunion_pathkeys = NIL;
1014 : double dLeftGroups,
1015 : dRightGroups,
1016 : dNumGroups,
1017 : dNumOutputRows;
1018 : bool can_sort;
1019 : bool can_hash;
1020 : SetOpCmd cmd;
1021 :
1022 : /*
1023 : * Tell children to fetch all tuples.
1024 : */
1025 680 : root->tuple_fraction = 0.0;
1026 :
1027 : /* Recurse on children */
1028 680 : lrel = recurse_set_operations(op->larg, root,
1029 : op,
1030 : op->colTypes, op->colCollations,
1031 : refnames_tlist,
1032 : &lpath_tlist,
1033 : &lpath_trivial_tlist);
1034 :
1035 680 : rrel = recurse_set_operations(op->rarg, root,
1036 : op,
1037 : op->colTypes, op->colCollations,
1038 : refnames_tlist,
1039 : &rpath_tlist,
1040 : &rpath_trivial_tlist);
1041 :
1042 : /*
1043 : * Generate tlist for SetOp plan node.
1044 : *
1045 : * The tlist for a SetOp plan isn't important so far as the SetOp is
1046 : * concerned, but we must make it look real anyway for the benefit of the
1047 : * next plan level up.
1048 : */
1049 680 : tlist = generate_setop_tlist(op->colTypes, op->colCollations,
1050 : 0, false, lpath_tlist, refnames_tlist,
1051 : &result_trivial_tlist);
1052 :
1053 : /* We should not have needed any type coercions in the tlist */
1054 : Assert(result_trivial_tlist);
1055 :
1056 680 : *pTargetList = tlist;
1057 :
1058 : /* Identify the grouping semantics */
1059 680 : groupList = generate_setop_grouplist(op, tlist);
1060 :
1061 : /* Check whether the operators support sorting or hashing */
1062 680 : can_sort = grouping_is_sortable(groupList);
1063 680 : can_hash = grouping_is_hashable(groupList);
1064 680 : if (!can_sort && !can_hash)
1065 0 : ereport(ERROR,
1066 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1067 : /* translator: %s is INTERSECT or EXCEPT */
1068 : errmsg("could not implement %s",
1069 : (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT"),
1070 : errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
1071 :
1072 680 : if (can_sort)
1073 : {
1074 : /* Determine the pathkeys for sorting by the whole target list */
1075 680 : nonunion_pathkeys = make_pathkeys_for_sortclauses(root, groupList,
1076 : tlist);
1077 :
1078 680 : root->query_pathkeys = nonunion_pathkeys;
1079 : }
1080 :
1081 : /*
1082 : * Now that we've got all that info, we can build the child paths.
1083 : */
1084 680 : if (lrel->rtekind == RTE_SUBQUERY)
1085 656 : build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
1086 : nonunion_pathkeys, &dLeftGroups);
1087 : else
1088 24 : dLeftGroups = lrel->rows;
1089 680 : if (rrel->rtekind == RTE_SUBQUERY)
1090 674 : build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
1091 : nonunion_pathkeys, &dRightGroups);
1092 : else
1093 6 : dRightGroups = rrel->rows;
1094 :
1095 : /* Undo effects of forcing tuple_fraction to 0 */
1096 680 : root->tuple_fraction = save_fraction;
1097 :
1098 : /*
1099 : * For EXCEPT, we must put the left input first. For INTERSECT, either
1100 : * order should give the same results, and we prefer to put the smaller
1101 : * input first in order to (a) minimize the size of the hash table in the
1102 : * hashing case, and (b) improve our chances of exploiting the executor's
1103 : * fast path for empty left-hand input. "Smaller" means the one with the
1104 : * fewer groups.
1105 : */
1106 680 : if (op->op != SETOP_EXCEPT && dLeftGroups > dRightGroups)
1107 : {
1108 : /* need to swap the two inputs */
1109 : RelOptInfo *tmprel;
1110 : List *tmplist;
1111 : double tmpd;
1112 :
1113 30 : tmprel = lrel;
1114 30 : lrel = rrel;
1115 30 : rrel = tmprel;
1116 30 : tmplist = lpath_tlist;
1117 30 : lpath_tlist = rpath_tlist;
1118 30 : rpath_tlist = tmplist;
1119 30 : tmpd = dLeftGroups;
1120 30 : dLeftGroups = dRightGroups;
1121 30 : dRightGroups = tmpd;
1122 : }
1123 :
1124 680 : lpath = lrel->cheapest_total_path;
1125 680 : rpath = rrel->cheapest_total_path;
1126 :
1127 : /* Build result relation. */
1128 680 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
1129 680 : bms_union(lrel->relids, rrel->relids));
1130 680 : result_rel->reltarget = create_pathtarget(root, tlist);
1131 :
1132 : /*
1133 : * Estimate number of distinct groups that we'll need hashtable entries
1134 : * for; this is the size of the left-hand input for EXCEPT, or the smaller
1135 : * input for INTERSECT. Also estimate the number of eventual output rows.
1136 : * In non-ALL cases, we estimate each group produces one output row; in
1137 : * ALL cases use the relevant relation size. These are worst-case
1138 : * estimates, of course, but we need to be conservative.
1139 : */
1140 680 : if (op->op == SETOP_EXCEPT)
1141 : {
1142 452 : dNumGroups = dLeftGroups;
1143 452 : dNumOutputRows = op->all ? lpath->rows : dNumGroups;
1144 : }
1145 : else
1146 : {
1147 228 : dNumGroups = dLeftGroups;
1148 228 : dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
1149 : }
1150 680 : result_rel->rows = dNumOutputRows;
1151 :
1152 : /* Select the SetOpCmd type */
1153 680 : switch (op->op)
1154 : {
1155 228 : case SETOP_INTERSECT:
1156 228 : cmd = op->all ? SETOPCMD_INTERSECT_ALL : SETOPCMD_INTERSECT;
1157 228 : break;
1158 452 : case SETOP_EXCEPT:
1159 452 : cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT;
1160 452 : break;
1161 0 : default:
1162 0 : elog(ERROR, "unrecognized set op: %d", (int) op->op);
1163 : cmd = SETOPCMD_INTERSECT; /* keep compiler quiet */
1164 : break;
1165 : }
1166 :
1167 : /*
1168 : * If we can hash, that just requires a SetOp atop the cheapest inputs.
1169 : */
1170 680 : if (can_hash)
1171 : {
1172 620 : path = (Path *) create_setop_path(root,
1173 : result_rel,
1174 : lpath,
1175 : rpath,
1176 : cmd,
1177 : SETOP_HASHED,
1178 : groupList,
1179 : dNumGroups,
1180 : dNumOutputRows);
1181 620 : add_path(result_rel, path);
1182 : }
1183 :
1184 : /*
1185 : * If we can sort, generate the cheapest sorted input paths, and add a
1186 : * SetOp atop those.
1187 : */
1188 680 : if (can_sort)
1189 : {
1190 : List *pathkeys;
1191 : Path *slpath,
1192 : *srpath;
1193 :
1194 : /* First the left input ... */
1195 680 : pathkeys = make_pathkeys_for_sortclauses(root,
1196 : groupList,
1197 : lpath_tlist);
1198 680 : if (pathkeys_contained_in(pathkeys, lpath->pathkeys))
1199 96 : slpath = lpath; /* cheapest path is already sorted */
1200 : else
1201 : {
1202 584 : slpath = get_cheapest_path_for_pathkeys(lrel->pathlist,
1203 : nonunion_pathkeys,
1204 : NULL,
1205 : TOTAL_COST,
1206 : false);
1207 : /* Subquery failed to produce any presorted paths? */
1208 584 : if (slpath == NULL)
1209 168 : slpath = (Path *) create_sort_path(root,
1210 : lpath->parent,
1211 : lpath,
1212 : pathkeys,
1213 : -1.0);
1214 : }
1215 :
1216 : /* and now the same for the right. */
1217 680 : pathkeys = make_pathkeys_for_sortclauses(root,
1218 : groupList,
1219 : rpath_tlist);
1220 680 : if (pathkeys_contained_in(pathkeys, rpath->pathkeys))
1221 108 : srpath = rpath; /* cheapest path is already sorted */
1222 : else
1223 : {
1224 572 : srpath = get_cheapest_path_for_pathkeys(rrel->pathlist,
1225 : nonunion_pathkeys,
1226 : NULL,
1227 : TOTAL_COST,
1228 : false);
1229 : /* Subquery failed to produce any presorted paths? */
1230 572 : if (srpath == NULL)
1231 174 : srpath = (Path *) create_sort_path(root,
1232 : rpath->parent,
1233 : rpath,
1234 : pathkeys,
1235 : -1.0);
1236 : }
1237 :
1238 680 : path = (Path *) create_setop_path(root,
1239 : result_rel,
1240 : slpath,
1241 : srpath,
1242 : cmd,
1243 : SETOP_SORTED,
1244 : groupList,
1245 : dNumGroups,
1246 : dNumOutputRows);
1247 680 : add_path(result_rel, path);
1248 : }
1249 :
1250 680 : return result_rel;
1251 : }
1252 :
1253 : /*
1254 : * Pull up children of a UNION node that are identically-propertied UNIONs,
1255 : * and perform planning of the queries underneath the N-way UNION.
1256 : *
1257 : * The result is a list of RelOptInfos containing Paths for sub-nodes, with
1258 : * one entry for each descendant that is a leaf query or non-identical setop.
1259 : * We also return parallel lists of the childrens' targetlists and
1260 : * is-trivial-tlist flags.
1261 : *
1262 : * NOTE: we can also pull a UNION ALL up into a UNION, since the distinct
1263 : * output rows will be lost anyway.
1264 : */
1265 : static List *
1266 4158 : plan_union_children(PlannerInfo *root,
1267 : SetOperationStmt *top_union,
1268 : List *refnames_tlist,
1269 : List **tlist_list,
1270 : List **istrivial_tlist)
1271 : {
1272 4158 : List *pending_rels = list_make1(top_union);
1273 4158 : List *result = NIL;
1274 : List *child_tlist;
1275 : bool trivial_tlist;
1276 :
1277 4158 : *tlist_list = NIL;
1278 4158 : *istrivial_tlist = NIL;
1279 :
1280 23444 : while (pending_rels != NIL)
1281 : {
1282 19286 : Node *setOp = linitial(pending_rels);
1283 :
1284 19286 : pending_rels = list_delete_first(pending_rels);
1285 :
1286 19286 : if (IsA(setOp, SetOperationStmt))
1287 : {
1288 7684 : SetOperationStmt *op = (SetOperationStmt *) setOp;
1289 :
1290 7684 : if (op->op == top_union->op &&
1291 15170 : (op->all == top_union->all || op->all) &&
1292 15140 : equal(op->colTypes, top_union->colTypes) &&
1293 7564 : equal(op->colCollations, top_union->colCollations))
1294 : {
1295 : /* Same UNION, so fold children into parent */
1296 7564 : pending_rels = lcons(op->rarg, pending_rels);
1297 7564 : pending_rels = lcons(op->larg, pending_rels);
1298 7564 : continue;
1299 : }
1300 : }
1301 :
1302 : /*
1303 : * Not same, so plan this child separately.
1304 : *
1305 : * If top_union isn't a UNION ALL, then we are interested in sorted
1306 : * output from the child, so pass top_union as parentOp. Note that
1307 : * this isn't necessarily the child node's immediate SetOperationStmt
1308 : * parent, but that's fine: it's the effective parent.
1309 : */
1310 11722 : result = lappend(result, recurse_set_operations(setOp, root,
1311 11722 : top_union->all ? NULL : top_union,
1312 : top_union->colTypes,
1313 : top_union->colCollations,
1314 : refnames_tlist,
1315 : &child_tlist,
1316 : &trivial_tlist));
1317 11722 : *tlist_list = lappend(*tlist_list, child_tlist);
1318 11722 : *istrivial_tlist = lappend_int(*istrivial_tlist, trivial_tlist);
1319 : }
1320 :
1321 4158 : return result;
1322 : }
1323 :
1324 : /*
1325 : * postprocess_setop_rel - perform steps required after adding paths
1326 : */
1327 : static void
1328 20224 : postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel)
1329 : {
1330 : /*
1331 : * We don't currently worry about allowing FDWs to contribute paths to
1332 : * this relation, but give extensions a chance.
1333 : */
1334 20224 : if (create_upper_paths_hook)
1335 0 : (*create_upper_paths_hook) (root, UPPERREL_SETOP,
1336 : NULL, rel, NULL);
1337 :
1338 : /* Select cheapest path */
1339 20224 : set_cheapest(rel);
1340 20224 : }
1341 :
1342 : /*
1343 : * Generate targetlist for a set-operation plan node
1344 : *
1345 : * colTypes: OID list of set-op's result column datatypes
1346 : * colCollations: OID list of set-op's result column collations
1347 : * varno: varno to use in generated Vars
1348 : * hack_constants: true to copy up constants (see comments in code)
1349 : * input_tlist: targetlist of this node's input node
1350 : * refnames_tlist: targetlist to take column names from
1351 : * trivial_tlist: output parameter, set to true if targetlist is trivial
1352 : */
1353 : static List *
1354 15262 : generate_setop_tlist(List *colTypes, List *colCollations,
1355 : Index varno,
1356 : bool hack_constants,
1357 : List *input_tlist,
1358 : List *refnames_tlist,
1359 : bool *trivial_tlist)
1360 : {
1361 15262 : List *tlist = NIL;
1362 15262 : int resno = 1;
1363 : ListCell *ctlc,
1364 : *cclc,
1365 : *itlc,
1366 : *rtlc;
1367 : TargetEntry *tle;
1368 : Node *expr;
1369 :
1370 15262 : *trivial_tlist = true; /* until proven differently */
1371 :
1372 63638 : forfour(ctlc, colTypes, cclc, colCollations,
1373 : itlc, input_tlist, rtlc, refnames_tlist)
1374 : {
1375 48376 : Oid colType = lfirst_oid(ctlc);
1376 48376 : Oid colColl = lfirst_oid(cclc);
1377 48376 : TargetEntry *inputtle = (TargetEntry *) lfirst(itlc);
1378 48376 : TargetEntry *reftle = (TargetEntry *) lfirst(rtlc);
1379 :
1380 : Assert(inputtle->resno == resno);
1381 : Assert(reftle->resno == resno);
1382 : Assert(!inputtle->resjunk);
1383 : Assert(!reftle->resjunk);
1384 :
1385 : /*
1386 : * Generate columns referencing input columns and having appropriate
1387 : * data types and column names. Insert datatype coercions where
1388 : * necessary.
1389 : *
1390 : * HACK: constants in the input's targetlist are copied up as-is
1391 : * rather than being referenced as subquery outputs. This is mainly
1392 : * to ensure that when we try to coerce them to the output column's
1393 : * datatype, the right things happen for UNKNOWN constants. But do
1394 : * this only at the first level of subquery-scan plans; we don't want
1395 : * phony constants appearing in the output tlists of upper-level
1396 : * nodes!
1397 : *
1398 : * Note that copying a constant doesn't in itself require us to mark
1399 : * the tlist nontrivial; see trivial_subqueryscan() in setrefs.c.
1400 : */
1401 48376 : if (hack_constants && inputtle->expr && IsA(inputtle->expr, Const))
1402 14874 : expr = (Node *) inputtle->expr;
1403 : else
1404 134008 : expr = (Node *) makeVar(varno,
1405 33502 : inputtle->resno,
1406 33502 : exprType((Node *) inputtle->expr),
1407 33502 : exprTypmod((Node *) inputtle->expr),
1408 33502 : exprCollation((Node *) inputtle->expr),
1409 : 0);
1410 :
1411 48376 : if (exprType(expr) != colType)
1412 : {
1413 : /*
1414 : * Note: it's not really cool to be applying coerce_to_common_type
1415 : * here; one notable point is that assign_expr_collations never
1416 : * gets run on any generated nodes. For the moment that's not a
1417 : * problem because we force the correct exposed collation below.
1418 : * It would likely be best to make the parser generate the correct
1419 : * output tlist for every set-op to begin with, though.
1420 : */
1421 1482 : expr = coerce_to_common_type(NULL, /* no UNKNOWNs here */
1422 : expr,
1423 : colType,
1424 : "UNION/INTERSECT/EXCEPT");
1425 1482 : *trivial_tlist = false; /* the coercion makes it not trivial */
1426 : }
1427 :
1428 : /*
1429 : * Ensure the tlist entry's exposed collation matches the set-op. This
1430 : * is necessary because plan_set_operations() reports the result
1431 : * ordering as a list of SortGroupClauses, which don't carry collation
1432 : * themselves but just refer to tlist entries. If we don't show the
1433 : * right collation then planner.c might do the wrong thing in
1434 : * higher-level queries.
1435 : *
1436 : * Note we use RelabelType, not CollateExpr, since this expression
1437 : * will reach the executor without any further processing.
1438 : */
1439 48376 : if (exprCollation(expr) != colColl)
1440 : {
1441 12270 : expr = applyRelabelType(expr,
1442 : exprType(expr), exprTypmod(expr), colColl,
1443 : COERCE_IMPLICIT_CAST, -1, false);
1444 12270 : *trivial_tlist = false; /* the relabel makes it not trivial */
1445 : }
1446 :
1447 96752 : tle = makeTargetEntry((Expr *) expr,
1448 48376 : (AttrNumber) resno++,
1449 48376 : pstrdup(reftle->resname),
1450 : false);
1451 :
1452 : /*
1453 : * By convention, all output columns in a setop tree have
1454 : * ressortgroupref equal to their resno. In some cases the ref isn't
1455 : * needed, but this is a cleaner way than modifying the tlist later.
1456 : */
1457 48376 : tle->ressortgroupref = tle->resno;
1458 :
1459 48376 : tlist = lappend(tlist, tle);
1460 : }
1461 :
1462 15262 : return tlist;
1463 : }
1464 :
1465 : /*
1466 : * Generate targetlist for a set-operation Append node
1467 : *
1468 : * colTypes: OID list of set-op's result column datatypes
1469 : * colCollations: OID list of set-op's result column collations
1470 : * input_tlists: list of tlists for sub-plans of the Append
1471 : * refnames_tlist: targetlist to take column names from
1472 : *
1473 : * The entries in the Append's targetlist should always be simple Vars;
1474 : * we just have to make sure they have the right datatypes/typmods/collations.
1475 : * The Vars are always generated with varno 0.
1476 : *
1477 : * XXX a problem with the varno-zero approach is that set_pathtarget_cost_width
1478 : * cannot figure out a realistic width for the tlist we make here. But we
1479 : * ought to refactor this code to produce a PathTarget directly, anyway.
1480 : */
1481 : static List *
1482 4980 : generate_append_tlist(List *colTypes, List *colCollations,
1483 : List *input_tlists,
1484 : List *refnames_tlist)
1485 : {
1486 4980 : List *tlist = NIL;
1487 4980 : int resno = 1;
1488 : ListCell *curColType;
1489 : ListCell *curColCollation;
1490 : ListCell *ref_tl_item;
1491 : int colindex;
1492 : TargetEntry *tle;
1493 : Node *expr;
1494 : ListCell *tlistl;
1495 : int32 *colTypmods;
1496 :
1497 : /*
1498 : * First extract typmods to use.
1499 : *
1500 : * If the inputs all agree on type and typmod of a particular column, use
1501 : * that typmod; else use -1.
1502 : */
1503 4980 : colTypmods = (int32 *) palloc(list_length(colTypes) * sizeof(int32));
1504 :
1505 18346 : foreach(tlistl, input_tlists)
1506 : {
1507 13366 : List *subtlist = (List *) lfirst(tlistl);
1508 : ListCell *subtlistl;
1509 :
1510 13366 : curColType = list_head(colTypes);
1511 13366 : colindex = 0;
1512 53120 : foreach(subtlistl, subtlist)
1513 : {
1514 39754 : TargetEntry *subtle = (TargetEntry *) lfirst(subtlistl);
1515 :
1516 : Assert(!subtle->resjunk);
1517 : Assert(curColType != NULL);
1518 39754 : if (exprType((Node *) subtle->expr) == lfirst_oid(curColType))
1519 : {
1520 : /* If first subplan, copy the typmod; else compare */
1521 39754 : int32 subtypmod = exprTypmod((Node *) subtle->expr);
1522 :
1523 39754 : if (tlistl == list_head(input_tlists))
1524 13998 : colTypmods[colindex] = subtypmod;
1525 25756 : else if (subtypmod != colTypmods[colindex])
1526 12 : colTypmods[colindex] = -1;
1527 : }
1528 : else
1529 : {
1530 : /* types disagree, so force typmod to -1 */
1531 0 : colTypmods[colindex] = -1;
1532 : }
1533 39754 : curColType = lnext(colTypes, curColType);
1534 39754 : colindex++;
1535 : }
1536 : Assert(curColType == NULL);
1537 : }
1538 :
1539 : /*
1540 : * Now we can build the tlist for the Append.
1541 : */
1542 4980 : colindex = 0;
1543 18978 : forthree(curColType, colTypes, curColCollation, colCollations,
1544 : ref_tl_item, refnames_tlist)
1545 : {
1546 13998 : Oid colType = lfirst_oid(curColType);
1547 13998 : int32 colTypmod = colTypmods[colindex++];
1548 13998 : Oid colColl = lfirst_oid(curColCollation);
1549 13998 : TargetEntry *reftle = (TargetEntry *) lfirst(ref_tl_item);
1550 :
1551 : Assert(reftle->resno == resno);
1552 : Assert(!reftle->resjunk);
1553 13998 : expr = (Node *) makeVar(0,
1554 : resno,
1555 : colType,
1556 : colTypmod,
1557 : colColl,
1558 : 0);
1559 27996 : tle = makeTargetEntry((Expr *) expr,
1560 13998 : (AttrNumber) resno++,
1561 13998 : pstrdup(reftle->resname),
1562 : false);
1563 :
1564 : /*
1565 : * By convention, all output columns in a setop tree have
1566 : * ressortgroupref equal to their resno. In some cases the ref isn't
1567 : * needed, but this is a cleaner way than modifying the tlist later.
1568 : */
1569 13998 : tle->ressortgroupref = tle->resno;
1570 :
1571 13998 : tlist = lappend(tlist, tle);
1572 : }
1573 :
1574 4980 : pfree(colTypmods);
1575 :
1576 4980 : return tlist;
1577 : }
1578 :
1579 : /*
1580 : * generate_setop_grouplist
1581 : * Build a SortGroupClause list defining the sort/grouping properties
1582 : * of the setop's output columns.
1583 : *
1584 : * Parse analysis already determined the properties and built a suitable
1585 : * list, except that the entries do not have sortgrouprefs set because
1586 : * the parser output representation doesn't include a tlist for each
1587 : * setop. So what we need to do here is copy that list and install
1588 : * proper sortgrouprefs into it (copying those from the targetlist).
1589 : */
1590 : static List *
1591 4628 : generate_setop_grouplist(SetOperationStmt *op, List *targetlist)
1592 : {
1593 4628 : List *grouplist = copyObject(op->groupClauses);
1594 : ListCell *lg;
1595 : ListCell *lt;
1596 :
1597 4628 : lg = list_head(grouplist);
1598 18518 : foreach(lt, targetlist)
1599 : {
1600 13890 : TargetEntry *tle = (TargetEntry *) lfirst(lt);
1601 : SortGroupClause *sgc;
1602 :
1603 : Assert(!tle->resjunk);
1604 :
1605 : /* non-resjunk columns should have sortgroupref = resno */
1606 : Assert(tle->ressortgroupref == tle->resno);
1607 :
1608 : /* non-resjunk columns should have grouping clauses */
1609 : Assert(lg != NULL);
1610 13890 : sgc = (SortGroupClause *) lfirst(lg);
1611 13890 : lg = lnext(grouplist, lg);
1612 : Assert(sgc->tleSortGroupRef == 0);
1613 :
1614 13890 : sgc->tleSortGroupRef = tle->ressortgroupref;
1615 : }
1616 : Assert(lg == NULL);
1617 4628 : return grouplist;
1618 : }
|