Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * prepunion.c
4 : * Routines to plan set-operation queries. The filename is a leftover
5 : * from a time when only UNIONs were implemented.
6 : *
7 : * There are two code paths in the planner for set-operation queries.
8 : * If a subquery consists entirely of simple UNION ALL operations, it
9 : * is converted into an "append relation". Otherwise, it is handled
10 : * by the general code in this module (plan_set_operations and its
11 : * subroutines). There is some support code here for the append-relation
12 : * case, but most of the heavy lifting for that is done elsewhere,
13 : * notably in prepjointree.c and allpaths.c.
14 : *
15 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
16 : * Portions Copyright (c) 1994, Regents of the University of California
17 : *
18 : *
19 : * IDENTIFICATION
20 : * src/backend/optimizer/prep/prepunion.c
21 : *
22 : *-------------------------------------------------------------------------
23 : */
24 : #include "postgres.h"
25 :
26 : #include <math.h>
27 :
28 : #include "access/htup_details.h"
29 : #include "catalog/pg_type.h"
30 : #include "miscadmin.h"
31 : #include "nodes/makefuncs.h"
32 : #include "nodes/nodeFuncs.h"
33 : #include "optimizer/cost.h"
34 : #include "optimizer/pathnode.h"
35 : #include "optimizer/paths.h"
36 : #include "optimizer/planner.h"
37 : #include "optimizer/prep.h"
38 : #include "optimizer/tlist.h"
39 : #include "parser/parse_coerce.h"
40 : #include "port/pg_bitutils.h"
41 : #include "utils/selfuncs.h"
42 :
43 :
44 : static RelOptInfo *recurse_set_operations(Node *setOp, PlannerInfo *root,
45 : SetOperationStmt *parentOp,
46 : List *colTypes, List *colCollations,
47 : List *refnames_tlist,
48 : List **pTargetList,
49 : bool *istrivial_tlist);
50 : static RelOptInfo *generate_recursion_path(SetOperationStmt *setOp,
51 : PlannerInfo *root,
52 : List *refnames_tlist,
53 : List **pTargetList);
54 : static void build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
55 : bool trivial_tlist, List *child_tlist,
56 : List *interesting_pathkeys,
57 : double *pNumGroups);
58 : static RelOptInfo *generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
59 : List *refnames_tlist,
60 : List **pTargetList);
61 : static RelOptInfo *generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
62 : List *refnames_tlist,
63 : List **pTargetList);
64 : static List *plan_union_children(PlannerInfo *root,
65 : SetOperationStmt *top_union,
66 : List *refnames_tlist,
67 : List **tlist_list,
68 : List **istrivial_tlist);
69 : static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
70 : static List *generate_setop_tlist(List *colTypes, List *colCollations,
71 : Index varno,
72 : bool hack_constants,
73 : List *input_tlist,
74 : List *refnames_tlist,
75 : bool *trivial_tlist);
76 : static List *generate_append_tlist(List *colTypes, List *colCollations,
77 : List *input_tlists,
78 : List *refnames_tlist);
79 : static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
80 : static PathTarget *create_setop_pathtarget(PlannerInfo *root, List *tlist,
81 : List *child_pathlist);
82 :
83 :
84 : /*
85 : * plan_set_operations
86 : *
87 : * Plans the queries for a tree of set operations (UNION/INTERSECT/EXCEPT)
88 : *
89 : * This routine only deals with the setOperations tree of the given query.
90 : * Any top-level ORDER BY requested in root->parse->sortClause will be handled
91 : * when we return to grouping_planner; likewise for LIMIT.
92 : *
93 : * What we return is an "upperrel" RelOptInfo containing at least one Path
94 : * that implements the set-operation tree. In addition, root->processed_tlist
95 : * receives a targetlist representing the output of the topmost setop node.
96 : */
97 : RelOptInfo *
98 4914 : plan_set_operations(PlannerInfo *root)
99 : {
100 4914 : Query *parse = root->parse;
101 4914 : SetOperationStmt *topop = castNode(SetOperationStmt, parse->setOperations);
102 : Node *node;
103 : RangeTblEntry *leftmostRTE;
104 : Query *leftmostQuery;
105 : RelOptInfo *setop_rel;
106 : List *top_tlist;
107 :
108 : Assert(topop);
109 :
110 : /* check for unsupported stuff */
111 : Assert(parse->jointree->fromlist == NIL);
112 : Assert(parse->jointree->quals == NULL);
113 : Assert(parse->groupClause == NIL);
114 : Assert(parse->havingQual == NULL);
115 : Assert(parse->windowClause == NIL);
116 : Assert(parse->distinctClause == NIL);
117 :
118 : /*
119 : * In the outer query level, equivalence classes are limited to classes
120 : * which define that the top-level target entry is equivalent to the
121 : * corresponding child target entry. There won't be any equivalence class
122 : * merging. Mark that merging is complete to allow us to make pathkeys.
123 : */
124 : Assert(root->eq_classes == NIL);
125 4914 : root->ec_merging_done = true;
126 :
127 : /*
128 : * We'll need to build RelOptInfos for each of the leaf subqueries, which
129 : * are RTE_SUBQUERY rangetable entries in this Query. Prepare the index
130 : * arrays for those, and for AppendRelInfos in case they're needed.
131 : */
132 4914 : setup_simple_rel_arrays(root);
133 :
134 : /*
135 : * Find the leftmost component Query. We need to use its column names for
136 : * all generated tlists (else SELECT INTO won't work right).
137 : */
138 4914 : node = topop->larg;
139 8267 : while (node && IsA(node, SetOperationStmt))
140 3353 : node = ((SetOperationStmt *) node)->larg;
141 : Assert(node && IsA(node, RangeTblRef));
142 4914 : leftmostRTE = root->simple_rte_array[((RangeTblRef *) node)->rtindex];
143 4914 : leftmostQuery = leftmostRTE->subquery;
144 : Assert(leftmostQuery != NULL);
145 :
146 : /*
147 : * If the topmost node is a recursive union, it needs special processing.
148 : */
149 4914 : if (root->hasRecursion)
150 : {
151 692 : setop_rel = generate_recursion_path(topop, root,
152 : leftmostQuery->targetList,
153 : &top_tlist);
154 : }
155 : else
156 : {
157 : bool trivial_tlist;
158 :
159 : /*
160 : * Recurse on setOperations tree to generate paths for set ops. The
161 : * final output paths should have just the column types shown as the
162 : * output from the top-level node.
163 : */
164 4222 : setop_rel = recurse_set_operations((Node *) topop, root,
165 : NULL, /* no parent */
166 : topop->colTypes, topop->colCollations,
167 : leftmostQuery->targetList,
168 : &top_tlist,
169 : &trivial_tlist);
170 : }
171 :
172 : /* Must return the built tlist into root->processed_tlist. */
173 4910 : root->processed_tlist = top_tlist;
174 :
175 4910 : return setop_rel;
176 : }
177 :
178 : /*
179 : * recurse_set_operations
180 : * Recursively handle one step in a tree of set operations
181 : *
182 : * setOp: current step (could be a SetOperationStmt or a leaf RangeTblRef)
183 : * parentOp: parent step, or NULL if none (but see below)
184 : * colTypes: OID list of set-op's result column datatypes
185 : * colCollations: OID list of set-op's result column collations
186 : * refnames_tlist: targetlist to take column names from
187 : *
188 : * parentOp should be passed as NULL unless that step is interested in
189 : * getting sorted output from this step. ("Sorted" means "sorted according
190 : * to the default btree opclasses of the result column datatypes".)
191 : *
192 : * Returns a RelOptInfo for the subtree, as well as these output parameters:
193 : * *pTargetList: receives the fully-fledged tlist for the subtree's top plan
194 : * *istrivial_tlist: true if, and only if, datatypes between parent and child
195 : * match.
196 : *
197 : * If setOp is a leaf node, this function plans the sub-query but does
198 : * not populate the pathlist of the returned RelOptInfo. The caller will
199 : * generate SubqueryScan paths using useful path(s) of the subquery (see
200 : * build_setop_child_paths). But this function does build the paths for
201 : * set-operation nodes.
202 : *
203 : * The pTargetList output parameter is mostly redundant with the pathtarget
204 : * of the returned RelOptInfo, but for the moment we need it because much of
205 : * the logic in this file depends on flag columns being marked resjunk.
206 : * XXX Now that there are no flag columns and hence no resjunk columns, we
207 : * could probably refactor this file to deal only in pathtargets.
208 : *
209 : * We don't have to care about typmods here: the only allowed difference
210 : * between set-op input and output typmods is input is a specific typmod
211 : * and output is -1, and that does not require a coercion.
212 : */
213 : static RelOptInfo *
214 17603 : recurse_set_operations(Node *setOp, PlannerInfo *root,
215 : SetOperationStmt *parentOp,
216 : List *colTypes, List *colCollations,
217 : List *refnames_tlist,
218 : List **pTargetList,
219 : bool *istrivial_tlist)
220 : {
221 : RelOptInfo *rel;
222 :
223 17603 : *istrivial_tlist = true; /* for now */
224 :
225 : /* Guard against stack overflow due to overly complex setop nests */
226 17603 : check_stack_depth();
227 :
228 17603 : if (IsA(setOp, RangeTblRef))
229 : {
230 13241 : RangeTblRef *rtr = (RangeTblRef *) setOp;
231 13241 : RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
232 13241 : Query *subquery = rte->subquery;
233 : PlannerInfo *subroot;
234 : List *tlist;
235 : bool trivial_tlist;
236 : char *plan_name;
237 :
238 : Assert(subquery != NULL);
239 :
240 : /* Build a RelOptInfo for this leaf subquery. */
241 13241 : rel = build_simple_rel(root, rtr->rtindex, NULL);
242 :
243 : /* plan_params should not be in use in current query level */
244 : Assert(root->plan_params == NIL);
245 :
246 : /*
247 : * Generate a subroot and Paths for the subquery. If we have a
248 : * parentOp, pass that down to encourage subquery_planner to consider
249 : * suitably-sorted Paths.
250 : */
251 13241 : plan_name = choose_plan_name(root->glob, "setop", true);
252 13241 : subroot = rel->subroot = subquery_planner(root->glob, subquery,
253 : plan_name, root,
254 : false, root->tuple_fraction,
255 : parentOp);
256 :
257 : /*
258 : * It should not be possible for the primitive query to contain any
259 : * cross-references to other primitive queries in the setop tree.
260 : */
261 13241 : if (root->plan_params)
262 0 : elog(ERROR, "unexpected outer reference in set operation subquery");
263 :
264 : /* Figure out the appropriate target list for this subquery. */
265 13241 : tlist = generate_setop_tlist(colTypes, colCollations,
266 13241 : rtr->rtindex,
267 : true,
268 : subroot->processed_tlist,
269 : refnames_tlist,
270 : &trivial_tlist);
271 13241 : rel->reltarget = create_pathtarget(root, tlist);
272 :
273 : /* Return the fully-fledged tlist to caller, too */
274 13241 : *pTargetList = tlist;
275 13241 : *istrivial_tlist = trivial_tlist;
276 : }
277 4362 : else if (IsA(setOp, SetOperationStmt))
278 : {
279 4362 : SetOperationStmt *op = (SetOperationStmt *) setOp;
280 :
281 : /* UNIONs are much different from INTERSECT/EXCEPT */
282 4362 : if (op->op == SETOP_UNION)
283 3738 : rel = generate_union_paths(op, root,
284 : refnames_tlist,
285 : pTargetList);
286 : else
287 624 : rel = generate_nonunion_paths(op, root,
288 : refnames_tlist,
289 : pTargetList);
290 :
291 : /*
292 : * If necessary, add a Result node to project the caller-requested
293 : * output columns.
294 : *
295 : * XXX you don't really want to know about this: setrefs.c will apply
296 : * fix_upper_expr() to the Result node's tlist. This would fail if the
297 : * Vars generated by generate_setop_tlist() were not exactly equal()
298 : * to the corresponding tlist entries of the subplan. However, since
299 : * the subplan was generated by generate_union_paths() or
300 : * generate_nonunion_paths(), and hence its tlist was generated by
301 : * generate_append_tlist() or generate_setop_tlist(), this will work.
302 : * We just tell generate_setop_tlist() to use varno 0.
303 : */
304 4362 : if (!tlist_same_datatypes(*pTargetList, colTypes, false) ||
305 4352 : !tlist_same_collations(*pTargetList, colCollations, false))
306 : {
307 : PathTarget *target;
308 : bool trivial_tlist;
309 : ListCell *lc;
310 :
311 10 : *pTargetList = generate_setop_tlist(colTypes, colCollations,
312 : 0,
313 : false,
314 : *pTargetList,
315 : refnames_tlist,
316 : &trivial_tlist);
317 10 : *istrivial_tlist = trivial_tlist;
318 10 : target = create_pathtarget(root, *pTargetList);
319 :
320 : /* Apply projection to each path */
321 20 : foreach(lc, rel->pathlist)
322 : {
323 10 : Path *subpath = (Path *) lfirst(lc);
324 : Path *path;
325 :
326 : Assert(subpath->param_info == NULL);
327 10 : path = apply_projection_to_path(root, subpath->parent,
328 : subpath, target);
329 : /* If we had to add a Result, path is different from subpath */
330 10 : if (path != subpath)
331 10 : lfirst(lc) = path;
332 : }
333 :
334 : /* Apply projection to each partial path */
335 10 : foreach(lc, rel->partial_pathlist)
336 : {
337 0 : Path *subpath = (Path *) lfirst(lc);
338 : Path *path;
339 :
340 : Assert(subpath->param_info == NULL);
341 :
342 : /* avoid apply_projection_to_path, in case of multiple refs */
343 0 : path = (Path *) create_projection_path(root, subpath->parent,
344 : subpath, target);
345 0 : lfirst(lc) = path;
346 : }
347 : }
348 4362 : postprocess_setop_rel(root, rel);
349 : }
350 : else
351 : {
352 0 : elog(ERROR, "unrecognized node type: %d",
353 : (int) nodeTag(setOp));
354 : *pTargetList = NIL;
355 : rel = NULL; /* keep compiler quiet */
356 : }
357 :
358 17603 : return rel;
359 : }
360 :
361 : /*
362 : * Generate paths for a recursive UNION node
363 : */
364 : static RelOptInfo *
365 692 : generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
366 : List *refnames_tlist,
367 : List **pTargetList)
368 : {
369 : RelOptInfo *result_rel;
370 : Path *path;
371 : RelOptInfo *lrel,
372 : *rrel;
373 : Path *lpath;
374 : Path *rpath;
375 : List *lpath_tlist;
376 : bool lpath_trivial_tlist;
377 : List *rpath_tlist;
378 : bool rpath_trivial_tlist;
379 : List *tlist;
380 : List *groupList;
381 : double dNumGroups;
382 :
383 : /* Parser should have rejected other cases */
384 692 : if (setOp->op != SETOP_UNION)
385 0 : elog(ERROR, "only UNION queries can be recursive");
386 : /* Worktable ID should be assigned */
387 : Assert(root->wt_param_id >= 0);
388 :
389 : /*
390 : * Unlike a regular UNION node, process the left and right inputs
391 : * separately without any intention of combining them into one Append.
392 : */
393 692 : lrel = recurse_set_operations(setOp->larg, root,
394 : NULL, /* no value in sorted results */
395 : setOp->colTypes, setOp->colCollations,
396 : refnames_tlist,
397 : &lpath_tlist,
398 : &lpath_trivial_tlist);
399 692 : if (lrel->rtekind == RTE_SUBQUERY)
400 692 : build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
401 : NIL, NULL);
402 692 : lpath = lrel->cheapest_total_path;
403 : /* The right path will want to look at the left one ... */
404 692 : root->non_recursive_path = lpath;
405 692 : rrel = recurse_set_operations(setOp->rarg, root,
406 : NULL, /* no value in sorted results */
407 : setOp->colTypes, setOp->colCollations,
408 : refnames_tlist,
409 : &rpath_tlist,
410 : &rpath_trivial_tlist);
411 692 : if (rrel->rtekind == RTE_SUBQUERY)
412 687 : build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
413 : NIL, NULL);
414 692 : rpath = rrel->cheapest_total_path;
415 692 : root->non_recursive_path = NULL;
416 :
417 : /*
418 : * Generate tlist for RecursiveUnion path node --- same as in Append cases
419 : */
420 692 : tlist = generate_append_tlist(setOp->colTypes, setOp->colCollations,
421 692 : list_make2(lpath_tlist, rpath_tlist),
422 : refnames_tlist);
423 :
424 692 : *pTargetList = tlist;
425 :
426 : /* Build result relation. */
427 692 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
428 692 : bms_union(lrel->relids, rrel->relids));
429 692 : result_rel->reltarget = create_pathtarget(root, tlist);
430 :
431 : /*
432 : * If UNION, identify the grouping operators
433 : */
434 692 : if (setOp->all)
435 : {
436 402 : groupList = NIL;
437 402 : dNumGroups = 0;
438 : }
439 : else
440 : {
441 : /* Identify the grouping semantics */
442 290 : groupList = generate_setop_grouplist(setOp, tlist);
443 :
444 : /* We only support hashing here */
445 290 : if (!grouping_is_hashable(groupList))
446 4 : ereport(ERROR,
447 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
448 : errmsg("could not implement recursive UNION"),
449 : errdetail("All column datatypes must be hashable.")));
450 :
451 : /*
452 : * For the moment, take the number of distinct groups as equal to the
453 : * total input size, ie, the worst case.
454 : */
455 286 : dNumGroups = lpath->rows + rpath->rows * 10;
456 : }
457 :
458 : /*
459 : * And make the path node.
460 : */
461 688 : path = (Path *) create_recursiveunion_path(root,
462 : result_rel,
463 : lpath,
464 : rpath,
465 688 : result_rel->reltarget,
466 : groupList,
467 : root->wt_param_id,
468 : dNumGroups);
469 :
470 688 : add_path(result_rel, path);
471 688 : postprocess_setop_rel(root, result_rel);
472 688 : return result_rel;
473 : }
474 :
475 : /*
476 : * build_setop_child_paths
477 : * Build paths for the set op child relation denoted by 'rel'.
478 : *
479 : * 'rel' is an RTE_SUBQUERY relation. We have already generated paths within
480 : * the subquery's subroot; the task here is to create SubqueryScan paths for
481 : * 'rel', representing scans of the useful subquery paths.
482 : *
483 : * interesting_pathkeys: if not NIL, also include paths that suit these
484 : * pathkeys, sorting any unsorted paths as required.
485 : * *pNumGroups: if not NULL, we estimate the number of distinct groups
486 : * in the result, and store it there.
487 : */
488 : static void
489 13241 : build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
490 : bool trivial_tlist, List *child_tlist,
491 : List *interesting_pathkeys, double *pNumGroups)
492 : {
493 : RelOptInfo *final_rel;
494 13241 : List *setop_pathkeys = rel->subroot->setop_pathkeys;
495 : ListCell *lc;
496 :
497 : /* it can't be a set op child rel if it's not a subquery */
498 : Assert(rel->rtekind == RTE_SUBQUERY);
499 :
500 : /* when sorting is needed, add child rel equivalences */
501 13241 : if (interesting_pathkeys != NIL)
502 10153 : add_setop_child_rel_equivalences(root,
503 : rel,
504 : child_tlist,
505 : interesting_pathkeys);
506 :
507 : /*
508 : * Mark rel with estimated output rows, width, etc. Note that we have to
509 : * do this before generating outer-query paths, else cost_subqueryscan is
510 : * not happy.
511 : */
512 13241 : set_subquery_size_estimates(root, rel);
513 :
514 : /*
515 : * Since we may want to add a partial path to this relation, we must set
516 : * its consider_parallel flag correctly.
517 : */
518 13241 : final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);
519 13241 : rel->consider_parallel = final_rel->consider_parallel;
520 :
521 : /* Generate subquery scan paths for any interesting path in final_rel */
522 34477 : foreach(lc, final_rel->pathlist)
523 : {
524 21236 : Path *subpath = (Path *) lfirst(lc);
525 : List *pathkeys;
526 21236 : Path *cheapest_input_path = final_rel->cheapest_total_path;
527 : bool is_sorted;
528 : int presorted_keys;
529 :
530 : /* If the input rel is dummy, propagate that to this query level */
531 21236 : if (is_dummy_rel(final_rel))
532 : {
533 68 : mark_dummy_rel(rel);
534 3184 : continue;
535 : }
536 :
537 : /*
538 : * Include the cheapest path as-is so that the set operation can be
539 : * cheaply implemented using a method which does not require the input
540 : * to be sorted.
541 : */
542 21168 : if (subpath == cheapest_input_path)
543 : {
544 : /* Convert subpath's pathkeys to outer representation */
545 13173 : pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
546 : make_tlist_from_pathtarget(subpath->pathtarget));
547 :
548 : /* Generate outer path using this subpath */
549 13173 : add_path(rel, (Path *) create_subqueryscan_path(root,
550 : rel,
551 : subpath,
552 : trivial_tlist,
553 : pathkeys,
554 : NULL));
555 : }
556 :
557 : /* skip dealing with sorted paths if the setop doesn't need them */
558 21168 : if (interesting_pathkeys == NIL)
559 3106 : continue;
560 :
561 : /*
562 : * Create paths to suit final sort order required for setop_pathkeys.
563 : * Here we'll sort the cheapest input path (if not sorted already) and
564 : * incremental sort any paths which are partially sorted.
565 : */
566 18062 : is_sorted = pathkeys_count_contained_in(setop_pathkeys,
567 : subpath->pathkeys,
568 : &presorted_keys);
569 :
570 18062 : if (!is_sorted)
571 : {
572 11985 : double limittuples = rel->subroot->limit_tuples;
573 :
574 : /*
575 : * Try at least sorting the cheapest path and also try
576 : * incrementally sorting any path which is partially sorted
577 : * already (no need to deal with paths which have presorted keys
578 : * when incremental sort is disabled unless it's the cheapest
579 : * input path).
580 : */
581 11985 : if (subpath != cheapest_input_path &&
582 2666 : (presorted_keys == 0 || !enable_incremental_sort))
583 10 : continue;
584 :
585 : /*
586 : * We've no need to consider both a sort and incremental sort.
587 : * We'll just do a sort if there are no presorted keys and an
588 : * incremental sort when there are presorted keys.
589 : */
590 11975 : if (presorted_keys == 0 || !enable_incremental_sort)
591 9319 : subpath = (Path *) create_sort_path(rel->subroot,
592 : final_rel,
593 : subpath,
594 : setop_pathkeys,
595 : limittuples);
596 : else
597 2656 : subpath = (Path *) create_incremental_sort_path(rel->subroot,
598 : final_rel,
599 : subpath,
600 : setop_pathkeys,
601 : presorted_keys,
602 : limittuples);
603 : }
604 :
605 : /*
606 : * subpath is now sorted, so add it to the pathlist. We already added
607 : * the cheapest_input_path above, so don't add it again unless we just
608 : * sorted it.
609 : */
610 18052 : if (subpath != cheapest_input_path)
611 : {
612 : /* Convert subpath's pathkeys to outer representation */
613 17278 : pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
614 : make_tlist_from_pathtarget(subpath->pathtarget));
615 :
616 : /* Generate outer path using this subpath */
617 17278 : add_path(rel, (Path *) create_subqueryscan_path(root,
618 : rel,
619 : subpath,
620 : trivial_tlist,
621 : pathkeys,
622 : NULL));
623 : }
624 : }
625 :
626 : /* if consider_parallel is false, there should be no partial paths */
627 : Assert(final_rel->consider_parallel ||
628 : final_rel->partial_pathlist == NIL);
629 :
630 : /*
631 : * If we have a partial path for the child relation, we can use that to
632 : * build a partial path for this relation. But there's no point in
633 : * considering any path but the cheapest.
634 : */
635 13241 : if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
636 9207 : final_rel->partial_pathlist != NIL)
637 : {
638 : Path *partial_subpath;
639 : Path *partial_path;
640 :
641 15 : partial_subpath = linitial(final_rel->partial_pathlist);
642 : partial_path = (Path *)
643 15 : create_subqueryscan_path(root, rel, partial_subpath,
644 : trivial_tlist,
645 : NIL, NULL);
646 15 : add_partial_path(rel, partial_path);
647 : }
648 :
649 13241 : postprocess_setop_rel(root, rel);
650 :
651 : /*
652 : * Estimate number of groups if caller wants it. If the subquery used
653 : * grouping or aggregation, its output is probably mostly unique anyway;
654 : * otherwise do statistical estimation.
655 : *
656 : * XXX you don't really want to know about this: we do the estimation
657 : * using the subroot->parse's original targetlist expressions, not the
658 : * subroot->processed_tlist which might seem more appropriate. The reason
659 : * is that if the subquery is itself a setop, it may return a
660 : * processed_tlist containing "varno 0" Vars generated by
661 : * generate_append_tlist, and those would confuse estimate_num_groups
662 : * mightily. We ought to get rid of the "varno 0" hack, but that requires
663 : * a redesign of the parsetree representation of setops, so that there can
664 : * be an RTE corresponding to each setop's output. Note, we use this not
665 : * subquery's targetlist but subroot->parse's targetlist, because it was
666 : * revised by self-join removal. subquery's targetlist might contain the
667 : * references to the removed relids.
668 : */
669 13241 : if (pNumGroups)
670 : {
671 1218 : PlannerInfo *subroot = rel->subroot;
672 1218 : Query *subquery = subroot->parse;
673 :
674 1218 : if (subquery->groupClause || subquery->groupingSets ||
675 1218 : subquery->distinctClause || subroot->hasHavingQual ||
676 1208 : subquery->hasAggs)
677 10 : *pNumGroups = rel->cheapest_total_path->rows;
678 : else
679 1208 : *pNumGroups = estimate_num_groups(subroot,
680 1208 : get_tlist_exprs(subroot->parse->targetList, false),
681 1208 : rel->cheapest_total_path->rows,
682 : NULL,
683 : NULL);
684 : }
685 13241 : }
686 :
687 : /*
688 : * Generate paths for a UNION or UNION ALL node
689 : */
690 : static RelOptInfo *
691 3738 : generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
692 : List *refnames_tlist,
693 : List **pTargetList)
694 : {
695 3738 : Relids relids = NULL;
696 : RelOptInfo *result_rel;
697 : ListCell *lc;
698 : ListCell *lc2;
699 : ListCell *lc3;
700 3738 : AppendPathInput cheapest = {0};
701 3738 : AppendPathInput ordered = {0};
702 3738 : AppendPathInput partial = {0};
703 3738 : bool partial_paths_valid = true;
704 3738 : bool consider_parallel = true;
705 : List *rellist;
706 : List *tlist_list;
707 : List *trivial_tlist_list;
708 : List *tlist;
709 3738 : List *groupList = NIL;
710 : Path *apath;
711 3738 : Path *gpath = NULL;
712 3738 : bool try_sorted = false;
713 3738 : List *union_pathkeys = NIL;
714 :
715 : /*
716 : * If any of my children are identical UNION nodes (same op, all-flag, and
717 : * colTypes/colCollations) then they can be merged into this node so that
718 : * we generate only one Append/MergeAppend and unique-ification for the
719 : * lot. Recurse to find such nodes.
720 : */
721 3738 : rellist = plan_union_children(root,
722 : op,
723 : refnames_tlist,
724 : &tlist_list,
725 : &trivial_tlist_list);
726 :
727 : /*
728 : * Generate tlist for Append/MergeAppend plan node.
729 : *
730 : * The tlist for an Append plan isn't important as far as the Append is
731 : * concerned, but we must make it look real anyway for the benefit of the
732 : * next plan level up.
733 : */
734 3738 : tlist = generate_append_tlist(op->colTypes, op->colCollations,
735 : tlist_list, refnames_tlist);
736 3738 : *pTargetList = tlist;
737 :
738 : /* For UNIONs (not UNION ALL), try sorting, if sorting is possible */
739 3738 : if (!op->all)
740 : {
741 : /* Identify the grouping semantics */
742 3244 : groupList = generate_setop_grouplist(op, tlist);
743 :
744 3244 : if (grouping_is_sortable(op->groupClauses))
745 : {
746 3190 : try_sorted = true;
747 : /* Determine the pathkeys for sorting by the whole target list */
748 3190 : union_pathkeys = make_pathkeys_for_sortclauses(root, groupList,
749 : tlist);
750 :
751 3190 : root->query_pathkeys = union_pathkeys;
752 : }
753 : }
754 :
755 : /*
756 : * Now that we've got the append target list, we can build the union child
757 : * paths.
758 : */
759 14487 : forthree(lc, rellist, lc2, trivial_tlist_list, lc3, tlist_list)
760 : {
761 10749 : RelOptInfo *rel = lfirst(lc);
762 10749 : bool trivial_tlist = lfirst_int(lc2);
763 10749 : List *child_tlist = lfirst_node(List, lc3);
764 :
765 : /* only build paths for the union children */
766 10749 : if (rel->rtekind == RTE_SUBQUERY)
767 10644 : build_setop_child_paths(root, rel, trivial_tlist, child_tlist,
768 : union_pathkeys, NULL);
769 : }
770 :
771 : /* Build path lists and relid set. */
772 14487 : foreach(lc, rellist)
773 : {
774 10749 : RelOptInfo *rel = lfirst(lc);
775 : Path *ordered_path;
776 :
777 : /*
778 : * Record the relids so that we can identify the correct
779 : * UPPERREL_SETOP RelOptInfo below.
780 : */
781 10749 : relids = bms_add_members(relids, rel->relids);
782 :
783 : /* Skip any UNION children that are proven not to yield any rows */
784 10749 : if (is_dummy_rel(rel))
785 38 : continue;
786 :
787 21422 : cheapest.subpaths = lappend(cheapest.subpaths,
788 10711 : rel->cheapest_total_path);
789 :
790 10711 : if (try_sorted)
791 : {
792 3437 : ordered_path = get_cheapest_path_for_pathkeys(rel->pathlist,
793 : union_pathkeys,
794 : NULL,
795 : TOTAL_COST,
796 : false);
797 :
798 3437 : if (ordered_path != NULL)
799 489 : ordered.subpaths = lappend(ordered.subpaths, ordered_path);
800 : else
801 : {
802 : /*
803 : * If we can't find a sorted path, just give up trying to
804 : * generate a list of correctly sorted child paths. This can
805 : * happen when type coercion was added to the targetlist due
806 : * to mismatching types from the union children.
807 : */
808 2948 : try_sorted = false;
809 : }
810 : }
811 :
812 10711 : if (consider_parallel)
813 : {
814 7783 : if (!rel->consider_parallel)
815 : {
816 2810 : consider_parallel = false;
817 2810 : partial_paths_valid = false;
818 : }
819 4973 : else if (rel->partial_pathlist == NIL)
820 4963 : partial_paths_valid = false;
821 : else
822 10 : partial.partial_subpaths = lappend(partial.partial_subpaths,
823 10 : linitial(rel->partial_pathlist));
824 : }
825 : }
826 :
827 : /* Build result relation. */
828 3738 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
829 3738 : result_rel->reltarget = create_setop_pathtarget(root, tlist,
830 : cheapest.subpaths);
831 3738 : result_rel->consider_parallel = consider_parallel;
832 3738 : result_rel->consider_startup = (root->tuple_fraction > 0);
833 :
834 : /* If all UNION children were dummy rels, make the resulting rel dummy */
835 3738 : if (cheapest.subpaths == NIL)
836 : {
837 5 : mark_dummy_rel(result_rel);
838 :
839 5 : return result_rel;
840 : }
841 :
842 : /*
843 : * Append the child results together using the cheapest paths from each
844 : * union child.
845 : */
846 3733 : apath = (Path *) create_append_path(root, result_rel, cheapest,
847 : NIL, NULL, 0, false, -1);
848 :
849 : /*
850 : * Estimate number of groups. For now we just assume the output is unique
851 : * --- this is certainly true for the UNION case, and we want worst-case
852 : * estimates anyway.
853 : */
854 3733 : result_rel->rows = apath->rows;
855 :
856 : /*
857 : * Now consider doing the same thing using the partial paths plus Append
858 : * plus Gather.
859 : */
860 3733 : if (partial_paths_valid)
861 : {
862 : Path *papath;
863 5 : int parallel_workers = 0;
864 :
865 : /* Find the highest number of workers requested for any subpath. */
866 15 : foreach(lc, partial.partial_subpaths)
867 : {
868 10 : Path *subpath = lfirst(lc);
869 :
870 10 : parallel_workers = Max(parallel_workers,
871 : subpath->parallel_workers);
872 : }
873 : Assert(parallel_workers > 0);
874 :
875 : /*
876 : * If the use of parallel append is permitted, always request at least
877 : * log2(# of children) paths. We assume it can be useful to have
878 : * extra workers in this case because they will be spread out across
879 : * the children. The precise formula is just a guess; see
880 : * add_paths_to_append_rel.
881 : */
882 5 : if (enable_parallel_append)
883 : {
884 5 : parallel_workers = Max(parallel_workers,
885 : pg_leftmost_one_pos32(list_length(partial.partial_subpaths)) + 1);
886 5 : parallel_workers = Min(parallel_workers,
887 : max_parallel_workers_per_gather);
888 : }
889 : Assert(parallel_workers > 0);
890 :
891 : papath = (Path *)
892 5 : create_append_path(root, result_rel, partial,
893 : NIL, NULL, parallel_workers,
894 : enable_parallel_append, -1);
895 : gpath = (Path *)
896 5 : create_gather_path(root, result_rel, papath,
897 5 : result_rel->reltarget, NULL, NULL);
898 : }
899 :
900 3733 : if (!op->all)
901 : {
902 : double dNumGroups;
903 3239 : bool can_sort = grouping_is_sortable(groupList);
904 3239 : bool can_hash = grouping_is_hashable(groupList);
905 3239 : Path *first_path = linitial(cheapest.subpaths);
906 :
907 : /*
908 : * Estimate the number of UNION output rows. In the case when only a
909 : * single UNION child remains, we can use estimate_num_groups() on
910 : * that child. We must be careful not to do this when that child is
911 : * the result of some other set operation as the targetlist will
912 : * contain Vars with varno==0, which estimate_num_groups() wouldn't
913 : * like.
914 : */
915 3239 : if (list_length(cheapest.subpaths) == 1 &&
916 15 : first_path->parent->reloptkind != RELOPT_UPPER_REL)
917 : {
918 10 : dNumGroups = estimate_num_groups(root,
919 10 : first_path->pathtarget->exprs,
920 : first_path->rows,
921 : NULL,
922 : NULL);
923 : }
924 : else
925 : {
926 : /*
927 : * Otherwise, for the moment, take the number of distinct groups
928 : * as equal to the total input size, i.e., the worst case. This
929 : * is too conservative, but it's not clear how to get a decent
930 : * estimate of the true size. One should note as well the
931 : * propensity of novices to write UNION rather than UNION ALL even
932 : * when they don't expect any duplicates...
933 : */
934 3229 : dNumGroups = apath->rows;
935 : }
936 :
937 3239 : if (can_hash)
938 : {
939 : Path *path;
940 :
941 : /*
942 : * Try a hash aggregate plan on 'apath'. This is the cheapest
943 : * available path containing each append child.
944 : */
945 3179 : path = (Path *) create_agg_path(root,
946 : result_rel,
947 : apath,
948 3179 : result_rel->reltarget,
949 : AGG_HASHED,
950 : AGGSPLIT_SIMPLE,
951 : groupList,
952 : NIL,
953 : NULL,
954 : dNumGroups);
955 3179 : add_path(result_rel, path);
956 :
957 : /* Try hash aggregate on the Gather path, if valid */
958 3179 : if (gpath != NULL)
959 : {
960 : /* Hashed aggregate plan --- no sort needed */
961 5 : path = (Path *) create_agg_path(root,
962 : result_rel,
963 : gpath,
964 5 : result_rel->reltarget,
965 : AGG_HASHED,
966 : AGGSPLIT_SIMPLE,
967 : groupList,
968 : NIL,
969 : NULL,
970 : dNumGroups);
971 5 : add_path(result_rel, path);
972 : }
973 : }
974 :
975 3239 : if (can_sort)
976 : {
977 3185 : Path *path = apath;
978 :
979 : /* Try Sort -> Unique on the Append path */
980 3185 : if (groupList != NIL)
981 3160 : path = (Path *) create_sort_path(root, result_rel, path,
982 : make_pathkeys_for_sortclauses(root, groupList, tlist),
983 : -1.0);
984 :
985 3185 : path = (Path *) create_unique_path(root,
986 : result_rel,
987 : path,
988 3185 : list_length(path->pathkeys),
989 : dNumGroups);
990 :
991 3185 : add_path(result_rel, path);
992 :
993 : /* Try Sort -> Unique on the Gather path, if set */
994 3185 : if (gpath != NULL)
995 : {
996 5 : path = gpath;
997 :
998 5 : path = (Path *) create_sort_path(root, result_rel, path,
999 : make_pathkeys_for_sortclauses(root, groupList, tlist),
1000 : -1.0);
1001 :
1002 5 : path = (Path *) create_unique_path(root,
1003 : result_rel,
1004 : path,
1005 5 : list_length(path->pathkeys),
1006 : dNumGroups);
1007 5 : add_path(result_rel, path);
1008 : }
1009 : }
1010 :
1011 : /*
1012 : * Try making a MergeAppend path if we managed to find a path with the
1013 : * correct pathkeys in each union child query.
1014 : */
1015 3239 : if (try_sorted && groupList != NIL)
1016 : {
1017 : Path *path;
1018 :
1019 212 : path = (Path *) create_merge_append_path(root,
1020 : result_rel,
1021 : ordered.subpaths,
1022 : NIL,
1023 : union_pathkeys,
1024 : NULL);
1025 :
1026 : /* and make the MergeAppend unique */
1027 212 : path = (Path *) create_unique_path(root,
1028 : result_rel,
1029 : path,
1030 : list_length(tlist),
1031 : dNumGroups);
1032 :
1033 212 : add_path(result_rel, path);
1034 : }
1035 : }
1036 : else
1037 : {
1038 : /* UNION ALL */
1039 494 : add_path(result_rel, apath);
1040 :
1041 494 : if (gpath != NULL)
1042 0 : add_path(result_rel, gpath);
1043 : }
1044 :
1045 3733 : return result_rel;
1046 : }
1047 :
1048 : /*
1049 : * Generate paths for an INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL node
1050 : */
1051 : static RelOptInfo *
1052 624 : generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
1053 : List *refnames_tlist,
1054 : List **pTargetList)
1055 : {
1056 : RelOptInfo *result_rel;
1057 : RelOptInfo *lrel,
1058 : *rrel;
1059 624 : double save_fraction = root->tuple_fraction;
1060 : Path *lpath,
1061 : *rpath,
1062 : *path;
1063 : List *lpath_tlist,
1064 : *rpath_tlist,
1065 : *tlist,
1066 : *groupList;
1067 : bool lpath_trivial_tlist,
1068 : rpath_trivial_tlist,
1069 : result_trivial_tlist;
1070 624 : List *nonunion_pathkeys = NIL;
1071 : double dLeftGroups,
1072 : dRightGroups,
1073 : dNumGroups,
1074 : dNumOutputRows;
1075 : bool can_sort;
1076 : bool can_hash;
1077 : SetOpCmd cmd;
1078 :
1079 : /*
1080 : * Tell children to fetch all tuples.
1081 : */
1082 624 : root->tuple_fraction = 0.0;
1083 :
1084 : /* Recurse on children */
1085 624 : lrel = recurse_set_operations(op->larg, root,
1086 : op,
1087 : op->colTypes, op->colCollations,
1088 : refnames_tlist,
1089 : &lpath_tlist,
1090 : &lpath_trivial_tlist);
1091 :
1092 624 : rrel = recurse_set_operations(op->rarg, root,
1093 : op,
1094 : op->colTypes, op->colCollations,
1095 : refnames_tlist,
1096 : &rpath_tlist,
1097 : &rpath_trivial_tlist);
1098 :
1099 : /*
1100 : * Generate tlist for SetOp plan node.
1101 : *
1102 : * The tlist for a SetOp plan isn't important so far as the SetOp is
1103 : * concerned, but we must make it look real anyway for the benefit of the
1104 : * next plan level up.
1105 : */
1106 624 : tlist = generate_setop_tlist(op->colTypes, op->colCollations,
1107 : 0, false, lpath_tlist, refnames_tlist,
1108 : &result_trivial_tlist);
1109 :
1110 : /* We should not have needed any type coercions in the tlist */
1111 : Assert(result_trivial_tlist);
1112 :
1113 624 : *pTargetList = tlist;
1114 :
1115 : /* Identify the grouping semantics */
1116 624 : groupList = generate_setop_grouplist(op, tlist);
1117 :
1118 : /* Check whether the operators support sorting or hashing */
1119 624 : can_sort = grouping_is_sortable(groupList);
1120 624 : can_hash = grouping_is_hashable(groupList);
1121 624 : if (!can_sort && !can_hash)
1122 0 : ereport(ERROR,
1123 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1124 : /* translator: %s is INTERSECT or EXCEPT */
1125 : errmsg("could not implement %s",
1126 : (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT"),
1127 : errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
1128 :
1129 624 : if (can_sort)
1130 : {
1131 : /* Determine the pathkeys for sorting by the whole target list */
1132 624 : nonunion_pathkeys = make_pathkeys_for_sortclauses(root, groupList,
1133 : tlist);
1134 :
1135 624 : root->query_pathkeys = nonunion_pathkeys;
1136 : }
1137 :
1138 : /*
1139 : * Now that we've got all that info, we can build the child paths.
1140 : */
1141 624 : if (lrel->rtekind == RTE_SUBQUERY)
1142 604 : build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
1143 : nonunion_pathkeys, &dLeftGroups);
1144 : else
1145 20 : dLeftGroups = lrel->rows;
1146 624 : if (rrel->rtekind == RTE_SUBQUERY)
1147 614 : build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
1148 : nonunion_pathkeys, &dRightGroups);
1149 : else
1150 10 : dRightGroups = rrel->rows;
1151 :
1152 : /* Undo effects of forcing tuple_fraction to 0 */
1153 624 : root->tuple_fraction = save_fraction;
1154 :
1155 : /*
1156 : * For EXCEPT, we must put the left input first. For INTERSECT, either
1157 : * order should give the same results, and we prefer to put the smaller
1158 : * input first in order to (a) minimize the size of the hash table in the
1159 : * hashing case, and (b) improve our chances of exploiting the executor's
1160 : * fast path for empty left-hand input. "Smaller" means the one with the
1161 : * fewer groups.
1162 : */
1163 624 : if (op->op != SETOP_EXCEPT && dLeftGroups > dRightGroups)
1164 : {
1165 : /* need to swap the two inputs */
1166 : RelOptInfo *tmprel;
1167 : List *tmplist;
1168 : double tmpd;
1169 :
1170 30 : tmprel = lrel;
1171 30 : lrel = rrel;
1172 30 : rrel = tmprel;
1173 30 : tmplist = lpath_tlist;
1174 30 : lpath_tlist = rpath_tlist;
1175 30 : rpath_tlist = tmplist;
1176 30 : tmpd = dLeftGroups;
1177 30 : dLeftGroups = dRightGroups;
1178 30 : dRightGroups = tmpd;
1179 : }
1180 :
1181 624 : lpath = lrel->cheapest_total_path;
1182 624 : rpath = rrel->cheapest_total_path;
1183 :
1184 : /* Build result relation. */
1185 624 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
1186 624 : bms_union(lrel->relids, rrel->relids));
1187 :
1188 : /*
1189 : * Create the PathTarget and set the width accordingly. For EXCEPT, since
1190 : * the set op result won't contain rows from the rpath, we only account
1191 : * for the width of the lpath. For INTERSECT, use both input paths.
1192 : */
1193 624 : if (op->op == SETOP_EXCEPT)
1194 419 : result_rel->reltarget = create_setop_pathtarget(root, tlist,
1195 419 : list_make1(lpath));
1196 : else
1197 205 : result_rel->reltarget = create_setop_pathtarget(root, tlist,
1198 205 : list_make2(lpath, rpath));
1199 :
1200 : /* Check for provably empty setop inputs and add short-circuit paths. */
1201 624 : if (op->op == SETOP_EXCEPT)
1202 : {
1203 : /*
1204 : * For EXCEPTs, if the left side is dummy then there's no need to
1205 : * inspect the right-hand side as scanning the right to find tuples to
1206 : * remove won't make the left-hand input any more empty.
1207 : */
1208 419 : if (is_dummy_rel(lrel))
1209 : {
1210 5 : mark_dummy_rel(result_rel);
1211 :
1212 5 : return result_rel;
1213 : }
1214 :
1215 : /* Handle EXCEPTs with dummy right input */
1216 414 : if (is_dummy_rel(rrel))
1217 : {
1218 5 : if (op->all)
1219 : {
1220 : Path *apath;
1221 5 : AppendPathInput append = {0};
1222 :
1223 5 : append.subpaths = list_make1(lpath);
1224 :
1225 : /*
1226 : * EXCEPT ALL: If the right-hand input is dummy then we can
1227 : * simply scan the left-hand input. To keep createplan.c
1228 : * happy, use a single child Append to handle the translation
1229 : * between the set op targetlist and the targetlist of the
1230 : * left input. The Append will be removed in setrefs.c.
1231 : */
1232 5 : apath = (Path *) create_append_path(root, result_rel,
1233 : append, NIL, NULL, 0,
1234 : false, -1);
1235 :
1236 5 : add_path(result_rel, apath);
1237 :
1238 5 : return result_rel;
1239 : }
1240 : else
1241 : {
1242 : /*
1243 : * To make EXCEPT with a dummy RHS work means having to
1244 : * deduplicate the left input. That could be done with
1245 : * AggPaths, but it doesn't seem worth the effort. Let the
1246 : * normal path generation code below handle this one.
1247 : */
1248 : }
1249 : }
1250 : }
1251 : else
1252 : {
1253 : /*
1254 : * For INTERSECT, if either input is a dummy rel then we can mark the
1255 : * result_rel as dummy since intersecting with an empty relation can
1256 : * never yield any results. This is true regardless of INTERSECT or
1257 : * INTERSECT ALL.
1258 : */
1259 205 : if (is_dummy_rel(lrel) || is_dummy_rel(rrel))
1260 : {
1261 15 : mark_dummy_rel(result_rel);
1262 :
1263 15 : return result_rel;
1264 : }
1265 : }
1266 :
1267 : /*
1268 : * Estimate number of distinct groups that we'll need hashtable entries
1269 : * for; this is the size of the left-hand input for EXCEPT, or the smaller
1270 : * input for INTERSECT. Also estimate the number of eventual output rows.
1271 : * In non-ALL cases, we estimate each group produces one output row; in
1272 : * ALL cases use the relevant relation size. These are worst-case
1273 : * estimates, of course, but we need to be conservative.
1274 : */
1275 599 : if (op->op == SETOP_EXCEPT)
1276 : {
1277 409 : dNumGroups = dLeftGroups;
1278 409 : dNumOutputRows = op->all ? lpath->rows : dNumGroups;
1279 : }
1280 : else
1281 : {
1282 190 : dNumGroups = dLeftGroups;
1283 190 : dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
1284 : }
1285 599 : result_rel->rows = dNumOutputRows;
1286 :
1287 : /* Select the SetOpCmd type */
1288 599 : switch (op->op)
1289 : {
1290 190 : case SETOP_INTERSECT:
1291 190 : cmd = op->all ? SETOPCMD_INTERSECT_ALL : SETOPCMD_INTERSECT;
1292 190 : break;
1293 409 : case SETOP_EXCEPT:
1294 409 : cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT;
1295 409 : break;
1296 0 : default:
1297 0 : elog(ERROR, "unrecognized set op: %d", (int) op->op);
1298 : cmd = SETOPCMD_INTERSECT; /* keep compiler quiet */
1299 : break;
1300 : }
1301 :
1302 : /*
1303 : * If we can hash, that just requires a SetOp atop the cheapest inputs.
1304 : */
1305 599 : if (can_hash)
1306 : {
1307 549 : path = (Path *) create_setop_path(root,
1308 : result_rel,
1309 : lpath,
1310 : rpath,
1311 : cmd,
1312 : SETOP_HASHED,
1313 : groupList,
1314 : dNumGroups,
1315 : dNumOutputRows);
1316 549 : add_path(result_rel, path);
1317 : }
1318 :
1319 : /*
1320 : * If we can sort, generate the cheapest sorted input paths, and add a
1321 : * SetOp atop those.
1322 : */
1323 599 : if (can_sort)
1324 : {
1325 : List *pathkeys;
1326 : Path *slpath,
1327 : *srpath;
1328 :
1329 : /* First the left input ... */
1330 599 : pathkeys = make_pathkeys_for_sortclauses(root,
1331 : groupList,
1332 : lpath_tlist);
1333 599 : if (pathkeys_contained_in(pathkeys, lpath->pathkeys))
1334 80 : slpath = lpath; /* cheapest path is already sorted */
1335 : else
1336 : {
1337 519 : slpath = get_cheapest_path_for_pathkeys(lrel->pathlist,
1338 : nonunion_pathkeys,
1339 : NULL,
1340 : TOTAL_COST,
1341 : false);
1342 : /* Subquery failed to produce any presorted paths? */
1343 519 : if (slpath == NULL)
1344 165 : slpath = (Path *) create_sort_path(root,
1345 : lpath->parent,
1346 : lpath,
1347 : pathkeys,
1348 : -1.0);
1349 : }
1350 :
1351 : /* and now the same for the right. */
1352 599 : pathkeys = make_pathkeys_for_sortclauses(root,
1353 : groupList,
1354 : rpath_tlist);
1355 599 : if (pathkeys_contained_in(pathkeys, rpath->pathkeys))
1356 90 : srpath = rpath; /* cheapest path is already sorted */
1357 : else
1358 : {
1359 509 : srpath = get_cheapest_path_for_pathkeys(rrel->pathlist,
1360 : nonunion_pathkeys,
1361 : NULL,
1362 : TOTAL_COST,
1363 : false);
1364 : /* Subquery failed to produce any presorted paths? */
1365 509 : if (srpath == NULL)
1366 165 : srpath = (Path *) create_sort_path(root,
1367 : rpath->parent,
1368 : rpath,
1369 : pathkeys,
1370 : -1.0);
1371 : }
1372 :
1373 599 : path = (Path *) create_setop_path(root,
1374 : result_rel,
1375 : slpath,
1376 : srpath,
1377 : cmd,
1378 : SETOP_SORTED,
1379 : groupList,
1380 : dNumGroups,
1381 : dNumOutputRows);
1382 599 : add_path(result_rel, path);
1383 : }
1384 :
1385 599 : return result_rel;
1386 : }
1387 :
1388 : /*
1389 : * Pull up children of a UNION node that are identically-propertied UNIONs,
1390 : * and perform planning of the queries underneath the N-way UNION.
1391 : *
1392 : * The result is a list of RelOptInfos containing Paths for sub-nodes, with
1393 : * one entry for each descendant that is a leaf query or non-identical setop.
1394 : * We also return parallel lists of the childrens' targetlists and
1395 : * is-trivial-tlist flags.
1396 : *
1397 : * NOTE: we can also pull a UNION ALL up into a UNION, since the distinct
1398 : * output rows will be lost anyway.
1399 : */
1400 : static List *
1401 3738 : plan_union_children(PlannerInfo *root,
1402 : SetOperationStmt *top_union,
1403 : List *refnames_tlist,
1404 : List **tlist_list,
1405 : List **istrivial_tlist)
1406 : {
1407 3738 : List *pending_rels = list_make1(top_union);
1408 3738 : List *result = NIL;
1409 : List *child_tlist;
1410 : bool trivial_tlist;
1411 :
1412 3738 : *tlist_list = NIL;
1413 3738 : *istrivial_tlist = NIL;
1414 :
1415 21498 : while (pending_rels != NIL)
1416 : {
1417 17760 : Node *setOp = linitial(pending_rels);
1418 :
1419 17760 : pending_rels = list_delete_first(pending_rels);
1420 :
1421 17760 : if (IsA(setOp, SetOperationStmt))
1422 : {
1423 7116 : SetOperationStmt *op = (SetOperationStmt *) setOp;
1424 :
1425 7116 : if (op->op == top_union->op &&
1426 14057 : (op->all == top_union->all || op->all) &&
1427 14032 : equal(op->colTypes, top_union->colTypes) &&
1428 7011 : equal(op->colCollations, top_union->colCollations))
1429 : {
1430 : /* Same UNION, so fold children into parent */
1431 7011 : pending_rels = lcons(op->rarg, pending_rels);
1432 7011 : pending_rels = lcons(op->larg, pending_rels);
1433 7011 : continue;
1434 : }
1435 : }
1436 :
1437 : /*
1438 : * Not same, so plan this child separately.
1439 : *
1440 : * If top_union isn't a UNION ALL, then we are interested in sorted
1441 : * output from the child, so pass top_union as parentOp. Note that
1442 : * this isn't necessarily the child node's immediate SetOperationStmt
1443 : * parent, but that's fine: it's the effective parent.
1444 : */
1445 10749 : result = lappend(result, recurse_set_operations(setOp, root,
1446 10749 : top_union->all ? NULL : top_union,
1447 : top_union->colTypes,
1448 : top_union->colCollations,
1449 : refnames_tlist,
1450 : &child_tlist,
1451 : &trivial_tlist));
1452 10749 : *tlist_list = lappend(*tlist_list, child_tlist);
1453 10749 : *istrivial_tlist = lappend_int(*istrivial_tlist, trivial_tlist);
1454 : }
1455 :
1456 3738 : return result;
1457 : }
1458 :
1459 : /*
1460 : * postprocess_setop_rel - perform steps required after adding paths
1461 : */
1462 : static void
1463 18291 : postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel)
1464 : {
1465 : /*
1466 : * We don't currently worry about allowing FDWs to contribute paths to
1467 : * this relation, but give extensions a chance.
1468 : */
1469 18291 : if (create_upper_paths_hook)
1470 0 : (*create_upper_paths_hook) (root, UPPERREL_SETOP,
1471 : NULL, rel, NULL);
1472 :
1473 : /* Select cheapest path */
1474 18291 : set_cheapest(rel);
1475 18291 : }
1476 :
1477 : /*
1478 : * Generate targetlist for a set-operation plan node
1479 : *
1480 : * colTypes: OID list of set-op's result column datatypes
1481 : * colCollations: OID list of set-op's result column collations
1482 : * varno: varno to use in generated Vars
1483 : * hack_constants: true to copy up constants (see comments in code)
1484 : * input_tlist: targetlist of this node's input node
1485 : * refnames_tlist: targetlist to take column names from
1486 : * trivial_tlist: output parameter, set to true if targetlist is trivial
1487 : */
1488 : static List *
1489 13875 : generate_setop_tlist(List *colTypes, List *colCollations,
1490 : Index varno,
1491 : bool hack_constants,
1492 : List *input_tlist,
1493 : List *refnames_tlist,
1494 : bool *trivial_tlist)
1495 : {
1496 13875 : List *tlist = NIL;
1497 13875 : int resno = 1;
1498 : ListCell *ctlc,
1499 : *cclc,
1500 : *itlc,
1501 : *rtlc;
1502 : TargetEntry *tle;
1503 : Node *expr;
1504 :
1505 13875 : *trivial_tlist = true; /* until proven differently */
1506 :
1507 56121 : forfour(ctlc, colTypes, cclc, colCollations,
1508 : itlc, input_tlist, rtlc, refnames_tlist)
1509 : {
1510 42246 : Oid colType = lfirst_oid(ctlc);
1511 42246 : Oid colColl = lfirst_oid(cclc);
1512 42246 : TargetEntry *inputtle = (TargetEntry *) lfirst(itlc);
1513 42246 : TargetEntry *reftle = (TargetEntry *) lfirst(rtlc);
1514 :
1515 : Assert(inputtle->resno == resno);
1516 : Assert(reftle->resno == resno);
1517 : Assert(!inputtle->resjunk);
1518 : Assert(!reftle->resjunk);
1519 :
1520 : /*
1521 : * Generate columns referencing input columns and having appropriate
1522 : * data types and column names. Insert datatype coercions where
1523 : * necessary.
1524 : *
1525 : * HACK: constants in the input's targetlist are copied up as-is
1526 : * rather than being referenced as subquery outputs. This is mainly
1527 : * to ensure that when we try to coerce them to the output column's
1528 : * datatype, the right things happen for UNKNOWN constants. But do
1529 : * this only at the first level of subquery-scan plans; we don't want
1530 : * phony constants appearing in the output tlists of upper-level
1531 : * nodes!
1532 : *
1533 : * Note that copying a constant doesn't in itself require us to mark
1534 : * the tlist nontrivial; see trivial_subqueryscan() in setrefs.c.
1535 : */
1536 42246 : if (hack_constants && inputtle->expr && IsA(inputtle->expr, Const))
1537 13102 : expr = (Node *) inputtle->expr;
1538 : else
1539 116576 : expr = (Node *) makeVar(varno,
1540 29144 : inputtle->resno,
1541 29144 : exprType((Node *) inputtle->expr),
1542 29144 : exprTypmod((Node *) inputtle->expr),
1543 29144 : exprCollation((Node *) inputtle->expr),
1544 : 0);
1545 :
1546 42246 : if (exprType(expr) != colType)
1547 : {
1548 : /*
1549 : * Note: it's not really cool to be applying coerce_to_common_type
1550 : * here; one notable point is that assign_expr_collations never
1551 : * gets run on any generated nodes. For the moment that's not a
1552 : * problem because we force the correct exposed collation below.
1553 : * It would likely be best to make the parser generate the correct
1554 : * output tlist for every set-op to begin with, though.
1555 : */
1556 1322 : expr = coerce_to_common_type(NULL, /* no UNKNOWNs here */
1557 : expr,
1558 : colType,
1559 : "UNION/INTERSECT/EXCEPT");
1560 1322 : *trivial_tlist = false; /* the coercion makes it not trivial */
1561 : }
1562 :
1563 : /*
1564 : * Ensure the tlist entry's exposed collation matches the set-op. This
1565 : * is necessary because plan_set_operations() reports the result
1566 : * ordering as a list of SortGroupClauses, which don't carry collation
1567 : * themselves but just refer to tlist entries. If we don't show the
1568 : * right collation then planner.c might do the wrong thing in
1569 : * higher-level queries.
1570 : *
1571 : * Note we use RelabelType, not CollateExpr, since this expression
1572 : * will reach the executor without any further processing.
1573 : */
1574 42246 : if (exprCollation(expr) != colColl)
1575 : {
1576 10885 : expr = applyRelabelType(expr,
1577 : exprType(expr), exprTypmod(expr), colColl,
1578 : COERCE_IMPLICIT_CAST, -1, false);
1579 10885 : *trivial_tlist = false; /* the relabel makes it not trivial */
1580 : }
1581 :
1582 84492 : tle = makeTargetEntry((Expr *) expr,
1583 42246 : (AttrNumber) resno++,
1584 42246 : pstrdup(reftle->resname),
1585 : false);
1586 :
1587 : /*
1588 : * By convention, all output columns in a setop tree have
1589 : * ressortgroupref equal to their resno. In some cases the ref isn't
1590 : * needed, but this is a cleaner way than modifying the tlist later.
1591 : */
1592 42246 : tle->ressortgroupref = tle->resno;
1593 :
1594 42246 : tlist = lappend(tlist, tle);
1595 : }
1596 :
1597 13875 : return tlist;
1598 : }
1599 :
1600 : /*
1601 : * Generate targetlist for a set-operation Append node
1602 : *
1603 : * colTypes: OID list of set-op's result column datatypes
1604 : * colCollations: OID list of set-op's result column collations
1605 : * input_tlists: list of tlists for sub-plans of the Append
1606 : * refnames_tlist: targetlist to take column names from
1607 : *
1608 : * The entries in the Append's targetlist should always be simple Vars;
1609 : * we just have to make sure they have the right datatypes/typmods/collations.
1610 : * The Vars are always generated with varno 0.
1611 : *
1612 : * XXX a problem with the varno-zero approach is that set_pathtarget_cost_width
1613 : * cannot figure out a realistic width for the tlist we make here. But we
1614 : * ought to refactor this code to produce a PathTarget directly, anyway.
1615 : */
1616 : static List *
1617 4430 : generate_append_tlist(List *colTypes, List *colCollations,
1618 : List *input_tlists,
1619 : List *refnames_tlist)
1620 : {
1621 4430 : List *tlist = NIL;
1622 4430 : int resno = 1;
1623 : ListCell *curColType;
1624 : ListCell *curColCollation;
1625 : ListCell *ref_tl_item;
1626 : int colindex;
1627 : TargetEntry *tle;
1628 : Node *expr;
1629 : ListCell *tlistl;
1630 : int32 *colTypmods;
1631 :
1632 : /*
1633 : * First extract typmods to use.
1634 : *
1635 : * If the inputs all agree on type and typmod of a particular column, use
1636 : * that typmod; else use -1.
1637 : */
1638 4430 : colTypmods = palloc_array(int32, list_length(colTypes));
1639 :
1640 16563 : foreach(tlistl, input_tlists)
1641 : {
1642 12133 : List *subtlist = (List *) lfirst(tlistl);
1643 : ListCell *subtlistl;
1644 :
1645 12133 : curColType = list_head(colTypes);
1646 12133 : colindex = 0;
1647 47868 : foreach(subtlistl, subtlist)
1648 : {
1649 35735 : TargetEntry *subtle = (TargetEntry *) lfirst(subtlistl);
1650 :
1651 : Assert(!subtle->resjunk);
1652 : Assert(curColType != NULL);
1653 35735 : if (exprType((Node *) subtle->expr) == lfirst_oid(curColType))
1654 : {
1655 : /* If first subplan, copy the typmod; else compare */
1656 35735 : int32 subtypmod = exprTypmod((Node *) subtle->expr);
1657 :
1658 35735 : if (tlistl == list_head(input_tlists))
1659 12231 : colTypmods[colindex] = subtypmod;
1660 23504 : else if (subtypmod != colTypmods[colindex])
1661 10 : colTypmods[colindex] = -1;
1662 : }
1663 : else
1664 : {
1665 : /* types disagree, so force typmod to -1 */
1666 0 : colTypmods[colindex] = -1;
1667 : }
1668 35735 : curColType = lnext(colTypes, curColType);
1669 35735 : colindex++;
1670 : }
1671 : Assert(curColType == NULL);
1672 : }
1673 :
1674 : /*
1675 : * Now we can build the tlist for the Append.
1676 : */
1677 4430 : colindex = 0;
1678 16661 : forthree(curColType, colTypes, curColCollation, colCollations,
1679 : ref_tl_item, refnames_tlist)
1680 : {
1681 12231 : Oid colType = lfirst_oid(curColType);
1682 12231 : int32 colTypmod = colTypmods[colindex++];
1683 12231 : Oid colColl = lfirst_oid(curColCollation);
1684 12231 : TargetEntry *reftle = (TargetEntry *) lfirst(ref_tl_item);
1685 :
1686 : Assert(reftle->resno == resno);
1687 : Assert(!reftle->resjunk);
1688 12231 : expr = (Node *) makeVar(0,
1689 : resno,
1690 : colType,
1691 : colTypmod,
1692 : colColl,
1693 : 0);
1694 24462 : tle = makeTargetEntry((Expr *) expr,
1695 12231 : (AttrNumber) resno++,
1696 12231 : pstrdup(reftle->resname),
1697 : false);
1698 :
1699 : /*
1700 : * By convention, all output columns in a setop tree have
1701 : * ressortgroupref equal to their resno. In some cases the ref isn't
1702 : * needed, but this is a cleaner way than modifying the tlist later.
1703 : */
1704 12231 : tle->ressortgroupref = tle->resno;
1705 :
1706 12231 : tlist = lappend(tlist, tle);
1707 : }
1708 :
1709 4430 : pfree(colTypmods);
1710 :
1711 4430 : return tlist;
1712 : }
1713 :
1714 : /*
1715 : * generate_setop_grouplist
1716 : * Build a SortGroupClause list defining the sort/grouping properties
1717 : * of the setop's output columns.
1718 : *
1719 : * Parse analysis already determined the properties and built a suitable
1720 : * list, except that the entries do not have sortgrouprefs set because
1721 : * the parser output representation doesn't include a tlist for each
1722 : * setop. So what we need to do here is copy that list and install
1723 : * proper sortgrouprefs into it (copying those from the targetlist).
1724 : */
1725 : static List *
1726 4158 : generate_setop_grouplist(SetOperationStmt *op, List *targetlist)
1727 : {
1728 4158 : List *grouplist = copyObject(op->groupClauses);
1729 : ListCell *lg;
1730 : ListCell *lt;
1731 :
1732 4158 : lg = list_head(grouplist);
1733 16031 : foreach(lt, targetlist)
1734 : {
1735 11873 : TargetEntry *tle = (TargetEntry *) lfirst(lt);
1736 : SortGroupClause *sgc;
1737 :
1738 : Assert(!tle->resjunk);
1739 :
1740 : /* non-resjunk columns should have sortgroupref = resno */
1741 : Assert(tle->ressortgroupref == tle->resno);
1742 :
1743 : /* non-resjunk columns should have grouping clauses */
1744 : Assert(lg != NULL);
1745 11873 : sgc = (SortGroupClause *) lfirst(lg);
1746 11873 : lg = lnext(grouplist, lg);
1747 : Assert(sgc->tleSortGroupRef == 0);
1748 :
1749 11873 : sgc->tleSortGroupRef = tle->ressortgroupref;
1750 : }
1751 : Assert(lg == NULL);
1752 4158 : return grouplist;
1753 : }
1754 :
1755 : /*
1756 : * create_setop_pathtarget
1757 : * Do the normal create_pathtarget() work, plus set the resulting
1758 : * PathTarget's width to the average width of the Paths in child_pathlist
1759 : * weighted using the estimated row count of each path.
1760 : *
1761 : * Note: This is required because set op target lists use varno==0, which
1762 : * results in a type default width estimate rather than one that's based on
1763 : * statistics of the columns from the set op children.
1764 : */
1765 : static PathTarget *
1766 4362 : create_setop_pathtarget(PlannerInfo *root, List *tlist, List *child_pathlist)
1767 : {
1768 : PathTarget *reltarget;
1769 : ListCell *lc;
1770 4362 : double parent_rows = 0;
1771 4362 : double parent_size = 0;
1772 :
1773 4362 : reltarget = create_pathtarget(root, tlist);
1774 :
1775 : /* Calculate the total rows and total size. */
1776 15902 : foreach(lc, child_pathlist)
1777 : {
1778 11540 : Path *path = (Path *) lfirst(lc);
1779 :
1780 11540 : parent_rows += path->rows;
1781 11540 : parent_size += path->parent->reltarget->width * path->rows;
1782 : }
1783 :
1784 4362 : if (parent_rows > 0)
1785 4347 : reltarget->width = rint(parent_size / parent_rows);
1786 :
1787 4362 : return reltarget;
1788 : }
|