Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * prepunion.c
4 : * Routines to plan set-operation queries. The filename is a leftover
5 : * from a time when only UNIONs were implemented.
6 : *
7 : * There are two code paths in the planner for set-operation queries.
8 : * If a subquery consists entirely of simple UNION ALL operations, it
9 : * is converted into an "append relation". Otherwise, it is handled
10 : * by the general code in this module (plan_set_operations and its
11 : * subroutines). There is some support code here for the append-relation
12 : * case, but most of the heavy lifting for that is done elsewhere,
13 : * notably in prepjointree.c and allpaths.c.
14 : *
15 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
16 : * Portions Copyright (c) 1994, Regents of the University of California
17 : *
18 : *
19 : * IDENTIFICATION
20 : * src/backend/optimizer/prep/prepunion.c
21 : *
22 : *-------------------------------------------------------------------------
23 : */
24 : #include "postgres.h"
25 :
26 : #include <math.h>
27 :
28 : #include "access/htup_details.h"
29 : #include "catalog/pg_type.h"
30 : #include "miscadmin.h"
31 : #include "nodes/makefuncs.h"
32 : #include "nodes/nodeFuncs.h"
33 : #include "optimizer/cost.h"
34 : #include "optimizer/pathnode.h"
35 : #include "optimizer/paths.h"
36 : #include "optimizer/planner.h"
37 : #include "optimizer/prep.h"
38 : #include "optimizer/tlist.h"
39 : #include "parser/parse_coerce.h"
40 : #include "utils/selfuncs.h"
41 :
42 :
43 : static RelOptInfo *recurse_set_operations(Node *setOp, PlannerInfo *root,
44 : SetOperationStmt *parentOp,
45 : List *colTypes, List *colCollations,
46 : List *refnames_tlist,
47 : List **pTargetList,
48 : bool *istrivial_tlist);
49 : static RelOptInfo *generate_recursion_path(SetOperationStmt *setOp,
50 : PlannerInfo *root,
51 : List *refnames_tlist,
52 : List **pTargetList);
53 : static void build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
54 : bool trivial_tlist, List *child_tlist,
55 : List *interesting_pathkeys,
56 : double *pNumGroups);
57 : static RelOptInfo *generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
58 : List *refnames_tlist,
59 : List **pTargetList);
60 : static RelOptInfo *generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
61 : List *refnames_tlist,
62 : List **pTargetList);
63 : static List *plan_union_children(PlannerInfo *root,
64 : SetOperationStmt *top_union,
65 : List *refnames_tlist,
66 : List **tlist_list,
67 : List **istrivial_tlist);
68 : static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
69 : static List *generate_setop_tlist(List *colTypes, List *colCollations,
70 : Index varno,
71 : bool hack_constants,
72 : List *input_tlist,
73 : List *refnames_tlist,
74 : bool *trivial_tlist);
75 : static List *generate_append_tlist(List *colTypes, List *colCollations,
76 : List *input_tlists,
77 : List *refnames_tlist);
78 : static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
79 : static PathTarget *create_setop_pathtarget(PlannerInfo *root, List *tlist,
80 : List *child_pathlist);
81 :
82 :
83 : /*
84 : * plan_set_operations
85 : *
86 : * Plans the queries for a tree of set operations (UNION/INTERSECT/EXCEPT)
87 : *
88 : * This routine only deals with the setOperations tree of the given query.
89 : * Any top-level ORDER BY requested in root->parse->sortClause will be handled
90 : * when we return to grouping_planner; likewise for LIMIT.
91 : *
92 : * What we return is an "upperrel" RelOptInfo containing at least one Path
93 : * that implements the set-operation tree. In addition, root->processed_tlist
94 : * receives a targetlist representing the output of the topmost setop node.
95 : */
96 : RelOptInfo *
97 6112 : plan_set_operations(PlannerInfo *root)
98 : {
99 6112 : Query *parse = root->parse;
100 6112 : SetOperationStmt *topop = castNode(SetOperationStmt, parse->setOperations);
101 : Node *node;
102 : RangeTblEntry *leftmostRTE;
103 : Query *leftmostQuery;
104 : RelOptInfo *setop_rel;
105 : List *top_tlist;
106 :
107 : Assert(topop);
108 :
109 : /* check for unsupported stuff */
110 : Assert(parse->jointree->fromlist == NIL);
111 : Assert(parse->jointree->quals == NULL);
112 : Assert(parse->groupClause == NIL);
113 : Assert(parse->havingQual == NULL);
114 : Assert(parse->windowClause == NIL);
115 : Assert(parse->distinctClause == NIL);
116 :
117 : /*
118 : * In the outer query level, equivalence classes are limited to classes
119 : * which define that the top-level target entry is equivalent to the
120 : * corresponding child target entry. There won't be any equivalence class
121 : * merging. Mark that merging is complete to allow us to make pathkeys.
122 : */
123 : Assert(root->eq_classes == NIL);
124 6112 : root->ec_merging_done = true;
125 :
126 : /*
127 : * We'll need to build RelOptInfos for each of the leaf subqueries, which
128 : * are RTE_SUBQUERY rangetable entries in this Query. Prepare the index
129 : * arrays for those, and for AppendRelInfos in case they're needed.
130 : */
131 6112 : setup_simple_rel_arrays(root);
132 :
133 : /*
134 : * Find the leftmost component Query. We need to use its column names for
135 : * all generated tlists (else SELECT INTO won't work right).
136 : */
137 6112 : node = topop->larg;
138 9898 : while (node && IsA(node, SetOperationStmt))
139 3786 : node = ((SetOperationStmt *) node)->larg;
140 : Assert(node && IsA(node, RangeTblRef));
141 6112 : leftmostRTE = root->simple_rte_array[((RangeTblRef *) node)->rtindex];
142 6112 : leftmostQuery = leftmostRTE->subquery;
143 : Assert(leftmostQuery != NULL);
144 :
145 : /*
146 : * If the topmost node is a recursive union, it needs special processing.
147 : */
148 6112 : if (root->hasRecursion)
149 : {
150 934 : setop_rel = generate_recursion_path(topop, root,
151 : leftmostQuery->targetList,
152 : &top_tlist);
153 : }
154 : else
155 : {
156 : bool trivial_tlist;
157 :
158 : /*
159 : * Recurse on setOperations tree to generate paths for set ops. The
160 : * final output paths should have just the column types shown as the
161 : * output from the top-level node.
162 : */
163 5178 : setop_rel = recurse_set_operations((Node *) topop, root,
164 : NULL, /* no parent */
165 : topop->colTypes, topop->colCollations,
166 : leftmostQuery->targetList,
167 : &top_tlist,
168 : &trivial_tlist);
169 : }
170 :
171 : /* Must return the built tlist into root->processed_tlist. */
172 6106 : root->processed_tlist = top_tlist;
173 :
174 6106 : return setop_rel;
175 : }
176 :
177 : /*
178 : * recurse_set_operations
179 : * Recursively handle one step in a tree of set operations
180 : *
181 : * setOp: current step (could be a SetOperationStmt or a leaf RangeTblRef)
182 : * parentOp: parent step, or NULL if none (but see below)
183 : * colTypes: OID list of set-op's result column datatypes
184 : * colCollations: OID list of set-op's result column collations
185 : * refnames_tlist: targetlist to take column names from
186 : *
187 : * parentOp should be passed as NULL unless that step is interested in
188 : * getting sorted output from this step. ("Sorted" means "sorted according
189 : * to the default btree opclasses of the result column datatypes".)
190 : *
191 : * Returns a RelOptInfo for the subtree, as well as these output parameters:
192 : * *pTargetList: receives the fully-fledged tlist for the subtree's top plan
193 : * *istrivial_tlist: true if, and only if, datatypes between parent and child
194 : * match.
195 : *
196 : * If setOp is a leaf node, this function plans the sub-query but does
197 : * not populate the pathlist of the returned RelOptInfo. The caller will
198 : * generate SubqueryScan paths using useful path(s) of the subquery (see
199 : * build_setop_child_paths). But this function does build the paths for
200 : * set-operation nodes.
201 : *
202 : * The pTargetList output parameter is mostly redundant with the pathtarget
203 : * of the returned RelOptInfo, but for the moment we need it because much of
204 : * the logic in this file depends on flag columns being marked resjunk.
205 : * XXX Now that there are no flag columns and hence no resjunk columns, we
206 : * could probably refactor this file to deal only in pathtargets.
207 : *
208 : * We don't have to care about typmods here: the only allowed difference
209 : * between set-op input and output typmods is input is a specific typmod
210 : * and output is -1, and that does not require a coercion.
211 : */
212 : static RelOptInfo *
213 21416 : recurse_set_operations(Node *setOp, PlannerInfo *root,
214 : SetOperationStmt *parentOp,
215 : List *colTypes, List *colCollations,
216 : List *refnames_tlist,
217 : List **pTargetList,
218 : bool *istrivial_tlist)
219 : {
220 : RelOptInfo *rel;
221 :
222 21416 : *istrivial_tlist = true; /* for now */
223 :
224 : /* Guard against stack overflow due to overly complex setop nests */
225 21416 : check_stack_depth();
226 :
227 21416 : if (IsA(setOp, RangeTblRef))
228 : {
229 16076 : RangeTblRef *rtr = (RangeTblRef *) setOp;
230 16076 : RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
231 16076 : Query *subquery = rte->subquery;
232 : PlannerInfo *subroot;
233 : List *tlist;
234 : bool trivial_tlist;
235 : char *plan_name;
236 :
237 : Assert(subquery != NULL);
238 :
239 : /* Build a RelOptInfo for this leaf subquery. */
240 16076 : rel = build_simple_rel(root, rtr->rtindex, NULL);
241 :
242 : /* plan_params should not be in use in current query level */
243 : Assert(root->plan_params == NIL);
244 :
245 : /*
246 : * Generate a subroot and Paths for the subquery. If we have a
247 : * parentOp, pass that down to encourage subquery_planner to consider
248 : * suitably-sorted Paths.
249 : */
250 16076 : plan_name = choose_plan_name(root->glob, "setop", true);
251 16076 : subroot = rel->subroot = subquery_planner(root->glob, subquery,
252 : plan_name, root,
253 : false, root->tuple_fraction,
254 : parentOp);
255 :
256 : /*
257 : * It should not be possible for the primitive query to contain any
258 : * cross-references to other primitive queries in the setop tree.
259 : */
260 16076 : if (root->plan_params)
261 0 : elog(ERROR, "unexpected outer reference in set operation subquery");
262 :
263 : /* Figure out the appropriate target list for this subquery. */
264 16076 : tlist = generate_setop_tlist(colTypes, colCollations,
265 16076 : rtr->rtindex,
266 : true,
267 : subroot->processed_tlist,
268 : refnames_tlist,
269 : &trivial_tlist);
270 16076 : rel->reltarget = create_pathtarget(root, tlist);
271 :
272 : /* Return the fully-fledged tlist to caller, too */
273 16076 : *pTargetList = tlist;
274 16076 : *istrivial_tlist = trivial_tlist;
275 : }
276 5340 : else if (IsA(setOp, SetOperationStmt))
277 : {
278 5340 : SetOperationStmt *op = (SetOperationStmt *) setOp;
279 :
280 : /* UNIONs are much different from INTERSECT/EXCEPT */
281 5340 : if (op->op == SETOP_UNION)
282 4642 : rel = generate_union_paths(op, root,
283 : refnames_tlist,
284 : pTargetList);
285 : else
286 698 : rel = generate_nonunion_paths(op, root,
287 : refnames_tlist,
288 : pTargetList);
289 :
290 : /*
291 : * If necessary, add a Result node to project the caller-requested
292 : * output columns.
293 : *
294 : * XXX you don't really want to know about this: setrefs.c will apply
295 : * fix_upper_expr() to the Result node's tlist. This would fail if the
296 : * Vars generated by generate_setop_tlist() were not exactly equal()
297 : * to the corresponding tlist entries of the subplan. However, since
298 : * the subplan was generated by generate_union_paths() or
299 : * generate_nonunion_paths(), and hence its tlist was generated by
300 : * generate_append_tlist() or generate_setop_tlist(), this will work.
301 : * We just tell generate_setop_tlist() to use varno 0.
302 : */
303 5340 : if (!tlist_same_datatypes(*pTargetList, colTypes, false) ||
304 5328 : !tlist_same_collations(*pTargetList, colCollations, false))
305 : {
306 : PathTarget *target;
307 : bool trivial_tlist;
308 : ListCell *lc;
309 :
310 12 : *pTargetList = generate_setop_tlist(colTypes, colCollations,
311 : 0,
312 : false,
313 : *pTargetList,
314 : refnames_tlist,
315 : &trivial_tlist);
316 12 : *istrivial_tlist = trivial_tlist;
317 12 : target = create_pathtarget(root, *pTargetList);
318 :
319 : /* Apply projection to each path */
320 24 : foreach(lc, rel->pathlist)
321 : {
322 12 : Path *subpath = (Path *) lfirst(lc);
323 : Path *path;
324 :
325 : Assert(subpath->param_info == NULL);
326 12 : path = apply_projection_to_path(root, subpath->parent,
327 : subpath, target);
328 : /* If we had to add a Result, path is different from subpath */
329 12 : if (path != subpath)
330 12 : lfirst(lc) = path;
331 : }
332 :
333 : /* Apply projection to each partial path */
334 12 : foreach(lc, rel->partial_pathlist)
335 : {
336 0 : Path *subpath = (Path *) lfirst(lc);
337 : Path *path;
338 :
339 : Assert(subpath->param_info == NULL);
340 :
341 : /* avoid apply_projection_to_path, in case of multiple refs */
342 0 : path = (Path *) create_projection_path(root, subpath->parent,
343 : subpath, target);
344 0 : lfirst(lc) = path;
345 : }
346 : }
347 5340 : postprocess_setop_rel(root, rel);
348 : }
349 : else
350 : {
351 0 : elog(ERROR, "unrecognized node type: %d",
352 : (int) nodeTag(setOp));
353 : *pTargetList = NIL;
354 : rel = NULL; /* keep compiler quiet */
355 : }
356 :
357 21416 : return rel;
358 : }
359 :
360 : /*
361 : * Generate paths for a recursive UNION node
362 : */
363 : static RelOptInfo *
364 934 : generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
365 : List *refnames_tlist,
366 : List **pTargetList)
367 : {
368 : RelOptInfo *result_rel;
369 : Path *path;
370 : RelOptInfo *lrel,
371 : *rrel;
372 : Path *lpath;
373 : Path *rpath;
374 : List *lpath_tlist;
375 : bool lpath_trivial_tlist;
376 : List *rpath_tlist;
377 : bool rpath_trivial_tlist;
378 : List *tlist;
379 : List *groupList;
380 : double dNumGroups;
381 :
382 : /* Parser should have rejected other cases */
383 934 : if (setOp->op != SETOP_UNION)
384 0 : elog(ERROR, "only UNION queries can be recursive");
385 : /* Worktable ID should be assigned */
386 : Assert(root->wt_param_id >= 0);
387 :
388 : /*
389 : * Unlike a regular UNION node, process the left and right inputs
390 : * separately without any intention of combining them into one Append.
391 : */
392 934 : lrel = recurse_set_operations(setOp->larg, root,
393 : NULL, /* no value in sorted results */
394 : setOp->colTypes, setOp->colCollations,
395 : refnames_tlist,
396 : &lpath_tlist,
397 : &lpath_trivial_tlist);
398 934 : if (lrel->rtekind == RTE_SUBQUERY)
399 934 : build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
400 : NIL, NULL);
401 934 : lpath = lrel->cheapest_total_path;
402 : /* The right path will want to look at the left one ... */
403 934 : root->non_recursive_path = lpath;
404 934 : rrel = recurse_set_operations(setOp->rarg, root,
405 : NULL, /* no value in sorted results */
406 : setOp->colTypes, setOp->colCollations,
407 : refnames_tlist,
408 : &rpath_tlist,
409 : &rpath_trivial_tlist);
410 934 : if (rrel->rtekind == RTE_SUBQUERY)
411 928 : build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
412 : NIL, NULL);
413 934 : rpath = rrel->cheapest_total_path;
414 934 : root->non_recursive_path = NULL;
415 :
416 : /*
417 : * Generate tlist for RecursiveUnion path node --- same as in Append cases
418 : */
419 934 : tlist = generate_append_tlist(setOp->colTypes, setOp->colCollations,
420 934 : list_make2(lpath_tlist, rpath_tlist),
421 : refnames_tlist);
422 :
423 934 : *pTargetList = tlist;
424 :
425 : /* Build result relation. */
426 934 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
427 934 : bms_union(lrel->relids, rrel->relids));
428 934 : result_rel->reltarget = create_pathtarget(root, tlist);
429 :
430 : /*
431 : * If UNION, identify the grouping operators
432 : */
433 934 : if (setOp->all)
434 : {
435 548 : groupList = NIL;
436 548 : dNumGroups = 0;
437 : }
438 : else
439 : {
440 : /* Identify the grouping semantics */
441 386 : groupList = generate_setop_grouplist(setOp, tlist);
442 :
443 : /* We only support hashing here */
444 386 : if (!grouping_is_hashable(groupList))
445 6 : ereport(ERROR,
446 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
447 : errmsg("could not implement recursive UNION"),
448 : errdetail("All column datatypes must be hashable.")));
449 :
450 : /*
451 : * For the moment, take the number of distinct groups as equal to the
452 : * total input size, ie, the worst case.
453 : */
454 380 : dNumGroups = lpath->rows + rpath->rows * 10;
455 : }
456 :
457 : /*
458 : * And make the path node.
459 : */
460 928 : path = (Path *) create_recursiveunion_path(root,
461 : result_rel,
462 : lpath,
463 : rpath,
464 928 : result_rel->reltarget,
465 : groupList,
466 : root->wt_param_id,
467 : dNumGroups);
468 :
469 928 : add_path(result_rel, path);
470 928 : postprocess_setop_rel(root, result_rel);
471 928 : return result_rel;
472 : }
473 :
474 : /*
475 : * build_setop_child_paths
476 : * Build paths for the set op child relation denoted by 'rel'.
477 : *
478 : * 'rel' is an RTE_SUBQUERY relation. We have already generated paths within
479 : * the subquery's subroot; the task here is to create SubqueryScan paths for
480 : * 'rel', representing scans of the useful subquery paths.
481 : *
482 : * interesting_pathkeys: if not NIL, also include paths that suit these
483 : * pathkeys, sorting any unsorted paths as required.
484 : * *pNumGroups: if not NULL, we estimate the number of distinct groups
485 : * in the result, and store it there.
486 : */
487 : static void
488 16076 : build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
489 : bool trivial_tlist, List *child_tlist,
490 : List *interesting_pathkeys, double *pNumGroups)
491 : {
492 : RelOptInfo *final_rel;
493 16076 : List *setop_pathkeys = rel->subroot->setop_pathkeys;
494 : ListCell *lc;
495 :
496 : /* it can't be a set op child rel if it's not a subquery */
497 : Assert(rel->rtekind == RTE_SUBQUERY);
498 :
499 : /* when sorting is needed, add child rel equivalences */
500 16076 : if (interesting_pathkeys != NIL)
501 12110 : add_setop_child_rel_equivalences(root,
502 : rel,
503 : child_tlist,
504 : interesting_pathkeys);
505 :
506 : /*
507 : * Mark rel with estimated output rows, width, etc. Note that we have to
508 : * do this before generating outer-query paths, else cost_subqueryscan is
509 : * not happy.
510 : */
511 16076 : set_subquery_size_estimates(root, rel);
512 :
513 : /*
514 : * Since we may want to add a partial path to this relation, we must set
515 : * its consider_parallel flag correctly.
516 : */
517 16076 : final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);
518 16076 : rel->consider_parallel = final_rel->consider_parallel;
519 :
520 : /* Generate subquery scan paths for any interesting path in final_rel */
521 41582 : foreach(lc, final_rel->pathlist)
522 : {
523 25506 : Path *subpath = (Path *) lfirst(lc);
524 : List *pathkeys;
525 25506 : Path *cheapest_input_path = final_rel->cheapest_total_path;
526 : bool is_sorted;
527 : int presorted_keys;
528 :
529 : /* If the input rel is dummy, propagate that to this query level */
530 25506 : if (is_dummy_rel(final_rel))
531 : {
532 88 : mark_dummy_rel(rel);
533 4094 : continue;
534 : }
535 :
536 : /*
537 : * Include the cheapest path as-is so that the set operation can be
538 : * cheaply implemented using a method which does not require the input
539 : * to be sorted.
540 : */
541 25418 : if (subpath == cheapest_input_path)
542 : {
543 : /* Convert subpath's pathkeys to outer representation */
544 15988 : pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
545 : make_tlist_from_pathtarget(subpath->pathtarget));
546 :
547 : /* Generate outer path using this subpath */
548 15988 : add_path(rel, (Path *) create_subqueryscan_path(root,
549 : rel,
550 : subpath,
551 : trivial_tlist,
552 : pathkeys,
553 : NULL));
554 : }
555 :
556 : /* skip dealing with sorted paths if the setop doesn't need them */
557 25418 : if (interesting_pathkeys == NIL)
558 3994 : continue;
559 :
560 : /*
561 : * Create paths to suit final sort order required for setop_pathkeys.
562 : * Here we'll sort the cheapest input path (if not sorted already) and
563 : * incremental sort any paths which are partially sorted.
564 : */
565 21424 : is_sorted = pathkeys_count_contained_in(setop_pathkeys,
566 : subpath->pathkeys,
567 : &presorted_keys);
568 :
569 21424 : if (!is_sorted)
570 : {
571 14088 : double limittuples = rel->subroot->limit_tuples;
572 :
573 : /*
574 : * Try at least sorting the cheapest path and also try
575 : * incrementally sorting any path which is partially sorted
576 : * already (no need to deal with paths which have presorted keys
577 : * when incremental sort is disabled unless it's the cheapest
578 : * input path).
579 : */
580 14088 : if (subpath != cheapest_input_path &&
581 3126 : (presorted_keys == 0 || !enable_incremental_sort))
582 12 : continue;
583 :
584 : /*
585 : * We've no need to consider both a sort and incremental sort.
586 : * We'll just do a sort if there are no presorted keys and an
587 : * incremental sort when there are presorted keys.
588 : */
589 14076 : if (presorted_keys == 0 || !enable_incremental_sort)
590 10914 : subpath = (Path *) create_sort_path(rel->subroot,
591 : final_rel,
592 : subpath,
593 : setop_pathkeys,
594 : limittuples);
595 : else
596 3162 : subpath = (Path *) create_incremental_sort_path(rel->subroot,
597 : final_rel,
598 : subpath,
599 : setop_pathkeys,
600 : presorted_keys,
601 : limittuples);
602 : }
603 :
604 : /*
605 : * subpath is now sorted, so add it to the pathlist. We already added
606 : * the cheapest_input_path above, so don't add it again unless we just
607 : * sorted it.
608 : */
609 21412 : if (subpath != cheapest_input_path)
610 : {
611 : /* Convert subpath's pathkeys to outer representation */
612 20336 : pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
613 : make_tlist_from_pathtarget(subpath->pathtarget));
614 :
615 : /* Generate outer path using this subpath */
616 20336 : add_path(rel, (Path *) create_subqueryscan_path(root,
617 : rel,
618 : subpath,
619 : trivial_tlist,
620 : pathkeys,
621 : NULL));
622 : }
623 : }
624 :
625 : /* if consider_parallel is false, there should be no partial paths */
626 : Assert(final_rel->consider_parallel ||
627 : final_rel->partial_pathlist == NIL);
628 :
629 : /*
630 : * If we have a partial path for the child relation, we can use that to
631 : * build a partial path for this relation. But there's no point in
632 : * considering any path but the cheapest.
633 : */
634 16076 : if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
635 11152 : final_rel->partial_pathlist != NIL)
636 : {
637 : Path *partial_subpath;
638 : Path *partial_path;
639 :
640 12 : partial_subpath = linitial(final_rel->partial_pathlist);
641 : partial_path = (Path *)
642 12 : create_subqueryscan_path(root, rel, partial_subpath,
643 : trivial_tlist,
644 : NIL, NULL);
645 12 : add_partial_path(rel, partial_path);
646 : }
647 :
648 16076 : postprocess_setop_rel(root, rel);
649 :
650 : /*
651 : * Estimate number of groups if caller wants it. If the subquery used
652 : * grouping or aggregation, its output is probably mostly unique anyway;
653 : * otherwise do statistical estimation.
654 : *
655 : * XXX you don't really want to know about this: we do the estimation
656 : * using the subroot->parse's original targetlist expressions, not the
657 : * subroot->processed_tlist which might seem more appropriate. The reason
658 : * is that if the subquery is itself a setop, it may return a
659 : * processed_tlist containing "varno 0" Vars generated by
660 : * generate_append_tlist, and those would confuse estimate_num_groups
661 : * mightily. We ought to get rid of the "varno 0" hack, but that requires
662 : * a redesign of the parsetree representation of setops, so that there can
663 : * be an RTE corresponding to each setop's output. Note, we use this not
664 : * subquery's targetlist but subroot->parse's targetlist, because it was
665 : * revised by self-join removal. subquery's targetlist might contain the
666 : * references to the removed relids.
667 : */
668 16076 : if (pNumGroups)
669 : {
670 1366 : PlannerInfo *subroot = rel->subroot;
671 1366 : Query *subquery = subroot->parse;
672 :
673 1366 : if (subquery->groupClause || subquery->groupingSets ||
674 1366 : subquery->distinctClause || subroot->hasHavingQual ||
675 1354 : subquery->hasAggs)
676 12 : *pNumGroups = rel->cheapest_total_path->rows;
677 : else
678 1354 : *pNumGroups = estimate_num_groups(subroot,
679 1354 : get_tlist_exprs(subroot->parse->targetList, false),
680 1354 : rel->cheapest_total_path->rows,
681 : NULL,
682 : NULL);
683 : }
684 16076 : }
685 :
686 : /*
687 : * Generate paths for a UNION or UNION ALL node
688 : */
689 : static RelOptInfo *
690 4642 : generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
691 : List *refnames_tlist,
692 : List **pTargetList)
693 : {
694 4642 : Relids relids = NULL;
695 : RelOptInfo *result_rel;
696 : ListCell *lc;
697 : ListCell *lc2;
698 : ListCell *lc3;
699 4642 : List *cheapest_pathlist = NIL;
700 4642 : List *ordered_pathlist = NIL;
701 4642 : List *partial_pathlist = NIL;
702 4642 : bool partial_paths_valid = true;
703 4642 : bool consider_parallel = true;
704 : List *rellist;
705 : List *tlist_list;
706 : List *trivial_tlist_list;
707 : List *tlist;
708 4642 : List *groupList = NIL;
709 : Path *apath;
710 4642 : Path *gpath = NULL;
711 4642 : bool try_sorted = false;
712 4642 : List *union_pathkeys = NIL;
713 :
714 : /*
715 : * If any of my children are identical UNION nodes (same op, all-flag, and
716 : * colTypes/colCollations) then they can be merged into this node so that
717 : * we generate only one Append/MergeAppend and unique-ification for the
718 : * lot. Recurse to find such nodes.
719 : */
720 4642 : rellist = plan_union_children(root,
721 : op,
722 : refnames_tlist,
723 : &tlist_list,
724 : &trivial_tlist_list);
725 :
726 : /*
727 : * Generate tlist for Append/MergeAppend plan node.
728 : *
729 : * The tlist for an Append plan isn't important as far as the Append is
730 : * concerned, but we must make it look real anyway for the benefit of the
731 : * next plan level up.
732 : */
733 4642 : tlist = generate_append_tlist(op->colTypes, op->colCollations,
734 : tlist_list, refnames_tlist);
735 4642 : *pTargetList = tlist;
736 :
737 : /* For UNIONs (not UNION ALL), try sorting, if sorting is possible */
738 4642 : if (!op->all)
739 : {
740 : /* Identify the grouping semantics */
741 3934 : groupList = generate_setop_grouplist(op, tlist);
742 :
743 3934 : if (grouping_is_sortable(op->groupClauses))
744 : {
745 3832 : try_sorted = true;
746 : /* Determine the pathkeys for sorting by the whole target list */
747 3832 : union_pathkeys = make_pathkeys_for_sortclauses(root, groupList,
748 : tlist);
749 :
750 3832 : root->query_pathkeys = union_pathkeys;
751 : }
752 : }
753 :
754 : /*
755 : * Now that we've got the append target list, we can build the union child
756 : * paths.
757 : */
758 17616 : forthree(lc, rellist, lc2, trivial_tlist_list, lc3, tlist_list)
759 : {
760 12974 : RelOptInfo *rel = lfirst(lc);
761 12974 : bool trivial_tlist = lfirst_int(lc2);
762 12974 : List *child_tlist = lfirst_node(List, lc3);
763 :
764 : /* only build paths for the union children */
765 12974 : if (rel->rtekind == RTE_SUBQUERY)
766 12848 : build_setop_child_paths(root, rel, trivial_tlist, child_tlist,
767 : union_pathkeys, NULL);
768 : }
769 :
770 : /* Build path lists and relid set. */
771 17616 : foreach(lc, rellist)
772 : {
773 12974 : RelOptInfo *rel = lfirst(lc);
774 : Path *ordered_path;
775 :
776 : /*
777 : * Record the relids so that we can identify the correct
778 : * UPPERREL_SETOP RelOptInfo below.
779 : */
780 12974 : relids = bms_add_members(relids, rel->relids);
781 :
782 : /* Skip any UNION children that are proven not to yield any rows */
783 12974 : if (is_dummy_rel(rel))
784 52 : continue;
785 :
786 12922 : cheapest_pathlist = lappend(cheapest_pathlist,
787 12922 : rel->cheapest_total_path);
788 :
789 12922 : if (try_sorted)
790 : {
791 4154 : ordered_path = get_cheapest_path_for_pathkeys(rel->pathlist,
792 : union_pathkeys,
793 : NULL,
794 : TOTAL_COST,
795 : false);
796 :
797 4154 : if (ordered_path != NULL)
798 638 : ordered_pathlist = lappend(ordered_pathlist, ordered_path);
799 : else
800 : {
801 : /*
802 : * If we can't find a sorted path, just give up trying to
803 : * generate a list of correctly sorted child paths. This can
804 : * happen when type coercion was added to the targetlist due
805 : * to mismatching types from the union children.
806 : */
807 3516 : try_sorted = false;
808 : }
809 : }
810 :
811 12922 : if (consider_parallel)
812 : {
813 9312 : if (!rel->consider_parallel)
814 : {
815 3378 : consider_parallel = false;
816 3378 : partial_paths_valid = false;
817 : }
818 5934 : else if (rel->partial_pathlist == NIL)
819 5922 : partial_paths_valid = false;
820 : else
821 12 : partial_pathlist = lappend(partial_pathlist,
822 12 : linitial(rel->partial_pathlist));
823 : }
824 : }
825 :
826 : /* Build result relation. */
827 4642 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
828 4642 : result_rel->reltarget = create_setop_pathtarget(root, tlist,
829 : cheapest_pathlist);
830 4642 : result_rel->consider_parallel = consider_parallel;
831 4642 : result_rel->consider_startup = (root->tuple_fraction > 0);
832 :
833 : /* If all UNION children were dummy rels, make the resulting rel dummy */
834 4642 : if (cheapest_pathlist == NIL)
835 : {
836 6 : mark_dummy_rel(result_rel);
837 :
838 6 : return result_rel;
839 : }
840 :
841 : /*
842 : * Append the child results together using the cheapest paths from each
843 : * union child.
844 : */
845 4636 : apath = (Path *) create_append_path(root, result_rel, cheapest_pathlist,
846 : NIL, NIL, NULL, 0, false, -1);
847 :
848 : /*
849 : * Estimate number of groups. For now we just assume the output is unique
850 : * --- this is certainly true for the UNION case, and we want worst-case
851 : * estimates anyway.
852 : */
853 4636 : result_rel->rows = apath->rows;
854 :
855 : /*
856 : * Now consider doing the same thing using the partial paths plus Append
857 : * plus Gather.
858 : */
859 4636 : if (partial_paths_valid)
860 : {
861 : Path *papath;
862 6 : int parallel_workers = 0;
863 :
864 : /* Find the highest number of workers requested for any subpath. */
865 18 : foreach(lc, partial_pathlist)
866 : {
867 12 : Path *subpath = lfirst(lc);
868 :
869 12 : parallel_workers = Max(parallel_workers,
870 : subpath->parallel_workers);
871 : }
872 : Assert(parallel_workers > 0);
873 :
874 : /*
875 : * If the use of parallel append is permitted, always request at least
876 : * log2(# of children) paths. We assume it can be useful to have
877 : * extra workers in this case because they will be spread out across
878 : * the children. The precise formula is just a guess; see
879 : * add_paths_to_append_rel.
880 : */
881 6 : if (enable_parallel_append)
882 : {
883 6 : parallel_workers = Max(parallel_workers,
884 : pg_leftmost_one_pos32(list_length(partial_pathlist)) + 1);
885 6 : parallel_workers = Min(parallel_workers,
886 : max_parallel_workers_per_gather);
887 : }
888 : Assert(parallel_workers > 0);
889 :
890 : papath = (Path *)
891 6 : create_append_path(root, result_rel, NIL, partial_pathlist,
892 : NIL, NULL, parallel_workers,
893 : enable_parallel_append, -1);
894 : gpath = (Path *)
895 6 : create_gather_path(root, result_rel, papath,
896 6 : result_rel->reltarget, NULL, NULL);
897 : }
898 :
899 4636 : if (!op->all)
900 : {
901 : double dNumGroups;
902 3928 : bool can_sort = grouping_is_sortable(groupList);
903 3928 : bool can_hash = grouping_is_hashable(groupList);
904 3928 : Path *first_path = linitial(cheapest_pathlist);
905 :
906 : /*
907 : * Estimate the number of UNION output rows. In the case when only a
908 : * single UNION child remains, we can use estimate_num_groups() on
909 : * that child. We must be careful not to do this when that child is
910 : * the result of some other set operation as the targetlist will
911 : * contain Vars with varno==0, which estimate_num_groups() wouldn't
912 : * like.
913 : */
914 3928 : if (list_length(cheapest_pathlist) == 1 &&
915 18 : first_path->parent->reloptkind != RELOPT_UPPER_REL)
916 : {
917 12 : dNumGroups = estimate_num_groups(root,
918 12 : first_path->pathtarget->exprs,
919 : first_path->rows,
920 : NULL,
921 : NULL);
922 : }
923 : else
924 : {
925 : /*
926 : * Otherwise, for the moment, take the number of distinct groups
927 : * as equal to the total input size, i.e., the worst case. This
928 : * is too conservative, but it's not clear how to get a decent
929 : * estimate of the true size. One should note as well the
930 : * propensity of novices to write UNION rather than UNION ALL even
931 : * when they don't expect any duplicates...
932 : */
933 3916 : dNumGroups = apath->rows;
934 : }
935 :
936 3928 : if (can_hash)
937 : {
938 : Path *path;
939 :
940 : /*
941 : * Try a hash aggregate plan on 'apath'. This is the cheapest
942 : * available path containing each append child.
943 : */
944 3856 : path = (Path *) create_agg_path(root,
945 : result_rel,
946 : apath,
947 3856 : result_rel->reltarget,
948 : AGG_HASHED,
949 : AGGSPLIT_SIMPLE,
950 : groupList,
951 : NIL,
952 : NULL,
953 : dNumGroups);
954 3856 : add_path(result_rel, path);
955 :
956 : /* Try hash aggregate on the Gather path, if valid */
957 3856 : if (gpath != NULL)
958 : {
959 : /* Hashed aggregate plan --- no sort needed */
960 6 : path = (Path *) create_agg_path(root,
961 : result_rel,
962 : gpath,
963 6 : result_rel->reltarget,
964 : AGG_HASHED,
965 : AGGSPLIT_SIMPLE,
966 : groupList,
967 : NIL,
968 : NULL,
969 : dNumGroups);
970 6 : add_path(result_rel, path);
971 : }
972 : }
973 :
974 3928 : if (can_sort)
975 : {
976 3826 : Path *path = apath;
977 :
978 : /* Try Sort -> Unique on the Append path */
979 3826 : if (groupList != NIL)
980 3796 : path = (Path *) create_sort_path(root, result_rel, path,
981 : make_pathkeys_for_sortclauses(root, groupList, tlist),
982 : -1.0);
983 :
984 3826 : path = (Path *) create_unique_path(root,
985 : result_rel,
986 : path,
987 3826 : list_length(path->pathkeys),
988 : dNumGroups);
989 :
990 3826 : add_path(result_rel, path);
991 :
992 : /* Try Sort -> Unique on the Gather path, if set */
993 3826 : if (gpath != NULL)
994 : {
995 6 : path = gpath;
996 :
997 6 : path = (Path *) create_sort_path(root, result_rel, path,
998 : make_pathkeys_for_sortclauses(root, groupList, tlist),
999 : -1.0);
1000 :
1001 6 : path = (Path *) create_unique_path(root,
1002 : result_rel,
1003 : path,
1004 6 : list_length(path->pathkeys),
1005 : dNumGroups);
1006 6 : add_path(result_rel, path);
1007 : }
1008 : }
1009 :
1010 : /*
1011 : * Try making a MergeAppend path if we managed to find a path with the
1012 : * correct pathkeys in each union child query.
1013 : */
1014 3928 : if (try_sorted && groupList != NIL)
1015 : {
1016 : Path *path;
1017 :
1018 280 : path = (Path *) create_merge_append_path(root,
1019 : result_rel,
1020 : ordered_pathlist,
1021 : union_pathkeys,
1022 : NULL);
1023 :
1024 : /* and make the MergeAppend unique */
1025 280 : path = (Path *) create_unique_path(root,
1026 : result_rel,
1027 : path,
1028 : list_length(tlist),
1029 : dNumGroups);
1030 :
1031 280 : add_path(result_rel, path);
1032 : }
1033 : }
1034 : else
1035 : {
1036 : /* UNION ALL */
1037 708 : add_path(result_rel, apath);
1038 :
1039 708 : if (gpath != NULL)
1040 0 : add_path(result_rel, gpath);
1041 : }
1042 :
1043 4636 : return result_rel;
1044 : }
1045 :
1046 : /*
1047 : * Generate paths for an INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL node
1048 : */
1049 : static RelOptInfo *
1050 698 : generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
1051 : List *refnames_tlist,
1052 : List **pTargetList)
1053 : {
1054 : RelOptInfo *result_rel;
1055 : RelOptInfo *lrel,
1056 : *rrel;
1057 698 : double save_fraction = root->tuple_fraction;
1058 : Path *lpath,
1059 : *rpath,
1060 : *path;
1061 : List *lpath_tlist,
1062 : *rpath_tlist,
1063 : *tlist,
1064 : *groupList;
1065 : bool lpath_trivial_tlist,
1066 : rpath_trivial_tlist,
1067 : result_trivial_tlist;
1068 698 : List *nonunion_pathkeys = NIL;
1069 : double dLeftGroups,
1070 : dRightGroups,
1071 : dNumGroups,
1072 : dNumOutputRows;
1073 : bool can_sort;
1074 : bool can_hash;
1075 : SetOpCmd cmd;
1076 :
1077 : /*
1078 : * Tell children to fetch all tuples.
1079 : */
1080 698 : root->tuple_fraction = 0.0;
1081 :
1082 : /* Recurse on children */
1083 698 : lrel = recurse_set_operations(op->larg, root,
1084 : op,
1085 : op->colTypes, op->colCollations,
1086 : refnames_tlist,
1087 : &lpath_tlist,
1088 : &lpath_trivial_tlist);
1089 :
1090 698 : rrel = recurse_set_operations(op->rarg, root,
1091 : op,
1092 : op->colTypes, op->colCollations,
1093 : refnames_tlist,
1094 : &rpath_tlist,
1095 : &rpath_trivial_tlist);
1096 :
1097 : /*
1098 : * Generate tlist for SetOp plan node.
1099 : *
1100 : * The tlist for a SetOp plan isn't important so far as the SetOp is
1101 : * concerned, but we must make it look real anyway for the benefit of the
1102 : * next plan level up.
1103 : */
1104 698 : tlist = generate_setop_tlist(op->colTypes, op->colCollations,
1105 : 0, false, lpath_tlist, refnames_tlist,
1106 : &result_trivial_tlist);
1107 :
1108 : /* We should not have needed any type coercions in the tlist */
1109 : Assert(result_trivial_tlist);
1110 :
1111 698 : *pTargetList = tlist;
1112 :
1113 : /* Identify the grouping semantics */
1114 698 : groupList = generate_setop_grouplist(op, tlist);
1115 :
1116 : /* Check whether the operators support sorting or hashing */
1117 698 : can_sort = grouping_is_sortable(groupList);
1118 698 : can_hash = grouping_is_hashable(groupList);
1119 698 : if (!can_sort && !can_hash)
1120 0 : ereport(ERROR,
1121 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1122 : /* translator: %s is INTERSECT or EXCEPT */
1123 : errmsg("could not implement %s",
1124 : (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT"),
1125 : errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
1126 :
1127 698 : if (can_sort)
1128 : {
1129 : /* Determine the pathkeys for sorting by the whole target list */
1130 698 : nonunion_pathkeys = make_pathkeys_for_sortclauses(root, groupList,
1131 : tlist);
1132 :
1133 698 : root->query_pathkeys = nonunion_pathkeys;
1134 : }
1135 :
1136 : /*
1137 : * Now that we've got all that info, we can build the child paths.
1138 : */
1139 698 : if (lrel->rtekind == RTE_SUBQUERY)
1140 674 : build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
1141 : nonunion_pathkeys, &dLeftGroups);
1142 : else
1143 24 : dLeftGroups = lrel->rows;
1144 698 : if (rrel->rtekind == RTE_SUBQUERY)
1145 692 : build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
1146 : nonunion_pathkeys, &dRightGroups);
1147 : else
1148 6 : dRightGroups = rrel->rows;
1149 :
1150 : /* Undo effects of forcing tuple_fraction to 0 */
1151 698 : root->tuple_fraction = save_fraction;
1152 :
1153 : /*
1154 : * For EXCEPT, we must put the left input first. For INTERSECT, either
1155 : * order should give the same results, and we prefer to put the smaller
1156 : * input first in order to (a) minimize the size of the hash table in the
1157 : * hashing case, and (b) improve our chances of exploiting the executor's
1158 : * fast path for empty left-hand input. "Smaller" means the one with the
1159 : * fewer groups.
1160 : */
1161 698 : if (op->op != SETOP_EXCEPT && dLeftGroups > dRightGroups)
1162 : {
1163 : /* need to swap the two inputs */
1164 : RelOptInfo *tmprel;
1165 : List *tmplist;
1166 : double tmpd;
1167 :
1168 36 : tmprel = lrel;
1169 36 : lrel = rrel;
1170 36 : rrel = tmprel;
1171 36 : tmplist = lpath_tlist;
1172 36 : lpath_tlist = rpath_tlist;
1173 36 : rpath_tlist = tmplist;
1174 36 : tmpd = dLeftGroups;
1175 36 : dLeftGroups = dRightGroups;
1176 36 : dRightGroups = tmpd;
1177 : }
1178 :
1179 698 : lpath = lrel->cheapest_total_path;
1180 698 : rpath = rrel->cheapest_total_path;
1181 :
1182 : /* Build result relation. */
1183 698 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
1184 698 : bms_union(lrel->relids, rrel->relids));
1185 :
1186 : /*
1187 : * Create the PathTarget and set the width accordingly. For EXCEPT, since
1188 : * the set op result won't contain rows from the rpath, we only account
1189 : * for the width of the lpath. For INTERSECT, use both input paths.
1190 : */
1191 698 : if (op->op == SETOP_EXCEPT)
1192 452 : result_rel->reltarget = create_setop_pathtarget(root, tlist,
1193 452 : list_make1(lpath));
1194 : else
1195 246 : result_rel->reltarget = create_setop_pathtarget(root, tlist,
1196 246 : list_make2(lpath, rpath));
1197 :
1198 : /* Check for provably empty setop inputs and add short-circuit paths. */
1199 698 : if (op->op == SETOP_EXCEPT)
1200 : {
1201 : /*
1202 : * For EXCEPTs, if the left side is dummy then there's no need to
1203 : * inspect the right-hand side as scanning the right to find tuples to
1204 : * remove won't make the left-hand input any more empty.
1205 : */
1206 452 : if (is_dummy_rel(lrel))
1207 : {
1208 6 : mark_dummy_rel(result_rel);
1209 :
1210 6 : return result_rel;
1211 : }
1212 :
1213 : /* Handle EXCEPTs with dummy right input */
1214 446 : if (is_dummy_rel(rrel))
1215 : {
1216 6 : if (op->all)
1217 : {
1218 : Path *apath;
1219 :
1220 : /*
1221 : * EXCEPT ALL: If the right-hand input is dummy then we can
1222 : * simply scan the left-hand input. To keep createplan.c
1223 : * happy, use a single child Append to handle the translation
1224 : * between the set op targetlist and the targetlist of the
1225 : * left input. The Append will be removed in setrefs.c.
1226 : */
1227 6 : apath = (Path *) create_append_path(root, result_rel, list_make1(lpath),
1228 : NIL, NIL, NULL, 0, false, -1);
1229 :
1230 6 : add_path(result_rel, apath);
1231 :
1232 6 : return result_rel;
1233 : }
1234 : else
1235 : {
1236 : /*
1237 : * To make EXCEPT with a dummy RHS work means having to
1238 : * deduplicate the left input. That could be done with
1239 : * AggPaths, but it doesn't seem worth the effort. Let the
1240 : * normal path generation code below handle this one.
1241 : */
1242 : }
1243 : }
1244 : }
1245 : else
1246 : {
1247 : /*
1248 : * For INTERSECT, if either input is a dummy rel then we can mark the
1249 : * result_rel as dummy since intersecting with an empty relation can
1250 : * never yield any results. This is true regardless of INTERSECT or
1251 : * INTERSECT ALL.
1252 : */
1253 246 : if (is_dummy_rel(lrel) || is_dummy_rel(rrel))
1254 : {
1255 18 : mark_dummy_rel(result_rel);
1256 :
1257 18 : return result_rel;
1258 : }
1259 : }
1260 :
1261 : /*
1262 : * Estimate number of distinct groups that we'll need hashtable entries
1263 : * for; this is the size of the left-hand input for EXCEPT, or the smaller
1264 : * input for INTERSECT. Also estimate the number of eventual output rows.
1265 : * In non-ALL cases, we estimate each group produces one output row; in
1266 : * ALL cases use the relevant relation size. These are worst-case
1267 : * estimates, of course, but we need to be conservative.
1268 : */
1269 668 : if (op->op == SETOP_EXCEPT)
1270 : {
1271 440 : dNumGroups = dLeftGroups;
1272 440 : dNumOutputRows = op->all ? lpath->rows : dNumGroups;
1273 : }
1274 : else
1275 : {
1276 228 : dNumGroups = dLeftGroups;
1277 228 : dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
1278 : }
1279 668 : result_rel->rows = dNumOutputRows;
1280 :
1281 : /* Select the SetOpCmd type */
1282 668 : switch (op->op)
1283 : {
1284 228 : case SETOP_INTERSECT:
1285 228 : cmd = op->all ? SETOPCMD_INTERSECT_ALL : SETOPCMD_INTERSECT;
1286 228 : break;
1287 440 : case SETOP_EXCEPT:
1288 440 : cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT;
1289 440 : break;
1290 0 : default:
1291 0 : elog(ERROR, "unrecognized set op: %d", (int) op->op);
1292 : cmd = SETOPCMD_INTERSECT; /* keep compiler quiet */
1293 : break;
1294 : }
1295 :
1296 : /*
1297 : * If we can hash, that just requires a SetOp atop the cheapest inputs.
1298 : */
1299 668 : if (can_hash)
1300 : {
1301 608 : path = (Path *) create_setop_path(root,
1302 : result_rel,
1303 : lpath,
1304 : rpath,
1305 : cmd,
1306 : SETOP_HASHED,
1307 : groupList,
1308 : dNumGroups,
1309 : dNumOutputRows);
1310 608 : add_path(result_rel, path);
1311 : }
1312 :
1313 : /*
1314 : * If we can sort, generate the cheapest sorted input paths, and add a
1315 : * SetOp atop those.
1316 : */
1317 668 : if (can_sort)
1318 : {
1319 : List *pathkeys;
1320 : Path *slpath,
1321 : *srpath;
1322 :
1323 : /* First the left input ... */
1324 668 : pathkeys = make_pathkeys_for_sortclauses(root,
1325 : groupList,
1326 : lpath_tlist);
1327 668 : if (pathkeys_contained_in(pathkeys, lpath->pathkeys))
1328 96 : slpath = lpath; /* cheapest path is already sorted */
1329 : else
1330 : {
1331 572 : slpath = get_cheapest_path_for_pathkeys(lrel->pathlist,
1332 : nonunion_pathkeys,
1333 : NULL,
1334 : TOTAL_COST,
1335 : false);
1336 : /* Subquery failed to produce any presorted paths? */
1337 572 : if (slpath == NULL)
1338 144 : slpath = (Path *) create_sort_path(root,
1339 : lpath->parent,
1340 : lpath,
1341 : pathkeys,
1342 : -1.0);
1343 : }
1344 :
1345 : /* and now the same for the right. */
1346 668 : pathkeys = make_pathkeys_for_sortclauses(root,
1347 : groupList,
1348 : rpath_tlist);
1349 668 : if (pathkeys_contained_in(pathkeys, rpath->pathkeys))
1350 108 : srpath = rpath; /* cheapest path is already sorted */
1351 : else
1352 : {
1353 560 : srpath = get_cheapest_path_for_pathkeys(rrel->pathlist,
1354 : nonunion_pathkeys,
1355 : NULL,
1356 : TOTAL_COST,
1357 : false);
1358 : /* Subquery failed to produce any presorted paths? */
1359 560 : if (srpath == NULL)
1360 150 : srpath = (Path *) create_sort_path(root,
1361 : rpath->parent,
1362 : rpath,
1363 : pathkeys,
1364 : -1.0);
1365 : }
1366 :
1367 668 : path = (Path *) create_setop_path(root,
1368 : result_rel,
1369 : slpath,
1370 : srpath,
1371 : cmd,
1372 : SETOP_SORTED,
1373 : groupList,
1374 : dNumGroups,
1375 : dNumOutputRows);
1376 668 : add_path(result_rel, path);
1377 : }
1378 :
1379 668 : return result_rel;
1380 : }
1381 :
1382 : /*
1383 : * Pull up children of a UNION node that are identically-propertied UNIONs,
1384 : * and perform planning of the queries underneath the N-way UNION.
1385 : *
1386 : * The result is a list of RelOptInfos containing Paths for sub-nodes, with
1387 : * one entry for each descendant that is a leaf query or non-identical setop.
1388 : * We also return parallel lists of the childrens' targetlists and
1389 : * is-trivial-tlist flags.
1390 : *
1391 : * NOTE: we can also pull a UNION ALL up into a UNION, since the distinct
1392 : * output rows will be lost anyway.
1393 : */
1394 : static List *
1395 4642 : plan_union_children(PlannerInfo *root,
1396 : SetOperationStmt *top_union,
1397 : List *refnames_tlist,
1398 : List **tlist_list,
1399 : List **istrivial_tlist)
1400 : {
1401 4642 : List *pending_rels = list_make1(top_union);
1402 4642 : List *result = NIL;
1403 : List *child_tlist;
1404 : bool trivial_tlist;
1405 :
1406 4642 : *tlist_list = NIL;
1407 4642 : *istrivial_tlist = NIL;
1408 :
1409 25948 : while (pending_rels != NIL)
1410 : {
1411 21306 : Node *setOp = linitial(pending_rels);
1412 :
1413 21306 : pending_rels = list_delete_first(pending_rels);
1414 :
1415 21306 : if (IsA(setOp, SetOperationStmt))
1416 : {
1417 8458 : SetOperationStmt *op = (SetOperationStmt *) setOp;
1418 :
1419 8458 : if (op->op == top_union->op &&
1420 16706 : (op->all == top_union->all || op->all) &&
1421 16676 : equal(op->colTypes, top_union->colTypes) &&
1422 8332 : equal(op->colCollations, top_union->colCollations))
1423 : {
1424 : /* Same UNION, so fold children into parent */
1425 8332 : pending_rels = lcons(op->rarg, pending_rels);
1426 8332 : pending_rels = lcons(op->larg, pending_rels);
1427 8332 : continue;
1428 : }
1429 : }
1430 :
1431 : /*
1432 : * Not same, so plan this child separately.
1433 : *
1434 : * If top_union isn't a UNION ALL, then we are interested in sorted
1435 : * output from the child, so pass top_union as parentOp. Note that
1436 : * this isn't necessarily the child node's immediate SetOperationStmt
1437 : * parent, but that's fine: it's the effective parent.
1438 : */
1439 12974 : result = lappend(result, recurse_set_operations(setOp, root,
1440 12974 : top_union->all ? NULL : top_union,
1441 : top_union->colTypes,
1442 : top_union->colCollations,
1443 : refnames_tlist,
1444 : &child_tlist,
1445 : &trivial_tlist));
1446 12974 : *tlist_list = lappend(*tlist_list, child_tlist);
1447 12974 : *istrivial_tlist = lappend_int(*istrivial_tlist, trivial_tlist);
1448 : }
1449 :
1450 4642 : return result;
1451 : }
1452 :
1453 : /*
1454 : * postprocess_setop_rel - perform steps required after adding paths
1455 : */
1456 : static void
1457 22344 : postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel)
1458 : {
1459 : /*
1460 : * We don't currently worry about allowing FDWs to contribute paths to
1461 : * this relation, but give extensions a chance.
1462 : */
1463 22344 : if (create_upper_paths_hook)
1464 0 : (*create_upper_paths_hook) (root, UPPERREL_SETOP,
1465 : NULL, rel, NULL);
1466 :
1467 : /* Select cheapest path */
1468 22344 : set_cheapest(rel);
1469 22344 : }
1470 :
1471 : /*
1472 : * Generate targetlist for a set-operation plan node
1473 : *
1474 : * colTypes: OID list of set-op's result column datatypes
1475 : * colCollations: OID list of set-op's result column collations
1476 : * varno: varno to use in generated Vars
1477 : * hack_constants: true to copy up constants (see comments in code)
1478 : * input_tlist: targetlist of this node's input node
1479 : * refnames_tlist: targetlist to take column names from
1480 : * trivial_tlist: output parameter, set to true if targetlist is trivial
1481 : */
1482 : static List *
1483 16786 : generate_setop_tlist(List *colTypes, List *colCollations,
1484 : Index varno,
1485 : bool hack_constants,
1486 : List *input_tlist,
1487 : List *refnames_tlist,
1488 : bool *trivial_tlist)
1489 : {
1490 16786 : List *tlist = NIL;
1491 16786 : int resno = 1;
1492 : ListCell *ctlc,
1493 : *cclc,
1494 : *itlc,
1495 : *rtlc;
1496 : TargetEntry *tle;
1497 : Node *expr;
1498 :
1499 16786 : *trivial_tlist = true; /* until proven differently */
1500 :
1501 66698 : forfour(ctlc, colTypes, cclc, colCollations,
1502 : itlc, input_tlist, rtlc, refnames_tlist)
1503 : {
1504 49912 : Oid colType = lfirst_oid(ctlc);
1505 49912 : Oid colColl = lfirst_oid(cclc);
1506 49912 : TargetEntry *inputtle = (TargetEntry *) lfirst(itlc);
1507 49912 : TargetEntry *reftle = (TargetEntry *) lfirst(rtlc);
1508 :
1509 : Assert(inputtle->resno == resno);
1510 : Assert(reftle->resno == resno);
1511 : Assert(!inputtle->resjunk);
1512 : Assert(!reftle->resjunk);
1513 :
1514 : /*
1515 : * Generate columns referencing input columns and having appropriate
1516 : * data types and column names. Insert datatype coercions where
1517 : * necessary.
1518 : *
1519 : * HACK: constants in the input's targetlist are copied up as-is
1520 : * rather than being referenced as subquery outputs. This is mainly
1521 : * to ensure that when we try to coerce them to the output column's
1522 : * datatype, the right things happen for UNKNOWN constants. But do
1523 : * this only at the first level of subquery-scan plans; we don't want
1524 : * phony constants appearing in the output tlists of upper-level
1525 : * nodes!
1526 : *
1527 : * Note that copying a constant doesn't in itself require us to mark
1528 : * the tlist nontrivial; see trivial_subqueryscan() in setrefs.c.
1529 : */
1530 49912 : if (hack_constants && inputtle->expr && IsA(inputtle->expr, Const))
1531 15926 : expr = (Node *) inputtle->expr;
1532 : else
1533 135944 : expr = (Node *) makeVar(varno,
1534 33986 : inputtle->resno,
1535 33986 : exprType((Node *) inputtle->expr),
1536 33986 : exprTypmod((Node *) inputtle->expr),
1537 33986 : exprCollation((Node *) inputtle->expr),
1538 : 0);
1539 :
1540 49912 : if (exprType(expr) != colType)
1541 : {
1542 : /*
1543 : * Note: it's not really cool to be applying coerce_to_common_type
1544 : * here; one notable point is that assign_expr_collations never
1545 : * gets run on any generated nodes. For the moment that's not a
1546 : * problem because we force the correct exposed collation below.
1547 : * It would likely be best to make the parser generate the correct
1548 : * output tlist for every set-op to begin with, though.
1549 : */
1550 1644 : expr = coerce_to_common_type(NULL, /* no UNKNOWNs here */
1551 : expr,
1552 : colType,
1553 : "UNION/INTERSECT/EXCEPT");
1554 1644 : *trivial_tlist = false; /* the coercion makes it not trivial */
1555 : }
1556 :
1557 : /*
1558 : * Ensure the tlist entry's exposed collation matches the set-op. This
1559 : * is necessary because plan_set_operations() reports the result
1560 : * ordering as a list of SortGroupClauses, which don't carry collation
1561 : * themselves but just refer to tlist entries. If we don't show the
1562 : * right collation then planner.c might do the wrong thing in
1563 : * higher-level queries.
1564 : *
1565 : * Note we use RelabelType, not CollateExpr, since this expression
1566 : * will reach the executor without any further processing.
1567 : */
1568 49912 : if (exprCollation(expr) != colColl)
1569 : {
1570 13130 : expr = applyRelabelType(expr,
1571 : exprType(expr), exprTypmod(expr), colColl,
1572 : COERCE_IMPLICIT_CAST, -1, false);
1573 13130 : *trivial_tlist = false; /* the relabel makes it not trivial */
1574 : }
1575 :
1576 99824 : tle = makeTargetEntry((Expr *) expr,
1577 49912 : (AttrNumber) resno++,
1578 49912 : pstrdup(reftle->resname),
1579 : false);
1580 :
1581 : /*
1582 : * By convention, all output columns in a setop tree have
1583 : * ressortgroupref equal to their resno. In some cases the ref isn't
1584 : * needed, but this is a cleaner way than modifying the tlist later.
1585 : */
1586 49912 : tle->ressortgroupref = tle->resno;
1587 :
1588 49912 : tlist = lappend(tlist, tle);
1589 : }
1590 :
1591 16786 : return tlist;
1592 : }
1593 :
1594 : /*
1595 : * Generate targetlist for a set-operation Append node
1596 : *
1597 : * colTypes: OID list of set-op's result column datatypes
1598 : * colCollations: OID list of set-op's result column collations
1599 : * input_tlists: list of tlists for sub-plans of the Append
1600 : * refnames_tlist: targetlist to take column names from
1601 : *
1602 : * The entries in the Append's targetlist should always be simple Vars;
1603 : * we just have to make sure they have the right datatypes/typmods/collations.
1604 : * The Vars are always generated with varno 0.
1605 : *
1606 : * XXX a problem with the varno-zero approach is that set_pathtarget_cost_width
1607 : * cannot figure out a realistic width for the tlist we make here. But we
1608 : * ought to refactor this code to produce a PathTarget directly, anyway.
1609 : */
1610 : static List *
1611 5576 : generate_append_tlist(List *colTypes, List *colCollations,
1612 : List *input_tlists,
1613 : List *refnames_tlist)
1614 : {
1615 5576 : List *tlist = NIL;
1616 5576 : int resno = 1;
1617 : ListCell *curColType;
1618 : ListCell *curColCollation;
1619 : ListCell *ref_tl_item;
1620 : int colindex;
1621 : TargetEntry *tle;
1622 : Node *expr;
1623 : ListCell *tlistl;
1624 : int32 *colTypmods;
1625 :
1626 : /*
1627 : * First extract typmods to use.
1628 : *
1629 : * If the inputs all agree on type and typmod of a particular column, use
1630 : * that typmod; else use -1.
1631 : */
1632 5576 : colTypmods = (int32 *) palloc(list_length(colTypes) * sizeof(int32));
1633 :
1634 20418 : foreach(tlistl, input_tlists)
1635 : {
1636 14842 : List *subtlist = (List *) lfirst(tlistl);
1637 : ListCell *subtlistl;
1638 :
1639 14842 : curColType = list_head(colTypes);
1640 14842 : colindex = 0;
1641 58244 : foreach(subtlistl, subtlist)
1642 : {
1643 43402 : TargetEntry *subtle = (TargetEntry *) lfirst(subtlistl);
1644 :
1645 : Assert(!subtle->resjunk);
1646 : Assert(curColType != NULL);
1647 43402 : if (exprType((Node *) subtle->expr) == lfirst_oid(curColType))
1648 : {
1649 : /* If first subplan, copy the typmod; else compare */
1650 43402 : int32 subtypmod = exprTypmod((Node *) subtle->expr);
1651 :
1652 43402 : if (tlistl == list_head(input_tlists))
1653 15366 : colTypmods[colindex] = subtypmod;
1654 28036 : else if (subtypmod != colTypmods[colindex])
1655 12 : colTypmods[colindex] = -1;
1656 : }
1657 : else
1658 : {
1659 : /* types disagree, so force typmod to -1 */
1660 0 : colTypmods[colindex] = -1;
1661 : }
1662 43402 : curColType = lnext(colTypes, curColType);
1663 43402 : colindex++;
1664 : }
1665 : Assert(curColType == NULL);
1666 : }
1667 :
1668 : /*
1669 : * Now we can build the tlist for the Append.
1670 : */
1671 5576 : colindex = 0;
1672 20942 : forthree(curColType, colTypes, curColCollation, colCollations,
1673 : ref_tl_item, refnames_tlist)
1674 : {
1675 15366 : Oid colType = lfirst_oid(curColType);
1676 15366 : int32 colTypmod = colTypmods[colindex++];
1677 15366 : Oid colColl = lfirst_oid(curColCollation);
1678 15366 : TargetEntry *reftle = (TargetEntry *) lfirst(ref_tl_item);
1679 :
1680 : Assert(reftle->resno == resno);
1681 : Assert(!reftle->resjunk);
1682 15366 : expr = (Node *) makeVar(0,
1683 : resno,
1684 : colType,
1685 : colTypmod,
1686 : colColl,
1687 : 0);
1688 30732 : tle = makeTargetEntry((Expr *) expr,
1689 15366 : (AttrNumber) resno++,
1690 15366 : pstrdup(reftle->resname),
1691 : false);
1692 :
1693 : /*
1694 : * By convention, all output columns in a setop tree have
1695 : * ressortgroupref equal to their resno. In some cases the ref isn't
1696 : * needed, but this is a cleaner way than modifying the tlist later.
1697 : */
1698 15366 : tle->ressortgroupref = tle->resno;
1699 :
1700 15366 : tlist = lappend(tlist, tle);
1701 : }
1702 :
1703 5576 : pfree(colTypmods);
1704 :
1705 5576 : return tlist;
1706 : }
1707 :
1708 : /*
1709 : * generate_setop_grouplist
1710 : * Build a SortGroupClause list defining the sort/grouping properties
1711 : * of the setop's output columns.
1712 : *
1713 : * Parse analysis already determined the properties and built a suitable
1714 : * list, except that the entries do not have sortgrouprefs set because
1715 : * the parser output representation doesn't include a tlist for each
1716 : * setop. So what we need to do here is copy that list and install
1717 : * proper sortgrouprefs into it (copying those from the targetlist).
1718 : */
1719 : static List *
1720 5018 : generate_setop_grouplist(SetOperationStmt *op, List *targetlist)
1721 : {
1722 5018 : List *grouplist = copyObject(op->groupClauses);
1723 : ListCell *lg;
1724 : ListCell *lt;
1725 :
1726 5018 : lg = list_head(grouplist);
1727 19030 : foreach(lt, targetlist)
1728 : {
1729 14012 : TargetEntry *tle = (TargetEntry *) lfirst(lt);
1730 : SortGroupClause *sgc;
1731 :
1732 : Assert(!tle->resjunk);
1733 :
1734 : /* non-resjunk columns should have sortgroupref = resno */
1735 : Assert(tle->ressortgroupref == tle->resno);
1736 :
1737 : /* non-resjunk columns should have grouping clauses */
1738 : Assert(lg != NULL);
1739 14012 : sgc = (SortGroupClause *) lfirst(lg);
1740 14012 : lg = lnext(grouplist, lg);
1741 : Assert(sgc->tleSortGroupRef == 0);
1742 :
1743 14012 : sgc->tleSortGroupRef = tle->ressortgroupref;
1744 : }
1745 : Assert(lg == NULL);
1746 5018 : return grouplist;
1747 : }
1748 :
1749 : /*
1750 : * create_setop_pathtarget
1751 : * Do the normal create_pathtarget() work, plus set the resulting
1752 : * PathTarget's width to the average width of the Paths in child_pathlist
1753 : * weighted using the estimated row count of each path.
1754 : *
1755 : * Note: This is required because set op target lists use varno==0, which
1756 : * results in a type default width estimate rather than one that's based on
1757 : * statistics of the columns from the set op children.
1758 : */
1759 : static PathTarget *
1760 5340 : create_setop_pathtarget(PlannerInfo *root, List *tlist, List *child_pathlist)
1761 : {
1762 : PathTarget *reltarget;
1763 : ListCell *lc;
1764 5340 : double parent_rows = 0;
1765 5340 : double parent_size = 0;
1766 :
1767 5340 : reltarget = create_pathtarget(root, tlist);
1768 :
1769 : /* Calculate the total rows and total size. */
1770 19206 : foreach(lc, child_pathlist)
1771 : {
1772 13866 : Path *path = (Path *) lfirst(lc);
1773 :
1774 13866 : parent_rows += path->rows;
1775 13866 : parent_size += path->parent->reltarget->width * path->rows;
1776 : }
1777 :
1778 5340 : if (parent_rows > 0)
1779 5322 : reltarget->width = rint(parent_size / parent_rows);
1780 :
1781 5340 : return reltarget;
1782 : }
|