Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * prepunion.c
4 : * Routines to plan set-operation queries. The filename is a leftover
5 : * from a time when only UNIONs were implemented.
6 : *
7 : * There are two code paths in the planner for set-operation queries.
8 : * If a subquery consists entirely of simple UNION ALL operations, it
9 : * is converted into an "append relation". Otherwise, it is handled
10 : * by the general code in this module (plan_set_operations and its
11 : * subroutines). There is some support code here for the append-relation
12 : * case, but most of the heavy lifting for that is done elsewhere,
13 : * notably in prepjointree.c and allpaths.c.
14 : *
15 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
16 : * Portions Copyright (c) 1994, Regents of the University of California
17 : *
18 : *
19 : * IDENTIFICATION
20 : * src/backend/optimizer/prep/prepunion.c
21 : *
22 : *-------------------------------------------------------------------------
23 : */
24 : #include "postgres.h"
25 :
26 : #include <math.h>
27 :
28 : #include "access/htup_details.h"
29 : #include "catalog/pg_type.h"
30 : #include "miscadmin.h"
31 : #include "nodes/makefuncs.h"
32 : #include "nodes/nodeFuncs.h"
33 : #include "optimizer/cost.h"
34 : #include "optimizer/pathnode.h"
35 : #include "optimizer/paths.h"
36 : #include "optimizer/planner.h"
37 : #include "optimizer/prep.h"
38 : #include "optimizer/tlist.h"
39 : #include "parser/parse_coerce.h"
40 : #include "port/pg_bitutils.h"
41 : #include "utils/selfuncs.h"
42 :
43 :
44 : static RelOptInfo *recurse_set_operations(Node *setOp, PlannerInfo *root,
45 : SetOperationStmt *parentOp,
46 : List *colTypes, List *colCollations,
47 : List *refnames_tlist,
48 : List **pTargetList,
49 : bool *istrivial_tlist);
50 : static RelOptInfo *generate_recursion_path(SetOperationStmt *setOp,
51 : PlannerInfo *root,
52 : List *refnames_tlist,
53 : List **pTargetList);
54 : static void build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
55 : bool trivial_tlist, List *child_tlist,
56 : List *interesting_pathkeys,
57 : double *pNumGroups);
58 : static RelOptInfo *generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
59 : List *refnames_tlist,
60 : List **pTargetList);
61 : static RelOptInfo *generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
62 : List *refnames_tlist,
63 : List **pTargetList);
64 : static List *plan_union_children(PlannerInfo *root,
65 : SetOperationStmt *top_union,
66 : List *refnames_tlist,
67 : List **tlist_list,
68 : List **istrivial_tlist);
69 : static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
70 : static List *generate_setop_tlist(List *colTypes, List *colCollations,
71 : Index varno,
72 : bool hack_constants,
73 : List *input_tlist,
74 : List *refnames_tlist,
75 : bool *trivial_tlist);
76 : static List *generate_append_tlist(List *colTypes, List *colCollations,
77 : List *input_tlists,
78 : List *refnames_tlist);
79 : static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
80 : static PathTarget *create_setop_pathtarget(PlannerInfo *root, List *tlist,
81 : List *child_pathlist);
82 :
83 :
84 : /*
85 : * plan_set_operations
86 : *
87 : * Plans the queries for a tree of set operations (UNION/INTERSECT/EXCEPT)
88 : *
89 : * This routine only deals with the setOperations tree of the given query.
90 : * Any top-level ORDER BY requested in root->parse->sortClause will be handled
91 : * when we return to grouping_planner; likewise for LIMIT.
92 : *
93 : * What we return is an "upperrel" RelOptInfo containing at least one Path
94 : * that implements the set-operation tree. In addition, root->processed_tlist
95 : * receives a targetlist representing the output of the topmost setop node.
96 : */
97 : RelOptInfo *
98 4836 : plan_set_operations(PlannerInfo *root)
99 : {
100 4836 : Query *parse = root->parse;
101 4836 : SetOperationStmt *topop = castNode(SetOperationStmt, parse->setOperations);
102 : Node *node;
103 : RangeTblEntry *leftmostRTE;
104 : Query *leftmostQuery;
105 : RelOptInfo *setop_rel;
106 : List *top_tlist;
107 :
108 : Assert(topop);
109 :
110 : /* check for unsupported stuff */
111 : Assert(parse->jointree->fromlist == NIL);
112 : Assert(parse->jointree->quals == NULL);
113 : Assert(parse->groupClause == NIL);
114 : Assert(parse->havingQual == NULL);
115 : Assert(parse->windowClause == NIL);
116 : Assert(parse->distinctClause == NIL);
117 :
118 : /*
119 : * In the outer query level, equivalence classes are limited to classes
120 : * which define that the top-level target entry is equivalent to the
121 : * corresponding child target entry. There won't be any equivalence class
122 : * merging. Mark that merging is complete to allow us to make pathkeys.
123 : */
124 : Assert(root->eq_classes == NIL);
125 4836 : root->ec_merging_done = true;
126 :
127 : /*
128 : * We'll need to build RelOptInfos for each of the leaf subqueries, which
129 : * are RTE_SUBQUERY rangetable entries in this Query. Prepare the index
130 : * arrays for those, and for AppendRelInfos in case they're needed.
131 : */
132 4836 : setup_simple_rel_arrays(root);
133 :
134 : /*
135 : * Find the leftmost component Query. We need to use its column names for
136 : * all generated tlists (else SELECT INTO won't work right).
137 : */
138 4836 : node = topop->larg;
139 8066 : while (node && IsA(node, SetOperationStmt))
140 3230 : node = ((SetOperationStmt *) node)->larg;
141 : Assert(node && IsA(node, RangeTblRef));
142 4836 : leftmostRTE = root->simple_rte_array[((RangeTblRef *) node)->rtindex];
143 4836 : leftmostQuery = leftmostRTE->subquery;
144 : Assert(leftmostQuery != NULL);
145 :
146 : /*
147 : * If the topmost node is a recursive union, it needs special processing.
148 : */
149 4836 : if (root->hasRecursion)
150 : {
151 637 : setop_rel = generate_recursion_path(topop, root,
152 : leftmostQuery->targetList,
153 : &top_tlist);
154 : }
155 : else
156 : {
157 : bool trivial_tlist;
158 :
159 : /*
160 : * Recurse on setOperations tree to generate paths for set ops. The
161 : * final output paths should have just the column types shown as the
162 : * output from the top-level node.
163 : */
164 4199 : setop_rel = recurse_set_operations((Node *) topop, root,
165 : NULL, /* no parent */
166 : topop->colTypes, topop->colCollations,
167 : leftmostQuery->targetList,
168 : &top_tlist,
169 : &trivial_tlist);
170 : }
171 :
172 : /* Must return the built tlist into root->processed_tlist. */
173 4832 : root->processed_tlist = top_tlist;
174 :
175 4832 : return setop_rel;
176 : }
177 :
178 : /*
179 : * recurse_set_operations
180 : * Recursively handle one step in a tree of set operations
181 : *
182 : * setOp: current step (could be a SetOperationStmt or a leaf RangeTblRef)
183 : * parentOp: parent step, or NULL if none (but see below)
184 : * colTypes: OID list of set-op's result column datatypes
185 : * colCollations: OID list of set-op's result column collations
186 : * refnames_tlist: targetlist to take column names from
187 : *
188 : * parentOp should be passed as NULL unless that step is interested in
189 : * getting sorted output from this step. ("Sorted" means "sorted according
190 : * to the default btree opclasses of the result column datatypes".)
191 : *
192 : * Returns a RelOptInfo for the subtree, as well as these output parameters:
193 : * *pTargetList: receives the fully-fledged tlist for the subtree's top plan
194 : * *istrivial_tlist: true if, and only if, datatypes between parent and child
195 : * match.
196 : *
197 : * If setOp is a leaf node, this function plans the sub-query but does
198 : * not populate the pathlist of the returned RelOptInfo. The caller will
199 : * generate SubqueryScan paths using useful path(s) of the subquery (see
200 : * build_setop_child_paths). But this function does build the paths for
201 : * set-operation nodes.
202 : *
203 : * The pTargetList output parameter is mostly redundant with the pathtarget
204 : * of the returned RelOptInfo, but for the moment we need it because much of
205 : * the logic in this file depends on flag columns being marked resjunk.
206 : * XXX Now that there are no flag columns and hence no resjunk columns, we
207 : * could probably refactor this file to deal only in pathtargets.
208 : *
209 : * We don't have to care about typmods here: the only allowed difference
210 : * between set-op input and output typmods is input is a specific typmod
211 : * and output is -1, and that does not require a coercion.
212 : */
213 : static RelOptInfo *
214 17306 : recurse_set_operations(Node *setOp, PlannerInfo *root,
215 : SetOperationStmt *parentOp,
216 : List *colTypes, List *colCollations,
217 : List *refnames_tlist,
218 : List **pTargetList,
219 : bool *istrivial_tlist)
220 : {
221 : RelOptInfo *rel;
222 :
223 17306 : *istrivial_tlist = true; /* for now */
224 :
225 : /* Guard against stack overflow due to overly complex setop nests */
226 17306 : check_stack_depth();
227 :
228 17306 : if (IsA(setOp, RangeTblRef))
229 : {
230 12962 : RangeTblRef *rtr = (RangeTblRef *) setOp;
231 12962 : RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
232 12962 : Query *subquery = rte->subquery;
233 : PlannerInfo *subroot;
234 : List *tlist;
235 : bool trivial_tlist;
236 : char *plan_name;
237 :
238 : Assert(subquery != NULL);
239 :
240 : /* Build a RelOptInfo for this leaf subquery. */
241 12962 : rel = build_simple_rel(root, rtr->rtindex, NULL);
242 :
243 : /* plan_params should not be in use in current query level */
244 : Assert(root->plan_params == NIL);
245 :
246 : /*
247 : * Generate a subroot and Paths for the subquery. If we have a
248 : * parentOp, pass that down to encourage subquery_planner to consider
249 : * suitably-sorted Paths.
250 : */
251 12962 : plan_name = choose_plan_name(root->glob, "setop", true);
252 12962 : subroot = rel->subroot = subquery_planner(root->glob, subquery,
253 : plan_name, root, NULL,
254 : false, root->tuple_fraction,
255 : parentOp);
256 :
257 : /*
258 : * It should not be possible for the primitive query to contain any
259 : * cross-references to other primitive queries in the setop tree.
260 : */
261 12962 : if (root->plan_params)
262 0 : elog(ERROR, "unexpected outer reference in set operation subquery");
263 :
264 : /* Figure out the appropriate target list for this subquery. */
265 12962 : tlist = generate_setop_tlist(colTypes, colCollations,
266 12962 : rtr->rtindex,
267 : true,
268 : subroot->processed_tlist,
269 : refnames_tlist,
270 : &trivial_tlist);
271 12962 : rel->reltarget = create_pathtarget(root, tlist);
272 :
273 : /* Return the fully-fledged tlist to caller, too */
274 12962 : *pTargetList = tlist;
275 12962 : *istrivial_tlist = trivial_tlist;
276 : }
277 4344 : else if (IsA(setOp, SetOperationStmt))
278 : {
279 4344 : SetOperationStmt *op = (SetOperationStmt *) setOp;
280 :
281 : /* UNIONs are much different from INTERSECT/EXCEPT */
282 4344 : if (op->op == SETOP_UNION)
283 3720 : rel = generate_union_paths(op, root,
284 : refnames_tlist,
285 : pTargetList);
286 : else
287 624 : rel = generate_nonunion_paths(op, root,
288 : refnames_tlist,
289 : pTargetList);
290 :
291 : /*
292 : * If necessary, add a Result node to project the caller-requested
293 : * output columns.
294 : *
295 : * XXX you don't really want to know about this: setrefs.c will apply
296 : * fix_upper_expr() to the Result node's tlist. This would fail if the
297 : * Vars generated by generate_setop_tlist() were not exactly equal()
298 : * to the corresponding tlist entries of the subplan. However, since
299 : * the subplan was generated by generate_union_paths() or
300 : * generate_nonunion_paths(), and hence its tlist was generated by
301 : * generate_append_tlist() or generate_setop_tlist(), this will work.
302 : * We just tell generate_setop_tlist() to use varno 0.
303 : */
304 4344 : if (!tlist_same_datatypes(*pTargetList, colTypes, false) ||
305 4329 : !tlist_same_collations(*pTargetList, colCollations, false))
306 : {
307 : PathTarget *target;
308 : bool trivial_tlist;
309 : ListCell *lc;
310 :
311 15 : *pTargetList = generate_setop_tlist(colTypes, colCollations,
312 : 0,
313 : false,
314 : *pTargetList,
315 : refnames_tlist,
316 : &trivial_tlist);
317 15 : *istrivial_tlist = trivial_tlist;
318 15 : target = create_pathtarget(root, *pTargetList);
319 :
320 : /* Apply projection to each path */
321 30 : foreach(lc, rel->pathlist)
322 : {
323 15 : Path *subpath = (Path *) lfirst(lc);
324 : Path *path;
325 :
326 : Assert(subpath->param_info == NULL);
327 15 : path = apply_projection_to_path(root, subpath->parent,
328 : subpath, target);
329 : /* If we had to add a Result, path is different from subpath */
330 15 : if (path != subpath)
331 15 : lfirst(lc) = path;
332 : }
333 :
334 : /* Apply projection to each partial path */
335 15 : foreach(lc, rel->partial_pathlist)
336 : {
337 0 : Path *subpath = (Path *) lfirst(lc);
338 : Path *path;
339 :
340 : Assert(subpath->param_info == NULL);
341 :
342 : /* avoid apply_projection_to_path, in case of multiple refs */
343 0 : path = (Path *) create_projection_path(root, subpath->parent,
344 : subpath, target);
345 0 : lfirst(lc) = path;
346 : }
347 : }
348 4344 : postprocess_setop_rel(root, rel);
349 : }
350 : else
351 : {
352 0 : elog(ERROR, "unrecognized node type: %d",
353 : (int) nodeTag(setOp));
354 : *pTargetList = NIL;
355 : rel = NULL; /* keep compiler quiet */
356 : }
357 :
358 17306 : return rel;
359 : }
360 :
361 : /*
362 : * Generate paths for a recursive UNION node
363 : */
364 : static RelOptInfo *
365 637 : generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
366 : List *refnames_tlist,
367 : List **pTargetList)
368 : {
369 : RelOptInfo *result_rel;
370 : Path *path;
371 : RelOptInfo *lrel,
372 : *rrel;
373 : Path *lpath;
374 : Path *rpath;
375 : List *lpath_tlist;
376 : bool lpath_trivial_tlist;
377 : List *rpath_tlist;
378 : bool rpath_trivial_tlist;
379 : List *tlist;
380 : List *groupList;
381 : double dNumGroups;
382 :
383 : /* Parser should have rejected other cases */
384 637 : if (setOp->op != SETOP_UNION)
385 0 : elog(ERROR, "only UNION queries can be recursive");
386 : /* Worktable ID should be assigned */
387 : Assert(root->wt_param_id >= 0);
388 :
389 : /*
390 : * Unlike a regular UNION node, process the left and right inputs
391 : * separately without any intention of combining them into one Append.
392 : */
393 637 : lrel = recurse_set_operations(setOp->larg, root,
394 : NULL, /* no value in sorted results */
395 : setOp->colTypes, setOp->colCollations,
396 : refnames_tlist,
397 : &lpath_tlist,
398 : &lpath_trivial_tlist);
399 637 : if (lrel->rtekind == RTE_SUBQUERY)
400 637 : build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
401 : NIL, NULL);
402 637 : lpath = lrel->cheapest_total_path;
403 : /* The right path will want to look at the left one ... */
404 637 : root->non_recursive_path = lpath;
405 637 : rrel = recurse_set_operations(setOp->rarg, root,
406 : NULL, /* no value in sorted results */
407 : setOp->colTypes, setOp->colCollations,
408 : refnames_tlist,
409 : &rpath_tlist,
410 : &rpath_trivial_tlist);
411 637 : if (rrel->rtekind == RTE_SUBQUERY)
412 632 : build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
413 : NIL, NULL);
414 637 : rpath = rrel->cheapest_total_path;
415 637 : root->non_recursive_path = NULL;
416 :
417 : /*
418 : * Generate tlist for RecursiveUnion path node --- same as in Append cases
419 : */
420 637 : tlist = generate_append_tlist(setOp->colTypes, setOp->colCollations,
421 : list_make2(lpath_tlist, rpath_tlist),
422 : refnames_tlist);
423 :
424 637 : *pTargetList = tlist;
425 :
426 : /* Build result relation. */
427 637 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
428 637 : bms_union(lrel->relids, rrel->relids));
429 637 : result_rel->reltarget = create_pathtarget(root, tlist);
430 :
431 : /*
432 : * If UNION, identify the grouping operators
433 : */
434 637 : if (setOp->all)
435 : {
436 417 : groupList = NIL;
437 417 : dNumGroups = 0;
438 : }
439 : else
440 : {
441 : /* Identify the grouping semantics */
442 220 : groupList = generate_setop_grouplist(setOp, tlist);
443 :
444 : /* We only support hashing here */
445 220 : if (!grouping_is_hashable(groupList))
446 4 : ereport(ERROR,
447 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
448 : errmsg("could not implement recursive UNION"),
449 : errdetail("All column datatypes must be hashable.")));
450 :
451 : /*
452 : * For the moment, take the number of distinct groups as equal to the
453 : * total input size, ie, the worst case.
454 : */
455 216 : dNumGroups = lpath->rows + rpath->rows * 10;
456 : }
457 :
458 : /*
459 : * And make the path node.
460 : */
461 633 : path = (Path *) create_recursiveunion_path(root,
462 : result_rel,
463 : lpath,
464 : rpath,
465 633 : result_rel->reltarget,
466 : groupList,
467 : root->wt_param_id,
468 : dNumGroups);
469 :
470 633 : add_path(result_rel, path);
471 633 : postprocess_setop_rel(root, result_rel);
472 633 : return result_rel;
473 : }
474 :
475 : /*
476 : * build_setop_child_paths
477 : * Build paths for the set op child relation denoted by 'rel'.
478 : *
479 : * 'rel' is an RTE_SUBQUERY relation. We have already generated paths within
480 : * the subquery's subroot; the task here is to create SubqueryScan paths for
481 : * 'rel', representing scans of the useful subquery paths.
482 : *
483 : * interesting_pathkeys: if not NIL, also include paths that suit these
484 : * pathkeys, sorting any unsorted paths as required.
485 : * *pNumGroups: if not NULL, we estimate the number of distinct groups
486 : * in the result, and store it there.
487 : */
488 : static void
489 12962 : build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
490 : bool trivial_tlist, List *child_tlist,
491 : List *interesting_pathkeys, double *pNumGroups)
492 : {
493 : RelOptInfo *final_rel;
494 12962 : List *setop_pathkeys = rel->subroot->setop_pathkeys;
495 : ListCell *lc;
496 :
497 : /* it can't be a set op child rel if it's not a subquery */
498 : Assert(rel->rtekind == RTE_SUBQUERY);
499 :
500 : /* when sorting is needed, add child rel equivalences */
501 12962 : if (interesting_pathkeys != NIL)
502 10213 : add_setop_child_rel_equivalences(root,
503 : rel,
504 : child_tlist,
505 : interesting_pathkeys);
506 :
507 : /*
508 : * Mark rel with estimated output rows, width, etc. Note that we have to
509 : * do this before generating outer-query paths, else cost_subqueryscan is
510 : * not happy.
511 : */
512 12962 : set_subquery_size_estimates(root, rel);
513 :
514 : /*
515 : * Since we may want to add a partial path to this relation, we must set
516 : * its consider_parallel flag correctly.
517 : */
518 12962 : final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);
519 12962 : rel->consider_parallel = final_rel->consider_parallel;
520 :
521 : /* Generate subquery scan paths for any interesting path in final_rel */
522 33969 : foreach(lc, final_rel->pathlist)
523 : {
524 21007 : Path *subpath = (Path *) lfirst(lc);
525 : List *pathkeys;
526 21007 : Path *cheapest_input_path = final_rel->cheapest_total_path;
527 : bool is_sorted;
528 : int presorted_keys;
529 :
530 : /* If the input rel is dummy, propagate that to this query level */
531 21007 : if (is_dummy_rel(final_rel))
532 : {
533 68 : mark_dummy_rel(rel);
534 2845 : continue;
535 : }
536 :
537 : /*
538 : * Include the cheapest path as-is so that the set operation can be
539 : * cheaply implemented using a method which does not require the input
540 : * to be sorted.
541 : */
542 20939 : if (subpath == cheapest_input_path)
543 : {
544 : /* Convert subpath's pathkeys to outer representation */
545 12894 : pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
546 : make_tlist_from_pathtarget(subpath->pathtarget));
547 :
548 : /* Generate outer path using this subpath */
549 12894 : add_path(rel, (Path *) create_subqueryscan_path(root,
550 : rel,
551 : subpath,
552 : trivial_tlist,
553 : pathkeys,
554 : NULL));
555 : }
556 :
557 : /* skip dealing with sorted paths if the setop doesn't need them */
558 20939 : if (interesting_pathkeys == NIL)
559 2767 : continue;
560 :
561 : /*
562 : * Create paths to suit final sort order required for setop_pathkeys.
563 : * Here we'll sort the cheapest input path (if not sorted already) and
564 : * incremental sort any paths which are partially sorted.
565 : */
566 18172 : is_sorted = pathkeys_count_contained_in(setop_pathkeys,
567 : subpath->pathkeys,
568 : &presorted_keys);
569 :
570 18172 : if (!is_sorted)
571 : {
572 12055 : double limittuples = rel->subroot->limit_tuples;
573 :
574 : /*
575 : * Try at least sorting the cheapest path and also try
576 : * incrementally sorting any path which is partially sorted
577 : * already (no need to deal with paths which have presorted keys
578 : * when incremental sort is disabled unless it's the cheapest
579 : * input path).
580 : */
581 12055 : if (subpath != cheapest_input_path &&
582 2676 : (presorted_keys == 0 || !enable_incremental_sort))
583 10 : continue;
584 :
585 : /*
586 : * We've no need to consider both a sort and incremental sort.
587 : * We'll just do a sort if there are no presorted keys and an
588 : * incremental sort when there are presorted keys.
589 : */
590 12045 : if (presorted_keys == 0 || !enable_incremental_sort)
591 9379 : subpath = (Path *) create_sort_path(rel->subroot,
592 : final_rel,
593 : subpath,
594 : setop_pathkeys,
595 : limittuples);
596 : else
597 2666 : subpath = (Path *) create_incremental_sort_path(rel->subroot,
598 : final_rel,
599 : subpath,
600 : setop_pathkeys,
601 : presorted_keys,
602 : limittuples);
603 : }
604 :
605 : /*
606 : * subpath is now sorted, so add it to the pathlist. We already added
607 : * the cheapest_input_path above, so don't add it again unless we just
608 : * sorted it.
609 : */
610 18162 : if (subpath != cheapest_input_path)
611 : {
612 : /* Convert subpath's pathkeys to outer representation */
613 17388 : pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
614 : make_tlist_from_pathtarget(subpath->pathtarget));
615 :
616 : /* Generate outer path using this subpath */
617 17388 : add_path(rel, (Path *) create_subqueryscan_path(root,
618 : rel,
619 : subpath,
620 : trivial_tlist,
621 : pathkeys,
622 : NULL));
623 : }
624 : }
625 :
626 : /* if consider_parallel is false, there should be no partial paths */
627 : Assert(final_rel->consider_parallel ||
628 : final_rel->partial_pathlist == NIL);
629 :
630 : /*
631 : * If we have a partial path for the child relation, we can use that to
632 : * build a partial path for this relation. But there's no point in
633 : * considering any path but the cheapest.
634 : */
635 12962 : if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
636 8961 : final_rel->partial_pathlist != NIL)
637 : {
638 : Path *partial_subpath;
639 : Path *partial_path;
640 :
641 15 : partial_subpath = linitial(final_rel->partial_pathlist);
642 : partial_path = (Path *)
643 15 : create_subqueryscan_path(root, rel, partial_subpath,
644 : trivial_tlist,
645 : NIL, NULL);
646 15 : add_partial_path(rel, partial_path);
647 : }
648 :
649 12962 : postprocess_setop_rel(root, rel);
650 :
651 : /*
652 : * Estimate number of groups if caller wants it. If the subquery used
653 : * grouping or aggregation, its output is probably mostly unique anyway;
654 : * otherwise do statistical estimation.
655 : *
656 : * XXX you don't really want to know about this: we do the estimation
657 : * using the subroot->parse's original targetlist expressions, not the
658 : * subroot->processed_tlist which might seem more appropriate. The reason
659 : * is that if the subquery is itself a setop, it may return a
660 : * processed_tlist containing "varno 0" Vars generated by
661 : * generate_append_tlist, and those would confuse estimate_num_groups
662 : * mightily. We ought to get rid of the "varno 0" hack, but that requires
663 : * a redesign of the parsetree representation of setops, so that there can
664 : * be an RTE corresponding to each setop's output. Note, we use this not
665 : * subquery's targetlist but subroot->parse's targetlist, because it was
666 : * revised by self-join removal. subquery's targetlist might contain the
667 : * references to the removed relids.
668 : */
669 12962 : if (pNumGroups)
670 : {
671 10503 : PlannerInfo *subroot = rel->subroot;
672 10503 : Query *subquery = subroot->parse;
673 :
674 10503 : if (subquery->groupClause || subquery->groupingSets ||
675 10503 : subquery->distinctClause || subroot->hasHavingQual ||
676 10493 : subquery->hasAggs)
677 10 : *pNumGroups = rel->cheapest_total_path->rows;
678 : else
679 10493 : *pNumGroups = estimate_num_groups(subroot,
680 10493 : get_tlist_exprs(subroot->parse->targetList, false),
681 10493 : rel->cheapest_total_path->rows,
682 : NULL,
683 : NULL);
684 : }
685 12962 : }
686 :
687 : /*
688 : * Generate paths for a UNION or UNION ALL node
689 : */
690 : static RelOptInfo *
691 3720 : generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
692 : List *refnames_tlist,
693 : List **pTargetList)
694 : {
695 3720 : Relids relids = NULL;
696 : RelOptInfo *result_rel;
697 : ListCell *lc;
698 : ListCell *lc2;
699 : ListCell *lc3;
700 3720 : AppendPathInput cheapest = {0};
701 3720 : AppendPathInput ordered = {0};
702 3720 : AppendPathInput partial = {0};
703 3720 : bool partial_paths_valid = true;
704 3720 : bool consider_parallel = true;
705 : List *rellist;
706 : List *tlist_list;
707 : List *trivial_tlist_list;
708 : List *tlist;
709 3720 : List *groupList = NIL;
710 : Path *apath;
711 3720 : Path *gpath = NULL;
712 3720 : bool try_sorted = false;
713 3720 : List *union_pathkeys = NIL;
714 3720 : double dNumChildGroups = 0;
715 :
716 : /*
717 : * If any of my children are identical UNION nodes (same op, all-flag, and
718 : * colTypes/colCollations) then they can be merged into this node so that
719 : * we generate only one Append/MergeAppend and unique-ification for the
720 : * lot. Recurse to find such nodes.
721 : */
722 3720 : rellist = plan_union_children(root,
723 : op,
724 : refnames_tlist,
725 : &tlist_list,
726 : &trivial_tlist_list);
727 :
728 : /*
729 : * Generate tlist for Append/MergeAppend plan node.
730 : *
731 : * The tlist for an Append plan isn't important as far as the Append is
732 : * concerned, but we must make it look real anyway for the benefit of the
733 : * next plan level up.
734 : */
735 3720 : tlist = generate_append_tlist(op->colTypes, op->colCollations,
736 : tlist_list, refnames_tlist);
737 3720 : *pTargetList = tlist;
738 :
739 : /* For UNIONs (not UNION ALL), try sorting, if sorting is possible */
740 3720 : if (!op->all)
741 : {
742 : /* Identify the grouping semantics */
743 3275 : groupList = generate_setop_grouplist(op, tlist);
744 :
745 3275 : if (grouping_is_sortable(op->groupClauses))
746 : {
747 3215 : try_sorted = true;
748 : /* Determine the pathkeys for sorting by the whole target list */
749 3215 : union_pathkeys = make_pathkeys_for_sortclauses(root, groupList,
750 : tlist);
751 :
752 3215 : root->query_pathkeys = union_pathkeys;
753 : }
754 : }
755 :
756 : /*
757 : * Now that we've got the append target list, we can build the union child
758 : * paths.
759 : */
760 14305 : forthree(lc, rellist, lc2, trivial_tlist_list, lc3, tlist_list)
761 : {
762 10585 : RelOptInfo *rel = lfirst(lc);
763 10585 : bool trivial_tlist = lfirst_int(lc2);
764 10585 : List *child_tlist = lfirst_node(List, lc3);
765 10585 : double childGroups = 0;
766 :
767 : /* only build paths for the union children */
768 10585 : if (rel->rtekind == RTE_SUBQUERY)
769 10475 : build_setop_child_paths(root, rel, trivial_tlist, child_tlist,
770 : union_pathkeys,
771 10475 : op->all ? NULL : &childGroups);
772 : else
773 110 : childGroups = rel->rows;
774 :
775 : /*
776 : * For UNION (not UNION ALL), accumulate the per-child distinct-group
777 : * estimates. This sum is the basis for the UNION's output estimate
778 : * below: since distinct(A union B) <= distinct(A) + distinct(B), the
779 : * union cannot have more distinct rows than its children do in total.
780 : * Children that are known to be empty contribute nothing, so skip
781 : * them.
782 : */
783 10585 : if (!op->all && !is_dummy_rel(rel))
784 9265 : dNumChildGroups += childGroups;
785 : }
786 :
787 : /* Build path lists and relid set. */
788 14305 : foreach(lc, rellist)
789 : {
790 10585 : RelOptInfo *rel = lfirst(lc);
791 : Path *ordered_path;
792 :
793 : /*
794 : * Record the relids so that we can identify the correct
795 : * UPPERREL_SETOP RelOptInfo below.
796 : */
797 10585 : relids = bms_add_members(relids, rel->relids);
798 :
799 : /* Skip any UNION children that are proven not to yield any rows */
800 10585 : if (is_dummy_rel(rel))
801 38 : continue;
802 :
803 21094 : cheapest.subpaths = lappend(cheapest.subpaths,
804 10547 : rel->cheapest_total_path);
805 :
806 10547 : if (try_sorted)
807 : {
808 3477 : ordered_path = get_cheapest_path_for_pathkeys(rel->pathlist,
809 : union_pathkeys,
810 : NULL,
811 : TOTAL_COST,
812 : false);
813 :
814 3477 : if (ordered_path != NULL)
815 519 : ordered.subpaths = lappend(ordered.subpaths, ordered_path);
816 : else
817 : {
818 : /*
819 : * If we can't find a sorted path, just give up trying to
820 : * generate a list of correctly sorted child paths. This can
821 : * happen when type coercion was added to the targetlist due
822 : * to mismatching types from the union children.
823 : */
824 2958 : try_sorted = false;
825 : }
826 : }
827 :
828 10547 : if (consider_parallel)
829 : {
830 7603 : if (!rel->consider_parallel)
831 : {
832 2826 : consider_parallel = false;
833 2826 : partial_paths_valid = false;
834 : }
835 4777 : else if (rel->partial_pathlist == NIL)
836 4767 : partial_paths_valid = false;
837 : else
838 10 : partial.partial_subpaths = lappend(partial.partial_subpaths,
839 10 : linitial(rel->partial_pathlist));
840 : }
841 : }
842 :
843 : /* Build result relation. */
844 3720 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
845 3720 : result_rel->reltarget = create_setop_pathtarget(root, tlist,
846 : cheapest.subpaths);
847 3720 : result_rel->consider_parallel = consider_parallel;
848 3720 : result_rel->consider_startup = (root->tuple_fraction > 0);
849 :
850 : /* If all UNION children were dummy rels, make the resulting rel dummy */
851 3720 : if (cheapest.subpaths == NIL)
852 : {
853 5 : mark_dummy_rel(result_rel);
854 :
855 5 : return result_rel;
856 : }
857 :
858 : /*
859 : * Append the child results together using the cheapest paths from each
860 : * union child.
861 : */
862 3715 : apath = (Path *) create_append_path(root, result_rel, cheapest,
863 : NIL, NULL, 0, false, -1);
864 :
865 : /*
866 : * Initialize the result row estimate to the total input size. This is
867 : * correct for UNION ALL; for the UNION case it is overwritten below with
868 : * the estimated number of distinct groups.
869 : */
870 3715 : result_rel->rows = apath->rows;
871 :
872 : /*
873 : * Now consider doing the same thing using the partial paths plus Append
874 : * plus Gather.
875 : */
876 3715 : if (partial_paths_valid)
877 : {
878 : Path *papath;
879 5 : int parallel_workers = 0;
880 :
881 : /* Find the highest number of workers requested for any subpath. */
882 15 : foreach(lc, partial.partial_subpaths)
883 : {
884 10 : Path *subpath = lfirst(lc);
885 :
886 10 : parallel_workers = Max(parallel_workers,
887 : subpath->parallel_workers);
888 : }
889 : Assert(parallel_workers > 0);
890 :
891 : /*
892 : * If the use of parallel append is permitted, always request at least
893 : * log2(# of children) paths. We assume it can be useful to have
894 : * extra workers in this case because they will be spread out across
895 : * the children. The precise formula is just a guess; see
896 : * add_paths_to_append_rel.
897 : */
898 5 : if (enable_parallel_append)
899 : {
900 5 : parallel_workers = Max(parallel_workers,
901 : pg_leftmost_one_pos32(list_length(partial.partial_subpaths)) + 1);
902 5 : parallel_workers = Min(parallel_workers,
903 : max_parallel_workers_per_gather);
904 : }
905 : Assert(parallel_workers > 0);
906 :
907 : papath = (Path *)
908 5 : create_append_path(root, result_rel, partial,
909 : NIL, NULL, parallel_workers,
910 : enable_parallel_append, -1);
911 : gpath = (Path *)
912 5 : create_gather_path(root, result_rel, papath,
913 5 : result_rel->reltarget, NULL, NULL);
914 : }
915 :
916 3715 : if (!op->all)
917 : {
918 3270 : bool can_sort = grouping_is_sortable(groupList);
919 3270 : bool can_hash = grouping_is_hashable(groupList);
920 :
921 : /*
922 : * result_rel->rows was initialized to the total input size above,
923 : * which is the correct estimate for UNION ALL. A UNION removes
924 : * duplicates, so override it with the estimated number of distinct
925 : * groups.
926 : */
927 3270 : result_rel->rows = dNumChildGroups;
928 :
929 3270 : if (can_hash)
930 : {
931 : Path *path;
932 :
933 : /*
934 : * Try a hash aggregate plan on 'apath'. This is the cheapest
935 : * available path containing each append child.
936 : */
937 3210 : path = (Path *) create_agg_path(root,
938 : result_rel,
939 : apath,
940 3210 : result_rel->reltarget,
941 : AGG_HASHED,
942 : AGGSPLIT_SIMPLE,
943 : groupList,
944 : NIL,
945 : NULL,
946 : dNumChildGroups);
947 3210 : add_path(result_rel, path);
948 :
949 : /* Try hash aggregate on the Gather path, if valid */
950 3210 : if (gpath != NULL)
951 : {
952 : /* Hashed aggregate plan --- no sort needed */
953 5 : path = (Path *) create_agg_path(root,
954 : result_rel,
955 : gpath,
956 5 : result_rel->reltarget,
957 : AGG_HASHED,
958 : AGGSPLIT_SIMPLE,
959 : groupList,
960 : NIL,
961 : NULL,
962 : dNumChildGroups);
963 5 : add_path(result_rel, path);
964 : }
965 : }
966 :
967 3270 : if (can_sort)
968 : {
969 3210 : Path *path = apath;
970 :
971 : /* Try Sort -> Unique on the Append path */
972 3210 : if (groupList != NIL)
973 3185 : path = (Path *) create_sort_path(root, result_rel, path,
974 : make_pathkeys_for_sortclauses(root, groupList, tlist),
975 : -1.0);
976 :
977 3210 : path = (Path *) create_unique_path(root,
978 : result_rel,
979 : path,
980 3210 : list_length(path->pathkeys),
981 : dNumChildGroups);
982 :
983 3210 : add_path(result_rel, path);
984 :
985 : /* Try Sort -> Unique on the Gather path, if set */
986 3210 : if (gpath != NULL)
987 : {
988 5 : path = gpath;
989 :
990 5 : path = (Path *) create_sort_path(root, result_rel, path,
991 : make_pathkeys_for_sortclauses(root, groupList, tlist),
992 : -1.0);
993 :
994 5 : path = (Path *) create_unique_path(root,
995 : result_rel,
996 : path,
997 5 : list_length(path->pathkeys),
998 : dNumChildGroups);
999 5 : add_path(result_rel, path);
1000 : }
1001 : }
1002 :
1003 : /*
1004 : * Try making a MergeAppend path if we managed to find a path with the
1005 : * correct pathkeys in each union child query.
1006 : */
1007 3270 : if (try_sorted && groupList != NIL)
1008 : {
1009 : Path *path;
1010 :
1011 227 : path = (Path *) create_merge_append_path(root,
1012 : result_rel,
1013 : ordered.subpaths,
1014 : NIL,
1015 : union_pathkeys,
1016 : NULL);
1017 :
1018 : /* and make the MergeAppend unique */
1019 227 : path = (Path *) create_unique_path(root,
1020 : result_rel,
1021 : path,
1022 : list_length(tlist),
1023 : dNumChildGroups);
1024 :
1025 227 : add_path(result_rel, path);
1026 : }
1027 : }
1028 : else
1029 : {
1030 : /* UNION ALL */
1031 445 : add_path(result_rel, apath);
1032 :
1033 445 : if (gpath != NULL)
1034 0 : add_path(result_rel, gpath);
1035 : }
1036 :
1037 3715 : return result_rel;
1038 : }
1039 :
1040 : /*
1041 : * Generate paths for an INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL node
1042 : */
1043 : static RelOptInfo *
1044 624 : generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
1045 : List *refnames_tlist,
1046 : List **pTargetList)
1047 : {
1048 : RelOptInfo *result_rel;
1049 : RelOptInfo *lrel,
1050 : *rrel;
1051 624 : double save_fraction = root->tuple_fraction;
1052 : Path *lpath,
1053 : *rpath,
1054 : *path;
1055 : List *lpath_tlist,
1056 : *rpath_tlist,
1057 : *tlist,
1058 : *groupList;
1059 : bool lpath_trivial_tlist,
1060 : rpath_trivial_tlist,
1061 : result_trivial_tlist;
1062 624 : List *nonunion_pathkeys = NIL;
1063 : double dLeftGroups,
1064 : dRightGroups,
1065 : dNumGroups,
1066 : dNumOutputRows;
1067 : bool can_sort;
1068 : bool can_hash;
1069 : SetOpCmd cmd;
1070 :
1071 : /*
1072 : * Tell children to fetch all tuples.
1073 : */
1074 624 : root->tuple_fraction = 0.0;
1075 :
1076 : /* Recurse on children */
1077 624 : lrel = recurse_set_operations(op->larg, root,
1078 : op,
1079 : op->colTypes, op->colCollations,
1080 : refnames_tlist,
1081 : &lpath_tlist,
1082 : &lpath_trivial_tlist);
1083 :
1084 624 : rrel = recurse_set_operations(op->rarg, root,
1085 : op,
1086 : op->colTypes, op->colCollations,
1087 : refnames_tlist,
1088 : &rpath_tlist,
1089 : &rpath_trivial_tlist);
1090 :
1091 : /*
1092 : * Generate tlist for SetOp plan node.
1093 : *
1094 : * The tlist for a SetOp plan isn't important so far as the SetOp is
1095 : * concerned, but we must make it look real anyway for the benefit of the
1096 : * next plan level up.
1097 : */
1098 624 : tlist = generate_setop_tlist(op->colTypes, op->colCollations,
1099 : 0, false, lpath_tlist, refnames_tlist,
1100 : &result_trivial_tlist);
1101 :
1102 : /* We should not have needed any type coercions in the tlist */
1103 : Assert(result_trivial_tlist);
1104 :
1105 624 : *pTargetList = tlist;
1106 :
1107 : /* Identify the grouping semantics */
1108 624 : groupList = generate_setop_grouplist(op, tlist);
1109 :
1110 : /* Check whether the operators support sorting or hashing */
1111 624 : can_sort = grouping_is_sortable(groupList);
1112 624 : can_hash = grouping_is_hashable(groupList);
1113 624 : if (!can_sort && !can_hash)
1114 0 : ereport(ERROR,
1115 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1116 : /* translator: %s is INTERSECT or EXCEPT */
1117 : errmsg("could not implement %s",
1118 : (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT"),
1119 : errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
1120 :
1121 624 : if (can_sort)
1122 : {
1123 : /* Determine the pathkeys for sorting by the whole target list */
1124 624 : nonunion_pathkeys = make_pathkeys_for_sortclauses(root, groupList,
1125 : tlist);
1126 :
1127 624 : root->query_pathkeys = nonunion_pathkeys;
1128 : }
1129 :
1130 : /*
1131 : * Now that we've got all that info, we can build the child paths.
1132 : */
1133 624 : if (lrel->rtekind == RTE_SUBQUERY)
1134 604 : build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
1135 : nonunion_pathkeys, &dLeftGroups);
1136 : else
1137 20 : dLeftGroups = lrel->rows;
1138 624 : if (rrel->rtekind == RTE_SUBQUERY)
1139 614 : build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
1140 : nonunion_pathkeys, &dRightGroups);
1141 : else
1142 10 : dRightGroups = rrel->rows;
1143 :
1144 : /* Undo effects of forcing tuple_fraction to 0 */
1145 624 : root->tuple_fraction = save_fraction;
1146 :
1147 : /*
1148 : * For EXCEPT, we must put the left input first. For INTERSECT, either
1149 : * order should give the same results, and we prefer to put the smaller
1150 : * input first in order to (a) minimize the size of the hash table in the
1151 : * hashing case, and (b) improve our chances of exploiting the executor's
1152 : * fast path for empty left-hand input. "Smaller" means the one with the
1153 : * fewer groups.
1154 : */
1155 624 : if (op->op != SETOP_EXCEPT && dLeftGroups > dRightGroups)
1156 : {
1157 : /* need to swap the two inputs */
1158 : RelOptInfo *tmprel;
1159 : List *tmplist;
1160 : double tmpd;
1161 :
1162 30 : tmprel = lrel;
1163 30 : lrel = rrel;
1164 30 : rrel = tmprel;
1165 30 : tmplist = lpath_tlist;
1166 30 : lpath_tlist = rpath_tlist;
1167 30 : rpath_tlist = tmplist;
1168 30 : tmpd = dLeftGroups;
1169 30 : dLeftGroups = dRightGroups;
1170 30 : dRightGroups = tmpd;
1171 : }
1172 :
1173 624 : lpath = lrel->cheapest_total_path;
1174 624 : rpath = rrel->cheapest_total_path;
1175 :
1176 : /* Build result relation. */
1177 624 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
1178 624 : bms_union(lrel->relids, rrel->relids));
1179 :
1180 : /*
1181 : * Create the PathTarget and set the width accordingly. For EXCEPT, since
1182 : * the set op result won't contain rows from the rpath, we only account
1183 : * for the width of the lpath. For INTERSECT, use both input paths.
1184 : */
1185 624 : if (op->op == SETOP_EXCEPT)
1186 419 : result_rel->reltarget = create_setop_pathtarget(root, tlist,
1187 : list_make1(lpath));
1188 : else
1189 205 : result_rel->reltarget = create_setop_pathtarget(root, tlist,
1190 : list_make2(lpath, rpath));
1191 :
1192 : /* Check for provably empty setop inputs and add short-circuit paths. */
1193 624 : if (op->op == SETOP_EXCEPT)
1194 : {
1195 : /*
1196 : * For EXCEPTs, if the left side is dummy then there's no need to
1197 : * inspect the right-hand side as scanning the right to find tuples to
1198 : * remove won't make the left-hand input any more empty.
1199 : */
1200 419 : if (is_dummy_rel(lrel))
1201 : {
1202 5 : mark_dummy_rel(result_rel);
1203 :
1204 5 : return result_rel;
1205 : }
1206 :
1207 : /* Handle EXCEPTs with dummy right input */
1208 414 : if (is_dummy_rel(rrel))
1209 : {
1210 5 : if (op->all)
1211 : {
1212 : Path *apath;
1213 5 : AppendPathInput append = {0};
1214 :
1215 5 : append.subpaths = list_make1(lpath);
1216 :
1217 : /*
1218 : * EXCEPT ALL: If the right-hand input is dummy then we can
1219 : * simply scan the left-hand input. To keep createplan.c
1220 : * happy, use a single child Append to handle the translation
1221 : * between the set op targetlist and the targetlist of the
1222 : * left input. The Append will be removed in setrefs.c.
1223 : */
1224 5 : apath = (Path *) create_append_path(root, result_rel,
1225 : append, NIL, NULL, 0,
1226 : false, -1);
1227 :
1228 5 : add_path(result_rel, apath);
1229 :
1230 5 : return result_rel;
1231 : }
1232 : else
1233 : {
1234 : /*
1235 : * To make EXCEPT with a dummy RHS work means having to
1236 : * deduplicate the left input. That could be done with
1237 : * AggPaths, but it doesn't seem worth the effort. Let the
1238 : * normal path generation code below handle this one.
1239 : */
1240 : }
1241 : }
1242 : }
1243 : else
1244 : {
1245 : /*
1246 : * For INTERSECT, if either input is a dummy rel then we can mark the
1247 : * result_rel as dummy since intersecting with an empty relation can
1248 : * never yield any results. This is true regardless of INTERSECT or
1249 : * INTERSECT ALL.
1250 : */
1251 205 : if (is_dummy_rel(lrel) || is_dummy_rel(rrel))
1252 : {
1253 15 : mark_dummy_rel(result_rel);
1254 :
1255 15 : return result_rel;
1256 : }
1257 : }
1258 :
1259 : /*
1260 : * Estimate number of distinct groups that we'll need hashtable entries
1261 : * for; this is the size of the left-hand input for EXCEPT, or the smaller
1262 : * input for INTERSECT. Also estimate the number of eventual output rows.
1263 : * In non-ALL cases, we estimate each group produces one output row; in
1264 : * ALL cases use the relevant relation size. These are worst-case
1265 : * estimates, of course, but we need to be conservative.
1266 : */
1267 599 : if (op->op == SETOP_EXCEPT)
1268 : {
1269 409 : dNumGroups = dLeftGroups;
1270 409 : dNumOutputRows = op->all ? lpath->rows : dNumGroups;
1271 : }
1272 : else
1273 : {
1274 190 : dNumGroups = dLeftGroups;
1275 190 : dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
1276 : }
1277 599 : result_rel->rows = dNumOutputRows;
1278 :
1279 : /* Select the SetOpCmd type */
1280 599 : switch (op->op)
1281 : {
1282 190 : case SETOP_INTERSECT:
1283 190 : cmd = op->all ? SETOPCMD_INTERSECT_ALL : SETOPCMD_INTERSECT;
1284 190 : break;
1285 409 : case SETOP_EXCEPT:
1286 409 : cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT;
1287 409 : break;
1288 0 : default:
1289 0 : elog(ERROR, "unrecognized set op: %d", (int) op->op);
1290 : cmd = SETOPCMD_INTERSECT; /* keep compiler quiet */
1291 : break;
1292 : }
1293 :
1294 : /*
1295 : * If we can hash, that just requires a SetOp atop the cheapest inputs.
1296 : */
1297 599 : if (can_hash)
1298 : {
1299 549 : path = (Path *) create_setop_path(root,
1300 : result_rel,
1301 : lpath,
1302 : rpath,
1303 : cmd,
1304 : SETOP_HASHED,
1305 : groupList,
1306 : dNumGroups,
1307 : dNumOutputRows);
1308 549 : add_path(result_rel, path);
1309 : }
1310 :
1311 : /*
1312 : * If we can sort, generate the cheapest sorted input paths, and add a
1313 : * SetOp atop those.
1314 : */
1315 599 : if (can_sort)
1316 : {
1317 : List *pathkeys;
1318 : Path *slpath,
1319 : *srpath;
1320 :
1321 : /* First the left input ... */
1322 599 : pathkeys = make_pathkeys_for_sortclauses(root,
1323 : groupList,
1324 : lpath_tlist);
1325 599 : if (pathkeys_contained_in(pathkeys, lpath->pathkeys))
1326 80 : slpath = lpath; /* cheapest path is already sorted */
1327 : else
1328 : {
1329 519 : slpath = get_cheapest_path_for_pathkeys(lrel->pathlist,
1330 : nonunion_pathkeys,
1331 : NULL,
1332 : TOTAL_COST,
1333 : false);
1334 : /* Subquery failed to produce any presorted paths? */
1335 519 : if (slpath == NULL)
1336 165 : slpath = (Path *) create_sort_path(root,
1337 : lpath->parent,
1338 : lpath,
1339 : pathkeys,
1340 : -1.0);
1341 : }
1342 :
1343 : /* and now the same for the right. */
1344 599 : pathkeys = make_pathkeys_for_sortclauses(root,
1345 : groupList,
1346 : rpath_tlist);
1347 599 : if (pathkeys_contained_in(pathkeys, rpath->pathkeys))
1348 90 : srpath = rpath; /* cheapest path is already sorted */
1349 : else
1350 : {
1351 509 : srpath = get_cheapest_path_for_pathkeys(rrel->pathlist,
1352 : nonunion_pathkeys,
1353 : NULL,
1354 : TOTAL_COST,
1355 : false);
1356 : /* Subquery failed to produce any presorted paths? */
1357 509 : if (srpath == NULL)
1358 165 : srpath = (Path *) create_sort_path(root,
1359 : rpath->parent,
1360 : rpath,
1361 : pathkeys,
1362 : -1.0);
1363 : }
1364 :
1365 599 : path = (Path *) create_setop_path(root,
1366 : result_rel,
1367 : slpath,
1368 : srpath,
1369 : cmd,
1370 : SETOP_SORTED,
1371 : groupList,
1372 : dNumGroups,
1373 : dNumOutputRows);
1374 599 : add_path(result_rel, path);
1375 : }
1376 :
1377 599 : return result_rel;
1378 : }
1379 :
1380 : /*
1381 : * Pull up children of a UNION node that are identically-propertied UNIONs,
1382 : * and perform planning of the queries underneath the N-way UNION.
1383 : *
1384 : * The result is a list of RelOptInfos containing Paths for sub-nodes, with
1385 : * one entry for each descendant that is a leaf query or non-identical setop.
1386 : * We also return parallel lists of the childrens' targetlists and
1387 : * is-trivial-tlist flags.
1388 : *
1389 : * NOTE: we can also pull a UNION ALL up into a UNION, since the distinct
1390 : * output rows will be lost anyway.
1391 : */
1392 : static List *
1393 3720 : plan_union_children(PlannerInfo *root,
1394 : SetOperationStmt *top_union,
1395 : List *refnames_tlist,
1396 : List **tlist_list,
1397 : List **istrivial_tlist)
1398 : {
1399 3720 : List *pending_rels = list_make1(top_union);
1400 3720 : List *result = NIL;
1401 : List *child_tlist;
1402 : bool trivial_tlist;
1403 :
1404 3720 : *tlist_list = NIL;
1405 3720 : *istrivial_tlist = NIL;
1406 :
1407 21170 : while (pending_rels != NIL)
1408 : {
1409 17450 : Node *setOp = linitial(pending_rels);
1410 :
1411 17450 : pending_rels = list_delete_first(pending_rels);
1412 :
1413 17450 : if (IsA(setOp, SetOperationStmt))
1414 : {
1415 6975 : SetOperationStmt *op = (SetOperationStmt *) setOp;
1416 :
1417 6975 : if (op->op == top_union->op &&
1418 13775 : (op->all == top_union->all || op->all) &&
1419 13745 : equal(op->colTypes, top_union->colTypes) &&
1420 6865 : equal(op->colCollations, top_union->colCollations))
1421 : {
1422 : /* Same UNION, so fold children into parent */
1423 6865 : pending_rels = lcons(op->rarg, pending_rels);
1424 6865 : pending_rels = lcons(op->larg, pending_rels);
1425 6865 : continue;
1426 : }
1427 : }
1428 :
1429 : /*
1430 : * Not same, so plan this child separately.
1431 : *
1432 : * If top_union isn't a UNION ALL, then we are interested in sorted
1433 : * output from the child, so pass top_union as parentOp. Note that
1434 : * this isn't necessarily the child node's immediate SetOperationStmt
1435 : * parent, but that's fine: it's the effective parent.
1436 : */
1437 10585 : result = lappend(result, recurse_set_operations(setOp, root,
1438 10585 : top_union->all ? NULL : top_union,
1439 : top_union->colTypes,
1440 : top_union->colCollations,
1441 : refnames_tlist,
1442 : &child_tlist,
1443 : &trivial_tlist));
1444 10585 : *tlist_list = lappend(*tlist_list, child_tlist);
1445 10585 : *istrivial_tlist = lappend_int(*istrivial_tlist, trivial_tlist);
1446 : }
1447 :
1448 3720 : return result;
1449 : }
1450 :
1451 : /*
1452 : * postprocess_setop_rel - perform steps required after adding paths
1453 : */
1454 : static void
1455 17939 : postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel)
1456 : {
1457 : /*
1458 : * We don't currently worry about allowing FDWs to contribute paths to
1459 : * this relation, but give extensions a chance.
1460 : */
1461 17939 : if (create_upper_paths_hook)
1462 0 : (*create_upper_paths_hook) (root, UPPERREL_SETOP,
1463 : NULL, rel, NULL);
1464 :
1465 : /* Select cheapest path */
1466 17939 : set_cheapest(rel);
1467 17939 : }
1468 :
1469 : /*
1470 : * Generate targetlist for a set-operation plan node
1471 : *
1472 : * colTypes: OID list of set-op's result column datatypes
1473 : * colCollations: OID list of set-op's result column collations
1474 : * varno: varno to use in generated Vars
1475 : * hack_constants: true to copy up constants (see comments in code)
1476 : * input_tlist: targetlist of this node's input node
1477 : * refnames_tlist: targetlist to take column names from
1478 : * trivial_tlist: output parameter, set to true if targetlist is trivial
1479 : */
1480 : static List *
1481 13601 : generate_setop_tlist(List *colTypes, List *colCollations,
1482 : Index varno,
1483 : bool hack_constants,
1484 : List *input_tlist,
1485 : List *refnames_tlist,
1486 : bool *trivial_tlist)
1487 : {
1488 13601 : List *tlist = NIL;
1489 13601 : int resno = 1;
1490 : ListCell *ctlc,
1491 : *cclc,
1492 : *itlc,
1493 : *rtlc;
1494 : TargetEntry *tle;
1495 : Node *expr;
1496 :
1497 13601 : *trivial_tlist = true; /* until proven differently */
1498 :
1499 54419 : forfour(ctlc, colTypes, cclc, colCollations,
1500 : itlc, input_tlist, rtlc, refnames_tlist)
1501 : {
1502 40818 : Oid colType = lfirst_oid(ctlc);
1503 40818 : Oid colColl = lfirst_oid(cclc);
1504 40818 : TargetEntry *inputtle = (TargetEntry *) lfirst(itlc);
1505 40818 : TargetEntry *reftle = (TargetEntry *) lfirst(rtlc);
1506 :
1507 : Assert(inputtle->resno == resno);
1508 : Assert(reftle->resno == resno);
1509 : Assert(!inputtle->resjunk);
1510 : Assert(!reftle->resjunk);
1511 :
1512 : /*
1513 : * Generate columns referencing input columns and having appropriate
1514 : * data types and column names. Insert datatype coercions where
1515 : * necessary.
1516 : *
1517 : * HACK: constants in the input's targetlist are copied up as-is
1518 : * rather than being referenced as subquery outputs. This is mainly
1519 : * to ensure that when we try to coerce them to the output column's
1520 : * datatype, the right things happen for UNKNOWN constants. But do
1521 : * this only at the first level of subquery-scan plans; we don't want
1522 : * phony constants appearing in the output tlists of upper-level
1523 : * nodes!
1524 : *
1525 : * Note that copying a constant doesn't in itself require us to mark
1526 : * the tlist nontrivial; see trivial_subqueryscan() in setrefs.c.
1527 : */
1528 40818 : if (hack_constants && inputtle->expr && IsA(inputtle->expr, Const))
1529 12976 : expr = (Node *) inputtle->expr;
1530 : else
1531 111368 : expr = (Node *) makeVar(varno,
1532 27842 : inputtle->resno,
1533 27842 : exprType((Node *) inputtle->expr),
1534 27842 : exprTypmod((Node *) inputtle->expr),
1535 27842 : exprCollation((Node *) inputtle->expr),
1536 : 0);
1537 :
1538 40818 : if (exprType(expr) != colType)
1539 : {
1540 : /*
1541 : * Note: it's not really cool to be applying coerce_to_common_type
1542 : * here; one notable point is that assign_expr_collations never
1543 : * gets run on any generated nodes. For the moment that's not a
1544 : * problem because we force the correct exposed collation below.
1545 : * It would likely be best to make the parser generate the correct
1546 : * output tlist for every set-op to begin with, though.
1547 : */
1548 1130 : expr = coerce_to_common_type(NULL, /* no UNKNOWNs here */
1549 : expr,
1550 : colType,
1551 : "UNION/INTERSECT/EXCEPT");
1552 1130 : *trivial_tlist = false; /* the coercion makes it not trivial */
1553 : }
1554 :
1555 : /*
1556 : * Ensure the tlist entry's exposed collation matches the set-op. This
1557 : * is necessary because plan_set_operations() reports the result
1558 : * ordering as a list of SortGroupClauses, which don't carry collation
1559 : * themselves but just refer to tlist entries. If we don't show the
1560 : * right collation then planner.c might do the wrong thing in
1561 : * higher-level queries.
1562 : *
1563 : * Note we use RelabelType, not CollateExpr, since this expression
1564 : * will reach the executor without any further processing.
1565 : */
1566 40818 : if (exprCollation(expr) != colColl)
1567 : {
1568 10925 : expr = applyRelabelType(expr,
1569 : exprType(expr), exprTypmod(expr), colColl,
1570 : COERCE_IMPLICIT_CAST, -1, false);
1571 10925 : *trivial_tlist = false; /* the relabel makes it not trivial */
1572 : }
1573 :
1574 81636 : tle = makeTargetEntry((Expr *) expr,
1575 40818 : (AttrNumber) resno++,
1576 40818 : pstrdup(reftle->resname),
1577 : false);
1578 :
1579 : /*
1580 : * By convention, all output columns in a setop tree have
1581 : * ressortgroupref equal to their resno. In some cases the ref isn't
1582 : * needed, but this is a cleaner way than modifying the tlist later.
1583 : */
1584 40818 : tle->ressortgroupref = tle->resno;
1585 :
1586 40818 : tlist = lappend(tlist, tle);
1587 : }
1588 :
1589 13601 : return tlist;
1590 : }
1591 :
1592 : /*
1593 : * Generate targetlist for a set-operation Append node
1594 : *
1595 : * colTypes: OID list of set-op's result column datatypes
1596 : * colCollations: OID list of set-op's result column collations
1597 : * input_tlists: list of tlists for sub-plans of the Append
1598 : * refnames_tlist: targetlist to take column names from
1599 : *
1600 : * The entries in the Append's targetlist should always be simple Vars;
1601 : * we just have to make sure they have the right datatypes/typmods/collations.
1602 : * The Vars are always generated with varno 0.
1603 : *
1604 : * XXX a problem with the varno-zero approach is that set_pathtarget_cost_width
1605 : * cannot figure out a realistic width for the tlist we make here. But we
1606 : * ought to refactor this code to produce a PathTarget directly, anyway.
1607 : */
1608 : static List *
1609 4357 : generate_append_tlist(List *colTypes, List *colCollations,
1610 : List *input_tlists,
1611 : List *refnames_tlist)
1612 : {
1613 4357 : List *tlist = NIL;
1614 4357 : int resno = 1;
1615 : ListCell *curColType;
1616 : ListCell *curColCollation;
1617 : ListCell *ref_tl_item;
1618 : int colindex;
1619 : TargetEntry *tle;
1620 : Node *expr;
1621 : ListCell *tlistl;
1622 : int32 *colTypmods;
1623 :
1624 : /*
1625 : * First extract typmods to use.
1626 : *
1627 : * If the inputs all agree on type and typmod of a particular column, use
1628 : * that typmod; else use -1.
1629 : */
1630 4357 : colTypmods = palloc_array(int32, list_length(colTypes));
1631 :
1632 16216 : foreach(tlistl, input_tlists)
1633 : {
1634 11859 : List *subtlist = (List *) lfirst(tlistl);
1635 : ListCell *subtlistl;
1636 :
1637 11859 : curColType = list_head(colTypes);
1638 11859 : colindex = 0;
1639 46166 : foreach(subtlistl, subtlist)
1640 : {
1641 34307 : TargetEntry *subtle = (TargetEntry *) lfirst(subtlistl);
1642 :
1643 : Assert(!subtle->resjunk);
1644 : Assert(curColType != NULL);
1645 34307 : if (exprType((Node *) subtle->expr) == lfirst_oid(curColType))
1646 : {
1647 : /* If first subplan, copy the typmod; else compare */
1648 34307 : int32 subtypmod = exprTypmod((Node *) subtle->expr);
1649 :
1650 34307 : if (tlistl == list_head(input_tlists))
1651 11847 : colTypmods[colindex] = subtypmod;
1652 22460 : else if (subtypmod != colTypmods[colindex])
1653 10 : colTypmods[colindex] = -1;
1654 : }
1655 : else
1656 : {
1657 : /* types disagree, so force typmod to -1 */
1658 0 : colTypmods[colindex] = -1;
1659 : }
1660 34307 : curColType = lnext(colTypes, curColType);
1661 34307 : colindex++;
1662 : }
1663 : Assert(curColType == NULL);
1664 : }
1665 :
1666 : /*
1667 : * Now we can build the tlist for the Append.
1668 : */
1669 4357 : colindex = 0;
1670 16204 : forthree(curColType, colTypes, curColCollation, colCollations,
1671 : ref_tl_item, refnames_tlist)
1672 : {
1673 11847 : Oid colType = lfirst_oid(curColType);
1674 11847 : int32 colTypmod = colTypmods[colindex++];
1675 11847 : Oid colColl = lfirst_oid(curColCollation);
1676 11847 : TargetEntry *reftle = (TargetEntry *) lfirst(ref_tl_item);
1677 :
1678 : Assert(reftle->resno == resno);
1679 : Assert(!reftle->resjunk);
1680 11847 : expr = (Node *) makeVar(0,
1681 : resno,
1682 : colType,
1683 : colTypmod,
1684 : colColl,
1685 : 0);
1686 23694 : tle = makeTargetEntry((Expr *) expr,
1687 11847 : (AttrNumber) resno++,
1688 11847 : pstrdup(reftle->resname),
1689 : false);
1690 :
1691 : /*
1692 : * By convention, all output columns in a setop tree have
1693 : * ressortgroupref equal to their resno. In some cases the ref isn't
1694 : * needed, but this is a cleaner way than modifying the tlist later.
1695 : */
1696 11847 : tle->ressortgroupref = tle->resno;
1697 :
1698 11847 : tlist = lappend(tlist, tle);
1699 : }
1700 :
1701 4357 : pfree(colTypmods);
1702 :
1703 4357 : return tlist;
1704 : }
1705 :
1706 : /*
1707 : * generate_setop_grouplist
1708 : * Build a SortGroupClause list defining the sort/grouping properties
1709 : * of the setop's output columns.
1710 : *
1711 : * Parse analysis already determined the properties and built a suitable
1712 : * list, except that the entries do not have sortgrouprefs set because
1713 : * the parser output representation doesn't include a tlist for each
1714 : * setop. So what we need to do here is copy that list and install
1715 : * proper sortgrouprefs into it (copying those from the targetlist).
1716 : */
1717 : static List *
1718 4119 : generate_setop_grouplist(SetOperationStmt *op, List *targetlist)
1719 : {
1720 4119 : List *grouplist = copyObject(op->groupClauses);
1721 : ListCell *lg;
1722 : ListCell *lt;
1723 :
1724 4119 : lg = list_head(grouplist);
1725 15833 : foreach(lt, targetlist)
1726 : {
1727 11714 : TargetEntry *tle = (TargetEntry *) lfirst(lt);
1728 : SortGroupClause *sgc;
1729 :
1730 : Assert(!tle->resjunk);
1731 :
1732 : /* non-resjunk columns should have sortgroupref = resno */
1733 : Assert(tle->ressortgroupref == tle->resno);
1734 :
1735 : /* non-resjunk columns should have grouping clauses */
1736 : Assert(lg != NULL);
1737 11714 : sgc = (SortGroupClause *) lfirst(lg);
1738 11714 : lg = lnext(grouplist, lg);
1739 : Assert(sgc->tleSortGroupRef == 0);
1740 :
1741 11714 : sgc->tleSortGroupRef = tle->ressortgroupref;
1742 : }
1743 : Assert(lg == NULL);
1744 4119 : return grouplist;
1745 : }
1746 :
1747 : /*
1748 : * create_setop_pathtarget
1749 : * Do the normal create_pathtarget() work, plus set the resulting
1750 : * PathTarget's width to the average width of the Paths in child_pathlist
1751 : * weighted using the estimated row count of each path.
1752 : *
1753 : * Note: This is required because set op target lists use varno==0, which
1754 : * results in a type default width estimate rather than one that's based on
1755 : * statistics of the columns from the set op children.
1756 : */
1757 : static PathTarget *
1758 4344 : create_setop_pathtarget(PlannerInfo *root, List *tlist, List *child_pathlist)
1759 : {
1760 : PathTarget *reltarget;
1761 : ListCell *lc;
1762 4344 : double parent_rows = 0;
1763 4344 : double parent_size = 0;
1764 :
1765 4344 : reltarget = create_pathtarget(root, tlist);
1766 :
1767 : /* Calculate the total rows and total size. */
1768 15720 : foreach(lc, child_pathlist)
1769 : {
1770 11376 : Path *path = (Path *) lfirst(lc);
1771 :
1772 11376 : parent_rows += path->rows;
1773 11376 : parent_size += path->parent->reltarget->width * path->rows;
1774 : }
1775 :
1776 4344 : if (parent_rows > 0)
1777 4329 : reltarget->width = rint(parent_size / parent_rows);
1778 :
1779 4344 : return reltarget;
1780 : }
|