Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * prepunion.c
4 : * Routines to plan set-operation queries. The filename is a leftover
5 : * from a time when only UNIONs were implemented.
6 : *
7 : * There are two code paths in the planner for set-operation queries.
8 : * If a subquery consists entirely of simple UNION ALL operations, it
9 : * is converted into an "append relation". Otherwise, it is handled
10 : * by the general code in this module (plan_set_operations and its
11 : * subroutines). There is some support code here for the append-relation
12 : * case, but most of the heavy lifting for that is done elsewhere,
13 : * notably in prepjointree.c and allpaths.c.
14 : *
15 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
16 : * Portions Copyright (c) 1994, Regents of the University of California
17 : *
18 : *
19 : * IDENTIFICATION
20 : * src/backend/optimizer/prep/prepunion.c
21 : *
22 : *-------------------------------------------------------------------------
23 : */
24 : #include "postgres.h"
25 :
26 : #include "access/htup_details.h"
27 : #include "catalog/pg_type.h"
28 : #include "miscadmin.h"
29 : #include "nodes/makefuncs.h"
30 : #include "nodes/nodeFuncs.h"
31 : #include "optimizer/cost.h"
32 : #include "optimizer/pathnode.h"
33 : #include "optimizer/paths.h"
34 : #include "optimizer/planner.h"
35 : #include "optimizer/prep.h"
36 : #include "optimizer/tlist.h"
37 : #include "parser/parse_coerce.h"
38 : #include "utils/selfuncs.h"
39 :
40 :
41 : static RelOptInfo *recurse_set_operations(Node *setOp, PlannerInfo *root,
42 : List *colTypes, List *colCollations,
43 : bool junkOK,
44 : int flag, List *refnames_tlist,
45 : List **pTargetList,
46 : bool *istrivial_tlist);
47 : static RelOptInfo *generate_recursion_path(SetOperationStmt *setOp,
48 : PlannerInfo *root,
49 : List *refnames_tlist,
50 : List **pTargetList);
51 : static void build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
52 : bool trivial_tlist, List *child_tlist,
53 : List *interesting_pathkeys,
54 : double *pNumGroups);
55 : static RelOptInfo *generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
56 : List *refnames_tlist,
57 : List **pTargetList);
58 : static RelOptInfo *generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
59 : List *refnames_tlist,
60 : List **pTargetList);
61 : static List *plan_union_children(PlannerInfo *root,
62 : SetOperationStmt *top_union,
63 : List *refnames_tlist,
64 : List **tlist_list,
65 : List **istrivial_tlist);
66 : static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
67 : static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
68 : Path *input_path,
69 : double dNumGroups, double dNumOutputRows,
70 : const char *construct);
71 : static List *generate_setop_tlist(List *colTypes, List *colCollations,
72 : int flag,
73 : Index varno,
74 : bool hack_constants,
75 : List *input_tlist,
76 : List *refnames_tlist,
77 : bool *trivial_tlist);
78 : static List *generate_append_tlist(List *colTypes, List *colCollations,
79 : bool flag,
80 : List *input_tlists,
81 : List *refnames_tlist);
82 : static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
83 :
84 :
85 : /*
86 : * plan_set_operations
87 : *
88 : * Plans the queries for a tree of set operations (UNION/INTERSECT/EXCEPT)
89 : *
90 : * This routine only deals with the setOperations tree of the given query.
91 : * Any top-level ORDER BY requested in root->parse->sortClause will be handled
92 : * when we return to grouping_planner; likewise for LIMIT.
93 : *
94 : * What we return is an "upperrel" RelOptInfo containing at least one Path
95 : * that implements the set-operation tree. In addition, root->processed_tlist
96 : * receives a targetlist representing the output of the topmost setop node.
97 : */
98 : RelOptInfo *
99 5252 : plan_set_operations(PlannerInfo *root)
100 : {
101 5252 : Query *parse = root->parse;
102 5252 : SetOperationStmt *topop = castNode(SetOperationStmt, parse->setOperations);
103 : Node *node;
104 : RangeTblEntry *leftmostRTE;
105 : Query *leftmostQuery;
106 : RelOptInfo *setop_rel;
107 : List *top_tlist;
108 :
109 : Assert(topop);
110 :
111 : /* check for unsupported stuff */
112 : Assert(parse->jointree->fromlist == NIL);
113 : Assert(parse->jointree->quals == NULL);
114 : Assert(parse->groupClause == NIL);
115 : Assert(parse->havingQual == NULL);
116 : Assert(parse->windowClause == NIL);
117 : Assert(parse->distinctClause == NIL);
118 :
119 : /*
120 : * In the outer query level, equivalence classes are limited to classes
121 : * which define that the top-level target entry is equivalent to the
122 : * corresponding child target entry. There won't be any equivalence class
123 : * merging. Mark that merging is complete to allow us to make pathkeys.
124 : */
125 : Assert(root->eq_classes == NIL);
126 5252 : root->ec_merging_done = true;
127 :
128 : /*
129 : * We'll need to build RelOptInfos for each of the leaf subqueries, which
130 : * are RTE_SUBQUERY rangetable entries in this Query. Prepare the index
131 : * arrays for those, and for AppendRelInfos in case they're needed.
132 : */
133 5252 : setup_simple_rel_arrays(root);
134 :
135 : /*
136 : * Find the leftmost component Query. We need to use its column names for
137 : * all generated tlists (else SELECT INTO won't work right).
138 : */
139 5252 : node = topop->larg;
140 8644 : while (node && IsA(node, SetOperationStmt))
141 3392 : node = ((SetOperationStmt *) node)->larg;
142 : Assert(node && IsA(node, RangeTblRef));
143 5252 : leftmostRTE = root->simple_rte_array[((RangeTblRef *) node)->rtindex];
144 5252 : leftmostQuery = leftmostRTE->subquery;
145 : Assert(leftmostQuery != NULL);
146 :
147 : /*
148 : * If the topmost node is a recursive union, it needs special processing.
149 : */
150 5252 : if (root->hasRecursion)
151 : {
152 806 : setop_rel = generate_recursion_path(topop, root,
153 : leftmostQuery->targetList,
154 : &top_tlist);
155 : }
156 : else
157 : {
158 : bool trivial_tlist;
159 :
160 : /*
161 : * Recurse on setOperations tree to generate paths for set ops. The
162 : * final output paths should have just the column types shown as the
163 : * output from the top-level node, plus possibly resjunk working
164 : * columns (we can rely on upper-level nodes to deal with that).
165 : */
166 4446 : setop_rel = recurse_set_operations((Node *) topop, root,
167 : topop->colTypes, topop->colCollations,
168 : true, -1,
169 : leftmostQuery->targetList,
170 : &top_tlist,
171 : &trivial_tlist);
172 : }
173 :
174 : /* Must return the built tlist into root->processed_tlist. */
175 5246 : root->processed_tlist = top_tlist;
176 :
177 5246 : return setop_rel;
178 : }
179 :
180 : /*
181 : * set_operation_ordered_results_useful
182 : * Return true if the given SetOperationStmt can be executed by utilizing
183 : * paths that provide sorted input according to the setop's targetlist.
184 : * Returns false when sorted paths are not any more useful then unsorted
185 : * ones.
186 : */
187 : bool
188 13926 : set_operation_ordered_results_useful(SetOperationStmt *setop)
189 : {
190 : /*
191 : * Paths sorted by the targetlist are useful for UNION as we can opt to
192 : * MergeAppend the sorted paths then Unique them. Ordered paths are no
193 : * more useful than unordered ones for UNION ALL.
194 : */
195 13926 : if (!setop->all && setop->op == SETOP_UNION)
196 10484 : return true;
197 :
198 : /*
199 : * EXCEPT / EXCEPT ALL / INTERSECT / INTERSECT ALL cannot yet utilize
200 : * correctly sorted input paths.
201 : */
202 3442 : return false;
203 : }
204 :
205 : /*
206 : * recurse_set_operations
207 : * Recursively handle one step in a tree of set operations
208 : *
209 : * colTypes: OID list of set-op's result column datatypes
210 : * colCollations: OID list of set-op's result column collations
211 : * junkOK: if true, child resjunk columns may be left in the result
212 : * flag: if >= 0, add a resjunk output column indicating value of flag
213 : * refnames_tlist: targetlist to take column names from
214 : *
215 : * Returns a RelOptInfo for the subtree, as well as these output parameters:
216 : * *pTargetList: receives the fully-fledged tlist for the subtree's top plan
217 : * *istrivial_tlist: true if, and only if, datatypes between parent and child
218 : * match.
219 : *
220 : * The pTargetList output parameter is mostly redundant with the pathtarget
221 : * of the returned RelOptInfo, but for the moment we need it because much of
222 : * the logic in this file depends on flag columns being marked resjunk.
223 : * Pending a redesign of how that works, this is the easy way out.
224 : *
225 : * We don't have to care about typmods here: the only allowed difference
226 : * between set-op input and output typmods is input is a specific typmod
227 : * and output is -1, and that does not require a coercion.
228 : */
229 : static RelOptInfo *
230 18552 : recurse_set_operations(Node *setOp, PlannerInfo *root,
231 : List *colTypes, List *colCollations,
232 : bool junkOK,
233 : int flag, List *refnames_tlist,
234 : List **pTargetList,
235 : bool *istrivial_tlist)
236 : {
237 : RelOptInfo *rel;
238 :
239 18552 : *istrivial_tlist = true; /* for now */
240 :
241 : /* Guard against stack overflow due to overly complex setop nests */
242 18552 : check_stack_depth();
243 :
244 18552 : if (IsA(setOp, RangeTblRef))
245 : {
246 13956 : RangeTblRef *rtr = (RangeTblRef *) setOp;
247 13956 : RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
248 : SetOperationStmt *setops;
249 13956 : Query *subquery = rte->subquery;
250 : PlannerInfo *subroot;
251 : List *tlist;
252 : bool trivial_tlist;
253 :
254 : Assert(subquery != NULL);
255 :
256 : /* Build a RelOptInfo for this leaf subquery. */
257 13956 : rel = build_simple_rel(root, rtr->rtindex, NULL);
258 :
259 : /* plan_params should not be in use in current query level */
260 : Assert(root->plan_params == NIL);
261 :
262 : /*
263 : * Pass the set operation details to the subquery_planner to have it
264 : * consider generating Paths correctly ordered for the set operation.
265 : */
266 13956 : setops = castNode(SetOperationStmt, root->parse->setOperations);
267 :
268 : /* Generate a subroot and Paths for the subquery */
269 13956 : subroot = rel->subroot = subquery_planner(root->glob, subquery, root,
270 : false, root->tuple_fraction,
271 : setops);
272 :
273 : /*
274 : * It should not be possible for the primitive query to contain any
275 : * cross-references to other primitive queries in the setop tree.
276 : */
277 13956 : if (root->plan_params)
278 0 : elog(ERROR, "unexpected outer reference in set operation subquery");
279 :
280 : /* Figure out the appropriate target list for this subquery. */
281 13956 : tlist = generate_setop_tlist(colTypes, colCollations,
282 : flag,
283 13956 : rtr->rtindex,
284 : true,
285 : subroot->processed_tlist,
286 : refnames_tlist,
287 : &trivial_tlist);
288 13956 : rel->reltarget = create_pathtarget(root, tlist);
289 :
290 : /* Return the fully-fledged tlist to caller, too */
291 13956 : *pTargetList = tlist;
292 13956 : *istrivial_tlist = trivial_tlist;
293 : }
294 4596 : else if (IsA(setOp, SetOperationStmt))
295 : {
296 4596 : SetOperationStmt *op = (SetOperationStmt *) setOp;
297 :
298 : /* UNIONs are much different from INTERSECT/EXCEPT */
299 4596 : if (op->op == SETOP_UNION)
300 3988 : rel = generate_union_paths(op, root,
301 : refnames_tlist,
302 : pTargetList);
303 : else
304 608 : rel = generate_nonunion_paths(op, root,
305 : refnames_tlist,
306 : pTargetList);
307 :
308 : /*
309 : * If necessary, add a Result node to project the caller-requested
310 : * output columns.
311 : *
312 : * XXX you don't really want to know about this: setrefs.c will apply
313 : * fix_upper_expr() to the Result node's tlist. This would fail if the
314 : * Vars generated by generate_setop_tlist() were not exactly equal()
315 : * to the corresponding tlist entries of the subplan. However, since
316 : * the subplan was generated by generate_union_paths() or
317 : * generate_nonunion_paths(), and hence its tlist was generated by
318 : * generate_append_tlist(), this will work. We just tell
319 : * generate_setop_tlist() to use varno 0.
320 : */
321 4596 : if (flag >= 0 ||
322 4566 : !tlist_same_datatypes(*pTargetList, colTypes, junkOK) ||
323 4464 : !tlist_same_collations(*pTargetList, colCollations, junkOK))
324 : {
325 : PathTarget *target;
326 : bool trivial_tlist;
327 : ListCell *lc;
328 :
329 132 : *pTargetList = generate_setop_tlist(colTypes, colCollations,
330 : flag,
331 : 0,
332 : false,
333 : *pTargetList,
334 : refnames_tlist,
335 : &trivial_tlist);
336 132 : *istrivial_tlist = trivial_tlist;
337 132 : target = create_pathtarget(root, *pTargetList);
338 :
339 : /* Apply projection to each path */
340 264 : foreach(lc, rel->pathlist)
341 : {
342 132 : Path *subpath = (Path *) lfirst(lc);
343 : Path *path;
344 :
345 : Assert(subpath->param_info == NULL);
346 132 : path = apply_projection_to_path(root, subpath->parent,
347 : subpath, target);
348 : /* If we had to add a Result, path is different from subpath */
349 132 : if (path != subpath)
350 132 : lfirst(lc) = path;
351 : }
352 :
353 : /* Apply projection to each partial path */
354 132 : foreach(lc, rel->partial_pathlist)
355 : {
356 0 : Path *subpath = (Path *) lfirst(lc);
357 : Path *path;
358 :
359 : Assert(subpath->param_info == NULL);
360 :
361 : /* avoid apply_projection_to_path, in case of multiple refs */
362 0 : path = (Path *) create_projection_path(root, subpath->parent,
363 : subpath, target);
364 0 : lfirst(lc) = path;
365 : }
366 : }
367 4596 : postprocess_setop_rel(root, rel);
368 : }
369 : else
370 : {
371 0 : elog(ERROR, "unrecognized node type: %d",
372 : (int) nodeTag(setOp));
373 : *pTargetList = NIL;
374 : rel = NULL; /* keep compiler quiet */
375 : }
376 :
377 18552 : return rel;
378 : }
379 :
380 : /*
381 : * Generate paths for a recursive UNION node
382 : */
383 : static RelOptInfo *
384 806 : generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
385 : List *refnames_tlist,
386 : List **pTargetList)
387 : {
388 : RelOptInfo *result_rel;
389 : Path *path;
390 : RelOptInfo *lrel,
391 : *rrel;
392 : Path *lpath;
393 : Path *rpath;
394 : List *lpath_tlist;
395 : bool lpath_trivial_tlist;
396 : List *rpath_tlist;
397 : bool rpath_trivial_tlist;
398 : List *tlist;
399 : List *groupList;
400 : double dNumGroups;
401 :
402 : /* Parser should have rejected other cases */
403 806 : if (setOp->op != SETOP_UNION)
404 0 : elog(ERROR, "only UNION queries can be recursive");
405 : /* Worktable ID should be assigned */
406 : Assert(root->wt_param_id >= 0);
407 :
408 : /*
409 : * Unlike a regular UNION node, process the left and right inputs
410 : * separately without any intention of combining them into one Append.
411 : */
412 806 : lrel = recurse_set_operations(setOp->larg, root,
413 : setOp->colTypes, setOp->colCollations,
414 : false, -1,
415 : refnames_tlist,
416 : &lpath_tlist,
417 : &lpath_trivial_tlist);
418 806 : if (lrel->rtekind == RTE_SUBQUERY)
419 806 : build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
420 : NIL, NULL);
421 806 : lpath = lrel->cheapest_total_path;
422 : /* The right path will want to look at the left one ... */
423 806 : root->non_recursive_path = lpath;
424 806 : rrel = recurse_set_operations(setOp->rarg, root,
425 : setOp->colTypes, setOp->colCollations,
426 : false, -1,
427 : refnames_tlist,
428 : &rpath_tlist,
429 : &rpath_trivial_tlist);
430 806 : if (rrel->rtekind == RTE_SUBQUERY)
431 800 : build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
432 : NIL, NULL);
433 806 : rpath = rrel->cheapest_total_path;
434 806 : root->non_recursive_path = NULL;
435 :
436 : /*
437 : * Generate tlist for RecursiveUnion path node --- same as in Append cases
438 : */
439 806 : tlist = generate_append_tlist(setOp->colTypes, setOp->colCollations, false,
440 806 : list_make2(lpath_tlist, rpath_tlist),
441 : refnames_tlist);
442 :
443 806 : *pTargetList = tlist;
444 :
445 : /* Build result relation. */
446 806 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
447 806 : bms_union(lrel->relids, rrel->relids));
448 806 : result_rel->reltarget = create_pathtarget(root, tlist);
449 :
450 : /*
451 : * If UNION, identify the grouping operators
452 : */
453 806 : if (setOp->all)
454 : {
455 452 : groupList = NIL;
456 452 : dNumGroups = 0;
457 : }
458 : else
459 : {
460 : /* Identify the grouping semantics */
461 354 : groupList = generate_setop_grouplist(setOp, tlist);
462 :
463 : /* We only support hashing here */
464 354 : if (!grouping_is_hashable(groupList))
465 6 : ereport(ERROR,
466 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
467 : errmsg("could not implement recursive UNION"),
468 : errdetail("All column datatypes must be hashable.")));
469 :
470 : /*
471 : * For the moment, take the number of distinct groups as equal to the
472 : * total input size, ie, the worst case.
473 : */
474 348 : dNumGroups = lpath->rows + rpath->rows * 10;
475 : }
476 :
477 : /*
478 : * And make the path node.
479 : */
480 800 : path = (Path *) create_recursiveunion_path(root,
481 : result_rel,
482 : lpath,
483 : rpath,
484 800 : result_rel->reltarget,
485 : groupList,
486 : root->wt_param_id,
487 : dNumGroups);
488 :
489 800 : add_path(result_rel, path);
490 800 : postprocess_setop_rel(root, result_rel);
491 800 : return result_rel;
492 : }
493 :
494 : /*
495 : * build_setop_child_paths
496 : * Build paths for the set op child relation denoted by 'rel'.
497 : *
498 : * interesting_pathkeys: if not NIL, also include paths that suit these
499 : * pathkeys, sorting any unsorted paths as required.
500 : * *pNumGroups: if not NULL, we estimate the number of distinct groups
501 : * in the result, and store it there
502 : */
503 : static void
504 13956 : build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
505 : bool trivial_tlist, List *child_tlist,
506 : List *interesting_pathkeys, double *pNumGroups)
507 : {
508 : RelOptInfo *final_rel;
509 13956 : List *setop_pathkeys = rel->subroot->setop_pathkeys;
510 : ListCell *lc;
511 :
512 : /* it can't be a set op child rel if it's not a subquery */
513 : Assert(rel->rtekind == RTE_SUBQUERY);
514 :
515 : /* when sorting is needed, add child rel equivalences */
516 13956 : if (interesting_pathkeys != NIL)
517 9618 : add_setop_child_rel_equivalences(root,
518 : rel,
519 : child_tlist,
520 : interesting_pathkeys);
521 :
522 : /*
523 : * Mark rel with estimated output rows, width, etc. Note that we have to
524 : * do this before generating outer-query paths, else cost_subqueryscan is
525 : * not happy.
526 : */
527 13956 : set_subquery_size_estimates(root, rel);
528 :
529 : /*
530 : * Since we may want to add a partial path to this relation, we must set
531 : * its consider_parallel flag correctly.
532 : */
533 13956 : final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);
534 13956 : rel->consider_parallel = final_rel->consider_parallel;
535 :
536 : /* Generate subquery scan paths for any interesting path in final_rel */
537 36570 : foreach(lc, final_rel->pathlist)
538 : {
539 22614 : Path *subpath = (Path *) lfirst(lc);
540 : List *pathkeys;
541 22614 : Path *cheapest_input_path = final_rel->cheapest_total_path;
542 : bool is_sorted;
543 : int presorted_keys;
544 :
545 : /*
546 : * Include the cheapest path as-is so that the set operation can be
547 : * cheaply implemented using a method which does not require the input
548 : * to be sorted.
549 : */
550 22614 : if (subpath == cheapest_input_path)
551 : {
552 : /* Convert subpath's pathkeys to outer representation */
553 13956 : pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
554 : make_tlist_from_pathtarget(subpath->pathtarget));
555 :
556 : /* Generate outer path using this subpath */
557 13956 : add_path(rel, (Path *) create_subqueryscan_path(root,
558 : rel,
559 : subpath,
560 : trivial_tlist,
561 : pathkeys,
562 : NULL));
563 : }
564 :
565 : /* skip dealing with sorted paths if the setop doesn't need them */
566 22614 : if (interesting_pathkeys == NIL)
567 4700 : continue;
568 :
569 : /*
570 : * Create paths to suit final sort order required for setop_pathkeys.
571 : * Here we'll sort the cheapest input path (if not sorted already) and
572 : * incremental sort any paths which are partially sorted.
573 : */
574 17926 : is_sorted = pathkeys_count_contained_in(setop_pathkeys,
575 : subpath->pathkeys,
576 : &presorted_keys);
577 :
578 17926 : if (!is_sorted)
579 : {
580 11972 : double limittuples = rel->subroot->limit_tuples;
581 :
582 : /*
583 : * Try at least sorting the cheapest path and also try
584 : * incrementally sorting any path which is partially sorted
585 : * already (no need to deal with paths which have presorted keys
586 : * when incremental sort is disabled unless it's the cheapest
587 : * input path).
588 : */
589 11972 : if (subpath != cheapest_input_path &&
590 2896 : (presorted_keys == 0 || !enable_incremental_sort))
591 12 : continue;
592 :
593 : /*
594 : * We've no need to consider both a sort and incremental sort.
595 : * We'll just do a sort if there are no presorted keys and an
596 : * incremental sort when there are presorted keys.
597 : */
598 11960 : if (presorted_keys == 0 || !enable_incremental_sort)
599 9076 : subpath = (Path *) create_sort_path(rel->subroot,
600 : final_rel,
601 : subpath,
602 : setop_pathkeys,
603 : limittuples);
604 : else
605 2884 : subpath = (Path *) create_incremental_sort_path(rel->subroot,
606 : final_rel,
607 : subpath,
608 : setop_pathkeys,
609 : presorted_keys,
610 : limittuples);
611 : }
612 :
613 : /*
614 : * subpath is now sorted, so add it to the pathlist. We already added
615 : * the cheapest_input_path above, so don't add it again unless we just
616 : * sorted it.
617 : */
618 17914 : if (subpath != cheapest_input_path)
619 : {
620 : /* Convert subpath's pathkeys to outer representation */
621 17372 : pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
622 : make_tlist_from_pathtarget(subpath->pathtarget));
623 :
624 : /* Generate outer path using this subpath */
625 17372 : add_path(rel, (Path *) create_subqueryscan_path(root,
626 : rel,
627 : subpath,
628 : trivial_tlist,
629 : pathkeys,
630 : NULL));
631 : }
632 : }
633 :
634 : /* if consider_parallel is false, there should be no partial paths */
635 : Assert(final_rel->consider_parallel ||
636 : final_rel->partial_pathlist == NIL);
637 :
638 : /*
639 : * If we have a partial path for the child relation, we can use that to
640 : * build a partial path for this relation. But there's no point in
641 : * considering any path but the cheapest.
642 : */
643 13956 : if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
644 9498 : final_rel->partial_pathlist != NIL)
645 : {
646 : Path *partial_subpath;
647 : Path *partial_path;
648 :
649 12 : partial_subpath = linitial(final_rel->partial_pathlist);
650 : partial_path = (Path *)
651 12 : create_subqueryscan_path(root, rel, partial_subpath,
652 : trivial_tlist,
653 : NIL, NULL);
654 12 : add_partial_path(rel, partial_path);
655 : }
656 :
657 13956 : postprocess_setop_rel(root, rel);
658 :
659 : /*
660 : * Estimate number of groups if caller wants it. If the subquery used
661 : * grouping or aggregation, its output is probably mostly unique anyway;
662 : * otherwise do statistical estimation.
663 : *
664 : * XXX you don't really want to know about this: we do the estimation
665 : * using the subroot->parse's original targetlist expressions, not the
666 : * subroot->processed_tlist which might seem more appropriate. The reason
667 : * is that if the subquery is itself a setop, it may return a
668 : * processed_tlist containing "varno 0" Vars generated by
669 : * generate_append_tlist, and those would confuse estimate_num_groups
670 : * mightily. We ought to get rid of the "varno 0" hack, but that requires
671 : * a redesign of the parsetree representation of setops, so that there can
672 : * be an RTE corresponding to each setop's output. Note, we use this not
673 : * subquery's targetlist but subroot->parse's targetlist, because it was
674 : * revised by self-join removal. subquery's targetlist might contain the
675 : * references to the removed relids.
676 : */
677 13956 : if (pNumGroups)
678 : {
679 1186 : PlannerInfo *subroot = rel->subroot;
680 1186 : Query *subquery = subroot->parse;
681 :
682 1186 : if (subquery->groupClause || subquery->groupingSets ||
683 1186 : subquery->distinctClause || subroot->hasHavingQual ||
684 1174 : subquery->hasAggs)
685 12 : *pNumGroups = rel->cheapest_total_path->rows;
686 : else
687 1174 : *pNumGroups = estimate_num_groups(subroot,
688 1174 : get_tlist_exprs(subroot->parse->targetList, false),
689 1174 : rel->cheapest_total_path->rows,
690 : NULL,
691 : NULL);
692 : }
693 13956 : }
694 :
695 : /*
696 : * Generate paths for a UNION or UNION ALL node
697 : */
698 : static RelOptInfo *
699 3988 : generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
700 : List *refnames_tlist,
701 : List **pTargetList)
702 : {
703 3988 : Relids relids = NULL;
704 : RelOptInfo *result_rel;
705 : ListCell *lc;
706 : ListCell *lc2;
707 : ListCell *lc3;
708 3988 : List *cheapest_pathlist = NIL;
709 3988 : List *ordered_pathlist = NIL;
710 3988 : List *partial_pathlist = NIL;
711 3988 : bool partial_paths_valid = true;
712 3988 : bool consider_parallel = true;
713 : List *rellist;
714 : List *tlist_list;
715 : List *trivial_tlist_list;
716 : List *tlist;
717 3988 : List *groupList = NIL;
718 : Path *apath;
719 3988 : Path *gpath = NULL;
720 : bool try_sorted;
721 3988 : List *union_pathkeys = NIL;
722 :
723 : /*
724 : * If any of my children are identical UNION nodes (same op, all-flag, and
725 : * colTypes) then they can be merged into this node so that we generate
726 : * only one Append/MergeAppend and unique-ification for the lot. Recurse
727 : * to find such nodes.
728 : */
729 3988 : rellist = plan_union_children(root,
730 : op,
731 : refnames_tlist,
732 : &tlist_list,
733 : &trivial_tlist_list);
734 :
735 : /*
736 : * Generate tlist for Append/MergeAppend plan node.
737 : *
738 : * The tlist for an Append plan isn't important as far as the Append is
739 : * concerned, but we must make it look real anyway for the benefit of the
740 : * next plan level up.
741 : */
742 3988 : tlist = generate_append_tlist(op->colTypes, op->colCollations, false,
743 : tlist_list, refnames_tlist);
744 3988 : *pTargetList = tlist;
745 :
746 : /* For for UNIONs (not UNION ALL), try sorting, if sorting is possible */
747 3988 : try_sorted = !op->all && grouping_is_sortable(op->groupClauses);
748 :
749 3988 : if (try_sorted)
750 : {
751 : /* Identify the grouping semantics */
752 3354 : groupList = generate_setop_grouplist(op, tlist);
753 :
754 : /* Determine the pathkeys for sorting by the whole target list */
755 3354 : union_pathkeys = make_pathkeys_for_sortclauses(root, groupList, tlist);
756 :
757 3354 : root->query_pathkeys = union_pathkeys;
758 : }
759 :
760 : /*
761 : * Now that we've got the append target list, we can build the union child
762 : * paths.
763 : */
764 15266 : forthree(lc, rellist, lc2, trivial_tlist_list, lc3, tlist_list)
765 : {
766 11278 : RelOptInfo *rel = lfirst(lc);
767 11278 : bool trivial_tlist = lfirst_int(lc2);
768 11278 : List *child_tlist = lfirst_node(List, lc3);
769 :
770 : /* only build paths for the union children */
771 11278 : if (rel->rtekind == RTE_SUBQUERY)
772 11164 : build_setop_child_paths(root, rel, trivial_tlist, child_tlist,
773 : union_pathkeys, NULL);
774 : }
775 :
776 : /* Build path lists and relid set. */
777 15266 : foreach(lc, rellist)
778 : {
779 11278 : RelOptInfo *rel = lfirst(lc);
780 : Path *ordered_path;
781 :
782 11278 : cheapest_pathlist = lappend(cheapest_pathlist,
783 11278 : rel->cheapest_total_path);
784 :
785 11278 : if (try_sorted)
786 : {
787 3604 : ordered_path = get_cheapest_path_for_pathkeys(rel->pathlist,
788 : union_pathkeys,
789 : NULL,
790 : TOTAL_COST,
791 : false);
792 :
793 3604 : if (ordered_path != NULL)
794 464 : ordered_pathlist = lappend(ordered_pathlist, ordered_path);
795 : else
796 : {
797 : /*
798 : * If we can't find a sorted path, just give up trying to
799 : * generate a list of correctly sorted child paths. This can
800 : * happen when type coercion was added to the targetlist due
801 : * to mismatching types from the union children.
802 : */
803 3140 : try_sorted = false;
804 : }
805 : }
806 :
807 11278 : if (consider_parallel)
808 : {
809 8024 : if (!rel->consider_parallel)
810 : {
811 3002 : consider_parallel = false;
812 3002 : partial_paths_valid = false;
813 : }
814 5022 : else if (rel->partial_pathlist == NIL)
815 5010 : partial_paths_valid = false;
816 : else
817 12 : partial_pathlist = lappend(partial_pathlist,
818 12 : linitial(rel->partial_pathlist));
819 : }
820 :
821 11278 : relids = bms_union(relids, rel->relids);
822 : }
823 :
824 : /* Build result relation. */
825 3988 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
826 3988 : result_rel->reltarget = create_pathtarget(root, tlist);
827 3988 : result_rel->consider_parallel = consider_parallel;
828 3988 : result_rel->consider_startup = (root->tuple_fraction > 0);
829 :
830 : /*
831 : * Append the child results together using the cheapest paths from each
832 : * union child.
833 : */
834 3988 : apath = (Path *) create_append_path(root, result_rel, cheapest_pathlist,
835 : NIL, NIL, NULL, 0, false, -1);
836 :
837 : /*
838 : * Estimate number of groups. For now we just assume the output is unique
839 : * --- this is certainly true for the UNION case, and we want worst-case
840 : * estimates anyway.
841 : */
842 3988 : result_rel->rows = apath->rows;
843 :
844 : /*
845 : * Now consider doing the same thing using the partial paths plus Append
846 : * plus Gather.
847 : */
848 3988 : if (partial_paths_valid)
849 : {
850 : Path *papath;
851 6 : int parallel_workers = 0;
852 :
853 : /* Find the highest number of workers requested for any subpath. */
854 18 : foreach(lc, partial_pathlist)
855 : {
856 12 : Path *subpath = lfirst(lc);
857 :
858 12 : parallel_workers = Max(parallel_workers,
859 : subpath->parallel_workers);
860 : }
861 : Assert(parallel_workers > 0);
862 :
863 : /*
864 : * If the use of parallel append is permitted, always request at least
865 : * log2(# of children) paths. We assume it can be useful to have
866 : * extra workers in this case because they will be spread out across
867 : * the children. The precise formula is just a guess; see
868 : * add_paths_to_append_rel.
869 : */
870 6 : if (enable_parallel_append)
871 : {
872 6 : parallel_workers = Max(parallel_workers,
873 : pg_leftmost_one_pos32(list_length(partial_pathlist)) + 1);
874 6 : parallel_workers = Min(parallel_workers,
875 : max_parallel_workers_per_gather);
876 : }
877 : Assert(parallel_workers > 0);
878 :
879 : papath = (Path *)
880 6 : create_append_path(root, result_rel, NIL, partial_pathlist,
881 : NIL, NULL, parallel_workers,
882 : enable_parallel_append, -1);
883 : gpath = (Path *)
884 6 : create_gather_path(root, result_rel, papath,
885 6 : result_rel->reltarget, NULL, NULL);
886 : }
887 :
888 3988 : if (!op->all)
889 : {
890 : double dNumGroups;
891 3430 : bool can_sort = grouping_is_sortable(groupList);
892 3430 : bool can_hash = grouping_is_hashable(groupList);
893 :
894 : /*
895 : * XXX for the moment, take the number of distinct groups as equal to
896 : * the total input size, i.e., the worst case. This is too
897 : * conservative, but it's not clear how to get a decent estimate of
898 : * the true size. One should note as well the propensity of novices
899 : * to write UNION rather than UNION ALL even when they don't expect
900 : * any duplicates...
901 : */
902 3430 : dNumGroups = apath->rows;
903 :
904 3430 : if (can_hash)
905 : {
906 : Path *path;
907 :
908 : /*
909 : * Try a hash aggregate plan on 'apath'. This is the cheapest
910 : * available path containing each append child.
911 : */
912 3358 : path = (Path *) create_agg_path(root,
913 : result_rel,
914 : apath,
915 : create_pathtarget(root, tlist),
916 : AGG_HASHED,
917 : AGGSPLIT_SIMPLE,
918 : groupList,
919 : NIL,
920 : NULL,
921 : dNumGroups);
922 3358 : add_path(result_rel, path);
923 :
924 : /* Try hash aggregate on the Gather path, if valid */
925 3358 : if (gpath != NULL)
926 : {
927 : /* Hashed aggregate plan --- no sort needed */
928 6 : path = (Path *) create_agg_path(root,
929 : result_rel,
930 : gpath,
931 : create_pathtarget(root, tlist),
932 : AGG_HASHED,
933 : AGGSPLIT_SIMPLE,
934 : groupList,
935 : NIL,
936 : NULL,
937 : dNumGroups);
938 6 : add_path(result_rel, path);
939 : }
940 : }
941 :
942 3430 : if (can_sort)
943 : {
944 3430 : Path *path = apath;
945 :
946 : /* Try Sort -> Unique on the Append path */
947 3430 : if (groupList != NIL)
948 3324 : path = (Path *) create_sort_path(root, result_rel, path,
949 : make_pathkeys_for_sortclauses(root, groupList, tlist),
950 : -1.0);
951 :
952 3430 : path = (Path *) create_upper_unique_path(root,
953 : result_rel,
954 : path,
955 3430 : list_length(path->pathkeys),
956 : dNumGroups);
957 :
958 3430 : add_path(result_rel, path);
959 :
960 : /* Try Sort -> Unique on the Gather path, if set */
961 3430 : if (gpath != NULL)
962 : {
963 6 : path = gpath;
964 :
965 6 : path = (Path *) create_sort_path(root, result_rel, path,
966 : make_pathkeys_for_sortclauses(root, groupList, tlist),
967 : -1.0);
968 :
969 6 : path = (Path *) create_upper_unique_path(root,
970 : result_rel,
971 : path,
972 6 : list_length(path->pathkeys),
973 : dNumGroups);
974 6 : add_path(result_rel, path);
975 : }
976 : }
977 :
978 : /*
979 : * Try making a MergeAppend path if we managed to find a path with the
980 : * correct pathkeys in each union child query.
981 : */
982 3430 : if (try_sorted && groupList != NIL)
983 : {
984 : Path *path;
985 :
986 184 : path = (Path *) create_merge_append_path(root,
987 : result_rel,
988 : ordered_pathlist,
989 : union_pathkeys,
990 : NULL);
991 :
992 : /* and make the MergeAppend unique */
993 184 : path = (Path *) create_upper_unique_path(root,
994 : result_rel,
995 : path,
996 : list_length(tlist),
997 : dNumGroups);
998 :
999 184 : add_path(result_rel, path);
1000 : }
1001 : }
1002 : else
1003 : {
1004 : /* UNION ALL */
1005 558 : add_path(result_rel, apath);
1006 :
1007 558 : if (gpath != NULL)
1008 0 : add_path(result_rel, gpath);
1009 : }
1010 :
1011 3988 : return result_rel;
1012 : }
1013 :
1014 : /*
1015 : * Generate paths for an INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL node
1016 : */
1017 : static RelOptInfo *
1018 608 : generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
1019 : List *refnames_tlist,
1020 : List **pTargetList)
1021 : {
1022 : RelOptInfo *result_rel;
1023 : RelOptInfo *lrel,
1024 : *rrel;
1025 608 : double save_fraction = root->tuple_fraction;
1026 : Path *lpath,
1027 : *rpath,
1028 : *path;
1029 : List *lpath_tlist,
1030 : *rpath_tlist,
1031 : *tlist_list,
1032 : *tlist,
1033 : *groupList,
1034 : *pathlist;
1035 : bool lpath_trivial_tlist,
1036 : rpath_trivial_tlist;
1037 : double dLeftGroups,
1038 : dRightGroups,
1039 : dNumGroups,
1040 : dNumOutputRows;
1041 : bool use_hash;
1042 : SetOpCmd cmd;
1043 : int firstFlag;
1044 :
1045 : /*
1046 : * Tell children to fetch all tuples.
1047 : */
1048 608 : root->tuple_fraction = 0.0;
1049 :
1050 : /* Recurse on children, ensuring their outputs are marked */
1051 608 : lrel = recurse_set_operations(op->larg, root,
1052 : op->colTypes, op->colCollations,
1053 : false, 0,
1054 : refnames_tlist,
1055 : &lpath_tlist,
1056 : &lpath_trivial_tlist);
1057 608 : if (lrel->rtekind == RTE_SUBQUERY)
1058 584 : build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
1059 : NIL, &dLeftGroups);
1060 : else
1061 24 : dLeftGroups = lrel->rows;
1062 :
1063 608 : lpath = lrel->cheapest_total_path;
1064 608 : rrel = recurse_set_operations(op->rarg, root,
1065 : op->colTypes, op->colCollations,
1066 : false, 1,
1067 : refnames_tlist,
1068 : &rpath_tlist,
1069 : &rpath_trivial_tlist);
1070 608 : if (rrel->rtekind == RTE_SUBQUERY)
1071 602 : build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
1072 : NIL, &dRightGroups);
1073 : else
1074 6 : dRightGroups = rrel->rows;
1075 :
1076 608 : rpath = rrel->cheapest_total_path;
1077 :
1078 : /* Undo effects of forcing tuple_fraction to 0 */
1079 608 : root->tuple_fraction = save_fraction;
1080 :
1081 : /*
1082 : * For EXCEPT, we must put the left input first. For INTERSECT, either
1083 : * order should give the same results, and we prefer to put the smaller
1084 : * input first in order to minimize the size of the hash table in the
1085 : * hashing case. "Smaller" means the one with the fewer groups.
1086 : */
1087 608 : if (op->op == SETOP_EXCEPT || dLeftGroups <= dRightGroups)
1088 578 : {
1089 578 : pathlist = list_make2(lpath, rpath);
1090 578 : tlist_list = list_make2(lpath_tlist, rpath_tlist);
1091 578 : firstFlag = 0;
1092 : }
1093 : else
1094 : {
1095 30 : pathlist = list_make2(rpath, lpath);
1096 30 : tlist_list = list_make2(rpath_tlist, lpath_tlist);
1097 30 : firstFlag = 1;
1098 : }
1099 :
1100 : /*
1101 : * Generate tlist for Append plan node.
1102 : *
1103 : * The tlist for an Append plan isn't important as far as the Append is
1104 : * concerned, but we must make it look real anyway for the benefit of the
1105 : * next plan level up. In fact, it has to be real enough that the flag
1106 : * column is shown as a variable not a constant, else setrefs.c will get
1107 : * confused.
1108 : */
1109 608 : tlist = generate_append_tlist(op->colTypes, op->colCollations, true,
1110 : tlist_list, refnames_tlist);
1111 :
1112 608 : *pTargetList = tlist;
1113 :
1114 : /* Build result relation. */
1115 608 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
1116 608 : bms_union(lrel->relids, rrel->relids));
1117 608 : result_rel->reltarget = create_pathtarget(root, tlist);
1118 :
1119 : /*
1120 : * Append the child results together.
1121 : */
1122 608 : path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
1123 : NIL, NULL, 0, false, -1);
1124 :
1125 : /* Identify the grouping semantics */
1126 608 : groupList = generate_setop_grouplist(op, tlist);
1127 :
1128 : /*
1129 : * Estimate number of distinct groups that we'll need hashtable entries
1130 : * for; this is the size of the left-hand input for EXCEPT, or the smaller
1131 : * input for INTERSECT. Also estimate the number of eventual output rows.
1132 : * In non-ALL cases, we estimate each group produces one output row; in
1133 : * ALL cases use the relevant relation size. These are worst-case
1134 : * estimates, of course, but we need to be conservative.
1135 : */
1136 608 : if (op->op == SETOP_EXCEPT)
1137 : {
1138 398 : dNumGroups = dLeftGroups;
1139 398 : dNumOutputRows = op->all ? lpath->rows : dNumGroups;
1140 : }
1141 : else
1142 : {
1143 210 : dNumGroups = Min(dLeftGroups, dRightGroups);
1144 210 : dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
1145 : }
1146 :
1147 : /*
1148 : * Decide whether to hash or sort, and add a sort node if needed.
1149 : */
1150 608 : use_hash = choose_hashed_setop(root, groupList, path,
1151 : dNumGroups, dNumOutputRows,
1152 608 : (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT");
1153 :
1154 608 : if (groupList && !use_hash)
1155 132 : path = (Path *) create_sort_path(root,
1156 : result_rel,
1157 : path,
1158 : make_pathkeys_for_sortclauses(root,
1159 : groupList,
1160 : tlist),
1161 : -1.0);
1162 :
1163 : /*
1164 : * Finally, add a SetOp path node to generate the correct output.
1165 : */
1166 608 : switch (op->op)
1167 : {
1168 210 : case SETOP_INTERSECT:
1169 210 : cmd = op->all ? SETOPCMD_INTERSECT_ALL : SETOPCMD_INTERSECT;
1170 210 : break;
1171 398 : case SETOP_EXCEPT:
1172 398 : cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT;
1173 398 : break;
1174 0 : default:
1175 0 : elog(ERROR, "unrecognized set op: %d", (int) op->op);
1176 : cmd = SETOPCMD_INTERSECT; /* keep compiler quiet */
1177 : break;
1178 : }
1179 608 : path = (Path *) create_setop_path(root,
1180 : result_rel,
1181 : path,
1182 : cmd,
1183 : use_hash ? SETOP_HASHED : SETOP_SORTED,
1184 : groupList,
1185 608 : list_length(op->colTypes) + 1,
1186 : use_hash ? firstFlag : -1,
1187 : dNumGroups,
1188 : dNumOutputRows);
1189 :
1190 608 : result_rel->rows = path->rows;
1191 608 : add_path(result_rel, path);
1192 608 : return result_rel;
1193 : }
1194 :
1195 : /*
1196 : * Pull up children of a UNION node that are identically-propertied UNIONs.
1197 : *
1198 : * NOTE: we can also pull a UNION ALL up into a UNION, since the distinct
1199 : * output rows will be lost anyway.
1200 : *
1201 : * NOTE: currently, we ignore collations while determining if a child has
1202 : * the same properties. This is semantically sound only so long as all
1203 : * collations have the same notion of equality. It is valid from an
1204 : * implementation standpoint because we don't care about the ordering of
1205 : * a UNION child's result: UNION ALL results are always unordered, and
1206 : * generate_union_paths will force a fresh sort if the top level is a UNION.
1207 : */
1208 : static List *
1209 3988 : plan_union_children(PlannerInfo *root,
1210 : SetOperationStmt *top_union,
1211 : List *refnames_tlist,
1212 : List **tlist_list,
1213 : List **istrivial_tlist)
1214 : {
1215 3988 : List *pending_rels = list_make1(top_union);
1216 3988 : List *result = NIL;
1217 : List *child_tlist;
1218 : bool trivial_tlist;
1219 :
1220 3988 : *tlist_list = NIL;
1221 3988 : *istrivial_tlist = NIL;
1222 :
1223 22556 : while (pending_rels != NIL)
1224 : {
1225 18568 : Node *setOp = linitial(pending_rels);
1226 :
1227 18568 : pending_rels = list_delete_first(pending_rels);
1228 :
1229 18568 : if (IsA(setOp, SetOperationStmt))
1230 : {
1231 7404 : SetOperationStmt *op = (SetOperationStmt *) setOp;
1232 :
1233 7404 : if (op->op == top_union->op &&
1234 14616 : (op->all == top_union->all || op->all) &&
1235 7302 : equal(op->colTypes, top_union->colTypes))
1236 : {
1237 : /* Same UNION, so fold children into parent */
1238 7290 : pending_rels = lcons(op->rarg, pending_rels);
1239 7290 : pending_rels = lcons(op->larg, pending_rels);
1240 7290 : continue;
1241 : }
1242 : }
1243 :
1244 : /*
1245 : * Not same, so plan this child separately.
1246 : *
1247 : * Note we disallow any resjunk columns in child results. This is
1248 : * necessary since the Append node that implements the union won't do
1249 : * any projection, and upper levels will get confused if some of our
1250 : * output tuples have junk and some don't. This case only arises when
1251 : * we have an EXCEPT or INTERSECT as child, else there won't be
1252 : * resjunk anyway.
1253 : */
1254 11278 : result = lappend(result, recurse_set_operations(setOp, root,
1255 : top_union->colTypes,
1256 : top_union->colCollations,
1257 : false, -1,
1258 : refnames_tlist,
1259 : &child_tlist,
1260 : &trivial_tlist));
1261 11278 : *tlist_list = lappend(*tlist_list, child_tlist);
1262 11278 : *istrivial_tlist = lappend_int(*istrivial_tlist, trivial_tlist);
1263 : }
1264 :
1265 3988 : return result;
1266 : }
1267 :
1268 : /*
1269 : * postprocess_setop_rel - perform steps required after adding paths
1270 : */
1271 : static void
1272 19352 : postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel)
1273 : {
1274 : /*
1275 : * We don't currently worry about allowing FDWs to contribute paths to
1276 : * this relation, but give extensions a chance.
1277 : */
1278 19352 : if (create_upper_paths_hook)
1279 0 : (*create_upper_paths_hook) (root, UPPERREL_SETOP,
1280 : NULL, rel, NULL);
1281 :
1282 : /* Select cheapest path */
1283 19352 : set_cheapest(rel);
1284 19352 : }
1285 :
1286 : /*
1287 : * choose_hashed_setop - should we use hashing for a set operation?
1288 : */
1289 : static bool
1290 608 : choose_hashed_setop(PlannerInfo *root, List *groupClauses,
1291 : Path *input_path,
1292 : double dNumGroups, double dNumOutputRows,
1293 : const char *construct)
1294 : {
1295 608 : int numGroupCols = list_length(groupClauses);
1296 608 : Size hash_mem_limit = get_hash_memory_limit();
1297 : bool can_sort;
1298 : bool can_hash;
1299 : Size hashentrysize;
1300 : Path hashed_p;
1301 : Path sorted_p;
1302 : double tuple_fraction;
1303 :
1304 : /* Check whether the operators support sorting or hashing */
1305 608 : can_sort = grouping_is_sortable(groupClauses);
1306 608 : can_hash = grouping_is_hashable(groupClauses);
1307 608 : if (can_hash && can_sort)
1308 : {
1309 : /* we have a meaningful choice to make, continue ... */
1310 : }
1311 60 : else if (can_hash)
1312 0 : return true;
1313 60 : else if (can_sort)
1314 60 : return false;
1315 : else
1316 0 : ereport(ERROR,
1317 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1318 : /* translator: %s is UNION, INTERSECT, or EXCEPT */
1319 : errmsg("could not implement %s", construct),
1320 : errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
1321 :
1322 : /* Prefer sorting when enable_hashagg is off */
1323 548 : if (!enable_hashagg)
1324 102 : return false;
1325 :
1326 : /*
1327 : * Don't do it if it doesn't look like the hashtable will fit into
1328 : * hash_mem.
1329 : */
1330 446 : hashentrysize = MAXALIGN(input_path->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
1331 :
1332 446 : if (hashentrysize * dNumGroups > hash_mem_limit)
1333 0 : return false;
1334 :
1335 : /*
1336 : * See if the estimated cost is no more than doing it the other way.
1337 : *
1338 : * We need to consider input_plan + hashagg versus input_plan + sort +
1339 : * group. Note that the actual result plan might involve a SetOp or
1340 : * Unique node, not Agg or Group, but the cost estimates for Agg and Group
1341 : * should be close enough for our purposes here.
1342 : *
1343 : * These path variables are dummies that just hold cost fields; we don't
1344 : * make actual Paths for these steps.
1345 : */
1346 446 : cost_agg(&hashed_p, root, AGG_HASHED, NULL,
1347 : numGroupCols, dNumGroups,
1348 : NIL,
1349 : input_path->startup_cost, input_path->total_cost,
1350 446 : input_path->rows, input_path->pathtarget->width);
1351 :
1352 : /*
1353 : * Now for the sorted case. Note that the input is *always* unsorted,
1354 : * since it was made by appending unrelated sub-relations together.
1355 : */
1356 446 : sorted_p.startup_cost = input_path->startup_cost;
1357 446 : sorted_p.total_cost = input_path->total_cost;
1358 : /* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */
1359 446 : cost_sort(&sorted_p, root, NIL, sorted_p.total_cost,
1360 446 : input_path->rows, input_path->pathtarget->width,
1361 : 0.0, work_mem, -1.0);
1362 446 : cost_group(&sorted_p, root, numGroupCols, dNumGroups,
1363 : NIL,
1364 : sorted_p.startup_cost, sorted_p.total_cost,
1365 : input_path->rows);
1366 :
1367 : /*
1368 : * Now make the decision using the top-level tuple fraction. First we
1369 : * have to convert an absolute count (LIMIT) into fractional form.
1370 : */
1371 446 : tuple_fraction = root->tuple_fraction;
1372 446 : if (tuple_fraction >= 1.0)
1373 0 : tuple_fraction /= dNumOutputRows;
1374 :
1375 446 : if (compare_fractional_path_costs(&hashed_p, &sorted_p,
1376 : tuple_fraction) < 0)
1377 : {
1378 : /* Hashed is cheaper, so use it */
1379 446 : return true;
1380 : }
1381 0 : return false;
1382 : }
1383 :
1384 : /*
1385 : * Generate targetlist for a set-operation plan node
1386 : *
1387 : * colTypes: OID list of set-op's result column datatypes
1388 : * colCollations: OID list of set-op's result column collations
1389 : * flag: -1 if no flag column needed, 0 or 1 to create a const flag column
1390 : * varno: varno to use in generated Vars
1391 : * hack_constants: true to copy up constants (see comments in code)
1392 : * input_tlist: targetlist of this node's input node
1393 : * refnames_tlist: targetlist to take column names from
1394 : * trivial_tlist: output parameter, set to true if targetlist is trivial
1395 : */
1396 : static List *
1397 14088 : generate_setop_tlist(List *colTypes, List *colCollations,
1398 : int flag,
1399 : Index varno,
1400 : bool hack_constants,
1401 : List *input_tlist,
1402 : List *refnames_tlist,
1403 : bool *trivial_tlist)
1404 : {
1405 14088 : List *tlist = NIL;
1406 14088 : int resno = 1;
1407 : ListCell *ctlc,
1408 : *cclc,
1409 : *itlc,
1410 : *rtlc;
1411 : TargetEntry *tle;
1412 : Node *expr;
1413 :
1414 14088 : *trivial_tlist = true; /* until proven differently */
1415 :
1416 55152 : forfour(ctlc, colTypes, cclc, colCollations,
1417 : itlc, input_tlist, rtlc, refnames_tlist)
1418 : {
1419 41064 : Oid colType = lfirst_oid(ctlc);
1420 41064 : Oid colColl = lfirst_oid(cclc);
1421 41064 : TargetEntry *inputtle = (TargetEntry *) lfirst(itlc);
1422 41064 : TargetEntry *reftle = (TargetEntry *) lfirst(rtlc);
1423 :
1424 : Assert(inputtle->resno == resno);
1425 : Assert(reftle->resno == resno);
1426 : Assert(!inputtle->resjunk);
1427 : Assert(!reftle->resjunk);
1428 :
1429 : /*
1430 : * Generate columns referencing input columns and having appropriate
1431 : * data types and column names. Insert datatype coercions where
1432 : * necessary.
1433 : *
1434 : * HACK: constants in the input's targetlist are copied up as-is
1435 : * rather than being referenced as subquery outputs. This is mainly
1436 : * to ensure that when we try to coerce them to the output column's
1437 : * datatype, the right things happen for UNKNOWN constants. But do
1438 : * this only at the first level of subquery-scan plans; we don't want
1439 : * phony constants appearing in the output tlists of upper-level
1440 : * nodes!
1441 : *
1442 : * Note that copying a constant doesn't in itself require us to mark
1443 : * the tlist nontrivial; see trivial_subqueryscan() in setrefs.c.
1444 : */
1445 41064 : if (hack_constants && inputtle->expr && IsA(inputtle->expr, Const))
1446 14248 : expr = (Node *) inputtle->expr;
1447 : else
1448 107264 : expr = (Node *) makeVar(varno,
1449 26816 : inputtle->resno,
1450 26816 : exprType((Node *) inputtle->expr),
1451 26816 : exprTypmod((Node *) inputtle->expr),
1452 26816 : exprCollation((Node *) inputtle->expr),
1453 : 0);
1454 :
1455 41064 : if (exprType(expr) != colType)
1456 : {
1457 : /*
1458 : * Note: it's not really cool to be applying coerce_to_common_type
1459 : * here; one notable point is that assign_expr_collations never
1460 : * gets run on any generated nodes. For the moment that's not a
1461 : * problem because we force the correct exposed collation below.
1462 : * It would likely be best to make the parser generate the correct
1463 : * output tlist for every set-op to begin with, though.
1464 : */
1465 1462 : expr = coerce_to_common_type(NULL, /* no UNKNOWNs here */
1466 : expr,
1467 : colType,
1468 : "UNION/INTERSECT/EXCEPT");
1469 1462 : *trivial_tlist = false; /* the coercion makes it not trivial */
1470 : }
1471 :
1472 : /*
1473 : * Ensure the tlist entry's exposed collation matches the set-op. This
1474 : * is necessary because plan_set_operations() reports the result
1475 : * ordering as a list of SortGroupClauses, which don't carry collation
1476 : * themselves but just refer to tlist entries. If we don't show the
1477 : * right collation then planner.c might do the wrong thing in
1478 : * higher-level queries.
1479 : *
1480 : * Note we use RelabelType, not CollateExpr, since this expression
1481 : * will reach the executor without any further processing.
1482 : */
1483 41064 : if (exprCollation(expr) != colColl)
1484 : {
1485 11870 : expr = applyRelabelType(expr,
1486 : exprType(expr), exprTypmod(expr), colColl,
1487 : COERCE_IMPLICIT_CAST, -1, false);
1488 11870 : *trivial_tlist = false; /* the relabel makes it not trivial */
1489 : }
1490 :
1491 82128 : tle = makeTargetEntry((Expr *) expr,
1492 41064 : (AttrNumber) resno++,
1493 41064 : pstrdup(reftle->resname),
1494 : false);
1495 :
1496 : /*
1497 : * By convention, all non-resjunk columns in a setop tree have
1498 : * ressortgroupref equal to their resno. In some cases the ref isn't
1499 : * needed, but this is a cleaner way than modifying the tlist later.
1500 : */
1501 41064 : tle->ressortgroupref = tle->resno;
1502 :
1503 41064 : tlist = lappend(tlist, tle);
1504 : }
1505 :
1506 14088 : if (flag >= 0)
1507 : {
1508 : /* Add a resjunk flag column */
1509 : /* flag value is the given constant */
1510 1216 : expr = (Node *) makeConst(INT4OID,
1511 : -1,
1512 : InvalidOid,
1513 : sizeof(int32),
1514 : Int32GetDatum(flag),
1515 : false,
1516 : true);
1517 1216 : tle = makeTargetEntry((Expr *) expr,
1518 1216 : (AttrNumber) resno++,
1519 : pstrdup("flag"),
1520 : true);
1521 1216 : tlist = lappend(tlist, tle);
1522 1216 : *trivial_tlist = false; /* the extra entry makes it not trivial */
1523 : }
1524 :
1525 14088 : return tlist;
1526 : }
1527 :
1528 : /*
1529 : * Generate targetlist for a set-operation Append node
1530 : *
1531 : * colTypes: OID list of set-op's result column datatypes
1532 : * colCollations: OID list of set-op's result column collations
1533 : * flag: true to create a flag column copied up from subplans
1534 : * input_tlists: list of tlists for sub-plans of the Append
1535 : * refnames_tlist: targetlist to take column names from
1536 : *
1537 : * The entries in the Append's targetlist should always be simple Vars;
1538 : * we just have to make sure they have the right datatypes/typmods/collations.
1539 : * The Vars are always generated with varno 0.
1540 : *
1541 : * XXX a problem with the varno-zero approach is that set_pathtarget_cost_width
1542 : * cannot figure out a realistic width for the tlist we make here. But we
1543 : * ought to refactor this code to produce a PathTarget directly, anyway.
1544 : */
1545 : static List *
1546 5402 : generate_append_tlist(List *colTypes, List *colCollations,
1547 : bool flag,
1548 : List *input_tlists,
1549 : List *refnames_tlist)
1550 : {
1551 5402 : List *tlist = NIL;
1552 5402 : int resno = 1;
1553 : ListCell *curColType;
1554 : ListCell *curColCollation;
1555 : ListCell *ref_tl_item;
1556 : int colindex;
1557 : TargetEntry *tle;
1558 : Node *expr;
1559 : ListCell *tlistl;
1560 : int32 *colTypmods;
1561 :
1562 : /*
1563 : * First extract typmods to use.
1564 : *
1565 : * If the inputs all agree on type and typmod of a particular column, use
1566 : * that typmod; else use -1.
1567 : */
1568 5402 : colTypmods = (int32 *) palloc(list_length(colTypes) * sizeof(int32));
1569 :
1570 19508 : foreach(tlistl, input_tlists)
1571 : {
1572 14106 : List *subtlist = (List *) lfirst(tlistl);
1573 : ListCell *subtlistl;
1574 :
1575 14106 : curColType = list_head(colTypes);
1576 14106 : colindex = 0;
1577 56416 : foreach(subtlistl, subtlist)
1578 : {
1579 42310 : TargetEntry *subtle = (TargetEntry *) lfirst(subtlistl);
1580 :
1581 42310 : if (subtle->resjunk)
1582 1216 : continue;
1583 : Assert(curColType != NULL);
1584 41094 : if (exprType((Node *) subtle->expr) == lfirst_oid(curColType))
1585 : {
1586 : /* If first subplan, copy the typmod; else compare */
1587 41094 : int32 subtypmod = exprTypmod((Node *) subtle->expr);
1588 :
1589 41094 : if (tlistl == list_head(input_tlists))
1590 14828 : colTypmods[colindex] = subtypmod;
1591 26266 : else if (subtypmod != colTypmods[colindex])
1592 12 : colTypmods[colindex] = -1;
1593 : }
1594 : else
1595 : {
1596 : /* types disagree, so force typmod to -1 */
1597 0 : colTypmods[colindex] = -1;
1598 : }
1599 41094 : curColType = lnext(colTypes, curColType);
1600 41094 : colindex++;
1601 : }
1602 : Assert(curColType == NULL);
1603 : }
1604 :
1605 : /*
1606 : * Now we can build the tlist for the Append.
1607 : */
1608 5402 : colindex = 0;
1609 20230 : forthree(curColType, colTypes, curColCollation, colCollations,
1610 : ref_tl_item, refnames_tlist)
1611 : {
1612 14828 : Oid colType = lfirst_oid(curColType);
1613 14828 : int32 colTypmod = colTypmods[colindex++];
1614 14828 : Oid colColl = lfirst_oid(curColCollation);
1615 14828 : TargetEntry *reftle = (TargetEntry *) lfirst(ref_tl_item);
1616 :
1617 : Assert(reftle->resno == resno);
1618 : Assert(!reftle->resjunk);
1619 14828 : expr = (Node *) makeVar(0,
1620 : resno,
1621 : colType,
1622 : colTypmod,
1623 : colColl,
1624 : 0);
1625 29656 : tle = makeTargetEntry((Expr *) expr,
1626 14828 : (AttrNumber) resno++,
1627 14828 : pstrdup(reftle->resname),
1628 : false);
1629 :
1630 : /*
1631 : * By convention, all non-resjunk columns in a setop tree have
1632 : * ressortgroupref equal to their resno. In some cases the ref isn't
1633 : * needed, but this is a cleaner way than modifying the tlist later.
1634 : */
1635 14828 : tle->ressortgroupref = tle->resno;
1636 :
1637 14828 : tlist = lappend(tlist, tle);
1638 : }
1639 :
1640 5402 : if (flag)
1641 : {
1642 : /* Add a resjunk flag column */
1643 : /* flag value is shown as copied up from subplan */
1644 608 : expr = (Node *) makeVar(0,
1645 : resno,
1646 : INT4OID,
1647 : -1,
1648 : InvalidOid,
1649 : 0);
1650 608 : tle = makeTargetEntry((Expr *) expr,
1651 608 : (AttrNumber) resno++,
1652 : pstrdup("flag"),
1653 : true);
1654 608 : tlist = lappend(tlist, tle);
1655 : }
1656 :
1657 5402 : pfree(colTypmods);
1658 :
1659 5402 : return tlist;
1660 : }
1661 :
1662 : /*
1663 : * generate_setop_grouplist
1664 : * Build a SortGroupClause list defining the sort/grouping properties
1665 : * of the setop's output columns.
1666 : *
1667 : * Parse analysis already determined the properties and built a suitable
1668 : * list, except that the entries do not have sortgrouprefs set because
1669 : * the parser output representation doesn't include a tlist for each
1670 : * setop. So what we need to do here is copy that list and install
1671 : * proper sortgrouprefs into it (copying those from the targetlist).
1672 : */
1673 : static List *
1674 4316 : generate_setop_grouplist(SetOperationStmt *op, List *targetlist)
1675 : {
1676 4316 : List *grouplist = copyObject(op->groupClauses);
1677 : ListCell *lg;
1678 : ListCell *lt;
1679 :
1680 4316 : lg = list_head(grouplist);
1681 16632 : foreach(lt, targetlist)
1682 : {
1683 12316 : TargetEntry *tle = (TargetEntry *) lfirst(lt);
1684 : SortGroupClause *sgc;
1685 :
1686 12316 : if (tle->resjunk)
1687 : {
1688 : /* resjunk columns should not have sortgrouprefs */
1689 : Assert(tle->ressortgroupref == 0);
1690 608 : continue; /* ignore resjunk columns */
1691 : }
1692 :
1693 : /* non-resjunk columns should have sortgroupref = resno */
1694 : Assert(tle->ressortgroupref == tle->resno);
1695 :
1696 : /* non-resjunk columns should have grouping clauses */
1697 : Assert(lg != NULL);
1698 11708 : sgc = (SortGroupClause *) lfirst(lg);
1699 11708 : lg = lnext(grouplist, lg);
1700 : Assert(sgc->tleSortGroupRef == 0);
1701 :
1702 11708 : sgc->tleSortGroupRef = tle->ressortgroupref;
1703 : }
1704 : Assert(lg == NULL);
1705 4316 : return grouplist;
1706 : }
|