Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * prepunion.c
4 : * Routines to plan set-operation queries. The filename is a leftover
5 : * from a time when only UNIONs were implemented.
6 : *
7 : * There are two code paths in the planner for set-operation queries.
8 : * If a subquery consists entirely of simple UNION ALL operations, it
9 : * is converted into an "append relation". Otherwise, it is handled
10 : * by the general code in this module (plan_set_operations and its
11 : * subroutines). There is some support code here for the append-relation
12 : * case, but most of the heavy lifting for that is done elsewhere,
13 : * notably in prepjointree.c and allpaths.c.
14 : *
15 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
16 : * Portions Copyright (c) 1994, Regents of the University of California
17 : *
18 : *
19 : * IDENTIFICATION
20 : * src/backend/optimizer/prep/prepunion.c
21 : *
22 : *-------------------------------------------------------------------------
23 : */
24 : #include "postgres.h"
25 :
26 : #include "access/htup_details.h"
27 : #include "catalog/pg_type.h"
28 : #include "miscadmin.h"
29 : #include "nodes/makefuncs.h"
30 : #include "nodes/nodeFuncs.h"
31 : #include "optimizer/cost.h"
32 : #include "optimizer/pathnode.h"
33 : #include "optimizer/paths.h"
34 : #include "optimizer/planner.h"
35 : #include "optimizer/prep.h"
36 : #include "optimizer/tlist.h"
37 : #include "parser/parse_coerce.h"
38 : #include "utils/selfuncs.h"
39 :
40 :
41 : static RelOptInfo *recurse_set_operations(Node *setOp, PlannerInfo *root,
42 : List *colTypes, List *colCollations,
43 : bool junkOK,
44 : int flag, List *refnames_tlist,
45 : List **pTargetList,
46 : bool *istrivial_tlist);
47 : static RelOptInfo *generate_recursion_path(SetOperationStmt *setOp,
48 : PlannerInfo *root,
49 : List *refnames_tlist,
50 : List **pTargetList);
51 : static void build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
52 : bool trivial_tlist, List *child_tlist,
53 : List *interesting_pathkeys,
54 : double *pNumGroups);
55 : static RelOptInfo *generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
56 : List *refnames_tlist,
57 : List **pTargetList);
58 : static RelOptInfo *generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
59 : List *refnames_tlist,
60 : List **pTargetList);
61 : static List *plan_union_children(PlannerInfo *root,
62 : SetOperationStmt *top_union,
63 : List *refnames_tlist,
64 : List **tlist_list,
65 : List **istrivial_tlist);
66 : static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
67 : static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
68 : Path *input_path,
69 : double dNumGroups, double dNumOutputRows,
70 : const char *construct);
71 : static List *generate_setop_tlist(List *colTypes, List *colCollations,
72 : int flag,
73 : Index varno,
74 : bool hack_constants,
75 : List *input_tlist,
76 : List *refnames_tlist,
77 : bool *trivial_tlist);
78 : static List *generate_append_tlist(List *colTypes, List *colCollations,
79 : bool flag,
80 : List *input_tlists,
81 : List *refnames_tlist);
82 : static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
83 :
84 :
85 : /*
86 : * plan_set_operations
87 : *
88 : * Plans the queries for a tree of set operations (UNION/INTERSECT/EXCEPT)
89 : *
90 : * This routine only deals with the setOperations tree of the given query.
91 : * Any top-level ORDER BY requested in root->parse->sortClause will be handled
92 : * when we return to grouping_planner; likewise for LIMIT.
93 : *
94 : * What we return is an "upperrel" RelOptInfo containing at least one Path
95 : * that implements the set-operation tree. In addition, root->processed_tlist
96 : * receives a targetlist representing the output of the topmost setop node.
97 : */
98 : RelOptInfo *
99 5250 : plan_set_operations(PlannerInfo *root)
100 : {
101 5250 : Query *parse = root->parse;
102 5250 : SetOperationStmt *topop = castNode(SetOperationStmt, parse->setOperations);
103 : Node *node;
104 : RangeTblEntry *leftmostRTE;
105 : Query *leftmostQuery;
106 : RelOptInfo *setop_rel;
107 : List *top_tlist;
108 :
109 : Assert(topop);
110 :
111 : /* check for unsupported stuff */
112 : Assert(parse->jointree->fromlist == NIL);
113 : Assert(parse->jointree->quals == NULL);
114 : Assert(parse->groupClause == NIL);
115 : Assert(parse->havingQual == NULL);
116 : Assert(parse->windowClause == NIL);
117 : Assert(parse->distinctClause == NIL);
118 :
119 : /*
120 : * In the outer query level, equivalence classes are limited to classes
121 : * which define that the top-level target entry is equivalent to the
122 : * corresponding child target entry. There won't be any equivalence class
123 : * merging. Mark that merging is complete to allow us to make pathkeys.
124 : */
125 : Assert(root->eq_classes == NIL);
126 5250 : root->ec_merging_done = true;
127 :
128 : /*
129 : * We'll need to build RelOptInfos for each of the leaf subqueries, which
130 : * are RTE_SUBQUERY rangetable entries in this Query. Prepare the index
131 : * arrays for those, and for AppendRelInfos in case they're needed.
132 : */
133 5250 : setup_simple_rel_arrays(root);
134 :
135 : /*
136 : * Find the leftmost component Query. We need to use its column names for
137 : * all generated tlists (else SELECT INTO won't work right).
138 : */
139 5250 : node = topop->larg;
140 8644 : while (node && IsA(node, SetOperationStmt))
141 3394 : node = ((SetOperationStmt *) node)->larg;
142 : Assert(node && IsA(node, RangeTblRef));
143 5250 : leftmostRTE = root->simple_rte_array[((RangeTblRef *) node)->rtindex];
144 5250 : leftmostQuery = leftmostRTE->subquery;
145 : Assert(leftmostQuery != NULL);
146 :
147 : /*
148 : * If the topmost node is a recursive union, it needs special processing.
149 : */
150 5250 : if (root->hasRecursion)
151 : {
152 808 : setop_rel = generate_recursion_path(topop, root,
153 : leftmostQuery->targetList,
154 : &top_tlist);
155 : }
156 : else
157 : {
158 : bool trivial_tlist;
159 :
160 : /*
161 : * Recurse on setOperations tree to generate paths for set ops. The
162 : * final output paths should have just the column types shown as the
163 : * output from the top-level node, plus possibly resjunk working
164 : * columns (we can rely on upper-level nodes to deal with that).
165 : */
166 4442 : setop_rel = recurse_set_operations((Node *) topop, root,
167 : topop->colTypes, topop->colCollations,
168 : true, -1,
169 : leftmostQuery->targetList,
170 : &top_tlist,
171 : &trivial_tlist);
172 : }
173 :
174 : /* Must return the built tlist into root->processed_tlist. */
175 5244 : root->processed_tlist = top_tlist;
176 :
177 5244 : return setop_rel;
178 : }
179 :
180 : /*
181 : * set_operation_ordered_results_useful
182 : * Return true if the given SetOperationStmt can be executed by utilizing
183 : * paths that provide sorted input according to the setop's targetlist.
184 : * Returns false when sorted paths are not any more useful then unsorted
185 : * ones.
186 : */
187 : bool
188 13924 : set_operation_ordered_results_useful(SetOperationStmt *setop)
189 : {
190 : /*
191 : * Paths sorted by the targetlist are useful for UNION as we can opt to
192 : * MergeAppend the sorted paths then Unique them. Ordered paths are no
193 : * more useful than unordered ones for UNION ALL.
194 : */
195 13924 : if (!setop->all && setop->op == SETOP_UNION)
196 10488 : return true;
197 :
198 : /*
199 : * EXCEPT / EXCEPT ALL / INTERSECT / INTERSECT ALL cannot yet utilize
200 : * correctly sorted input paths.
201 : */
202 3436 : return false;
203 : }
204 :
205 : /*
206 : * recurse_set_operations
207 : * Recursively handle one step in a tree of set operations
208 : *
209 : * colTypes: OID list of set-op's result column datatypes
210 : * colCollations: OID list of set-op's result column collations
211 : * junkOK: if true, child resjunk columns may be left in the result
212 : * flag: if >= 0, add a resjunk output column indicating value of flag
213 : * refnames_tlist: targetlist to take column names from
214 : *
215 : * Returns a RelOptInfo for the subtree, as well as these output parameters:
216 : * *pTargetList: receives the fully-fledged tlist for the subtree's top plan
217 : * *istrivial_tlist: true if, and only if, datatypes between parent and child
218 : * match.
219 : *
220 : * The pTargetList output parameter is mostly redundant with the pathtarget
221 : * of the returned RelOptInfo, but for the moment we need it because much of
222 : * the logic in this file depends on flag columns being marked resjunk.
223 : * Pending a redesign of how that works, this is the easy way out.
224 : *
225 : * We don't have to care about typmods here: the only allowed difference
226 : * between set-op input and output typmods is input is a specific typmod
227 : * and output is -1, and that does not require a coercion.
228 : */
229 : static RelOptInfo *
230 18546 : recurse_set_operations(Node *setOp, PlannerInfo *root,
231 : List *colTypes, List *colCollations,
232 : bool junkOK,
233 : int flag, List *refnames_tlist,
234 : List **pTargetList,
235 : bool *istrivial_tlist)
236 : {
237 : RelOptInfo *rel;
238 :
239 18546 : *istrivial_tlist = true; /* for now */
240 :
241 : /* Guard against stack overflow due to overly complex setop nests */
242 18546 : check_stack_depth();
243 :
244 18546 : if (IsA(setOp, RangeTblRef))
245 : {
246 13954 : RangeTblRef *rtr = (RangeTblRef *) setOp;
247 13954 : RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
248 : SetOperationStmt *setops;
249 13954 : Query *subquery = rte->subquery;
250 : PlannerInfo *subroot;
251 : List *tlist;
252 : bool trivial_tlist;
253 :
254 : Assert(subquery != NULL);
255 :
256 : /* Build a RelOptInfo for this leaf subquery. */
257 13954 : rel = build_simple_rel(root, rtr->rtindex, NULL);
258 :
259 : /* plan_params should not be in use in current query level */
260 : Assert(root->plan_params == NIL);
261 :
262 : /*
263 : * Pass the set operation details to the subquery_planner to have it
264 : * consider generating Paths correctly ordered for the set operation.
265 : */
266 13954 : setops = castNode(SetOperationStmt, root->parse->setOperations);
267 :
268 : /* Generate a subroot and Paths for the subquery */
269 13954 : subroot = rel->subroot = subquery_planner(root->glob, subquery, root,
270 : false, root->tuple_fraction,
271 : setops);
272 :
273 : /*
274 : * It should not be possible for the primitive query to contain any
275 : * cross-references to other primitive queries in the setop tree.
276 : */
277 13954 : if (root->plan_params)
278 0 : elog(ERROR, "unexpected outer reference in set operation subquery");
279 :
280 : /* Figure out the appropriate target list for this subquery. */
281 13954 : tlist = generate_setop_tlist(colTypes, colCollations,
282 : flag,
283 13954 : rtr->rtindex,
284 : true,
285 : subroot->processed_tlist,
286 : refnames_tlist,
287 : &trivial_tlist);
288 13954 : rel->reltarget = create_pathtarget(root, tlist);
289 :
290 : /* Return the fully-fledged tlist to caller, too */
291 13954 : *pTargetList = tlist;
292 13954 : *istrivial_tlist = trivial_tlist;
293 : }
294 4592 : else if (IsA(setOp, SetOperationStmt))
295 : {
296 4592 : SetOperationStmt *op = (SetOperationStmt *) setOp;
297 :
298 : /* UNIONs are much different from INTERSECT/EXCEPT */
299 4592 : if (op->op == SETOP_UNION)
300 3990 : rel = generate_union_paths(op, root,
301 : refnames_tlist,
302 : pTargetList);
303 : else
304 602 : rel = generate_nonunion_paths(op, root,
305 : refnames_tlist,
306 : pTargetList);
307 :
308 : /*
309 : * If necessary, add a Result node to project the caller-requested
310 : * output columns.
311 : *
312 : * XXX you don't really want to know about this: setrefs.c will apply
313 : * fix_upper_expr() to the Result node's tlist. This would fail if the
314 : * Vars generated by generate_setop_tlist() were not exactly equal()
315 : * to the corresponding tlist entries of the subplan. However, since
316 : * the subplan was generated by generate_union_paths() or
317 : * generate_nonunion_paths(), and hence its tlist was generated by
318 : * generate_append_tlist(), this will work. We just tell
319 : * generate_setop_tlist() to use varno 0.
320 : */
321 4592 : if (flag >= 0 ||
322 4562 : !tlist_same_datatypes(*pTargetList, colTypes, junkOK) ||
323 4460 : !tlist_same_collations(*pTargetList, colCollations, junkOK))
324 : {
325 : PathTarget *target;
326 : bool trivial_tlist;
327 : ListCell *lc;
328 :
329 132 : *pTargetList = generate_setop_tlist(colTypes, colCollations,
330 : flag,
331 : 0,
332 : false,
333 : *pTargetList,
334 : refnames_tlist,
335 : &trivial_tlist);
336 132 : *istrivial_tlist = trivial_tlist;
337 132 : target = create_pathtarget(root, *pTargetList);
338 :
339 : /* Apply projection to each path */
340 264 : foreach(lc, rel->pathlist)
341 : {
342 132 : Path *subpath = (Path *) lfirst(lc);
343 : Path *path;
344 :
345 : Assert(subpath->param_info == NULL);
346 132 : path = apply_projection_to_path(root, subpath->parent,
347 : subpath, target);
348 : /* If we had to add a Result, path is different from subpath */
349 132 : if (path != subpath)
350 132 : lfirst(lc) = path;
351 : }
352 :
353 : /* Apply projection to each partial path */
354 132 : foreach(lc, rel->partial_pathlist)
355 : {
356 0 : Path *subpath = (Path *) lfirst(lc);
357 : Path *path;
358 :
359 : Assert(subpath->param_info == NULL);
360 :
361 : /* avoid apply_projection_to_path, in case of multiple refs */
362 0 : path = (Path *) create_projection_path(root, subpath->parent,
363 : subpath, target);
364 0 : lfirst(lc) = path;
365 : }
366 : }
367 4592 : postprocess_setop_rel(root, rel);
368 : }
369 : else
370 : {
371 0 : elog(ERROR, "unrecognized node type: %d",
372 : (int) nodeTag(setOp));
373 : *pTargetList = NIL;
374 : rel = NULL; /* keep compiler quiet */
375 : }
376 :
377 18546 : return rel;
378 : }
379 :
380 : /*
381 : * Generate paths for a recursive UNION node
382 : */
383 : static RelOptInfo *
384 808 : generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
385 : List *refnames_tlist,
386 : List **pTargetList)
387 : {
388 : RelOptInfo *result_rel;
389 : Path *path;
390 : RelOptInfo *lrel,
391 : *rrel;
392 : Path *lpath;
393 : Path *rpath;
394 : List *lpath_tlist;
395 : bool lpath_trivial_tlist;
396 : List *rpath_tlist;
397 : bool rpath_trivial_tlist;
398 : List *tlist;
399 : List *groupList;
400 : double dNumGroups;
401 :
402 : /* Parser should have rejected other cases */
403 808 : if (setOp->op != SETOP_UNION)
404 0 : elog(ERROR, "only UNION queries can be recursive");
405 : /* Worktable ID should be assigned */
406 : Assert(root->wt_param_id >= 0);
407 :
408 : /*
409 : * Unlike a regular UNION node, process the left and right inputs
410 : * separately without any intention of combining them into one Append.
411 : */
412 808 : lrel = recurse_set_operations(setOp->larg, root,
413 : setOp->colTypes, setOp->colCollations,
414 : false, -1,
415 : refnames_tlist,
416 : &lpath_tlist,
417 : &lpath_trivial_tlist);
418 808 : if (lrel->rtekind == RTE_SUBQUERY)
419 808 : build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
420 : NIL, NULL);
421 808 : lpath = lrel->cheapest_total_path;
422 : /* The right path will want to look at the left one ... */
423 808 : root->non_recursive_path = lpath;
424 808 : rrel = recurse_set_operations(setOp->rarg, root,
425 : setOp->colTypes, setOp->colCollations,
426 : false, -1,
427 : refnames_tlist,
428 : &rpath_tlist,
429 : &rpath_trivial_tlist);
430 808 : if (rrel->rtekind == RTE_SUBQUERY)
431 802 : build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
432 : NIL, NULL);
433 808 : rpath = rrel->cheapest_total_path;
434 808 : root->non_recursive_path = NULL;
435 :
436 : /*
437 : * Generate tlist for RecursiveUnion path node --- same as in Append cases
438 : */
439 808 : tlist = generate_append_tlist(setOp->colTypes, setOp->colCollations, false,
440 808 : list_make2(lpath_tlist, rpath_tlist),
441 : refnames_tlist);
442 :
443 808 : *pTargetList = tlist;
444 :
445 : /* Build result relation. */
446 808 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
447 808 : bms_union(lrel->relids, rrel->relids));
448 808 : result_rel->reltarget = create_pathtarget(root, tlist);
449 :
450 : /*
451 : * If UNION, identify the grouping operators
452 : */
453 808 : if (setOp->all)
454 : {
455 452 : groupList = NIL;
456 452 : dNumGroups = 0;
457 : }
458 : else
459 : {
460 : /* Identify the grouping semantics */
461 356 : groupList = generate_setop_grouplist(setOp, tlist);
462 :
463 : /* We only support hashing here */
464 356 : if (!grouping_is_hashable(groupList))
465 6 : ereport(ERROR,
466 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
467 : errmsg("could not implement recursive UNION"),
468 : errdetail("All column datatypes must be hashable.")));
469 :
470 : /*
471 : * For the moment, take the number of distinct groups as equal to the
472 : * total input size, ie, the worst case.
473 : */
474 350 : dNumGroups = lpath->rows + rpath->rows * 10;
475 : }
476 :
477 : /*
478 : * And make the path node.
479 : */
480 802 : path = (Path *) create_recursiveunion_path(root,
481 : result_rel,
482 : lpath,
483 : rpath,
484 802 : result_rel->reltarget,
485 : groupList,
486 : root->wt_param_id,
487 : dNumGroups);
488 :
489 802 : add_path(result_rel, path);
490 802 : postprocess_setop_rel(root, result_rel);
491 802 : return result_rel;
492 : }
493 :
494 : /*
495 : * build_setop_child_paths
496 : * Build paths for the set op child relation denoted by 'rel'.
497 : *
498 : * interesting_pathkeys: if not NIL, also include paths that suit these
499 : * pathkeys, sorting any unsorted paths as required.
500 : * *pNumGroups: if not NULL, we estimate the number of distinct groups
501 : * in the result, and store it there
502 : */
503 : static void
504 13954 : build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
505 : bool trivial_tlist, List *child_tlist,
506 : List *interesting_pathkeys, double *pNumGroups)
507 : {
508 : RelOptInfo *final_rel;
509 13954 : List *setop_pathkeys = rel->subroot->setop_pathkeys;
510 : ListCell *lc;
511 :
512 : /* it can't be a set op child rel if it's not a subquery */
513 : Assert(rel->rtekind == RTE_SUBQUERY);
514 :
515 : /* when sorting is needed, add child rel equivalences */
516 13954 : if (interesting_pathkeys != NIL)
517 9618 : add_setop_child_rel_equivalences(root,
518 : rel,
519 : child_tlist,
520 : interesting_pathkeys);
521 :
522 : /*
523 : * Mark rel with estimated output rows, width, etc. Note that we have to
524 : * do this before generating outer-query paths, else cost_subqueryscan is
525 : * not happy.
526 : */
527 13954 : set_subquery_size_estimates(root, rel);
528 :
529 : /*
530 : * Since we may want to add a partial path to this relation, we must set
531 : * its consider_parallel flag correctly.
532 : */
533 13954 : final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);
534 13954 : rel->consider_parallel = final_rel->consider_parallel;
535 :
536 : /* Generate subquery scan paths for any interesting path in final_rel */
537 36568 : foreach(lc, final_rel->pathlist)
538 : {
539 22614 : Path *subpath = (Path *) lfirst(lc);
540 : List *pathkeys;
541 22614 : Path *cheapest_input_path = final_rel->cheapest_total_path;
542 : bool is_sorted;
543 : int presorted_keys;
544 :
545 : /*
546 : * Include the cheapest path as-is so that the set operation can be
547 : * cheaply implemented using a method which does not require the input
548 : * to be sorted.
549 : */
550 22614 : if (subpath == cheapest_input_path)
551 : {
552 : /* Convert subpath's pathkeys to outer representation */
553 13954 : pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
554 : make_tlist_from_pathtarget(subpath->pathtarget));
555 :
556 : /* Generate outer path using this subpath */
557 13954 : add_path(rel, (Path *) create_subqueryscan_path(root,
558 : rel,
559 : subpath,
560 : trivial_tlist,
561 : pathkeys,
562 : NULL));
563 : }
564 :
565 : /* skip dealing with sorted paths if the setop doesn't need them */
566 22614 : if (interesting_pathkeys == NIL)
567 4700 : continue;
568 :
569 : /*
570 : * Create paths to suit final sort order required for setop_pathkeys.
571 : * Here we'll sort the cheapest input path (if not sorted already) and
572 : * incremental sort any paths which are partially sorted.
573 : */
574 17926 : is_sorted = pathkeys_count_contained_in(setop_pathkeys,
575 : subpath->pathkeys,
576 : &presorted_keys);
577 :
578 17926 : if (!is_sorted)
579 : {
580 11972 : double limittuples = rel->subroot->limit_tuples;
581 :
582 : /*
583 : * Try at least sorting the cheapest path and also try
584 : * incrementally sorting any path which is partially sorted
585 : * already (no need to deal with paths which have presorted keys
586 : * when incremental sort is disabled unless it's the cheapest
587 : * input path).
588 : */
589 11972 : if (subpath != cheapest_input_path &&
590 2896 : (presorted_keys == 0 || !enable_incremental_sort))
591 12 : continue;
592 :
593 : /*
594 : * We've no need to consider both a sort and incremental sort.
595 : * We'll just do a sort if there are no presorted keys and an
596 : * incremental sort when there are presorted keys.
597 : */
598 11960 : if (presorted_keys == 0 || !enable_incremental_sort)
599 9076 : subpath = (Path *) create_sort_path(rel->subroot,
600 : final_rel,
601 : subpath,
602 : setop_pathkeys,
603 : limittuples);
604 : else
605 2884 : subpath = (Path *) create_incremental_sort_path(rel->subroot,
606 : final_rel,
607 : subpath,
608 : setop_pathkeys,
609 : presorted_keys,
610 : limittuples);
611 : }
612 :
613 : /*
614 : * subpath is now sorted, so add it to the pathlist. We already added
615 : * the cheapest_input_path above, so don't add it again unless we just
616 : * sorted it.
617 : */
618 17914 : if (subpath != cheapest_input_path)
619 : {
620 : /* Convert subpath's pathkeys to outer representation */
621 17372 : pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
622 : make_tlist_from_pathtarget(subpath->pathtarget));
623 :
624 : /* Generate outer path using this subpath */
625 17372 : add_path(rel, (Path *) create_subqueryscan_path(root,
626 : rel,
627 : subpath,
628 : trivial_tlist,
629 : pathkeys,
630 : NULL));
631 : }
632 : }
633 :
634 : /* if consider_parallel is false, there should be no partial paths */
635 : Assert(final_rel->consider_parallel ||
636 : final_rel->partial_pathlist == NIL);
637 :
638 : /*
639 : * If we have a partial path for the child relation, we can use that to
640 : * build a partial path for this relation. But there's no point in
641 : * considering any path but the cheapest.
642 : */
643 13954 : if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
644 9494 : final_rel->partial_pathlist != NIL)
645 : {
646 : Path *partial_subpath;
647 : Path *partial_path;
648 :
649 12 : partial_subpath = linitial(final_rel->partial_pathlist);
650 : partial_path = (Path *)
651 12 : create_subqueryscan_path(root, rel, partial_subpath,
652 : trivial_tlist,
653 : NIL, NULL);
654 12 : add_partial_path(rel, partial_path);
655 : }
656 :
657 13954 : postprocess_setop_rel(root, rel);
658 :
659 : /*
660 : * Estimate number of groups if caller wants it. If the subquery used
661 : * grouping or aggregation, its output is probably mostly unique anyway;
662 : * otherwise do statistical estimation.
663 : *
664 : * XXX you don't really want to know about this: we do the estimation
665 : * using the subquery's original targetlist expressions, not the
666 : * subroot->processed_tlist which might seem more appropriate. The reason
667 : * is that if the subquery is itself a setop, it may return a
668 : * processed_tlist containing "varno 0" Vars generated by
669 : * generate_append_tlist, and those would confuse estimate_num_groups
670 : * mightily. We ought to get rid of the "varno 0" hack, but that requires
671 : * a redesign of the parsetree representation of setops, so that there can
672 : * be an RTE corresponding to each setop's output.
673 : */
674 13954 : if (pNumGroups)
675 : {
676 1174 : PlannerInfo *subroot = rel->subroot;
677 1174 : Query *subquery = subroot->parse;
678 :
679 1174 : if (subquery->groupClause || subquery->groupingSets ||
680 1174 : subquery->distinctClause || subroot->hasHavingQual ||
681 1162 : subquery->hasAggs)
682 12 : *pNumGroups = rel->cheapest_total_path->rows;
683 : else
684 1162 : *pNumGroups = estimate_num_groups(subroot,
685 : get_tlist_exprs(subquery->targetList, false),
686 1162 : rel->cheapest_total_path->rows,
687 : NULL,
688 : NULL);
689 : }
690 13954 : }
691 :
692 : /*
693 : * Generate paths for a UNION or UNION ALL node
694 : */
695 : static RelOptInfo *
696 3990 : generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
697 : List *refnames_tlist,
698 : List **pTargetList)
699 : {
700 3990 : Relids relids = NULL;
701 : RelOptInfo *result_rel;
702 : ListCell *lc;
703 : ListCell *lc2;
704 : ListCell *lc3;
705 3990 : List *cheapest_pathlist = NIL;
706 3990 : List *ordered_pathlist = NIL;
707 3990 : List *partial_pathlist = NIL;
708 3990 : bool partial_paths_valid = true;
709 3990 : bool consider_parallel = true;
710 : List *rellist;
711 : List *tlist_list;
712 : List *trivial_tlist_list;
713 : List *tlist;
714 3990 : List *groupList = NIL;
715 : Path *apath;
716 3990 : Path *gpath = NULL;
717 : bool try_sorted;
718 3990 : List *union_pathkeys = NIL;
719 :
720 : /*
721 : * If any of my children are identical UNION nodes (same op, all-flag, and
722 : * colTypes) then they can be merged into this node so that we generate
723 : * only one Append/MergeAppend and unique-ification for the lot. Recurse
724 : * to find such nodes.
725 : */
726 3990 : rellist = plan_union_children(root,
727 : op,
728 : refnames_tlist,
729 : &tlist_list,
730 : &trivial_tlist_list);
731 :
732 : /*
733 : * Generate tlist for Append/MergeAppend plan node.
734 : *
735 : * The tlist for an Append plan isn't important as far as the Append is
736 : * concerned, but we must make it look real anyway for the benefit of the
737 : * next plan level up.
738 : */
739 3990 : tlist = generate_append_tlist(op->colTypes, op->colCollations, false,
740 : tlist_list, refnames_tlist);
741 3990 : *pTargetList = tlist;
742 :
743 : /* For for UNIONs (not UNION ALL), try sorting, if sorting is possible */
744 3990 : try_sorted = !op->all && grouping_is_sortable(op->groupClauses);
745 :
746 3990 : if (try_sorted)
747 : {
748 : /* Identify the grouping semantics */
749 3354 : groupList = generate_setop_grouplist(op, tlist);
750 :
751 : /* Determine the pathkeys for sorting by the whole target list */
752 3354 : union_pathkeys = make_pathkeys_for_sortclauses(root, groupList, tlist);
753 :
754 3354 : root->query_pathkeys = union_pathkeys;
755 : }
756 :
757 : /*
758 : * Now that we've got the append target list, we can build the union child
759 : * paths.
760 : */
761 15274 : forthree(lc, rellist, lc2, trivial_tlist_list, lc3, tlist_list)
762 : {
763 11284 : RelOptInfo *rel = lfirst(lc);
764 11284 : bool trivial_tlist = lfirst_int(lc2);
765 11284 : List *child_tlist = lfirst_node(List, lc3);
766 :
767 : /* only build paths for the union children */
768 11284 : if (rel->rtekind == RTE_SUBQUERY)
769 11170 : build_setop_child_paths(root, rel, trivial_tlist, child_tlist,
770 : union_pathkeys, NULL);
771 : }
772 :
773 : /* Build path lists and relid set. */
774 15274 : foreach(lc, rellist)
775 : {
776 11284 : RelOptInfo *rel = lfirst(lc);
777 : Path *ordered_path;
778 :
779 11284 : cheapest_pathlist = lappend(cheapest_pathlist,
780 11284 : rel->cheapest_total_path);
781 :
782 11284 : if (try_sorted)
783 : {
784 3604 : ordered_path = get_cheapest_path_for_pathkeys(rel->pathlist,
785 : union_pathkeys,
786 : NULL,
787 : TOTAL_COST,
788 : false);
789 :
790 3604 : if (ordered_path != NULL)
791 464 : ordered_pathlist = lappend(ordered_pathlist, ordered_path);
792 : else
793 : {
794 : /*
795 : * If we can't find a sorted path, just give up trying to
796 : * generate a list of correctly sorted child paths. This can
797 : * happen when type coercion was added to the targetlist due
798 : * to mismatching types from the union children.
799 : */
800 3140 : try_sorted = false;
801 : }
802 : }
803 :
804 11284 : if (consider_parallel)
805 : {
806 8030 : if (!rel->consider_parallel)
807 : {
808 3002 : consider_parallel = false;
809 3002 : partial_paths_valid = false;
810 : }
811 5028 : else if (rel->partial_pathlist == NIL)
812 5016 : partial_paths_valid = false;
813 : else
814 12 : partial_pathlist = lappend(partial_pathlist,
815 12 : linitial(rel->partial_pathlist));
816 : }
817 :
818 11284 : relids = bms_union(relids, rel->relids);
819 : }
820 :
821 : /* Build result relation. */
822 3990 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
823 3990 : result_rel->reltarget = create_pathtarget(root, tlist);
824 3990 : result_rel->consider_parallel = consider_parallel;
825 3990 : result_rel->consider_startup = (root->tuple_fraction > 0);
826 :
827 : /*
828 : * Append the child results together using the cheapest paths from each
829 : * union child.
830 : */
831 3990 : apath = (Path *) create_append_path(root, result_rel, cheapest_pathlist,
832 : NIL, NIL, NULL, 0, false, -1);
833 :
834 : /*
835 : * Estimate number of groups. For now we just assume the output is unique
836 : * --- this is certainly true for the UNION case, and we want worst-case
837 : * estimates anyway.
838 : */
839 3990 : result_rel->rows = apath->rows;
840 :
841 : /*
842 : * Now consider doing the same thing using the partial paths plus Append
843 : * plus Gather.
844 : */
845 3990 : if (partial_paths_valid)
846 : {
847 : Path *papath;
848 6 : int parallel_workers = 0;
849 :
850 : /* Find the highest number of workers requested for any subpath. */
851 18 : foreach(lc, partial_pathlist)
852 : {
853 12 : Path *subpath = lfirst(lc);
854 :
855 12 : parallel_workers = Max(parallel_workers,
856 : subpath->parallel_workers);
857 : }
858 : Assert(parallel_workers > 0);
859 :
860 : /*
861 : * If the use of parallel append is permitted, always request at least
862 : * log2(# of children) paths. We assume it can be useful to have
863 : * extra workers in this case because they will be spread out across
864 : * the children. The precise formula is just a guess; see
865 : * add_paths_to_append_rel.
866 : */
867 6 : if (enable_parallel_append)
868 : {
869 6 : parallel_workers = Max(parallel_workers,
870 : pg_leftmost_one_pos32(list_length(partial_pathlist)) + 1);
871 6 : parallel_workers = Min(parallel_workers,
872 : max_parallel_workers_per_gather);
873 : }
874 : Assert(parallel_workers > 0);
875 :
876 : papath = (Path *)
877 6 : create_append_path(root, result_rel, NIL, partial_pathlist,
878 : NIL, NULL, parallel_workers,
879 : enable_parallel_append, -1);
880 : gpath = (Path *)
881 6 : create_gather_path(root, result_rel, papath,
882 6 : result_rel->reltarget, NULL, NULL);
883 : }
884 :
885 3990 : if (!op->all)
886 : {
887 : double dNumGroups;
888 3430 : bool can_sort = grouping_is_sortable(groupList);
889 3430 : bool can_hash = grouping_is_hashable(groupList);
890 :
891 : /*
892 : * XXX for the moment, take the number of distinct groups as equal to
893 : * the total input size, i.e., the worst case. This is too
894 : * conservative, but it's not clear how to get a decent estimate of
895 : * the true size. One should note as well the propensity of novices
896 : * to write UNION rather than UNION ALL even when they don't expect
897 : * any duplicates...
898 : */
899 3430 : dNumGroups = apath->rows;
900 :
901 3430 : if (can_hash)
902 : {
903 : Path *path;
904 :
905 : /*
906 : * Try a hash aggregate plan on 'apath'. This is the cheapest
907 : * available path containing each append child.
908 : */
909 3358 : path = (Path *) create_agg_path(root,
910 : result_rel,
911 : apath,
912 : create_pathtarget(root, tlist),
913 : AGG_HASHED,
914 : AGGSPLIT_SIMPLE,
915 : groupList,
916 : NIL,
917 : NULL,
918 : dNumGroups);
919 3358 : add_path(result_rel, path);
920 :
921 : /* Try hash aggregate on the Gather path, if valid */
922 3358 : if (gpath != NULL)
923 : {
924 : /* Hashed aggregate plan --- no sort needed */
925 6 : path = (Path *) create_agg_path(root,
926 : result_rel,
927 : gpath,
928 : create_pathtarget(root, tlist),
929 : AGG_HASHED,
930 : AGGSPLIT_SIMPLE,
931 : groupList,
932 : NIL,
933 : NULL,
934 : dNumGroups);
935 6 : add_path(result_rel, path);
936 : }
937 : }
938 :
939 3430 : if (can_sort)
940 : {
941 3430 : Path *path = apath;
942 :
943 : /* Try Sort -> Unique on the Append path */
944 3430 : if (groupList != NIL)
945 3324 : path = (Path *) create_sort_path(root, result_rel, path,
946 : make_pathkeys_for_sortclauses(root, groupList, tlist),
947 : -1.0);
948 :
949 3430 : path = (Path *) create_upper_unique_path(root,
950 : result_rel,
951 : path,
952 3430 : list_length(path->pathkeys),
953 : dNumGroups);
954 :
955 3430 : add_path(result_rel, path);
956 :
957 : /* Try Sort -> Unique on the Gather path, if set */
958 3430 : if (gpath != NULL)
959 : {
960 6 : path = gpath;
961 :
962 6 : path = (Path *) create_sort_path(root, result_rel, path,
963 : make_pathkeys_for_sortclauses(root, groupList, tlist),
964 : -1.0);
965 :
966 6 : path = (Path *) create_upper_unique_path(root,
967 : result_rel,
968 : path,
969 6 : list_length(path->pathkeys),
970 : dNumGroups);
971 6 : add_path(result_rel, path);
972 : }
973 : }
974 :
975 : /*
976 : * Try making a MergeAppend path if we managed to find a path with the
977 : * correct pathkeys in each union child query.
978 : */
979 3430 : if (try_sorted && groupList != NIL)
980 : {
981 : Path *path;
982 :
983 184 : path = (Path *) create_merge_append_path(root,
984 : result_rel,
985 : ordered_pathlist,
986 : union_pathkeys,
987 : NULL);
988 :
989 : /* and make the MergeAppend unique */
990 184 : path = (Path *) create_upper_unique_path(root,
991 : result_rel,
992 : path,
993 : list_length(tlist),
994 : dNumGroups);
995 :
996 184 : add_path(result_rel, path);
997 : }
998 : }
999 : else
1000 : {
1001 : /* UNION ALL */
1002 560 : add_path(result_rel, apath);
1003 :
1004 560 : if (gpath != NULL)
1005 0 : add_path(result_rel, gpath);
1006 : }
1007 :
1008 3990 : return result_rel;
1009 : }
1010 :
1011 : /*
1012 : * Generate paths for an INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL node
1013 : */
1014 : static RelOptInfo *
1015 602 : generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
1016 : List *refnames_tlist,
1017 : List **pTargetList)
1018 : {
1019 : RelOptInfo *result_rel;
1020 : RelOptInfo *lrel,
1021 : *rrel;
1022 602 : double save_fraction = root->tuple_fraction;
1023 : Path *lpath,
1024 : *rpath,
1025 : *path;
1026 : List *lpath_tlist,
1027 : *rpath_tlist,
1028 : *tlist_list,
1029 : *tlist,
1030 : *groupList,
1031 : *pathlist;
1032 : bool lpath_trivial_tlist,
1033 : rpath_trivial_tlist;
1034 : double dLeftGroups,
1035 : dRightGroups,
1036 : dNumGroups,
1037 : dNumOutputRows;
1038 : bool use_hash;
1039 : SetOpCmd cmd;
1040 : int firstFlag;
1041 :
1042 : /*
1043 : * Tell children to fetch all tuples.
1044 : */
1045 602 : root->tuple_fraction = 0.0;
1046 :
1047 : /* Recurse on children, ensuring their outputs are marked */
1048 602 : lrel = recurse_set_operations(op->larg, root,
1049 : op->colTypes, op->colCollations,
1050 : false, 0,
1051 : refnames_tlist,
1052 : &lpath_tlist,
1053 : &lpath_trivial_tlist);
1054 602 : if (lrel->rtekind == RTE_SUBQUERY)
1055 578 : build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
1056 : NIL, &dLeftGroups);
1057 : else
1058 24 : dLeftGroups = lrel->rows;
1059 :
1060 602 : lpath = lrel->cheapest_total_path;
1061 602 : rrel = recurse_set_operations(op->rarg, root,
1062 : op->colTypes, op->colCollations,
1063 : false, 1,
1064 : refnames_tlist,
1065 : &rpath_tlist,
1066 : &rpath_trivial_tlist);
1067 602 : if (rrel->rtekind == RTE_SUBQUERY)
1068 596 : build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
1069 : NIL, &dRightGroups);
1070 : else
1071 6 : dRightGroups = rrel->rows;
1072 :
1073 602 : rpath = rrel->cheapest_total_path;
1074 :
1075 : /* Undo effects of forcing tuple_fraction to 0 */
1076 602 : root->tuple_fraction = save_fraction;
1077 :
1078 : /*
1079 : * For EXCEPT, we must put the left input first. For INTERSECT, either
1080 : * order should give the same results, and we prefer to put the smaller
1081 : * input first in order to minimize the size of the hash table in the
1082 : * hashing case. "Smaller" means the one with the fewer groups.
1083 : */
1084 602 : if (op->op == SETOP_EXCEPT || dLeftGroups <= dRightGroups)
1085 572 : {
1086 572 : pathlist = list_make2(lpath, rpath);
1087 572 : tlist_list = list_make2(lpath_tlist, rpath_tlist);
1088 572 : firstFlag = 0;
1089 : }
1090 : else
1091 : {
1092 30 : pathlist = list_make2(rpath, lpath);
1093 30 : tlist_list = list_make2(rpath_tlist, lpath_tlist);
1094 30 : firstFlag = 1;
1095 : }
1096 :
1097 : /*
1098 : * Generate tlist for Append plan node.
1099 : *
1100 : * The tlist for an Append plan isn't important as far as the Append is
1101 : * concerned, but we must make it look real anyway for the benefit of the
1102 : * next plan level up. In fact, it has to be real enough that the flag
1103 : * column is shown as a variable not a constant, else setrefs.c will get
1104 : * confused.
1105 : */
1106 602 : tlist = generate_append_tlist(op->colTypes, op->colCollations, true,
1107 : tlist_list, refnames_tlist);
1108 :
1109 602 : *pTargetList = tlist;
1110 :
1111 : /* Build result relation. */
1112 602 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
1113 602 : bms_union(lrel->relids, rrel->relids));
1114 602 : result_rel->reltarget = create_pathtarget(root, tlist);
1115 :
1116 : /*
1117 : * Append the child results together.
1118 : */
1119 602 : path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
1120 : NIL, NULL, 0, false, -1);
1121 :
1122 : /* Identify the grouping semantics */
1123 602 : groupList = generate_setop_grouplist(op, tlist);
1124 :
1125 : /*
1126 : * Estimate number of distinct groups that we'll need hashtable entries
1127 : * for; this is the size of the left-hand input for EXCEPT, or the smaller
1128 : * input for INTERSECT. Also estimate the number of eventual output rows.
1129 : * In non-ALL cases, we estimate each group produces one output row; in
1130 : * ALL cases use the relevant relation size. These are worst-case
1131 : * estimates, of course, but we need to be conservative.
1132 : */
1133 602 : if (op->op == SETOP_EXCEPT)
1134 : {
1135 392 : dNumGroups = dLeftGroups;
1136 392 : dNumOutputRows = op->all ? lpath->rows : dNumGroups;
1137 : }
1138 : else
1139 : {
1140 210 : dNumGroups = Min(dLeftGroups, dRightGroups);
1141 210 : dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
1142 : }
1143 :
1144 : /*
1145 : * Decide whether to hash or sort, and add a sort node if needed.
1146 : */
1147 602 : use_hash = choose_hashed_setop(root, groupList, path,
1148 : dNumGroups, dNumOutputRows,
1149 602 : (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT");
1150 :
1151 602 : if (groupList && !use_hash)
1152 132 : path = (Path *) create_sort_path(root,
1153 : result_rel,
1154 : path,
1155 : make_pathkeys_for_sortclauses(root,
1156 : groupList,
1157 : tlist),
1158 : -1.0);
1159 :
1160 : /*
1161 : * Finally, add a SetOp path node to generate the correct output.
1162 : */
1163 602 : switch (op->op)
1164 : {
1165 210 : case SETOP_INTERSECT:
1166 210 : cmd = op->all ? SETOPCMD_INTERSECT_ALL : SETOPCMD_INTERSECT;
1167 210 : break;
1168 392 : case SETOP_EXCEPT:
1169 392 : cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT;
1170 392 : break;
1171 0 : default:
1172 0 : elog(ERROR, "unrecognized set op: %d", (int) op->op);
1173 : cmd = SETOPCMD_INTERSECT; /* keep compiler quiet */
1174 : break;
1175 : }
1176 602 : path = (Path *) create_setop_path(root,
1177 : result_rel,
1178 : path,
1179 : cmd,
1180 : use_hash ? SETOP_HASHED : SETOP_SORTED,
1181 : groupList,
1182 602 : list_length(op->colTypes) + 1,
1183 : use_hash ? firstFlag : -1,
1184 : dNumGroups,
1185 : dNumOutputRows);
1186 :
1187 602 : result_rel->rows = path->rows;
1188 602 : add_path(result_rel, path);
1189 602 : return result_rel;
1190 : }
1191 :
1192 : /*
1193 : * Pull up children of a UNION node that are identically-propertied UNIONs.
1194 : *
1195 : * NOTE: we can also pull a UNION ALL up into a UNION, since the distinct
1196 : * output rows will be lost anyway.
1197 : *
1198 : * NOTE: currently, we ignore collations while determining if a child has
1199 : * the same properties. This is semantically sound only so long as all
1200 : * collations have the same notion of equality. It is valid from an
1201 : * implementation standpoint because we don't care about the ordering of
1202 : * a UNION child's result: UNION ALL results are always unordered, and
1203 : * generate_union_paths will force a fresh sort if the top level is a UNION.
1204 : */
1205 : static List *
1206 3990 : plan_union_children(PlannerInfo *root,
1207 : SetOperationStmt *top_union,
1208 : List *refnames_tlist,
1209 : List **tlist_list,
1210 : List **istrivial_tlist)
1211 : {
1212 3990 : List *pending_rels = list_make1(top_union);
1213 3990 : List *result = NIL;
1214 : List *child_tlist;
1215 : bool trivial_tlist;
1216 :
1217 3990 : *tlist_list = NIL;
1218 3990 : *istrivial_tlist = NIL;
1219 :
1220 22568 : while (pending_rels != NIL)
1221 : {
1222 18578 : Node *setOp = linitial(pending_rels);
1223 :
1224 18578 : pending_rels = list_delete_first(pending_rels);
1225 :
1226 18578 : if (IsA(setOp, SetOperationStmt))
1227 : {
1228 7408 : SetOperationStmt *op = (SetOperationStmt *) setOp;
1229 :
1230 7408 : if (op->op == top_union->op &&
1231 14624 : (op->all == top_union->all || op->all) &&
1232 7306 : equal(op->colTypes, top_union->colTypes))
1233 : {
1234 : /* Same UNION, so fold children into parent */
1235 7294 : pending_rels = lcons(op->rarg, pending_rels);
1236 7294 : pending_rels = lcons(op->larg, pending_rels);
1237 7294 : continue;
1238 : }
1239 : }
1240 :
1241 : /*
1242 : * Not same, so plan this child separately.
1243 : *
1244 : * Note we disallow any resjunk columns in child results. This is
1245 : * necessary since the Append node that implements the union won't do
1246 : * any projection, and upper levels will get confused if some of our
1247 : * output tuples have junk and some don't. This case only arises when
1248 : * we have an EXCEPT or INTERSECT as child, else there won't be
1249 : * resjunk anyway.
1250 : */
1251 11284 : result = lappend(result, recurse_set_operations(setOp, root,
1252 : top_union->colTypes,
1253 : top_union->colCollations,
1254 : false, -1,
1255 : refnames_tlist,
1256 : &child_tlist,
1257 : &trivial_tlist));
1258 11284 : *tlist_list = lappend(*tlist_list, child_tlist);
1259 11284 : *istrivial_tlist = lappend_int(*istrivial_tlist, trivial_tlist);
1260 : }
1261 :
1262 3990 : return result;
1263 : }
1264 :
1265 : /*
1266 : * postprocess_setop_rel - perform steps required after adding paths
1267 : */
1268 : static void
1269 19348 : postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel)
1270 : {
1271 : /*
1272 : * We don't currently worry about allowing FDWs to contribute paths to
1273 : * this relation, but give extensions a chance.
1274 : */
1275 19348 : if (create_upper_paths_hook)
1276 0 : (*create_upper_paths_hook) (root, UPPERREL_SETOP,
1277 : NULL, rel, NULL);
1278 :
1279 : /* Select cheapest path */
1280 19348 : set_cheapest(rel);
1281 19348 : }
1282 :
1283 : /*
1284 : * choose_hashed_setop - should we use hashing for a set operation?
1285 : */
1286 : static bool
1287 602 : choose_hashed_setop(PlannerInfo *root, List *groupClauses,
1288 : Path *input_path,
1289 : double dNumGroups, double dNumOutputRows,
1290 : const char *construct)
1291 : {
1292 602 : int numGroupCols = list_length(groupClauses);
1293 602 : Size hash_mem_limit = get_hash_memory_limit();
1294 : bool can_sort;
1295 : bool can_hash;
1296 : Size hashentrysize;
1297 : Path hashed_p;
1298 : Path sorted_p;
1299 : double tuple_fraction;
1300 :
1301 : /* Check whether the operators support sorting or hashing */
1302 602 : can_sort = grouping_is_sortable(groupClauses);
1303 602 : can_hash = grouping_is_hashable(groupClauses);
1304 602 : if (can_hash && can_sort)
1305 : {
1306 : /* we have a meaningful choice to make, continue ... */
1307 : }
1308 60 : else if (can_hash)
1309 0 : return true;
1310 60 : else if (can_sort)
1311 60 : return false;
1312 : else
1313 0 : ereport(ERROR,
1314 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1315 : /* translator: %s is UNION, INTERSECT, or EXCEPT */
1316 : errmsg("could not implement %s", construct),
1317 : errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
1318 :
1319 : /* Prefer sorting when enable_hashagg is off */
1320 542 : if (!enable_hashagg)
1321 102 : return false;
1322 :
1323 : /*
1324 : * Don't do it if it doesn't look like the hashtable will fit into
1325 : * hash_mem.
1326 : */
1327 440 : hashentrysize = MAXALIGN(input_path->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
1328 :
1329 440 : if (hashentrysize * dNumGroups > hash_mem_limit)
1330 0 : return false;
1331 :
1332 : /*
1333 : * See if the estimated cost is no more than doing it the other way.
1334 : *
1335 : * We need to consider input_plan + hashagg versus input_plan + sort +
1336 : * group. Note that the actual result plan might involve a SetOp or
1337 : * Unique node, not Agg or Group, but the cost estimates for Agg and Group
1338 : * should be close enough for our purposes here.
1339 : *
1340 : * These path variables are dummies that just hold cost fields; we don't
1341 : * make actual Paths for these steps.
1342 : */
1343 440 : cost_agg(&hashed_p, root, AGG_HASHED, NULL,
1344 : numGroupCols, dNumGroups,
1345 : NIL,
1346 : input_path->startup_cost, input_path->total_cost,
1347 440 : input_path->rows, input_path->pathtarget->width);
1348 :
1349 : /*
1350 : * Now for the sorted case. Note that the input is *always* unsorted,
1351 : * since it was made by appending unrelated sub-relations together.
1352 : */
1353 440 : sorted_p.startup_cost = input_path->startup_cost;
1354 440 : sorted_p.total_cost = input_path->total_cost;
1355 : /* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */
1356 440 : cost_sort(&sorted_p, root, NIL, sorted_p.total_cost,
1357 440 : input_path->rows, input_path->pathtarget->width,
1358 : 0.0, work_mem, -1.0);
1359 440 : cost_group(&sorted_p, root, numGroupCols, dNumGroups,
1360 : NIL,
1361 : sorted_p.startup_cost, sorted_p.total_cost,
1362 : input_path->rows);
1363 :
1364 : /*
1365 : * Now make the decision using the top-level tuple fraction. First we
1366 : * have to convert an absolute count (LIMIT) into fractional form.
1367 : */
1368 440 : tuple_fraction = root->tuple_fraction;
1369 440 : if (tuple_fraction >= 1.0)
1370 0 : tuple_fraction /= dNumOutputRows;
1371 :
1372 440 : if (compare_fractional_path_costs(&hashed_p, &sorted_p,
1373 : tuple_fraction) < 0)
1374 : {
1375 : /* Hashed is cheaper, so use it */
1376 440 : return true;
1377 : }
1378 0 : return false;
1379 : }
1380 :
1381 : /*
1382 : * Generate targetlist for a set-operation plan node
1383 : *
1384 : * colTypes: OID list of set-op's result column datatypes
1385 : * colCollations: OID list of set-op's result column collations
1386 : * flag: -1 if no flag column needed, 0 or 1 to create a const flag column
1387 : * varno: varno to use in generated Vars
1388 : * hack_constants: true to copy up constants (see comments in code)
1389 : * input_tlist: targetlist of this node's input node
1390 : * refnames_tlist: targetlist to take column names from
1391 : * trivial_tlist: output parameter, set to true if targetlist is trivial
1392 : */
1393 : static List *
1394 14086 : generate_setop_tlist(List *colTypes, List *colCollations,
1395 : int flag,
1396 : Index varno,
1397 : bool hack_constants,
1398 : List *input_tlist,
1399 : List *refnames_tlist,
1400 : bool *trivial_tlist)
1401 : {
1402 14086 : List *tlist = NIL;
1403 14086 : int resno = 1;
1404 : ListCell *ctlc,
1405 : *cclc,
1406 : *itlc,
1407 : *rtlc;
1408 : TargetEntry *tle;
1409 : Node *expr;
1410 :
1411 14086 : *trivial_tlist = true; /* until proven differently */
1412 :
1413 55180 : forfour(ctlc, colTypes, cclc, colCollations,
1414 : itlc, input_tlist, rtlc, refnames_tlist)
1415 : {
1416 41094 : Oid colType = lfirst_oid(ctlc);
1417 41094 : Oid colColl = lfirst_oid(cclc);
1418 41094 : TargetEntry *inputtle = (TargetEntry *) lfirst(itlc);
1419 41094 : TargetEntry *reftle = (TargetEntry *) lfirst(rtlc);
1420 :
1421 : Assert(inputtle->resno == resno);
1422 : Assert(reftle->resno == resno);
1423 : Assert(!inputtle->resjunk);
1424 : Assert(!reftle->resjunk);
1425 :
1426 : /*
1427 : * Generate columns referencing input columns and having appropriate
1428 : * data types and column names. Insert datatype coercions where
1429 : * necessary.
1430 : *
1431 : * HACK: constants in the input's targetlist are copied up as-is
1432 : * rather than being referenced as subquery outputs. This is mainly
1433 : * to ensure that when we try to coerce them to the output column's
1434 : * datatype, the right things happen for UNKNOWN constants. But do
1435 : * this only at the first level of subquery-scan plans; we don't want
1436 : * phony constants appearing in the output tlists of upper-level
1437 : * nodes!
1438 : *
1439 : * Note that copying a constant doesn't in itself require us to mark
1440 : * the tlist nontrivial; see trivial_subqueryscan() in setrefs.c.
1441 : */
1442 41094 : if (hack_constants && inputtle->expr && IsA(inputtle->expr, Const))
1443 14252 : expr = (Node *) inputtle->expr;
1444 : else
1445 107368 : expr = (Node *) makeVar(varno,
1446 26842 : inputtle->resno,
1447 26842 : exprType((Node *) inputtle->expr),
1448 26842 : exprTypmod((Node *) inputtle->expr),
1449 26842 : exprCollation((Node *) inputtle->expr),
1450 : 0);
1451 :
1452 41094 : if (exprType(expr) != colType)
1453 : {
1454 : /*
1455 : * Note: it's not really cool to be applying coerce_to_common_type
1456 : * here; one notable point is that assign_expr_collations never
1457 : * gets run on any generated nodes. For the moment that's not a
1458 : * problem because we force the correct exposed collation below.
1459 : * It would likely be best to make the parser generate the correct
1460 : * output tlist for every set-op to begin with, though.
1461 : */
1462 1466 : expr = coerce_to_common_type(NULL, /* no UNKNOWNs here */
1463 : expr,
1464 : colType,
1465 : "UNION/INTERSECT/EXCEPT");
1466 1466 : *trivial_tlist = false; /* the coercion makes it not trivial */
1467 : }
1468 :
1469 : /*
1470 : * Ensure the tlist entry's exposed collation matches the set-op. This
1471 : * is necessary because plan_set_operations() reports the result
1472 : * ordering as a list of SortGroupClauses, which don't carry collation
1473 : * themselves but just refer to tlist entries. If we don't show the
1474 : * right collation then planner.c might do the wrong thing in
1475 : * higher-level queries.
1476 : *
1477 : * Note we use RelabelType, not CollateExpr, since this expression
1478 : * will reach the executor without any further processing.
1479 : */
1480 41094 : if (exprCollation(expr) != colColl)
1481 : {
1482 11870 : expr = applyRelabelType(expr,
1483 : exprType(expr), exprTypmod(expr), colColl,
1484 : COERCE_IMPLICIT_CAST, -1, false);
1485 11870 : *trivial_tlist = false; /* the relabel makes it not trivial */
1486 : }
1487 :
1488 82188 : tle = makeTargetEntry((Expr *) expr,
1489 41094 : (AttrNumber) resno++,
1490 41094 : pstrdup(reftle->resname),
1491 : false);
1492 :
1493 : /*
1494 : * By convention, all non-resjunk columns in a setop tree have
1495 : * ressortgroupref equal to their resno. In some cases the ref isn't
1496 : * needed, but this is a cleaner way than modifying the tlist later.
1497 : */
1498 41094 : tle->ressortgroupref = tle->resno;
1499 :
1500 41094 : tlist = lappend(tlist, tle);
1501 : }
1502 :
1503 14086 : if (flag >= 0)
1504 : {
1505 : /* Add a resjunk flag column */
1506 : /* flag value is the given constant */
1507 1204 : expr = (Node *) makeConst(INT4OID,
1508 : -1,
1509 : InvalidOid,
1510 : sizeof(int32),
1511 : Int32GetDatum(flag),
1512 : false,
1513 : true);
1514 1204 : tle = makeTargetEntry((Expr *) expr,
1515 1204 : (AttrNumber) resno++,
1516 : pstrdup("flag"),
1517 : true);
1518 1204 : tlist = lappend(tlist, tle);
1519 1204 : *trivial_tlist = false; /* the extra entry makes it not trivial */
1520 : }
1521 :
1522 14086 : return tlist;
1523 : }
1524 :
1525 : /*
1526 : * Generate targetlist for a set-operation Append node
1527 : *
1528 : * colTypes: OID list of set-op's result column datatypes
1529 : * colCollations: OID list of set-op's result column collations
1530 : * flag: true to create a flag column copied up from subplans
1531 : * input_tlists: list of tlists for sub-plans of the Append
1532 : * refnames_tlist: targetlist to take column names from
1533 : *
1534 : * The entries in the Append's targetlist should always be simple Vars;
1535 : * we just have to make sure they have the right datatypes/typmods/collations.
1536 : * The Vars are always generated with varno 0.
1537 : *
1538 : * XXX a problem with the varno-zero approach is that set_pathtarget_cost_width
1539 : * cannot figure out a realistic width for the tlist we make here. But we
1540 : * ought to refactor this code to produce a PathTarget directly, anyway.
1541 : */
1542 : static List *
1543 5400 : generate_append_tlist(List *colTypes, List *colCollations,
1544 : bool flag,
1545 : List *input_tlists,
1546 : List *refnames_tlist)
1547 : {
1548 5400 : List *tlist = NIL;
1549 5400 : int resno = 1;
1550 : ListCell *curColType;
1551 : ListCell *curColCollation;
1552 : ListCell *ref_tl_item;
1553 : int colindex;
1554 : TargetEntry *tle;
1555 : Node *expr;
1556 : ListCell *tlistl;
1557 : int32 *colTypmods;
1558 :
1559 : /*
1560 : * First extract typmods to use.
1561 : *
1562 : * If the inputs all agree on type and typmod of a particular column, use
1563 : * that typmod; else use -1.
1564 : */
1565 5400 : colTypmods = (int32 *) palloc(list_length(colTypes) * sizeof(int32));
1566 :
1567 19504 : foreach(tlistl, input_tlists)
1568 : {
1569 14104 : List *subtlist = (List *) lfirst(tlistl);
1570 : ListCell *subtlistl;
1571 :
1572 14104 : curColType = list_head(colTypes);
1573 14104 : colindex = 0;
1574 56432 : foreach(subtlistl, subtlist)
1575 : {
1576 42328 : TargetEntry *subtle = (TargetEntry *) lfirst(subtlistl);
1577 :
1578 42328 : if (subtle->resjunk)
1579 1204 : continue;
1580 : Assert(curColType != NULL);
1581 41124 : if (exprType((Node *) subtle->expr) == lfirst_oid(curColType))
1582 : {
1583 : /* If first subplan, copy the typmod; else compare */
1584 41124 : int32 subtypmod = exprTypmod((Node *) subtle->expr);
1585 :
1586 41124 : if (tlistl == list_head(input_tlists))
1587 14838 : colTypmods[colindex] = subtypmod;
1588 26286 : else if (subtypmod != colTypmods[colindex])
1589 12 : colTypmods[colindex] = -1;
1590 : }
1591 : else
1592 : {
1593 : /* types disagree, so force typmod to -1 */
1594 0 : colTypmods[colindex] = -1;
1595 : }
1596 41124 : curColType = lnext(colTypes, curColType);
1597 41124 : colindex++;
1598 : }
1599 : Assert(curColType == NULL);
1600 : }
1601 :
1602 : /*
1603 : * Now we can build the tlist for the Append.
1604 : */
1605 5400 : colindex = 0;
1606 20238 : forthree(curColType, colTypes, curColCollation, colCollations,
1607 : ref_tl_item, refnames_tlist)
1608 : {
1609 14838 : Oid colType = lfirst_oid(curColType);
1610 14838 : int32 colTypmod = colTypmods[colindex++];
1611 14838 : Oid colColl = lfirst_oid(curColCollation);
1612 14838 : TargetEntry *reftle = (TargetEntry *) lfirst(ref_tl_item);
1613 :
1614 : Assert(reftle->resno == resno);
1615 : Assert(!reftle->resjunk);
1616 14838 : expr = (Node *) makeVar(0,
1617 : resno,
1618 : colType,
1619 : colTypmod,
1620 : colColl,
1621 : 0);
1622 29676 : tle = makeTargetEntry((Expr *) expr,
1623 14838 : (AttrNumber) resno++,
1624 14838 : pstrdup(reftle->resname),
1625 : false);
1626 :
1627 : /*
1628 : * By convention, all non-resjunk columns in a setop tree have
1629 : * ressortgroupref equal to their resno. In some cases the ref isn't
1630 : * needed, but this is a cleaner way than modifying the tlist later.
1631 : */
1632 14838 : tle->ressortgroupref = tle->resno;
1633 :
1634 14838 : tlist = lappend(tlist, tle);
1635 : }
1636 :
1637 5400 : if (flag)
1638 : {
1639 : /* Add a resjunk flag column */
1640 : /* flag value is shown as copied up from subplan */
1641 602 : expr = (Node *) makeVar(0,
1642 : resno,
1643 : INT4OID,
1644 : -1,
1645 : InvalidOid,
1646 : 0);
1647 602 : tle = makeTargetEntry((Expr *) expr,
1648 602 : (AttrNumber) resno++,
1649 : pstrdup("flag"),
1650 : true);
1651 602 : tlist = lappend(tlist, tle);
1652 : }
1653 :
1654 5400 : pfree(colTypmods);
1655 :
1656 5400 : return tlist;
1657 : }
1658 :
1659 : /*
1660 : * generate_setop_grouplist
1661 : * Build a SortGroupClause list defining the sort/grouping properties
1662 : * of the setop's output columns.
1663 : *
1664 : * Parse analysis already determined the properties and built a suitable
1665 : * list, except that the entries do not have sortgrouprefs set because
1666 : * the parser output representation doesn't include a tlist for each
1667 : * setop. So what we need to do here is copy that list and install
1668 : * proper sortgrouprefs into it (copying those from the targetlist).
1669 : */
1670 : static List *
1671 4312 : generate_setop_grouplist(SetOperationStmt *op, List *targetlist)
1672 : {
1673 4312 : List *grouplist = copyObject(op->groupClauses);
1674 : ListCell *lg;
1675 : ListCell *lt;
1676 :
1677 4312 : lg = list_head(grouplist);
1678 16622 : foreach(lt, targetlist)
1679 : {
1680 12310 : TargetEntry *tle = (TargetEntry *) lfirst(lt);
1681 : SortGroupClause *sgc;
1682 :
1683 12310 : if (tle->resjunk)
1684 : {
1685 : /* resjunk columns should not have sortgrouprefs */
1686 : Assert(tle->ressortgroupref == 0);
1687 602 : continue; /* ignore resjunk columns */
1688 : }
1689 :
1690 : /* non-resjunk columns should have sortgroupref = resno */
1691 : Assert(tle->ressortgroupref == tle->resno);
1692 :
1693 : /* non-resjunk columns should have grouping clauses */
1694 : Assert(lg != NULL);
1695 11708 : sgc = (SortGroupClause *) lfirst(lg);
1696 11708 : lg = lnext(grouplist, lg);
1697 : Assert(sgc->tleSortGroupRef == 0);
1698 :
1699 11708 : sgc->tleSortGroupRef = tle->ressortgroupref;
1700 : }
1701 : Assert(lg == NULL);
1702 4312 : return grouplist;
1703 : }
|