Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * prepunion.c
4 : * Routines to plan set-operation queries. The filename is a leftover
5 : * from a time when only UNIONs were implemented.
6 : *
7 : * There are two code paths in the planner for set-operation queries.
8 : * If a subquery consists entirely of simple UNION ALL operations, it
9 : * is converted into an "append relation". Otherwise, it is handled
10 : * by the general code in this module (plan_set_operations and its
11 : * subroutines). There is some support code here for the append-relation
12 : * case, but most of the heavy lifting for that is done elsewhere,
13 : * notably in prepjointree.c and allpaths.c.
14 : *
15 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
16 : * Portions Copyright (c) 1994, Regents of the University of California
17 : *
18 : *
19 : * IDENTIFICATION
20 : * src/backend/optimizer/prep/prepunion.c
21 : *
22 : *-------------------------------------------------------------------------
23 : */
24 : #include "postgres.h"
25 :
26 : #include "access/htup_details.h"
27 : #include "catalog/pg_type.h"
28 : #include "miscadmin.h"
29 : #include "nodes/makefuncs.h"
30 : #include "nodes/nodeFuncs.h"
31 : #include "optimizer/cost.h"
32 : #include "optimizer/pathnode.h"
33 : #include "optimizer/paths.h"
34 : #include "optimizer/planner.h"
35 : #include "optimizer/prep.h"
36 : #include "optimizer/tlist.h"
37 : #include "parser/parse_coerce.h"
38 : #include "utils/selfuncs.h"
39 :
40 :
41 : static RelOptInfo *recurse_set_operations(Node *setOp, PlannerInfo *root,
42 : List *colTypes, List *colCollations,
43 : bool junkOK,
44 : int flag, List *refnames_tlist,
45 : List **pTargetList,
46 : bool *istrivial_tlist);
47 : static RelOptInfo *generate_recursion_path(SetOperationStmt *setOp,
48 : PlannerInfo *root,
49 : List *refnames_tlist,
50 : List **pTargetList);
51 : static void build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
52 : bool trivial_tlist, List *child_tlist,
53 : List *interesting_pathkeys,
54 : double *pNumGroups);
55 : static RelOptInfo *generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
56 : List *refnames_tlist,
57 : List **pTargetList);
58 : static RelOptInfo *generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
59 : List *refnames_tlist,
60 : List **pTargetList);
61 : static List *plan_union_children(PlannerInfo *root,
62 : SetOperationStmt *top_union,
63 : List *refnames_tlist,
64 : List **tlist_list,
65 : List **istrivial_tlist);
66 : static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
67 : static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
68 : Path *input_path,
69 : double dNumGroups, double dNumOutputRows,
70 : const char *construct);
71 : static List *generate_setop_tlist(List *colTypes, List *colCollations,
72 : int flag,
73 : Index varno,
74 : bool hack_constants,
75 : List *input_tlist,
76 : List *refnames_tlist,
77 : bool *trivial_tlist);
78 : static List *generate_append_tlist(List *colTypes, List *colCollations,
79 : bool flag,
80 : List *input_tlists,
81 : List *refnames_tlist);
82 : static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
83 :
84 :
85 : /*
86 : * plan_set_operations
87 : *
88 : * Plans the queries for a tree of set operations (UNION/INTERSECT/EXCEPT)
89 : *
90 : * This routine only deals with the setOperations tree of the given query.
91 : * Any top-level ORDER BY requested in root->parse->sortClause will be handled
92 : * when we return to grouping_planner; likewise for LIMIT.
93 : *
94 : * What we return is an "upperrel" RelOptInfo containing at least one Path
95 : * that implements the set-operation tree. In addition, root->processed_tlist
96 : * receives a targetlist representing the output of the topmost setop node.
97 : */
98 : RelOptInfo *
99 5444 : plan_set_operations(PlannerInfo *root)
100 : {
101 5444 : Query *parse = root->parse;
102 5444 : SetOperationStmt *topop = castNode(SetOperationStmt, parse->setOperations);
103 : Node *node;
104 : RangeTblEntry *leftmostRTE;
105 : Query *leftmostQuery;
106 : RelOptInfo *setop_rel;
107 : List *top_tlist;
108 :
109 : Assert(topop);
110 :
111 : /* check for unsupported stuff */
112 : Assert(parse->jointree->fromlist == NIL);
113 : Assert(parse->jointree->quals == NULL);
114 : Assert(parse->groupClause == NIL);
115 : Assert(parse->havingQual == NULL);
116 : Assert(parse->windowClause == NIL);
117 : Assert(parse->distinctClause == NIL);
118 :
119 : /*
120 : * In the outer query level, equivalence classes are limited to classes
121 : * which define that the top-level target entry is equivalent to the
122 : * corresponding child target entry. There won't be any equivalence class
123 : * merging. Mark that merging is complete to allow us to make pathkeys.
124 : */
125 : Assert(root->eq_classes == NIL);
126 5444 : root->ec_merging_done = true;
127 :
128 : /*
129 : * We'll need to build RelOptInfos for each of the leaf subqueries, which
130 : * are RTE_SUBQUERY rangetable entries in this Query. Prepare the index
131 : * arrays for those, and for AppendRelInfos in case they're needed.
132 : */
133 5444 : setup_simple_rel_arrays(root);
134 :
135 : /*
136 : * Find the leftmost component Query. We need to use its column names for
137 : * all generated tlists (else SELECT INTO won't work right).
138 : */
139 5444 : node = topop->larg;
140 8930 : while (node && IsA(node, SetOperationStmt))
141 3486 : node = ((SetOperationStmt *) node)->larg;
142 : Assert(node && IsA(node, RangeTblRef));
143 5444 : leftmostRTE = root->simple_rte_array[((RangeTblRef *) node)->rtindex];
144 5444 : leftmostQuery = leftmostRTE->subquery;
145 : Assert(leftmostQuery != NULL);
146 :
147 : /*
148 : * If the topmost node is a recursive union, it needs special processing.
149 : */
150 5444 : if (root->hasRecursion)
151 : {
152 812 : setop_rel = generate_recursion_path(topop, root,
153 : leftmostQuery->targetList,
154 : &top_tlist);
155 : }
156 : else
157 : {
158 : bool trivial_tlist;
159 :
160 : /*
161 : * Recurse on setOperations tree to generate paths for set ops. The
162 : * final output paths should have just the column types shown as the
163 : * output from the top-level node, plus possibly resjunk working
164 : * columns (we can rely on upper-level nodes to deal with that).
165 : */
166 4632 : setop_rel = recurse_set_operations((Node *) topop, root,
167 : topop->colTypes, topop->colCollations,
168 : true, -1,
169 : leftmostQuery->targetList,
170 : &top_tlist,
171 : &trivial_tlist);
172 : }
173 :
174 : /* Must return the built tlist into root->processed_tlist. */
175 5438 : root->processed_tlist = top_tlist;
176 :
177 5438 : return setop_rel;
178 : }
179 :
180 : /*
181 : * set_operation_ordered_results_useful
182 : * Return true if the given SetOperationStmt can be executed by utilizing
183 : * paths that provide sorted input according to the setop's targetlist.
184 : * Returns false when sorted paths are not any more useful then unsorted
185 : * ones.
186 : */
187 : bool
188 14404 : set_operation_ordered_results_useful(SetOperationStmt *setop)
189 : {
190 : /*
191 : * Paths sorted by the targetlist are useful for UNION as we can opt to
192 : * MergeAppend the sorted paths then Unique them. Ordered paths are no
193 : * more useful than unordered ones for UNION ALL.
194 : */
195 14404 : if (!setop->all && setop->op == SETOP_UNION)
196 10836 : return true;
197 :
198 : /*
199 : * EXCEPT / EXCEPT ALL / INTERSECT / INTERSECT ALL cannot yet utilize
200 : * correctly sorted input paths.
201 : */
202 3568 : return false;
203 : }
204 :
205 : /*
206 : * recurse_set_operations
207 : * Recursively handle one step in a tree of set operations
208 : *
209 : * colTypes: OID list of set-op's result column datatypes
210 : * colCollations: OID list of set-op's result column collations
211 : * junkOK: if true, child resjunk columns may be left in the result
212 : * flag: if >= 0, add a resjunk output column indicating value of flag
213 : * refnames_tlist: targetlist to take column names from
214 : *
215 : * Returns a RelOptInfo for the subtree, as well as these output parameters:
216 : * *pTargetList: receives the fully-fledged tlist for the subtree's top plan
217 : * *istrivial_tlist: true if, and only if, datatypes between parent and child
218 : * match.
219 : *
220 : * The pTargetList output parameter is mostly redundant with the pathtarget
221 : * of the returned RelOptInfo, but for the moment we need it because much of
222 : * the logic in this file depends on flag columns being marked resjunk.
223 : * Pending a redesign of how that works, this is the easy way out.
224 : *
225 : * We don't have to care about typmods here: the only allowed difference
226 : * between set-op input and output typmods is input is a specific typmod
227 : * and output is -1, and that does not require a coercion.
228 : */
229 : static RelOptInfo *
230 19216 : recurse_set_operations(Node *setOp, PlannerInfo *root,
231 : List *colTypes, List *colCollations,
232 : bool junkOK,
233 : int flag, List *refnames_tlist,
234 : List **pTargetList,
235 : bool *istrivial_tlist)
236 : {
237 : RelOptInfo *rel;
238 :
239 19216 : *istrivial_tlist = true; /* for now */
240 :
241 : /* Guard against stack overflow due to overly complex setop nests */
242 19216 : check_stack_depth();
243 :
244 19216 : if (IsA(setOp, RangeTblRef))
245 : {
246 14434 : RangeTblRef *rtr = (RangeTblRef *) setOp;
247 14434 : RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
248 : SetOperationStmt *setops;
249 14434 : Query *subquery = rte->subquery;
250 : PlannerInfo *subroot;
251 : List *tlist;
252 : bool trivial_tlist;
253 :
254 : Assert(subquery != NULL);
255 :
256 : /* Build a RelOptInfo for this leaf subquery. */
257 14434 : rel = build_simple_rel(root, rtr->rtindex, NULL);
258 :
259 : /* plan_params should not be in use in current query level */
260 : Assert(root->plan_params == NIL);
261 :
262 : /*
263 : * Pass the set operation details to the subquery_planner to have it
264 : * consider generating Paths correctly ordered for the set operation.
265 : */
266 14434 : setops = castNode(SetOperationStmt, root->parse->setOperations);
267 :
268 : /* Generate a subroot and Paths for the subquery */
269 14434 : subroot = rel->subroot = subquery_planner(root->glob, subquery, root,
270 : false, root->tuple_fraction,
271 : setops);
272 :
273 : /*
274 : * It should not be possible for the primitive query to contain any
275 : * cross-references to other primitive queries in the setop tree.
276 : */
277 14434 : if (root->plan_params)
278 0 : elog(ERROR, "unexpected outer reference in set operation subquery");
279 :
280 : /* Figure out the appropriate target list for this subquery. */
281 14434 : tlist = generate_setop_tlist(colTypes, colCollations,
282 : flag,
283 14434 : rtr->rtindex,
284 : true,
285 : subroot->processed_tlist,
286 : refnames_tlist,
287 : &trivial_tlist);
288 14434 : rel->reltarget = create_pathtarget(root, tlist);
289 :
290 : /* Return the fully-fledged tlist to caller, too */
291 14434 : *pTargetList = tlist;
292 14434 : *istrivial_tlist = trivial_tlist;
293 : }
294 4782 : else if (IsA(setOp, SetOperationStmt))
295 : {
296 4782 : SetOperationStmt *op = (SetOperationStmt *) setOp;
297 :
298 : /* UNIONs are much different from INTERSECT/EXCEPT */
299 4782 : if (op->op == SETOP_UNION)
300 4132 : rel = generate_union_paths(op, root,
301 : refnames_tlist,
302 : pTargetList);
303 : else
304 650 : rel = generate_nonunion_paths(op, root,
305 : refnames_tlist,
306 : pTargetList);
307 :
308 : /*
309 : * If necessary, add a Result node to project the caller-requested
310 : * output columns.
311 : *
312 : * XXX you don't really want to know about this: setrefs.c will apply
313 : * fix_upper_expr() to the Result node's tlist. This would fail if the
314 : * Vars generated by generate_setop_tlist() were not exactly equal()
315 : * to the corresponding tlist entries of the subplan. However, since
316 : * the subplan was generated by generate_union_paths() or
317 : * generate_nonunion_paths(), and hence its tlist was generated by
318 : * generate_append_tlist(), this will work. We just tell
319 : * generate_setop_tlist() to use varno 0.
320 : */
321 4782 : if (flag >= 0 ||
322 4752 : !tlist_same_datatypes(*pTargetList, colTypes, junkOK) ||
323 4650 : !tlist_same_collations(*pTargetList, colCollations, junkOK))
324 : {
325 : PathTarget *target;
326 : bool trivial_tlist;
327 : ListCell *lc;
328 :
329 132 : *pTargetList = generate_setop_tlist(colTypes, colCollations,
330 : flag,
331 : 0,
332 : false,
333 : *pTargetList,
334 : refnames_tlist,
335 : &trivial_tlist);
336 132 : *istrivial_tlist = trivial_tlist;
337 132 : target = create_pathtarget(root, *pTargetList);
338 :
339 : /* Apply projection to each path */
340 264 : foreach(lc, rel->pathlist)
341 : {
342 132 : Path *subpath = (Path *) lfirst(lc);
343 : Path *path;
344 :
345 : Assert(subpath->param_info == NULL);
346 132 : path = apply_projection_to_path(root, subpath->parent,
347 : subpath, target);
348 : /* If we had to add a Result, path is different from subpath */
349 132 : if (path != subpath)
350 132 : lfirst(lc) = path;
351 : }
352 :
353 : /* Apply projection to each partial path */
354 132 : foreach(lc, rel->partial_pathlist)
355 : {
356 0 : Path *subpath = (Path *) lfirst(lc);
357 : Path *path;
358 :
359 : Assert(subpath->param_info == NULL);
360 :
361 : /* avoid apply_projection_to_path, in case of multiple refs */
362 0 : path = (Path *) create_projection_path(root, subpath->parent,
363 : subpath, target);
364 0 : lfirst(lc) = path;
365 : }
366 : }
367 4782 : postprocess_setop_rel(root, rel);
368 : }
369 : else
370 : {
371 0 : elog(ERROR, "unrecognized node type: %d",
372 : (int) nodeTag(setOp));
373 : *pTargetList = NIL;
374 : rel = NULL; /* keep compiler quiet */
375 : }
376 :
377 19216 : return rel;
378 : }
379 :
380 : /*
381 : * Generate paths for a recursive UNION node
382 : */
383 : static RelOptInfo *
384 812 : generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
385 : List *refnames_tlist,
386 : List **pTargetList)
387 : {
388 : RelOptInfo *result_rel;
389 : Path *path;
390 : RelOptInfo *lrel,
391 : *rrel;
392 : Path *lpath;
393 : Path *rpath;
394 : List *lpath_tlist;
395 : bool lpath_trivial_tlist;
396 : List *rpath_tlist;
397 : bool rpath_trivial_tlist;
398 : List *tlist;
399 : List *groupList;
400 : double dNumGroups;
401 :
402 : /* Parser should have rejected other cases */
403 812 : if (setOp->op != SETOP_UNION)
404 0 : elog(ERROR, "only UNION queries can be recursive");
405 : /* Worktable ID should be assigned */
406 : Assert(root->wt_param_id >= 0);
407 :
408 : /*
409 : * Unlike a regular UNION node, process the left and right inputs
410 : * separately without any intention of combining them into one Append.
411 : */
412 812 : lrel = recurse_set_operations(setOp->larg, root,
413 : setOp->colTypes, setOp->colCollations,
414 : false, -1,
415 : refnames_tlist,
416 : &lpath_tlist,
417 : &lpath_trivial_tlist);
418 812 : if (lrel->rtekind == RTE_SUBQUERY)
419 812 : build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
420 : NIL, NULL);
421 812 : lpath = lrel->cheapest_total_path;
422 : /* The right path will want to look at the left one ... */
423 812 : root->non_recursive_path = lpath;
424 812 : rrel = recurse_set_operations(setOp->rarg, root,
425 : setOp->colTypes, setOp->colCollations,
426 : false, -1,
427 : refnames_tlist,
428 : &rpath_tlist,
429 : &rpath_trivial_tlist);
430 812 : if (rrel->rtekind == RTE_SUBQUERY)
431 806 : build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
432 : NIL, NULL);
433 812 : rpath = rrel->cheapest_total_path;
434 812 : root->non_recursive_path = NULL;
435 :
436 : /*
437 : * Generate tlist for RecursiveUnion path node --- same as in Append cases
438 : */
439 812 : tlist = generate_append_tlist(setOp->colTypes, setOp->colCollations, false,
440 812 : list_make2(lpath_tlist, rpath_tlist),
441 : refnames_tlist);
442 :
443 812 : *pTargetList = tlist;
444 :
445 : /* Build result relation. */
446 812 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
447 812 : bms_union(lrel->relids, rrel->relids));
448 812 : result_rel->reltarget = create_pathtarget(root, tlist);
449 :
450 : /*
451 : * If UNION, identify the grouping operators
452 : */
453 812 : if (setOp->all)
454 : {
455 452 : groupList = NIL;
456 452 : dNumGroups = 0;
457 : }
458 : else
459 : {
460 : /* Identify the grouping semantics */
461 360 : groupList = generate_setop_grouplist(setOp, tlist);
462 :
463 : /* We only support hashing here */
464 360 : if (!grouping_is_hashable(groupList))
465 6 : ereport(ERROR,
466 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
467 : errmsg("could not implement recursive UNION"),
468 : errdetail("All column datatypes must be hashable.")));
469 :
470 : /*
471 : * For the moment, take the number of distinct groups as equal to the
472 : * total input size, ie, the worst case.
473 : */
474 354 : dNumGroups = lpath->rows + rpath->rows * 10;
475 : }
476 :
477 : /*
478 : * And make the path node.
479 : */
480 806 : path = (Path *) create_recursiveunion_path(root,
481 : result_rel,
482 : lpath,
483 : rpath,
484 806 : result_rel->reltarget,
485 : groupList,
486 : root->wt_param_id,
487 : dNumGroups);
488 :
489 806 : add_path(result_rel, path);
490 806 : postprocess_setop_rel(root, result_rel);
491 806 : return result_rel;
492 : }
493 :
494 : /*
495 : * build_setop_child_paths
496 : * Build paths for the set op child relation denoted by 'rel'.
497 : *
498 : * interesting_pathkeys: if not NIL, also include paths that suit these
499 : * pathkeys, sorting any unsorted paths as required.
500 : * *pNumGroups: if not NULL, we estimate the number of distinct groups
501 : * in the result, and store it there.
502 : */
503 : static void
504 14434 : build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
505 : bool trivial_tlist, List *child_tlist,
506 : List *interesting_pathkeys, double *pNumGroups)
507 : {
508 : RelOptInfo *final_rel;
509 14434 : List *setop_pathkeys = rel->subroot->setop_pathkeys;
510 : ListCell *lc;
511 :
512 : /* it can't be a set op child rel if it's not a subquery */
513 : Assert(rel->rtekind == RTE_SUBQUERY);
514 :
515 : /* when sorting is needed, add child rel equivalences */
516 14434 : if (interesting_pathkeys != NIL)
517 9926 : add_setop_child_rel_equivalences(root,
518 : rel,
519 : child_tlist,
520 : interesting_pathkeys);
521 :
522 : /*
523 : * Mark rel with estimated output rows, width, etc. Note that we have to
524 : * do this before generating outer-query paths, else cost_subqueryscan is
525 : * not happy.
526 : */
527 14434 : set_subquery_size_estimates(root, rel);
528 :
529 : /*
530 : * Since we may want to add a partial path to this relation, we must set
531 : * its consider_parallel flag correctly.
532 : */
533 14434 : final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);
534 14434 : rel->consider_parallel = final_rel->consider_parallel;
535 :
536 : /* Generate subquery scan paths for any interesting path in final_rel */
537 37804 : foreach(lc, final_rel->pathlist)
538 : {
539 23370 : Path *subpath = (Path *) lfirst(lc);
540 : List *pathkeys;
541 23370 : Path *cheapest_input_path = final_rel->cheapest_total_path;
542 : bool is_sorted;
543 : int presorted_keys;
544 :
545 : /*
546 : * Include the cheapest path as-is so that the set operation can be
547 : * cheaply implemented using a method which does not require the input
548 : * to be sorted.
549 : */
550 23370 : if (subpath == cheapest_input_path)
551 : {
552 : /* Convert subpath's pathkeys to outer representation */
553 14434 : pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
554 : make_tlist_from_pathtarget(subpath->pathtarget));
555 :
556 : /* Generate outer path using this subpath */
557 14434 : add_path(rel, (Path *) create_subqueryscan_path(root,
558 : rel,
559 : subpath,
560 : trivial_tlist,
561 : pathkeys,
562 : NULL));
563 : }
564 :
565 : /* skip dealing with sorted paths if the setop doesn't need them */
566 23370 : if (interesting_pathkeys == NIL)
567 4886 : continue;
568 :
569 : /*
570 : * Create paths to suit final sort order required for setop_pathkeys.
571 : * Here we'll sort the cheapest input path (if not sorted already) and
572 : * incremental sort any paths which are partially sorted.
573 : */
574 18496 : is_sorted = pathkeys_count_contained_in(setop_pathkeys,
575 : subpath->pathkeys,
576 : &presorted_keys);
577 :
578 18496 : if (!is_sorted)
579 : {
580 12326 : double limittuples = rel->subroot->limit_tuples;
581 :
582 : /*
583 : * Try at least sorting the cheapest path and also try
584 : * incrementally sorting any path which is partially sorted
585 : * already (no need to deal with paths which have presorted keys
586 : * when incremental sort is disabled unless it's the cheapest
587 : * input path).
588 : */
589 12326 : if (subpath != cheapest_input_path &&
590 2982 : (presorted_keys == 0 || !enable_incremental_sort))
591 12 : continue;
592 :
593 : /*
594 : * We've no need to consider both a sort and incremental sort.
595 : * We'll just do a sort if there are no presorted keys and an
596 : * incremental sort when there are presorted keys.
597 : */
598 12314 : if (presorted_keys == 0 || !enable_incremental_sort)
599 9344 : subpath = (Path *) create_sort_path(rel->subroot,
600 : final_rel,
601 : subpath,
602 : setop_pathkeys,
603 : limittuples);
604 : else
605 2970 : subpath = (Path *) create_incremental_sort_path(rel->subroot,
606 : final_rel,
607 : subpath,
608 : setop_pathkeys,
609 : presorted_keys,
610 : limittuples);
611 : }
612 :
613 : /*
614 : * subpath is now sorted, so add it to the pathlist. We already added
615 : * the cheapest_input_path above, so don't add it again unless we just
616 : * sorted it.
617 : */
618 18484 : if (subpath != cheapest_input_path)
619 : {
620 : /* Convert subpath's pathkeys to outer representation */
621 17902 : pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
622 : make_tlist_from_pathtarget(subpath->pathtarget));
623 :
624 : /* Generate outer path using this subpath */
625 17902 : add_path(rel, (Path *) create_subqueryscan_path(root,
626 : rel,
627 : subpath,
628 : trivial_tlist,
629 : pathkeys,
630 : NULL));
631 : }
632 : }
633 :
634 : /* if consider_parallel is false, there should be no partial paths */
635 : Assert(final_rel->consider_parallel ||
636 : final_rel->partial_pathlist == NIL);
637 :
638 : /*
639 : * If we have a partial path for the child relation, we can use that to
640 : * build a partial path for this relation. But there's no point in
641 : * considering any path but the cheapest.
642 : */
643 14434 : if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
644 9858 : final_rel->partial_pathlist != NIL)
645 : {
646 : Path *partial_subpath;
647 : Path *partial_path;
648 :
649 12 : partial_subpath = linitial(final_rel->partial_pathlist);
650 : partial_path = (Path *)
651 12 : create_subqueryscan_path(root, rel, partial_subpath,
652 : trivial_tlist,
653 : NIL, NULL);
654 12 : add_partial_path(rel, partial_path);
655 : }
656 :
657 14434 : postprocess_setop_rel(root, rel);
658 :
659 : /*
660 : * Estimate number of groups if caller wants it. If the subquery used
661 : * grouping or aggregation, its output is probably mostly unique anyway;
662 : * otherwise do statistical estimation.
663 : *
664 : * XXX you don't really want to know about this: we do the estimation
665 : * using the subquery's original targetlist expressions, not the
666 : * subroot->processed_tlist which might seem more appropriate. The reason
667 : * is that if the subquery is itself a setop, it may return a
668 : * processed_tlist containing "varno 0" Vars generated by
669 : * generate_append_tlist, and those would confuse estimate_num_groups
670 : * mightily. We ought to get rid of the "varno 0" hack, but that requires
671 : * a redesign of the parsetree representation of setops, so that there can
672 : * be an RTE corresponding to each setop's output.
673 : */
674 14434 : if (pNumGroups)
675 : {
676 1270 : PlannerInfo *subroot = rel->subroot;
677 1270 : Query *subquery = subroot->parse;
678 :
679 1270 : if (subquery->groupClause || subquery->groupingSets ||
680 1270 : subquery->distinctClause || subroot->hasHavingQual ||
681 1258 : subquery->hasAggs)
682 12 : *pNumGroups = rel->cheapest_total_path->rows;
683 : else
684 1258 : *pNumGroups = estimate_num_groups(subroot,
685 : get_tlist_exprs(subquery->targetList, false),
686 1258 : rel->cheapest_total_path->rows,
687 : NULL,
688 : NULL);
689 : }
690 14434 : }
691 :
692 : /*
693 : * Generate paths for a UNION or UNION ALL node
694 : */
695 : static RelOptInfo *
696 4132 : generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
697 : List *refnames_tlist,
698 : List **pTargetList)
699 : {
700 4132 : Relids relids = NULL;
701 : RelOptInfo *result_rel;
702 : ListCell *lc;
703 : ListCell *lc2;
704 : ListCell *lc3;
705 4132 : List *cheapest_pathlist = NIL;
706 4132 : List *ordered_pathlist = NIL;
707 4132 : List *partial_pathlist = NIL;
708 4132 : bool partial_paths_valid = true;
709 4132 : bool consider_parallel = true;
710 : List *rellist;
711 : List *tlist_list;
712 : List *trivial_tlist_list;
713 : List *tlist;
714 4132 : List *groupList = NIL;
715 : Path *apath;
716 4132 : Path *gpath = NULL;
717 4132 : bool try_sorted = false;
718 4132 : List *union_pathkeys = NIL;
719 :
720 : /*
721 : * If any of my children are identical UNION nodes (same op, all-flag, and
722 : * colTypes/colCollations) then they can be merged into this node so that
723 : * we generate only one Append/MergeAppend and unique-ification for the
724 : * lot. Recurse to find such nodes.
725 : */
726 4132 : rellist = plan_union_children(root,
727 : op,
728 : refnames_tlist,
729 : &tlist_list,
730 : &trivial_tlist_list);
731 :
732 : /*
733 : * Generate tlist for Append/MergeAppend plan node.
734 : *
735 : * The tlist for an Append plan isn't important as far as the Append is
736 : * concerned, but we must make it look real anyway for the benefit of the
737 : * next plan level up.
738 : */
739 4132 : tlist = generate_append_tlist(op->colTypes, op->colCollations, false,
740 : tlist_list, refnames_tlist);
741 4132 : *pTargetList = tlist;
742 :
743 : /* For UNIONs (not UNION ALL), try sorting, if sorting is possible */
744 4132 : if (!op->all)
745 : {
746 : /* Identify the grouping semantics */
747 3556 : groupList = generate_setop_grouplist(op, tlist);
748 :
749 3556 : if (grouping_is_sortable(op->groupClauses))
750 : {
751 3464 : try_sorted = true;
752 : /* Determine the pathkeys for sorting by the whole target list */
753 3464 : union_pathkeys = make_pathkeys_for_sortclauses(root, groupList,
754 : tlist);
755 :
756 3464 : root->query_pathkeys = union_pathkeys;
757 : }
758 : }
759 :
760 : /*
761 : * Now that we've got the append target list, we can build the union child
762 : * paths.
763 : */
764 15792 : forthree(lc, rellist, lc2, trivial_tlist_list, lc3, tlist_list)
765 : {
766 11660 : RelOptInfo *rel = lfirst(lc);
767 11660 : bool trivial_tlist = lfirst_int(lc2);
768 11660 : List *child_tlist = lfirst_node(List, lc3);
769 :
770 : /* only build paths for the union children */
771 11660 : if (rel->rtekind == RTE_SUBQUERY)
772 11546 : build_setop_child_paths(root, rel, trivial_tlist, child_tlist,
773 : union_pathkeys, NULL);
774 : }
775 :
776 : /* Build path lists and relid set. */
777 15792 : foreach(lc, rellist)
778 : {
779 11660 : RelOptInfo *rel = lfirst(lc);
780 : Path *ordered_path;
781 :
782 11660 : cheapest_pathlist = lappend(cheapest_pathlist,
783 11660 : rel->cheapest_total_path);
784 :
785 11660 : if (try_sorted)
786 : {
787 3714 : ordered_path = get_cheapest_path_for_pathkeys(rel->pathlist,
788 : union_pathkeys,
789 : NULL,
790 : TOTAL_COST,
791 : false);
792 :
793 3714 : if (ordered_path != NULL)
794 464 : ordered_pathlist = lappend(ordered_pathlist, ordered_path);
795 : else
796 : {
797 : /*
798 : * If we can't find a sorted path, just give up trying to
799 : * generate a list of correctly sorted child paths. This can
800 : * happen when type coercion was added to the targetlist due
801 : * to mismatching types from the union children.
802 : */
803 3250 : try_sorted = false;
804 : }
805 : }
806 :
807 11660 : if (consider_parallel)
808 : {
809 8306 : if (!rel->consider_parallel)
810 : {
811 3096 : consider_parallel = false;
812 3096 : partial_paths_valid = false;
813 : }
814 5210 : else if (rel->partial_pathlist == NIL)
815 5198 : partial_paths_valid = false;
816 : else
817 12 : partial_pathlist = lappend(partial_pathlist,
818 12 : linitial(rel->partial_pathlist));
819 : }
820 :
821 11660 : relids = bms_union(relids, rel->relids);
822 : }
823 :
824 : /* Build result relation. */
825 4132 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
826 4132 : result_rel->reltarget = create_pathtarget(root, tlist);
827 4132 : result_rel->consider_parallel = consider_parallel;
828 4132 : result_rel->consider_startup = (root->tuple_fraction > 0);
829 :
830 : /*
831 : * Append the child results together using the cheapest paths from each
832 : * union child.
833 : */
834 4132 : apath = (Path *) create_append_path(root, result_rel, cheapest_pathlist,
835 : NIL, NIL, NULL, 0, false, -1);
836 :
837 : /*
838 : * Estimate number of groups. For now we just assume the output is unique
839 : * --- this is certainly true for the UNION case, and we want worst-case
840 : * estimates anyway.
841 : */
842 4132 : result_rel->rows = apath->rows;
843 :
844 : /*
845 : * Now consider doing the same thing using the partial paths plus Append
846 : * plus Gather.
847 : */
848 4132 : if (partial_paths_valid)
849 : {
850 : Path *papath;
851 6 : int parallel_workers = 0;
852 :
853 : /* Find the highest number of workers requested for any subpath. */
854 18 : foreach(lc, partial_pathlist)
855 : {
856 12 : Path *subpath = lfirst(lc);
857 :
858 12 : parallel_workers = Max(parallel_workers,
859 : subpath->parallel_workers);
860 : }
861 : Assert(parallel_workers > 0);
862 :
863 : /*
864 : * If the use of parallel append is permitted, always request at least
865 : * log2(# of children) paths. We assume it can be useful to have
866 : * extra workers in this case because they will be spread out across
867 : * the children. The precise formula is just a guess; see
868 : * add_paths_to_append_rel.
869 : */
870 6 : if (enable_parallel_append)
871 : {
872 6 : parallel_workers = Max(parallel_workers,
873 : pg_leftmost_one_pos32(list_length(partial_pathlist)) + 1);
874 6 : parallel_workers = Min(parallel_workers,
875 : max_parallel_workers_per_gather);
876 : }
877 : Assert(parallel_workers > 0);
878 :
879 : papath = (Path *)
880 6 : create_append_path(root, result_rel, NIL, partial_pathlist,
881 : NIL, NULL, parallel_workers,
882 : enable_parallel_append, -1);
883 : gpath = (Path *)
884 6 : create_gather_path(root, result_rel, papath,
885 6 : result_rel->reltarget, NULL, NULL);
886 : }
887 :
888 4132 : if (!op->all)
889 : {
890 : double dNumGroups;
891 3556 : bool can_sort = grouping_is_sortable(groupList);
892 3556 : bool can_hash = grouping_is_hashable(groupList);
893 :
894 : /*
895 : * XXX for the moment, take the number of distinct groups as equal to
896 : * the total input size, i.e., the worst case. This is too
897 : * conservative, but it's not clear how to get a decent estimate of
898 : * the true size. One should note as well the propensity of novices
899 : * to write UNION rather than UNION ALL even when they don't expect
900 : * any duplicates...
901 : */
902 3556 : dNumGroups = apath->rows;
903 :
904 3556 : if (can_hash)
905 : {
906 : Path *path;
907 :
908 : /*
909 : * Try a hash aggregate plan on 'apath'. This is the cheapest
910 : * available path containing each append child.
911 : */
912 3484 : path = (Path *) create_agg_path(root,
913 : result_rel,
914 : apath,
915 : create_pathtarget(root, tlist),
916 : AGG_HASHED,
917 : AGGSPLIT_SIMPLE,
918 : groupList,
919 : NIL,
920 : NULL,
921 : dNumGroups);
922 3484 : add_path(result_rel, path);
923 :
924 : /* Try hash aggregate on the Gather path, if valid */
925 3484 : if (gpath != NULL)
926 : {
927 : /* Hashed aggregate plan --- no sort needed */
928 6 : path = (Path *) create_agg_path(root,
929 : result_rel,
930 : gpath,
931 : create_pathtarget(root, tlist),
932 : AGG_HASHED,
933 : AGGSPLIT_SIMPLE,
934 : groupList,
935 : NIL,
936 : NULL,
937 : dNumGroups);
938 6 : add_path(result_rel, path);
939 : }
940 : }
941 :
942 3556 : if (can_sort)
943 : {
944 3464 : Path *path = apath;
945 :
946 : /* Try Sort -> Unique on the Append path */
947 3464 : if (groupList != NIL)
948 3434 : path = (Path *) create_sort_path(root, result_rel, path,
949 : make_pathkeys_for_sortclauses(root, groupList, tlist),
950 : -1.0);
951 :
952 3464 : path = (Path *) create_upper_unique_path(root,
953 : result_rel,
954 : path,
955 3464 : list_length(path->pathkeys),
956 : dNumGroups);
957 :
958 3464 : add_path(result_rel, path);
959 :
960 : /* Try Sort -> Unique on the Gather path, if set */
961 3464 : if (gpath != NULL)
962 : {
963 6 : path = gpath;
964 :
965 6 : path = (Path *) create_sort_path(root, result_rel, path,
966 : make_pathkeys_for_sortclauses(root, groupList, tlist),
967 : -1.0);
968 :
969 6 : path = (Path *) create_upper_unique_path(root,
970 : result_rel,
971 : path,
972 6 : list_length(path->pathkeys),
973 : dNumGroups);
974 6 : add_path(result_rel, path);
975 : }
976 : }
977 :
978 : /*
979 : * Try making a MergeAppend path if we managed to find a path with the
980 : * correct pathkeys in each union child query.
981 : */
982 3556 : if (try_sorted && groupList != NIL)
983 : {
984 : Path *path;
985 :
986 184 : path = (Path *) create_merge_append_path(root,
987 : result_rel,
988 : ordered_pathlist,
989 : union_pathkeys,
990 : NULL);
991 :
992 : /* and make the MergeAppend unique */
993 184 : path = (Path *) create_upper_unique_path(root,
994 : result_rel,
995 : path,
996 : list_length(tlist),
997 : dNumGroups);
998 :
999 184 : add_path(result_rel, path);
1000 : }
1001 : }
1002 : else
1003 : {
1004 : /* UNION ALL */
1005 576 : add_path(result_rel, apath);
1006 :
1007 576 : if (gpath != NULL)
1008 0 : add_path(result_rel, gpath);
1009 : }
1010 :
1011 4132 : return result_rel;
1012 : }
1013 :
1014 : /*
1015 : * Generate paths for an INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL node
1016 : */
1017 : static RelOptInfo *
1018 650 : generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
1019 : List *refnames_tlist,
1020 : List **pTargetList)
1021 : {
1022 : RelOptInfo *result_rel;
1023 : RelOptInfo *lrel,
1024 : *rrel;
1025 650 : double save_fraction = root->tuple_fraction;
1026 : Path *lpath,
1027 : *rpath,
1028 : *path;
1029 : List *lpath_tlist,
1030 : *rpath_tlist,
1031 : *tlist_list,
1032 : *tlist,
1033 : *groupList,
1034 : *pathlist;
1035 : bool lpath_trivial_tlist,
1036 : rpath_trivial_tlist;
1037 : double dLeftGroups,
1038 : dRightGroups,
1039 : dNumGroups,
1040 : dNumOutputRows;
1041 : bool use_hash;
1042 : SetOpCmd cmd;
1043 : int firstFlag;
1044 :
1045 : /*
1046 : * Tell children to fetch all tuples.
1047 : */
1048 650 : root->tuple_fraction = 0.0;
1049 :
1050 : /* Recurse on children, ensuring their outputs are marked */
1051 650 : lrel = recurse_set_operations(op->larg, root,
1052 : op->colTypes, op->colCollations,
1053 : false, 0,
1054 : refnames_tlist,
1055 : &lpath_tlist,
1056 : &lpath_trivial_tlist);
1057 650 : if (lrel->rtekind == RTE_SUBQUERY)
1058 626 : build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
1059 : NIL, &dLeftGroups);
1060 : else
1061 24 : dLeftGroups = lrel->rows;
1062 :
1063 650 : lpath = lrel->cheapest_total_path;
1064 650 : rrel = recurse_set_operations(op->rarg, root,
1065 : op->colTypes, op->colCollations,
1066 : false, 1,
1067 : refnames_tlist,
1068 : &rpath_tlist,
1069 : &rpath_trivial_tlist);
1070 650 : if (rrel->rtekind == RTE_SUBQUERY)
1071 644 : build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
1072 : NIL, &dRightGroups);
1073 : else
1074 6 : dRightGroups = rrel->rows;
1075 :
1076 650 : rpath = rrel->cheapest_total_path;
1077 :
1078 : /* Undo effects of forcing tuple_fraction to 0 */
1079 650 : root->tuple_fraction = save_fraction;
1080 :
1081 : /*
1082 : * For EXCEPT, we must put the left input first. For INTERSECT, either
1083 : * order should give the same results, and we prefer to put the smaller
1084 : * input first in order to minimize the size of the hash table in the
1085 : * hashing case. "Smaller" means the one with the fewer groups.
1086 : */
1087 650 : if (op->op == SETOP_EXCEPT || dLeftGroups <= dRightGroups)
1088 620 : {
1089 620 : pathlist = list_make2(lpath, rpath);
1090 620 : tlist_list = list_make2(lpath_tlist, rpath_tlist);
1091 620 : firstFlag = 0;
1092 : }
1093 : else
1094 : {
1095 30 : pathlist = list_make2(rpath, lpath);
1096 30 : tlist_list = list_make2(rpath_tlist, lpath_tlist);
1097 30 : firstFlag = 1;
1098 : }
1099 :
1100 : /*
1101 : * Generate tlist for Append plan node.
1102 : *
1103 : * The tlist for an Append plan isn't important as far as the Append is
1104 : * concerned, but we must make it look real anyway for the benefit of the
1105 : * next plan level up. In fact, it has to be real enough that the flag
1106 : * column is shown as a variable not a constant, else setrefs.c will get
1107 : * confused.
1108 : */
1109 650 : tlist = generate_append_tlist(op->colTypes, op->colCollations, true,
1110 : tlist_list, refnames_tlist);
1111 :
1112 650 : *pTargetList = tlist;
1113 :
1114 : /* Build result relation. */
1115 650 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
1116 650 : bms_union(lrel->relids, rrel->relids));
1117 650 : result_rel->reltarget = create_pathtarget(root, tlist);
1118 :
1119 : /*
1120 : * Append the child results together.
1121 : */
1122 650 : path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
1123 : NIL, NULL, 0, false, -1);
1124 :
1125 : /* Identify the grouping semantics */
1126 650 : groupList = generate_setop_grouplist(op, tlist);
1127 :
1128 : /*
1129 : * Estimate number of distinct groups that we'll need hashtable entries
1130 : * for; this is the size of the left-hand input for EXCEPT, or the smaller
1131 : * input for INTERSECT. Also estimate the number of eventual output rows.
1132 : * In non-ALL cases, we estimate each group produces one output row; in
1133 : * ALL cases use the relevant relation size. These are worst-case
1134 : * estimates, of course, but we need to be conservative.
1135 : */
1136 650 : if (op->op == SETOP_EXCEPT)
1137 : {
1138 440 : dNumGroups = dLeftGroups;
1139 440 : dNumOutputRows = op->all ? lpath->rows : dNumGroups;
1140 : }
1141 : else
1142 : {
1143 210 : dNumGroups = Min(dLeftGroups, dRightGroups);
1144 210 : dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
1145 : }
1146 :
1147 : /*
1148 : * Decide whether to hash or sort, and add a sort node if needed.
1149 : */
1150 650 : use_hash = choose_hashed_setop(root, groupList, path,
1151 : dNumGroups, dNumOutputRows,
1152 650 : (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT");
1153 :
1154 650 : if (groupList && !use_hash)
1155 132 : path = (Path *) create_sort_path(root,
1156 : result_rel,
1157 : path,
1158 : make_pathkeys_for_sortclauses(root,
1159 : groupList,
1160 : tlist),
1161 : -1.0);
1162 :
1163 : /*
1164 : * Finally, add a SetOp path node to generate the correct output.
1165 : */
1166 650 : switch (op->op)
1167 : {
1168 210 : case SETOP_INTERSECT:
1169 210 : cmd = op->all ? SETOPCMD_INTERSECT_ALL : SETOPCMD_INTERSECT;
1170 210 : break;
1171 440 : case SETOP_EXCEPT:
1172 440 : cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT;
1173 440 : break;
1174 0 : default:
1175 0 : elog(ERROR, "unrecognized set op: %d", (int) op->op);
1176 : cmd = SETOPCMD_INTERSECT; /* keep compiler quiet */
1177 : break;
1178 : }
1179 650 : path = (Path *) create_setop_path(root,
1180 : result_rel,
1181 : path,
1182 : cmd,
1183 : use_hash ? SETOP_HASHED : SETOP_SORTED,
1184 : groupList,
1185 650 : list_length(op->colTypes) + 1,
1186 : use_hash ? firstFlag : -1,
1187 : dNumGroups,
1188 : dNumOutputRows);
1189 :
1190 650 : result_rel->rows = path->rows;
1191 650 : add_path(result_rel, path);
1192 650 : return result_rel;
1193 : }
1194 :
1195 : /*
1196 : * Pull up children of a UNION node that are identically-propertied UNIONs,
1197 : * and perform planning of the queries underneath the N-way UNION.
1198 : *
1199 : * The result is a list of RelOptInfos containing Paths for sub-nodes, with
1200 : * one entry for each descendant that is a leaf query or non-identical setop.
1201 : * We also return parallel lists of the childrens' targetlists and
1202 : * is-trivial-tlist flags.
1203 : *
1204 : * NOTE: we can also pull a UNION ALL up into a UNION, since the distinct
1205 : * output rows will be lost anyway.
1206 : */
1207 : static List *
1208 4132 : plan_union_children(PlannerInfo *root,
1209 : SetOperationStmt *top_union,
1210 : List *refnames_tlist,
1211 : List **tlist_list,
1212 : List **istrivial_tlist)
1213 : {
1214 4132 : List *pending_rels = list_make1(top_union);
1215 4132 : List *result = NIL;
1216 : List *child_tlist;
1217 : bool trivial_tlist;
1218 :
1219 4132 : *tlist_list = NIL;
1220 4132 : *istrivial_tlist = NIL;
1221 :
1222 23320 : while (pending_rels != NIL)
1223 : {
1224 19188 : Node *setOp = linitial(pending_rels);
1225 :
1226 19188 : pending_rels = list_delete_first(pending_rels);
1227 :
1228 19188 : if (IsA(setOp, SetOperationStmt))
1229 : {
1230 7642 : SetOperationStmt *op = (SetOperationStmt *) setOp;
1231 :
1232 7642 : if (op->op == top_union->op &&
1233 15092 : (op->all == top_union->all || op->all) &&
1234 15068 : equal(op->colTypes, top_union->colTypes) &&
1235 7528 : equal(op->colCollations, top_union->colCollations))
1236 : {
1237 : /* Same UNION, so fold children into parent */
1238 7528 : pending_rels = lcons(op->rarg, pending_rels);
1239 7528 : pending_rels = lcons(op->larg, pending_rels);
1240 7528 : continue;
1241 : }
1242 : }
1243 :
1244 : /*
1245 : * Not same, so plan this child separately.
1246 : *
1247 : * Note we disallow any resjunk columns in child results. This is
1248 : * necessary since the Append node that implements the union won't do
1249 : * any projection, and upper levels will get confused if some of our
1250 : * output tuples have junk and some don't. This case only arises when
1251 : * we have an EXCEPT or INTERSECT as child, else there won't be
1252 : * resjunk anyway.
1253 : */
1254 11660 : result = lappend(result, recurse_set_operations(setOp, root,
1255 : top_union->colTypes,
1256 : top_union->colCollations,
1257 : false, -1,
1258 : refnames_tlist,
1259 : &child_tlist,
1260 : &trivial_tlist));
1261 11660 : *tlist_list = lappend(*tlist_list, child_tlist);
1262 11660 : *istrivial_tlist = lappend_int(*istrivial_tlist, trivial_tlist);
1263 : }
1264 :
1265 4132 : return result;
1266 : }
1267 :
1268 : /*
1269 : * postprocess_setop_rel - perform steps required after adding paths
1270 : */
1271 : static void
1272 20022 : postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel)
1273 : {
1274 : /*
1275 : * We don't currently worry about allowing FDWs to contribute paths to
1276 : * this relation, but give extensions a chance.
1277 : */
1278 20022 : if (create_upper_paths_hook)
1279 0 : (*create_upper_paths_hook) (root, UPPERREL_SETOP,
1280 : NULL, rel, NULL);
1281 :
1282 : /* Select cheapest path */
1283 20022 : set_cheapest(rel);
1284 20022 : }
1285 :
1286 : /*
1287 : * choose_hashed_setop - should we use hashing for a set operation?
1288 : */
1289 : static bool
1290 650 : choose_hashed_setop(PlannerInfo *root, List *groupClauses,
1291 : Path *input_path,
1292 : double dNumGroups, double dNumOutputRows,
1293 : const char *construct)
1294 : {
1295 650 : int numGroupCols = list_length(groupClauses);
1296 650 : Size hash_mem_limit = get_hash_memory_limit();
1297 : bool can_sort;
1298 : bool can_hash;
1299 : Size hashentrysize;
1300 : Path hashed_p;
1301 : Path sorted_p;
1302 : double tuple_fraction;
1303 :
1304 : /* Check whether the operators support sorting or hashing */
1305 650 : can_sort = grouping_is_sortable(groupClauses);
1306 650 : can_hash = grouping_is_hashable(groupClauses);
1307 650 : if (can_hash && can_sort)
1308 : {
1309 : /* we have a meaningful choice to make, continue ... */
1310 : }
1311 60 : else if (can_hash)
1312 0 : return true;
1313 60 : else if (can_sort)
1314 60 : return false;
1315 : else
1316 0 : ereport(ERROR,
1317 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1318 : /* translator: %s is UNION, INTERSECT, or EXCEPT */
1319 : errmsg("could not implement %s", construct),
1320 : errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
1321 :
1322 : /* Prefer sorting when enable_hashagg is off */
1323 590 : if (!enable_hashagg)
1324 102 : return false;
1325 :
1326 : /*
1327 : * Don't do it if it doesn't look like the hashtable will fit into
1328 : * hash_mem.
1329 : */
1330 488 : hashentrysize = MAXALIGN(input_path->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
1331 :
1332 488 : if (hashentrysize * dNumGroups > hash_mem_limit)
1333 0 : return false;
1334 :
1335 : /*
1336 : * See if the estimated cost is no more than doing it the other way.
1337 : *
1338 : * We need to consider input_plan + hashagg versus input_plan + sort +
1339 : * group. Note that the actual result plan might involve a SetOp or
1340 : * Unique node, not Agg or Group, but the cost estimates for Agg and Group
1341 : * should be close enough for our purposes here.
1342 : *
1343 : * These path variables are dummies that just hold cost fields; we don't
1344 : * make actual Paths for these steps.
1345 : */
1346 488 : cost_agg(&hashed_p, root, AGG_HASHED, NULL,
1347 : numGroupCols, dNumGroups,
1348 : NIL,
1349 : input_path->disabled_nodes,
1350 : input_path->startup_cost, input_path->total_cost,
1351 488 : input_path->rows, input_path->pathtarget->width);
1352 :
1353 : /*
1354 : * Now for the sorted case. Note that the input is *always* unsorted,
1355 : * since it was made by appending unrelated sub-relations together.
1356 : */
1357 488 : sorted_p.disabled_nodes = input_path->disabled_nodes;
1358 488 : sorted_p.startup_cost = input_path->startup_cost;
1359 488 : sorted_p.total_cost = input_path->total_cost;
1360 : /* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */
1361 488 : cost_sort(&sorted_p, root, NIL, sorted_p.disabled_nodes,
1362 : sorted_p.total_cost,
1363 488 : input_path->rows, input_path->pathtarget->width,
1364 : 0.0, work_mem, -1.0);
1365 488 : cost_group(&sorted_p, root, numGroupCols, dNumGroups,
1366 : NIL,
1367 : sorted_p.disabled_nodes,
1368 : sorted_p.startup_cost, sorted_p.total_cost,
1369 : input_path->rows);
1370 :
1371 : /*
1372 : * Now make the decision using the top-level tuple fraction. First we
1373 : * have to convert an absolute count (LIMIT) into fractional form.
1374 : */
1375 488 : tuple_fraction = root->tuple_fraction;
1376 488 : if (tuple_fraction >= 1.0)
1377 0 : tuple_fraction /= dNumOutputRows;
1378 :
1379 488 : if (compare_fractional_path_costs(&hashed_p, &sorted_p,
1380 : tuple_fraction) < 0)
1381 : {
1382 : /* Hashed is cheaper, so use it */
1383 488 : return true;
1384 : }
1385 0 : return false;
1386 : }
1387 :
1388 : /*
1389 : * Generate targetlist for a set-operation plan node
1390 : *
1391 : * colTypes: OID list of set-op's result column datatypes
1392 : * colCollations: OID list of set-op's result column collations
1393 : * flag: -1 if no flag column needed, 0 or 1 to create a const flag column
1394 : * varno: varno to use in generated Vars
1395 : * hack_constants: true to copy up constants (see comments in code)
1396 : * input_tlist: targetlist of this node's input node
1397 : * refnames_tlist: targetlist to take column names from
1398 : * trivial_tlist: output parameter, set to true if targetlist is trivial
1399 : */
1400 : static List *
1401 14566 : generate_setop_tlist(List *colTypes, List *colCollations,
1402 : int flag,
1403 : Index varno,
1404 : bool hack_constants,
1405 : List *input_tlist,
1406 : List *refnames_tlist,
1407 : bool *trivial_tlist)
1408 : {
1409 14566 : List *tlist = NIL;
1410 14566 : int resno = 1;
1411 : ListCell *ctlc,
1412 : *cclc,
1413 : *itlc,
1414 : *rtlc;
1415 : TargetEntry *tle;
1416 : Node *expr;
1417 :
1418 14566 : *trivial_tlist = true; /* until proven differently */
1419 :
1420 59684 : forfour(ctlc, colTypes, cclc, colCollations,
1421 : itlc, input_tlist, rtlc, refnames_tlist)
1422 : {
1423 45118 : Oid colType = lfirst_oid(ctlc);
1424 45118 : Oid colColl = lfirst_oid(cclc);
1425 45118 : TargetEntry *inputtle = (TargetEntry *) lfirst(itlc);
1426 45118 : TargetEntry *reftle = (TargetEntry *) lfirst(rtlc);
1427 :
1428 : Assert(inputtle->resno == resno);
1429 : Assert(reftle->resno == resno);
1430 : Assert(!inputtle->resjunk);
1431 : Assert(!reftle->resjunk);
1432 :
1433 : /*
1434 : * Generate columns referencing input columns and having appropriate
1435 : * data types and column names. Insert datatype coercions where
1436 : * necessary.
1437 : *
1438 : * HACK: constants in the input's targetlist are copied up as-is
1439 : * rather than being referenced as subquery outputs. This is mainly
1440 : * to ensure that when we try to coerce them to the output column's
1441 : * datatype, the right things happen for UNKNOWN constants. But do
1442 : * this only at the first level of subquery-scan plans; we don't want
1443 : * phony constants appearing in the output tlists of upper-level
1444 : * nodes!
1445 : *
1446 : * Note that copying a constant doesn't in itself require us to mark
1447 : * the tlist nontrivial; see trivial_subqueryscan() in setrefs.c.
1448 : */
1449 45118 : if (hack_constants && inputtle->expr && IsA(inputtle->expr, Const))
1450 14836 : expr = (Node *) inputtle->expr;
1451 : else
1452 121128 : expr = (Node *) makeVar(varno,
1453 30282 : inputtle->resno,
1454 30282 : exprType((Node *) inputtle->expr),
1455 30282 : exprTypmod((Node *) inputtle->expr),
1456 30282 : exprCollation((Node *) inputtle->expr),
1457 : 0);
1458 :
1459 45118 : if (exprType(expr) != colType)
1460 : {
1461 : /*
1462 : * Note: it's not really cool to be applying coerce_to_common_type
1463 : * here; one notable point is that assign_expr_collations never
1464 : * gets run on any generated nodes. For the moment that's not a
1465 : * problem because we force the correct exposed collation below.
1466 : * It would likely be best to make the parser generate the correct
1467 : * output tlist for every set-op to begin with, though.
1468 : */
1469 1498 : expr = coerce_to_common_type(NULL, /* no UNKNOWNs here */
1470 : expr,
1471 : colType,
1472 : "UNION/INTERSECT/EXCEPT");
1473 1498 : *trivial_tlist = false; /* the coercion makes it not trivial */
1474 : }
1475 :
1476 : /*
1477 : * Ensure the tlist entry's exposed collation matches the set-op. This
1478 : * is necessary because plan_set_operations() reports the result
1479 : * ordering as a list of SortGroupClauses, which don't carry collation
1480 : * themselves but just refer to tlist entries. If we don't show the
1481 : * right collation then planner.c might do the wrong thing in
1482 : * higher-level queries.
1483 : *
1484 : * Note we use RelabelType, not CollateExpr, since this expression
1485 : * will reach the executor without any further processing.
1486 : */
1487 45118 : if (exprCollation(expr) != colColl)
1488 : {
1489 12218 : expr = applyRelabelType(expr,
1490 : exprType(expr), exprTypmod(expr), colColl,
1491 : COERCE_IMPLICIT_CAST, -1, false);
1492 12218 : *trivial_tlist = false; /* the relabel makes it not trivial */
1493 : }
1494 :
1495 90236 : tle = makeTargetEntry((Expr *) expr,
1496 45118 : (AttrNumber) resno++,
1497 45118 : pstrdup(reftle->resname),
1498 : false);
1499 :
1500 : /*
1501 : * By convention, all non-resjunk columns in a setop tree have
1502 : * ressortgroupref equal to their resno. In some cases the ref isn't
1503 : * needed, but this is a cleaner way than modifying the tlist later.
1504 : */
1505 45118 : tle->ressortgroupref = tle->resno;
1506 :
1507 45118 : tlist = lappend(tlist, tle);
1508 : }
1509 :
1510 14566 : if (flag >= 0)
1511 : {
1512 : /* Add a resjunk flag column */
1513 : /* flag value is the given constant */
1514 1300 : expr = (Node *) makeConst(INT4OID,
1515 : -1,
1516 : InvalidOid,
1517 : sizeof(int32),
1518 : Int32GetDatum(flag),
1519 : false,
1520 : true);
1521 1300 : tle = makeTargetEntry((Expr *) expr,
1522 1300 : (AttrNumber) resno++,
1523 : pstrdup("flag"),
1524 : true);
1525 1300 : tlist = lappend(tlist, tle);
1526 1300 : *trivial_tlist = false; /* the extra entry makes it not trivial */
1527 : }
1528 :
1529 14566 : return tlist;
1530 : }
1531 :
1532 : /*
1533 : * Generate targetlist for a set-operation Append node
1534 : *
1535 : * colTypes: OID list of set-op's result column datatypes
1536 : * colCollations: OID list of set-op's result column collations
1537 : * flag: true to create a flag column copied up from subplans
1538 : * input_tlists: list of tlists for sub-plans of the Append
1539 : * refnames_tlist: targetlist to take column names from
1540 : *
1541 : * The entries in the Append's targetlist should always be simple Vars;
1542 : * we just have to make sure they have the right datatypes/typmods/collations.
1543 : * The Vars are always generated with varno 0.
1544 : *
1545 : * XXX a problem with the varno-zero approach is that set_pathtarget_cost_width
1546 : * cannot figure out a realistic width for the tlist we make here. But we
1547 : * ought to refactor this code to produce a PathTarget directly, anyway.
1548 : */
1549 : static List *
1550 5594 : generate_append_tlist(List *colTypes, List *colCollations,
1551 : bool flag,
1552 : List *input_tlists,
1553 : List *refnames_tlist)
1554 : {
1555 5594 : List *tlist = NIL;
1556 5594 : int resno = 1;
1557 : ListCell *curColType;
1558 : ListCell *curColCollation;
1559 : ListCell *ref_tl_item;
1560 : int colindex;
1561 : TargetEntry *tle;
1562 : Node *expr;
1563 : ListCell *tlistl;
1564 : int32 *colTypmods;
1565 :
1566 : /*
1567 : * First extract typmods to use.
1568 : *
1569 : * If the inputs all agree on type and typmod of a particular column, use
1570 : * that typmod; else use -1.
1571 : */
1572 5594 : colTypmods = (int32 *) palloc(list_length(colTypes) * sizeof(int32));
1573 :
1574 20178 : foreach(tlistl, input_tlists)
1575 : {
1576 14584 : List *subtlist = (List *) lfirst(tlistl);
1577 : ListCell *subtlistl;
1578 :
1579 14584 : curColType = list_head(colTypes);
1580 14584 : colindex = 0;
1581 61032 : foreach(subtlistl, subtlist)
1582 : {
1583 46448 : TargetEntry *subtle = (TargetEntry *) lfirst(subtlistl);
1584 :
1585 46448 : if (subtle->resjunk)
1586 1300 : continue;
1587 : Assert(curColType != NULL);
1588 45148 : if (exprType((Node *) subtle->expr) == lfirst_oid(curColType))
1589 : {
1590 : /* If first subplan, copy the typmod; else compare */
1591 45148 : int32 subtypmod = exprTypmod((Node *) subtle->expr);
1592 :
1593 45148 : if (tlistl == list_head(input_tlists))
1594 16696 : colTypmods[colindex] = subtypmod;
1595 28452 : else if (subtypmod != colTypmods[colindex])
1596 12 : colTypmods[colindex] = -1;
1597 : }
1598 : else
1599 : {
1600 : /* types disagree, so force typmod to -1 */
1601 0 : colTypmods[colindex] = -1;
1602 : }
1603 45148 : curColType = lnext(colTypes, curColType);
1604 45148 : colindex++;
1605 : }
1606 : Assert(curColType == NULL);
1607 : }
1608 :
1609 : /*
1610 : * Now we can build the tlist for the Append.
1611 : */
1612 5594 : colindex = 0;
1613 22290 : forthree(curColType, colTypes, curColCollation, colCollations,
1614 : ref_tl_item, refnames_tlist)
1615 : {
1616 16696 : Oid colType = lfirst_oid(curColType);
1617 16696 : int32 colTypmod = colTypmods[colindex++];
1618 16696 : Oid colColl = lfirst_oid(curColCollation);
1619 16696 : TargetEntry *reftle = (TargetEntry *) lfirst(ref_tl_item);
1620 :
1621 : Assert(reftle->resno == resno);
1622 : Assert(!reftle->resjunk);
1623 16696 : expr = (Node *) makeVar(0,
1624 : resno,
1625 : colType,
1626 : colTypmod,
1627 : colColl,
1628 : 0);
1629 33392 : tle = makeTargetEntry((Expr *) expr,
1630 16696 : (AttrNumber) resno++,
1631 16696 : pstrdup(reftle->resname),
1632 : false);
1633 :
1634 : /*
1635 : * By convention, all non-resjunk columns in a setop tree have
1636 : * ressortgroupref equal to their resno. In some cases the ref isn't
1637 : * needed, but this is a cleaner way than modifying the tlist later.
1638 : */
1639 16696 : tle->ressortgroupref = tle->resno;
1640 :
1641 16696 : tlist = lappend(tlist, tle);
1642 : }
1643 :
1644 5594 : if (flag)
1645 : {
1646 : /* Add a resjunk flag column */
1647 : /* flag value is shown as copied up from subplan */
1648 650 : expr = (Node *) makeVar(0,
1649 : resno,
1650 : INT4OID,
1651 : -1,
1652 : InvalidOid,
1653 : 0);
1654 650 : tle = makeTargetEntry((Expr *) expr,
1655 650 : (AttrNumber) resno++,
1656 : pstrdup("flag"),
1657 : true);
1658 650 : tlist = lappend(tlist, tle);
1659 : }
1660 :
1661 5594 : pfree(colTypmods);
1662 :
1663 5594 : return tlist;
1664 : }
1665 :
1666 : /*
1667 : * generate_setop_grouplist
1668 : * Build a SortGroupClause list defining the sort/grouping properties
1669 : * of the setop's output columns.
1670 : *
1671 : * Parse analysis already determined the properties and built a suitable
1672 : * list, except that the entries do not have sortgrouprefs set because
1673 : * the parser output representation doesn't include a tlist for each
1674 : * setop. So what we need to do here is copy that list and install
1675 : * proper sortgrouprefs into it (copying those from the targetlist).
1676 : */
1677 : static List *
1678 4566 : generate_setop_grouplist(SetOperationStmt *op, List *targetlist)
1679 : {
1680 4566 : List *grouplist = copyObject(op->groupClauses);
1681 : ListCell *lg;
1682 : ListCell *lt;
1683 :
1684 4566 : lg = list_head(grouplist);
1685 18814 : foreach(lt, targetlist)
1686 : {
1687 14248 : TargetEntry *tle = (TargetEntry *) lfirst(lt);
1688 : SortGroupClause *sgc;
1689 :
1690 14248 : if (tle->resjunk)
1691 : {
1692 : /* resjunk columns should not have sortgrouprefs */
1693 : Assert(tle->ressortgroupref == 0);
1694 650 : continue; /* ignore resjunk columns */
1695 : }
1696 :
1697 : /* non-resjunk columns should have sortgroupref = resno */
1698 : Assert(tle->ressortgroupref == tle->resno);
1699 :
1700 : /* non-resjunk columns should have grouping clauses */
1701 : Assert(lg != NULL);
1702 13598 : sgc = (SortGroupClause *) lfirst(lg);
1703 13598 : lg = lnext(grouplist, lg);
1704 : Assert(sgc->tleSortGroupRef == 0);
1705 :
1706 13598 : sgc->tleSortGroupRef = tle->ressortgroupref;
1707 : }
1708 : Assert(lg == NULL);
1709 4566 : return grouplist;
1710 : }
|