LCOV - PostgreSQL 19devel - src/backend/optimizer/prep/prepunion.c

LCOV - code coverage report

Current view:	top level - src/backend/optimizer/prep - prepunion.c (source / functions)		Hit	Total	Coverage
Test:	PostgreSQL 19devel	Lines:	377	389	96.9 %
Date:	2025-07-05 19:17:50	Functions:	11	11	100.0 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * prepunion.c
       4             :  *    Routines to plan set-operation queries.  The filename is a leftover
       5             :  *    from a time when only UNIONs were implemented.
       6             :  *
       7             :  * There are two code paths in the planner for set-operation queries.
       8             :  * If a subquery consists entirely of simple UNION ALL operations, it
       9             :  * is converted into an "append relation".  Otherwise, it is handled
      10             :  * by the general code in this module (plan_set_operations and its
      11             :  * subroutines).  There is some support code here for the append-relation
      12             :  * case, but most of the heavy lifting for that is done elsewhere,
      13             :  * notably in prepjointree.c and allpaths.c.
      14             :  *
      15             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
      16             :  * Portions Copyright (c) 1994, Regents of the University of California
      17             :  *
      18             :  *
      19             :  * IDENTIFICATION
      20             :  *    src/backend/optimizer/prep/prepunion.c
      21             :  *
      22             :  *-------------------------------------------------------------------------
      23             :  */
      24             : #include "postgres.h"
      25             : 
      26             : #include "access/htup_details.h"
      27             : #include "catalog/pg_type.h"
      28             : #include "miscadmin.h"
      29             : #include "nodes/makefuncs.h"
      30             : #include "nodes/nodeFuncs.h"
      31             : #include "optimizer/cost.h"
      32             : #include "optimizer/pathnode.h"
      33             : #include "optimizer/paths.h"
      34             : #include "optimizer/planner.h"
      35             : #include "optimizer/prep.h"
      36             : #include "optimizer/tlist.h"
      37             : #include "parser/parse_coerce.h"
      38             : #include "utils/selfuncs.h"
      39             : 
      40             : 
      41             : static RelOptInfo *recurse_set_operations(Node *setOp, PlannerInfo *root,
      42             :                                           SetOperationStmt *parentOp,
      43             :                                           List *colTypes, List *colCollations,
      44             :                                           List *refnames_tlist,
      45             :                                           List **pTargetList,
      46             :                                           bool *istrivial_tlist);
      47             : static RelOptInfo *generate_recursion_path(SetOperationStmt *setOp,
      48             :                                            PlannerInfo *root,
      49             :                                            List *refnames_tlist,
      50             :                                            List **pTargetList);
      51             : static void build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
      52             :                                     bool trivial_tlist, List *child_tlist,
      53             :                                     List *interesting_pathkeys,
      54             :                                     double *pNumGroups);
      55             : static RelOptInfo *generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
      56             :                                         List *refnames_tlist,
      57             :                                         List **pTargetList);
      58             : static RelOptInfo *generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
      59             :                                            List *refnames_tlist,
      60             :                                            List **pTargetList);
      61             : static List *plan_union_children(PlannerInfo *root,
      62             :                                  SetOperationStmt *top_union,
      63             :                                  List *refnames_tlist,
      64             :                                  List **tlist_list,
      65             :                                  List **istrivial_tlist);
      66             : static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
      67             : static List *generate_setop_tlist(List *colTypes, List *colCollations,
      68             :                                   Index varno,
      69             :                                   bool hack_constants,
      70             :                                   List *input_tlist,
      71             :                                   List *refnames_tlist,
      72             :                                   bool *trivial_tlist);
      73             : static List *generate_append_tlist(List *colTypes, List *colCollations,
      74             :                                    List *input_tlists,
      75             :                                    List *refnames_tlist);
      76             : static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
      77             : 
      78             : 
      79             : /*
      80             :  * plan_set_operations
      81             :  *
      82             :  *    Plans the queries for a tree of set operations (UNION/INTERSECT/EXCEPT)
      83             :  *
      84             :  * This routine only deals with the setOperations tree of the given query.
      85             :  * Any top-level ORDER BY requested in root->parse->sortClause will be handled
      86             :  * when we return to grouping_planner; likewise for LIMIT.
      87             :  *
      88             :  * What we return is an "upperrel" RelOptInfo containing at least one Path
      89             :  * that implements the set-operation tree.  In addition, root->processed_tlist
      90             :  * receives a targetlist representing the output of the topmost setop node.
      91             :  */
      92             : RelOptInfo *
      93        6168 : plan_set_operations(PlannerInfo *root)
      94             : {
      95        6168 :     Query      *parse = root->parse;
      96        6168 :     SetOperationStmt *topop = castNode(SetOperationStmt, parse->setOperations);
      97             :     Node       *node;
      98             :     RangeTblEntry *leftmostRTE;
      99             :     Query      *leftmostQuery;
     100             :     RelOptInfo *setop_rel;
     101             :     List       *top_tlist;
     102             : 
     103             :     Assert(topop);
     104             : 
     105             :     /* check for unsupported stuff */
     106             :     Assert(parse->jointree->fromlist == NIL);
     107             :     Assert(parse->jointree->quals == NULL);
     108             :     Assert(parse->groupClause == NIL);
     109             :     Assert(parse->havingQual == NULL);
     110             :     Assert(parse->windowClause == NIL);
     111             :     Assert(parse->distinctClause == NIL);
     112             : 
     113             :     /*
     114             :      * In the outer query level, equivalence classes are limited to classes
     115             :      * which define that the top-level target entry is equivalent to the
     116             :      * corresponding child target entry.  There won't be any equivalence class
     117             :      * merging.  Mark that merging is complete to allow us to make pathkeys.
     118             :      */
     119             :     Assert(root->eq_classes == NIL);
     120        6168 :     root->ec_merging_done = true;
     121             : 
     122             :     /*
     123             :      * We'll need to build RelOptInfos for each of the leaf subqueries, which
     124             :      * are RTE_SUBQUERY rangetable entries in this Query.  Prepare the index
     125             :      * arrays for those, and for AppendRelInfos in case they're needed.
     126             :      */
     127        6168 :     setup_simple_rel_arrays(root);
     128             : 
     129             :     /*
     130             :      * Find the leftmost component Query.  We need to use its column names for
     131             :      * all generated tlists (else SELECT INTO won't work right).
     132             :      */
     133        6168 :     node = topop->larg;
     134       10058 :     while (node && IsA(node, SetOperationStmt))
     135        3890 :         node = ((SetOperationStmt *) node)->larg;
     136             :     Assert(node && IsA(node, RangeTblRef));
     137        6168 :     leftmostRTE = root->simple_rte_array[((RangeTblRef *) node)->rtindex];
     138        6168 :     leftmostQuery = leftmostRTE->subquery;
     139             :     Assert(leftmostQuery != NULL);
     140             : 
     141             :     /*
     142             :      * If the topmost node is a recursive union, it needs special processing.
     143             :      */
     144        6168 :     if (root->hasRecursion)
     145             :     {
     146        1010 :         setop_rel = generate_recursion_path(topop, root,
     147             :                                             leftmostQuery->targetList,
     148             :                                             &top_tlist);
     149             :     }
     150             :     else
     151             :     {
     152             :         bool        trivial_tlist;
     153             : 
     154             :         /*
     155             :          * Recurse on setOperations tree to generate paths for set ops. The
     156             :          * final output paths should have just the column types shown as the
     157             :          * output from the top-level node.
     158             :          */
     159        5158 :         setop_rel = recurse_set_operations((Node *) topop, root,
     160             :                                            NULL,    /* no parent */
     161             :                                            topop->colTypes, topop->colCollations,
     162             :                                            leftmostQuery->targetList,
     163             :                                            &top_tlist,
     164             :                                            &trivial_tlist);
     165             :     }
     166             : 
     167             :     /* Must return the built tlist into root->processed_tlist. */
     168        6162 :     root->processed_tlist = top_tlist;
     169             : 
     170        6162 :     return setop_rel;
     171             : }
     172             : 
     173             : /*
     174             :  * recurse_set_operations
     175             :  *    Recursively handle one step in a tree of set operations
     176             :  *
     177             :  * setOp: current step (could be a SetOperationStmt or a leaf RangeTblRef)
     178             :  * parentOp: parent step, or NULL if none (but see below)
     179             :  * colTypes: OID list of set-op's result column datatypes
     180             :  * colCollations: OID list of set-op's result column collations
     181             :  * refnames_tlist: targetlist to take column names from
     182             :  *
     183             :  * parentOp should be passed as NULL unless that step is interested in
     184             :  * getting sorted output from this step.  ("Sorted" means "sorted according
     185             :  * to the default btree opclasses of the result column datatypes".)
     186             :  *
     187             :  * Returns a RelOptInfo for the subtree, as well as these output parameters:
     188             :  * *pTargetList: receives the fully-fledged tlist for the subtree's top plan
     189             :  * *istrivial_tlist: true if, and only if, datatypes between parent and child
     190             :  * match.
     191             :  *
     192             :  * If setOp is a leaf node, this function plans the sub-query but does
     193             :  * not populate the pathlist of the returned RelOptInfo.  The caller will
     194             :  * generate SubqueryScan paths using useful path(s) of the subquery (see
     195             :  * build_setop_child_paths).  But this function does build the paths for
     196             :  * set-operation nodes.
     197             :  *
     198             :  * The pTargetList output parameter is mostly redundant with the pathtarget
     199             :  * of the returned RelOptInfo, but for the moment we need it because much of
     200             :  * the logic in this file depends on flag columns being marked resjunk.
     201             :  * XXX Now that there are no flag columns and hence no resjunk columns, we
     202             :  * could probably refactor this file to deal only in pathtargets.
     203             :  *
     204             :  * We don't have to care about typmods here: the only allowed difference
     205             :  * between set-op input and output typmods is input is a specific typmod
     206             :  * and output is -1, and that does not require a coercion.
     207             :  */
     208             : static RelOptInfo *
     209       21606 : recurse_set_operations(Node *setOp, PlannerInfo *root,
     210             :                        SetOperationStmt *parentOp,
     211             :                        List *colTypes, List *colCollations,
     212             :                        List *refnames_tlist,
     213             :                        List **pTargetList,
     214             :                        bool *istrivial_tlist)
     215             : {
     216             :     RelOptInfo *rel;
     217             : 
     218       21606 :     *istrivial_tlist = true;    /* for now */
     219             : 
     220             :     /* Guard against stack overflow due to overly complex setop nests */
     221       21606 :     check_stack_depth();
     222             : 
     223       21606 :     if (IsA(setOp, RangeTblRef))
     224             :     {
     225       16292 :         RangeTblRef *rtr = (RangeTblRef *) setOp;
     226       16292 :         RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
     227       16292 :         Query      *subquery = rte->subquery;
     228             :         PlannerInfo *subroot;
     229             :         List       *tlist;
     230             :         bool        trivial_tlist;
     231             : 
     232             :         Assert(subquery != NULL);
     233             : 
     234             :         /* Build a RelOptInfo for this leaf subquery. */
     235       16292 :         rel = build_simple_rel(root, rtr->rtindex, NULL);
     236             : 
     237             :         /* plan_params should not be in use in current query level */
     238             :         Assert(root->plan_params == NIL);
     239             : 
     240             :         /*
     241             :          * Generate a subroot and Paths for the subquery.  If we have a
     242             :          * parentOp, pass that down to encourage subquery_planner to consider
     243             :          * suitably-sorted Paths.
     244             :          */
     245       16292 :         subroot = rel->subroot = subquery_planner(root->glob, subquery, root,
     246             :                                                   false, root->tuple_fraction,
     247             :                                                   parentOp);
     248             : 
     249             :         /*
     250             :          * It should not be possible for the primitive query to contain any
     251             :          * cross-references to other primitive queries in the setop tree.
     252             :          */
     253       16292 :         if (root->plan_params)
     254           0 :             elog(ERROR, "unexpected outer reference in set operation subquery");
     255             : 
     256             :         /* Figure out the appropriate target list for this subquery. */
     257       16292 :         tlist = generate_setop_tlist(colTypes, colCollations,
     258       16292 :                                      rtr->rtindex,
     259             :                                      true,
     260             :                                      subroot->processed_tlist,
     261             :                                      refnames_tlist,
     262             :                                      &trivial_tlist);
     263       16292 :         rel->reltarget = create_pathtarget(root, tlist);
     264             : 
     265             :         /* Return the fully-fledged tlist to caller, too */
     266       16292 :         *pTargetList = tlist;
     267       16292 :         *istrivial_tlist = trivial_tlist;
     268             :     }
     269        5314 :     else if (IsA(setOp, SetOperationStmt))
     270             :     {
     271        5314 :         SetOperationStmt *op = (SetOperationStmt *) setOp;
     272             : 
     273             :         /* UNIONs are much different from INTERSECT/EXCEPT */
     274        5314 :         if (op->op == SETOP_UNION)
     275        4652 :             rel = generate_union_paths(op, root,
     276             :                                        refnames_tlist,
     277             :                                        pTargetList);
     278             :         else
     279         662 :             rel = generate_nonunion_paths(op, root,
     280             :                                           refnames_tlist,
     281             :                                           pTargetList);
     282             : 
     283             :         /*
     284             :          * If necessary, add a Result node to project the caller-requested
     285             :          * output columns.
     286             :          *
     287             :          * XXX you don't really want to know about this: setrefs.c will apply
     288             :          * fix_upper_expr() to the Result node's tlist. This would fail if the
     289             :          * Vars generated by generate_setop_tlist() were not exactly equal()
     290             :          * to the corresponding tlist entries of the subplan. However, since
     291             :          * the subplan was generated by generate_union_paths() or
     292             :          * generate_nonunion_paths(), and hence its tlist was generated by
     293             :          * generate_append_tlist() or generate_setop_tlist(), this will work.
     294             :          * We just tell generate_setop_tlist() to use varno 0.
     295             :          */
     296        5314 :         if (!tlist_same_datatypes(*pTargetList, colTypes, false) ||
     297        5302 :             !tlist_same_collations(*pTargetList, colCollations, false))
     298             :         {
     299             :             PathTarget *target;
     300             :             bool        trivial_tlist;
     301             :             ListCell   *lc;
     302             : 
     303          12 :             *pTargetList = generate_setop_tlist(colTypes, colCollations,
     304             :                                                 0,
     305             :                                                 false,
     306             :                                                 *pTargetList,
     307             :                                                 refnames_tlist,
     308             :                                                 &trivial_tlist);
     309          12 :             *istrivial_tlist = trivial_tlist;
     310          12 :             target = create_pathtarget(root, *pTargetList);
     311             : 
     312             :             /* Apply projection to each path */
     313          24 :             foreach(lc, rel->pathlist)
     314             :             {
     315          12 :                 Path       *subpath = (Path *) lfirst(lc);
     316             :                 Path       *path;
     317             : 
     318             :                 Assert(subpath->param_info == NULL);
     319          12 :                 path = apply_projection_to_path(root, subpath->parent,
     320             :                                                 subpath, target);
     321             :                 /* If we had to add a Result, path is different from subpath */
     322          12 :                 if (path != subpath)
     323          12 :                     lfirst(lc) = path;
     324             :             }
     325             : 
     326             :             /* Apply projection to each partial path */
     327          12 :             foreach(lc, rel->partial_pathlist)
     328             :             {
     329           0 :                 Path       *subpath = (Path *) lfirst(lc);
     330             :                 Path       *path;
     331             : 
     332             :                 Assert(subpath->param_info == NULL);
     333             : 
     334             :                 /* avoid apply_projection_to_path, in case of multiple refs */
     335           0 :                 path = (Path *) create_projection_path(root, subpath->parent,
     336             :                                                        subpath, target);
     337           0 :                 lfirst(lc) = path;
     338             :             }
     339             :         }
     340        5314 :         postprocess_setop_rel(root, rel);
     341             :     }
     342             :     else
     343             :     {
     344           0 :         elog(ERROR, "unrecognized node type: %d",
     345             :              (int) nodeTag(setOp));
     346             :         *pTargetList = NIL;
     347             :         rel = NULL;             /* keep compiler quiet */
     348             :     }
     349             : 
     350       21606 :     return rel;
     351             : }
     352             : 
     353             : /*
     354             :  * Generate paths for a recursive UNION node
     355             :  */
     356             : static RelOptInfo *
     357        1010 : generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
     358             :                         List *refnames_tlist,
     359             :                         List **pTargetList)
     360             : {
     361             :     RelOptInfo *result_rel;
     362             :     Path       *path;
     363             :     RelOptInfo *lrel,
     364             :                *rrel;
     365             :     Path       *lpath;
     366             :     Path       *rpath;
     367             :     List       *lpath_tlist;
     368             :     bool        lpath_trivial_tlist;
     369             :     List       *rpath_tlist;
     370             :     bool        rpath_trivial_tlist;
     371             :     List       *tlist;
     372             :     List       *groupList;
     373             :     double      dNumGroups;
     374             : 
     375             :     /* Parser should have rejected other cases */
     376        1010 :     if (setOp->op != SETOP_UNION)
     377           0 :         elog(ERROR, "only UNION queries can be recursive");
     378             :     /* Worktable ID should be assigned */
     379             :     Assert(root->wt_param_id >= 0);
     380             : 
     381             :     /*
     382             :      * Unlike a regular UNION node, process the left and right inputs
     383             :      * separately without any intention of combining them into one Append.
     384             :      */
     385        1010 :     lrel = recurse_set_operations(setOp->larg, root,
     386             :                                   NULL, /* no value in sorted results */
     387             :                                   setOp->colTypes, setOp->colCollations,
     388             :                                   refnames_tlist,
     389             :                                   &lpath_tlist,
     390             :                                   &lpath_trivial_tlist);
     391        1010 :     if (lrel->rtekind == RTE_SUBQUERY)
     392        1010 :         build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
     393             :                                 NIL, NULL);
     394        1010 :     lpath = lrel->cheapest_total_path;
     395             :     /* The right path will want to look at the left one ... */
     396        1010 :     root->non_recursive_path = lpath;
     397        1010 :     rrel = recurse_set_operations(setOp->rarg, root,
     398             :                                   NULL, /* no value in sorted results */
     399             :                                   setOp->colTypes, setOp->colCollations,
     400             :                                   refnames_tlist,
     401             :                                   &rpath_tlist,
     402             :                                   &rpath_trivial_tlist);
     403        1010 :     if (rrel->rtekind == RTE_SUBQUERY)
     404        1004 :         build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
     405             :                                 NIL, NULL);
     406        1010 :     rpath = rrel->cheapest_total_path;
     407        1010 :     root->non_recursive_path = NULL;
     408             : 
     409             :     /*
     410             :      * Generate tlist for RecursiveUnion path node --- same as in Append cases
     411             :      */
     412        1010 :     tlist = generate_append_tlist(setOp->colTypes, setOp->colCollations,
     413        1010 :                                   list_make2(lpath_tlist, rpath_tlist),
     414             :                                   refnames_tlist);
     415             : 
     416        1010 :     *pTargetList = tlist;
     417             : 
     418             :     /* Build result relation. */
     419        1010 :     result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
     420        1010 :                                  bms_union(lrel->relids, rrel->relids));
     421        1010 :     result_rel->reltarget = create_pathtarget(root, tlist);
     422             : 
     423             :     /*
     424             :      * If UNION, identify the grouping operators
     425             :      */
     426        1010 :     if (setOp->all)
     427             :     {
     428         520 :         groupList = NIL;
     429         520 :         dNumGroups = 0;
     430             :     }
     431             :     else
     432             :     {
     433             :         /* Identify the grouping semantics */
     434         490 :         groupList = generate_setop_grouplist(setOp, tlist);
     435             : 
     436             :         /* We only support hashing here */
     437         490 :         if (!grouping_is_hashable(groupList))
     438           6 :             ereport(ERROR,
     439             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     440             :                      errmsg("could not implement recursive UNION"),
     441             :                      errdetail("All column datatypes must be hashable.")));
     442             : 
     443             :         /*
     444             :          * For the moment, take the number of distinct groups as equal to the
     445             :          * total input size, ie, the worst case.
     446             :          */
     447         484 :         dNumGroups = lpath->rows + rpath->rows * 10;
     448             :     }
     449             : 
     450             :     /*
     451             :      * And make the path node.
     452             :      */
     453        1004 :     path = (Path *) create_recursiveunion_path(root,
     454             :                                                result_rel,
     455             :                                                lpath,
     456             :                                                rpath,
     457        1004 :                                                result_rel->reltarget,
     458             :                                                groupList,
     459             :                                                root->wt_param_id,
     460             :                                                dNumGroups);
     461             : 
     462        1004 :     add_path(result_rel, path);
     463        1004 :     postprocess_setop_rel(root, result_rel);
     464        1004 :     return result_rel;
     465             : }
     466             : 
     467             : /*
     468             :  * build_setop_child_paths
     469             :  *      Build paths for the set op child relation denoted by 'rel'.
     470             :  *
     471             :  * 'rel' is an RTE_SUBQUERY relation.  We have already generated paths within
     472             :  * the subquery's subroot; the task here is to create SubqueryScan paths for
     473             :  * 'rel', representing scans of the useful subquery paths.
     474             :  *
     475             :  * interesting_pathkeys: if not NIL, also include paths that suit these
     476             :  * pathkeys, sorting any unsorted paths as required.
     477             :  * *pNumGroups: if not NULL, we estimate the number of distinct groups
     478             :  * in the result, and store it there.
     479             :  */
     480             : static void
     481       16292 : build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
     482             :                         bool trivial_tlist, List *child_tlist,
     483             :                         List *interesting_pathkeys, double *pNumGroups)
     484             : {
     485             :     RelOptInfo *final_rel;
     486       16292 :     List       *setop_pathkeys = rel->subroot->setop_pathkeys;
     487             :     ListCell   *lc;
     488             : 
     489             :     /* it can't be a set op child rel if it's not a subquery */
     490             :     Assert(rel->rtekind == RTE_SUBQUERY);
     491             : 
     492             :     /* when sorting is needed, add child rel equivalences */
     493       16292 :     if (interesting_pathkeys != NIL)
     494       11882 :         add_setop_child_rel_equivalences(root,
     495             :                                          rel,
     496             :                                          child_tlist,
     497             :                                          interesting_pathkeys);
     498             : 
     499             :     /*
     500             :      * Mark rel with estimated output rows, width, etc.  Note that we have to
     501             :      * do this before generating outer-query paths, else cost_subqueryscan is
     502             :      * not happy.
     503             :      */
     504       16292 :     set_subquery_size_estimates(root, rel);
     505             : 
     506             :     /*
     507             :      * Since we may want to add a partial path to this relation, we must set
     508             :      * its consider_parallel flag correctly.
     509             :      */
     510       16292 :     final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);
     511       16292 :     rel->consider_parallel = final_rel->consider_parallel;
     512             : 
     513             :     /* Generate subquery scan paths for any interesting path in final_rel */
     514       41924 :     foreach(lc, final_rel->pathlist)
     515             :     {
     516       25632 :         Path       *subpath = (Path *) lfirst(lc);
     517             :         List       *pathkeys;
     518       25632 :         Path       *cheapest_input_path = final_rel->cheapest_total_path;
     519             :         bool        is_sorted;
     520             :         int         presorted_keys;
     521             : 
     522             :         /*
     523             :          * Include the cheapest path as-is so that the set operation can be
     524             :          * cheaply implemented using a method which does not require the input
     525             :          * to be sorted.
     526             :          */
     527       25632 :         if (subpath == cheapest_input_path)
     528             :         {
     529             :             /* Convert subpath's pathkeys to outer representation */
     530       16292 :             pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
     531             :                                                  make_tlist_from_pathtarget(subpath->pathtarget));
     532             : 
     533             :             /* Generate outer path using this subpath */
     534       16292 :             add_path(rel, (Path *) create_subqueryscan_path(root,
     535             :                                                             rel,
     536             :                                                             subpath,
     537             :                                                             trivial_tlist,
     538             :                                                             pathkeys,
     539             :                                                             NULL));
     540             :         }
     541             : 
     542             :         /* skip dealing with sorted paths if the setop doesn't need them */
     543       25632 :         if (interesting_pathkeys == NIL)
     544        4466 :             continue;
     545             : 
     546             :         /*
     547             :          * Create paths to suit final sort order required for setop_pathkeys.
     548             :          * Here we'll sort the cheapest input path (if not sorted already) and
     549             :          * incremental sort any paths which are partially sorted.
     550             :          */
     551       21178 :         is_sorted = pathkeys_count_contained_in(setop_pathkeys,
     552             :                                                 subpath->pathkeys,
     553             :                                                 &presorted_keys);
     554             : 
     555       21178 :         if (!is_sorted)
     556             :         {
     557       13946 :             double      limittuples = rel->subroot->limit_tuples;
     558             : 
     559             :             /*
     560             :              * Try at least sorting the cheapest path and also try
     561             :              * incrementally sorting any path which is partially sorted
     562             :              * already (no need to deal with paths which have presorted keys
     563             :              * when incremental sort is disabled unless it's the cheapest
     564             :              * input path).
     565             :              */
     566       13946 :             if (subpath != cheapest_input_path &&
     567        3144 :                 (presorted_keys == 0 || !enable_incremental_sort))
     568          12 :                 continue;
     569             : 
     570             :             /*
     571             :              * We've no need to consider both a sort and incremental sort.
     572             :              * We'll just do a sort if there are no presorted keys and an
     573             :              * incremental sort when there are presorted keys.
     574             :              */
     575       13934 :             if (presorted_keys == 0 || !enable_incremental_sort)
     576       10754 :                 subpath = (Path *) create_sort_path(rel->subroot,
     577             :                                                     final_rel,
     578             :                                                     subpath,
     579             :                                                     setop_pathkeys,
     580             :                                                     limittuples);
     581             :             else
     582        3180 :                 subpath = (Path *) create_incremental_sort_path(rel->subroot,
     583             :                                                                 final_rel,
     584             :                                                                 subpath,
     585             :                                                                 setop_pathkeys,
     586             :                                                                 presorted_keys,
     587             :                                                                 limittuples);
     588             :         }
     589             : 
     590             :         /*
     591             :          * subpath is now sorted, so add it to the pathlist.  We already added
     592             :          * the cheapest_input_path above, so don't add it again unless we just
     593             :          * sorted it.
     594             :          */
     595       21166 :         if (subpath != cheapest_input_path)
     596             :         {
     597             :             /* Convert subpath's pathkeys to outer representation */
     598       20086 :             pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
     599             :                                                  make_tlist_from_pathtarget(subpath->pathtarget));
     600             : 
     601             :             /* Generate outer path using this subpath */
     602       20086 :             add_path(rel, (Path *) create_subqueryscan_path(root,
     603             :                                                             rel,
     604             :                                                             subpath,
     605             :                                                             trivial_tlist,
     606             :                                                             pathkeys,
     607             :                                                             NULL));
     608             :         }
     609             :     }
     610             : 
     611             :     /* if consider_parallel is false, there should be no partial paths */
     612             :     Assert(final_rel->consider_parallel ||
     613             :            final_rel->partial_pathlist == NIL);
     614             : 
     615             :     /*
     616             :      * If we have a partial path for the child relation, we can use that to
     617             :      * build a partial path for this relation.  But there's no point in
     618             :      * considering any path but the cheapest.
     619             :      */
     620       16292 :     if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
     621       11260 :         final_rel->partial_pathlist != NIL)
     622             :     {
     623             :         Path       *partial_subpath;
     624             :         Path       *partial_path;
     625             : 
     626          12 :         partial_subpath = linitial(final_rel->partial_pathlist);
     627             :         partial_path = (Path *)
     628          12 :             create_subqueryscan_path(root, rel, partial_subpath,
     629             :                                      trivial_tlist,
     630             :                                      NIL, NULL);
     631          12 :         add_partial_path(rel, partial_path);
     632             :     }
     633             : 
     634       16292 :     postprocess_setop_rel(root, rel);
     635             : 
     636             :     /*
     637             :      * Estimate number of groups if caller wants it.  If the subquery used
     638             :      * grouping or aggregation, its output is probably mostly unique anyway;
     639             :      * otherwise do statistical estimation.
     640             :      *
     641             :      * XXX you don't really want to know about this: we do the estimation
     642             :      * using the subroot->parse's original targetlist expressions, not the
     643             :      * subroot->processed_tlist which might seem more appropriate.  The reason
     644             :      * is that if the subquery is itself a setop, it may return a
     645             :      * processed_tlist containing "varno 0" Vars generated by
     646             :      * generate_append_tlist, and those would confuse estimate_num_groups
     647             :      * mightily.  We ought to get rid of the "varno 0" hack, but that requires
     648             :      * a redesign of the parsetree representation of setops, so that there can
     649             :      * be an RTE corresponding to each setop's output. Note, we use this not
     650             :      * subquery's targetlist but subroot->parse's targetlist, because it was
     651             :      * revised by self-join removal.  subquery's targetlist might contain the
     652             :      * references to the removed relids.
     653             :      */
     654       16292 :     if (pNumGroups)
     655             :     {
     656        1294 :         PlannerInfo *subroot = rel->subroot;
     657        1294 :         Query      *subquery = subroot->parse;
     658             : 
     659        1294 :         if (subquery->groupClause || subquery->groupingSets ||
     660        1294 :             subquery->distinctClause || subroot->hasHavingQual ||
     661        1282 :             subquery->hasAggs)
     662          12 :             *pNumGroups = rel->cheapest_total_path->rows;
     663             :         else
     664        1282 :             *pNumGroups = estimate_num_groups(subroot,
     665        1282 :                                               get_tlist_exprs(subroot->parse->targetList, false),
     666        1282 :                                               rel->cheapest_total_path->rows,
     667             :                                               NULL,
     668             :                                               NULL);
     669             :     }
     670       16292 : }
     671             : 
     672             : /*
     673             :  * Generate paths for a UNION or UNION ALL node
     674             :  */
     675             : static RelOptInfo *
     676        4652 : generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
     677             :                      List *refnames_tlist,
     678             :                      List **pTargetList)
     679             : {
     680        4652 :     Relids      relids = NULL;
     681             :     RelOptInfo *result_rel;
     682             :     ListCell   *lc;
     683             :     ListCell   *lc2;
     684             :     ListCell   *lc3;
     685        4652 :     List       *cheapest_pathlist = NIL;
     686        4652 :     List       *ordered_pathlist = NIL;
     687        4652 :     List       *partial_pathlist = NIL;
     688        4652 :     bool        partial_paths_valid = true;
     689        4652 :     bool        consider_parallel = true;
     690             :     List       *rellist;
     691             :     List       *tlist_list;
     692             :     List       *trivial_tlist_list;
     693             :     List       *tlist;
     694        4652 :     List       *groupList = NIL;
     695             :     Path       *apath;
     696        4652 :     Path       *gpath = NULL;
     697        4652 :     bool        try_sorted = false;
     698        4652 :     List       *union_pathkeys = NIL;
     699             : 
     700             :     /*
     701             :      * If any of my children are identical UNION nodes (same op, all-flag, and
     702             :      * colTypes/colCollations) then they can be merged into this node so that
     703             :      * we generate only one Append/MergeAppend and unique-ification for the
     704             :      * lot.  Recurse to find such nodes.
     705             :      */
     706        4652 :     rellist = plan_union_children(root,
     707             :                                   op,
     708             :                                   refnames_tlist,
     709             :                                   &tlist_list,
     710             :                                   &trivial_tlist_list);
     711             : 
     712             :     /*
     713             :      * Generate tlist for Append/MergeAppend plan node.
     714             :      *
     715             :      * The tlist for an Append plan isn't important as far as the Append is
     716             :      * concerned, but we must make it look real anyway for the benefit of the
     717             :      * next plan level up.
     718             :      */
     719        4652 :     tlist = generate_append_tlist(op->colTypes, op->colCollations,
     720             :                                   tlist_list, refnames_tlist);
     721        4652 :     *pTargetList = tlist;
     722             : 
     723             :     /* For UNIONs (not UNION ALL), try sorting, if sorting is possible */
     724        4652 :     if (!op->all)
     725             :     {
     726             :         /* Identify the grouping semantics */
     727        3848 :         groupList = generate_setop_grouplist(op, tlist);
     728             : 
     729        3848 :         if (grouping_is_sortable(op->groupClauses))
     730             :         {
     731        3744 :             try_sorted = true;
     732             :             /* Determine the pathkeys for sorting by the whole target list */
     733        3744 :             union_pathkeys = make_pathkeys_for_sortclauses(root, groupList,
     734             :                                                            tlist);
     735             : 
     736        3744 :             root->query_pathkeys = union_pathkeys;
     737             :         }
     738             :     }
     739             : 
     740             :     /*
     741             :      * Now that we've got the append target list, we can build the union child
     742             :      * paths.
     743             :      */
     744       17756 :     forthree(lc, rellist, lc2, trivial_tlist_list, lc3, tlist_list)
     745             :     {
     746       13104 :         RelOptInfo *rel = lfirst(lc);
     747       13104 :         bool        trivial_tlist = lfirst_int(lc2);
     748       13104 :         List       *child_tlist = lfirst_node(List, lc3);
     749             : 
     750             :         /* only build paths for the union children */
     751       13104 :         if (rel->rtekind == RTE_SUBQUERY)
     752       12984 :             build_setop_child_paths(root, rel, trivial_tlist, child_tlist,
     753             :                                     union_pathkeys, NULL);
     754             :     }
     755             : 
     756             :     /* Build path lists and relid set. */
     757       17756 :     foreach(lc, rellist)
     758             :     {
     759       13104 :         RelOptInfo *rel = lfirst(lc);
     760             :         Path       *ordered_path;
     761             : 
     762       13104 :         cheapest_pathlist = lappend(cheapest_pathlist,
     763       13104 :                                     rel->cheapest_total_path);
     764             : 
     765       13104 :         if (try_sorted)
     766             :         {
     767        4000 :             ordered_path = get_cheapest_path_for_pathkeys(rel->pathlist,
     768             :                                                           union_pathkeys,
     769             :                                                           NULL,
     770             :                                                           TOTAL_COST,
     771             :                                                           false);
     772             : 
     773        4000 :             if (ordered_path != NULL)
     774         476 :                 ordered_pathlist = lappend(ordered_pathlist, ordered_path);
     775             :             else
     776             :             {
     777             :                 /*
     778             :                  * If we can't find a sorted path, just give up trying to
     779             :                  * generate a list of correctly sorted child paths.  This can
     780             :                  * happen when type coercion was added to the targetlist due
     781             :                  * to mismatching types from the union children.
     782             :                  */
     783        3524 :                 try_sorted = false;
     784             :             }
     785             :         }
     786             : 
     787       13104 :         if (consider_parallel)
     788             :         {
     789        9482 :             if (!rel->consider_parallel)
     790             :             {
     791        3382 :                 consider_parallel = false;
     792        3382 :                 partial_paths_valid = false;
     793             :             }
     794        6100 :             else if (rel->partial_pathlist == NIL)
     795        6088 :                 partial_paths_valid = false;
     796             :             else
     797          12 :                 partial_pathlist = lappend(partial_pathlist,
     798          12 :                                            linitial(rel->partial_pathlist));
     799             :         }
     800             : 
     801       13104 :         relids = bms_union(relids, rel->relids);
     802             :     }
     803             : 
     804             :     /* Build result relation. */
     805        4652 :     result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
     806        4652 :     result_rel->reltarget = create_pathtarget(root, tlist);
     807        4652 :     result_rel->consider_parallel = consider_parallel;
     808        4652 :     result_rel->consider_startup = (root->tuple_fraction > 0);
     809             : 
     810             :     /*
     811             :      * Append the child results together using the cheapest paths from each
     812             :      * union child.
     813             :      */
     814        4652 :     apath = (Path *) create_append_path(root, result_rel, cheapest_pathlist,
     815             :                                         NIL, NIL, NULL, 0, false, -1);
     816             : 
     817             :     /*
     818             :      * Estimate number of groups.  For now we just assume the output is unique
     819             :      * --- this is certainly true for the UNION case, and we want worst-case
     820             :      * estimates anyway.
     821             :      */
     822        4652 :     result_rel->rows = apath->rows;
     823             : 
     824             :     /*
     825             :      * Now consider doing the same thing using the partial paths plus Append
     826             :      * plus Gather.
     827             :      */
     828        4652 :     if (partial_paths_valid)
     829             :     {
     830             :         Path       *papath;
     831           6 :         int         parallel_workers = 0;
     832             : 
     833             :         /* Find the highest number of workers requested for any subpath. */
     834          18 :         foreach(lc, partial_pathlist)
     835             :         {
     836          12 :             Path       *subpath = lfirst(lc);
     837             : 
     838          12 :             parallel_workers = Max(parallel_workers,
     839             :                                    subpath->parallel_workers);
     840             :         }
     841             :         Assert(parallel_workers > 0);
     842             : 
     843             :         /*
     844             :          * If the use of parallel append is permitted, always request at least
     845             :          * log2(# of children) paths.  We assume it can be useful to have
     846             :          * extra workers in this case because they will be spread out across
     847             :          * the children.  The precise formula is just a guess; see
     848             :          * add_paths_to_append_rel.
     849             :          */
     850           6 :         if (enable_parallel_append)
     851             :         {
     852           6 :             parallel_workers = Max(parallel_workers,
     853             :                                    pg_leftmost_one_pos32(list_length(partial_pathlist)) + 1);
     854           6 :             parallel_workers = Min(parallel_workers,
     855             :                                    max_parallel_workers_per_gather);
     856             :         }
     857             :         Assert(parallel_workers > 0);
     858             : 
     859             :         papath = (Path *)
     860           6 :             create_append_path(root, result_rel, NIL, partial_pathlist,
     861             :                                NIL, NULL, parallel_workers,
     862             :                                enable_parallel_append, -1);
     863             :         gpath = (Path *)
     864           6 :             create_gather_path(root, result_rel, papath,
     865           6 :                                result_rel->reltarget, NULL, NULL);
     866             :     }
     867             : 
     868        4652 :     if (!op->all)
     869             :     {
     870             :         double      dNumGroups;
     871        3848 :         bool        can_sort = grouping_is_sortable(groupList);
     872        3848 :         bool        can_hash = grouping_is_hashable(groupList);
     873             : 
     874             :         /*
     875             :          * XXX for the moment, take the number of distinct groups as equal to
     876             :          * the total input size, i.e., the worst case.  This is too
     877             :          * conservative, but it's not clear how to get a decent estimate of
     878             :          * the true size.  One should note as well the propensity of novices
     879             :          * to write UNION rather than UNION ALL even when they don't expect
     880             :          * any duplicates...
     881             :          */
     882        3848 :         dNumGroups = apath->rows;
     883             : 
     884        3848 :         if (can_hash)
     885             :         {
     886             :             Path       *path;
     887             : 
     888             :             /*
     889             :              * Try a hash aggregate plan on 'apath'.  This is the cheapest
     890             :              * available path containing each append child.
     891             :              */
     892        3776 :             path = (Path *) create_agg_path(root,
     893             :                                             result_rel,
     894             :                                             apath,
     895             :                                             create_pathtarget(root, tlist),
     896             :                                             AGG_HASHED,
     897             :                                             AGGSPLIT_SIMPLE,
     898             :                                             groupList,
     899             :                                             NIL,
     900             :                                             NULL,
     901             :                                             dNumGroups);
     902        3776 :             add_path(result_rel, path);
     903             : 
     904             :             /* Try hash aggregate on the Gather path, if valid */
     905        3776 :             if (gpath != NULL)
     906             :             {
     907             :                 /* Hashed aggregate plan --- no sort needed */
     908           6 :                 path = (Path *) create_agg_path(root,
     909             :                                                 result_rel,
     910             :                                                 gpath,
     911             :                                                 create_pathtarget(root, tlist),
     912             :                                                 AGG_HASHED,
     913             :                                                 AGGSPLIT_SIMPLE,
     914             :                                                 groupList,
     915             :                                                 NIL,
     916             :                                                 NULL,
     917             :                                                 dNumGroups);
     918           6 :                 add_path(result_rel, path);
     919             :             }
     920             :         }
     921             : 
     922        3848 :         if (can_sort)
     923             :         {
     924        3744 :             Path       *path = apath;
     925             : 
     926             :             /* Try Sort -> Unique on the Append path */
     927        3744 :             if (groupList != NIL)
     928        3714 :                 path = (Path *) create_sort_path(root, result_rel, path,
     929             :                                                  make_pathkeys_for_sortclauses(root, groupList, tlist),
     930             :                                                  -1.0);
     931             : 
     932        3744 :             path = (Path *) create_upper_unique_path(root,
     933             :                                                      result_rel,
     934             :                                                      path,
     935        3744 :                                                      list_length(path->pathkeys),
     936             :                                                      dNumGroups);
     937             : 
     938        3744 :             add_path(result_rel, path);
     939             : 
     940             :             /* Try Sort -> Unique on the Gather path, if set */
     941        3744 :             if (gpath != NULL)
     942             :             {
     943           6 :                 path = gpath;
     944             : 
     945           6 :                 path = (Path *) create_sort_path(root, result_rel, path,
     946             :                                                  make_pathkeys_for_sortclauses(root, groupList, tlist),
     947             :                                                  -1.0);
     948             : 
     949           6 :                 path = (Path *) create_upper_unique_path(root,
     950             :                                                          result_rel,
     951             :                                                          path,
     952           6 :                                                          list_length(path->pathkeys),
     953             :                                                          dNumGroups);
     954           6 :                 add_path(result_rel, path);
     955             :             }
     956             :         }
     957             : 
     958             :         /*
     959             :          * Try making a MergeAppend path if we managed to find a path with the
     960             :          * correct pathkeys in each union child query.
     961             :          */
     962        3848 :         if (try_sorted && groupList != NIL)
     963             :         {
     964             :             Path       *path;
     965             : 
     966         190 :             path = (Path *) create_merge_append_path(root,
     967             :                                                      result_rel,
     968             :                                                      ordered_pathlist,
     969             :                                                      union_pathkeys,
     970             :                                                      NULL);
     971             : 
     972             :             /* and make the MergeAppend unique */
     973         190 :             path = (Path *) create_upper_unique_path(root,
     974             :                                                      result_rel,
     975             :                                                      path,
     976             :                                                      list_length(tlist),
     977             :                                                      dNumGroups);
     978             : 
     979         190 :             add_path(result_rel, path);
     980             :         }
     981             :     }
     982             :     else
     983             :     {
     984             :         /* UNION ALL */
     985         804 :         add_path(result_rel, apath);
     986             : 
     987         804 :         if (gpath != NULL)
     988           0 :             add_path(result_rel, gpath);
     989             :     }
     990             : 
     991        4652 :     return result_rel;
     992             : }
     993             : 
     994             : /*
     995             :  * Generate paths for an INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL node
     996             :  */
     997             : static RelOptInfo *
     998         662 : generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
     999             :                         List *refnames_tlist,
    1000             :                         List **pTargetList)
    1001             : {
    1002             :     RelOptInfo *result_rel;
    1003             :     RelOptInfo *lrel,
    1004             :                *rrel;
    1005         662 :     double      save_fraction = root->tuple_fraction;
    1006             :     Path       *lpath,
    1007             :                *rpath,
    1008             :                *path;
    1009             :     List       *lpath_tlist,
    1010             :                *rpath_tlist,
    1011             :                *tlist,
    1012             :                *groupList;
    1013             :     bool        lpath_trivial_tlist,
    1014             :                 rpath_trivial_tlist,
    1015             :                 result_trivial_tlist;
    1016         662 :     List       *nonunion_pathkeys = NIL;
    1017             :     double      dLeftGroups,
    1018             :                 dRightGroups,
    1019             :                 dNumGroups,
    1020             :                 dNumOutputRows;
    1021             :     bool        can_sort;
    1022             :     bool        can_hash;
    1023             :     SetOpCmd    cmd;
    1024             : 
    1025             :     /*
    1026             :      * Tell children to fetch all tuples.
    1027             :      */
    1028         662 :     root->tuple_fraction = 0.0;
    1029             : 
    1030             :     /* Recurse on children */
    1031         662 :     lrel = recurse_set_operations(op->larg, root,
    1032             :                                   op,
    1033             :                                   op->colTypes, op->colCollations,
    1034             :                                   refnames_tlist,
    1035             :                                   &lpath_tlist,
    1036             :                                   &lpath_trivial_tlist);
    1037             : 
    1038         662 :     rrel = recurse_set_operations(op->rarg, root,
    1039             :                                   op,
    1040             :                                   op->colTypes, op->colCollations,
    1041             :                                   refnames_tlist,
    1042             :                                   &rpath_tlist,
    1043             :                                   &rpath_trivial_tlist);
    1044             : 
    1045             :     /*
    1046             :      * Generate tlist for SetOp plan node.
    1047             :      *
    1048             :      * The tlist for a SetOp plan isn't important so far as the SetOp is
    1049             :      * concerned, but we must make it look real anyway for the benefit of the
    1050             :      * next plan level up.
    1051             :      */
    1052         662 :     tlist = generate_setop_tlist(op->colTypes, op->colCollations,
    1053             :                                  0, false, lpath_tlist, refnames_tlist,
    1054             :                                  &result_trivial_tlist);
    1055             : 
    1056             :     /* We should not have needed any type coercions in the tlist */
    1057             :     Assert(result_trivial_tlist);
    1058             : 
    1059         662 :     *pTargetList = tlist;
    1060             : 
    1061             :     /* Identify the grouping semantics */
    1062         662 :     groupList = generate_setop_grouplist(op, tlist);
    1063             : 
    1064             :     /* Check whether the operators support sorting or hashing */
    1065         662 :     can_sort = grouping_is_sortable(groupList);
    1066         662 :     can_hash = grouping_is_hashable(groupList);
    1067         662 :     if (!can_sort && !can_hash)
    1068           0 :         ereport(ERROR,
    1069             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1070             :         /* translator: %s is INTERSECT or EXCEPT */
    1071             :                  errmsg("could not implement %s",
    1072             :                         (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT"),
    1073             :                  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
    1074             : 
    1075         662 :     if (can_sort)
    1076             :     {
    1077             :         /* Determine the pathkeys for sorting by the whole target list */
    1078         662 :         nonunion_pathkeys = make_pathkeys_for_sortclauses(root, groupList,
    1079             :                                                           tlist);
    1080             : 
    1081         662 :         root->query_pathkeys = nonunion_pathkeys;
    1082             :     }
    1083             : 
    1084             :     /*
    1085             :      * Now that we've got all that info, we can build the child paths.
    1086             :      */
    1087         662 :     if (lrel->rtekind == RTE_SUBQUERY)
    1088         638 :         build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
    1089             :                                 nonunion_pathkeys, &dLeftGroups);
    1090             :     else
    1091          24 :         dLeftGroups = lrel->rows;
    1092         662 :     if (rrel->rtekind == RTE_SUBQUERY)
    1093         656 :         build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
    1094             :                                 nonunion_pathkeys, &dRightGroups);
    1095             :     else
    1096           6 :         dRightGroups = rrel->rows;
    1097             : 
    1098             :     /* Undo effects of forcing tuple_fraction to 0 */
    1099         662 :     root->tuple_fraction = save_fraction;
    1100             : 
    1101             :     /*
    1102             :      * For EXCEPT, we must put the left input first.  For INTERSECT, either
    1103             :      * order should give the same results, and we prefer to put the smaller
    1104             :      * input first in order to (a) minimize the size of the hash table in the
    1105             :      * hashing case, and (b) improve our chances of exploiting the executor's
    1106             :      * fast path for empty left-hand input.  "Smaller" means the one with the
    1107             :      * fewer groups.
    1108             :      */
    1109         662 :     if (op->op != SETOP_EXCEPT && dLeftGroups > dRightGroups)
    1110             :     {
    1111             :         /* need to swap the two inputs */
    1112             :         RelOptInfo *tmprel;
    1113             :         List       *tmplist;
    1114             :         double      tmpd;
    1115             : 
    1116          30 :         tmprel = lrel;
    1117          30 :         lrel = rrel;
    1118          30 :         rrel = tmprel;
    1119          30 :         tmplist = lpath_tlist;
    1120          30 :         lpath_tlist = rpath_tlist;
    1121          30 :         rpath_tlist = tmplist;
    1122          30 :         tmpd = dLeftGroups;
    1123          30 :         dLeftGroups = dRightGroups;
    1124          30 :         dRightGroups = tmpd;
    1125             :     }
    1126             : 
    1127         662 :     lpath = lrel->cheapest_total_path;
    1128         662 :     rpath = rrel->cheapest_total_path;
    1129             : 
    1130             :     /* Build result relation. */
    1131         662 :     result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
    1132         662 :                                  bms_union(lrel->relids, rrel->relids));
    1133         662 :     result_rel->reltarget = create_pathtarget(root, tlist);
    1134             : 
    1135             :     /*
    1136             :      * Estimate number of distinct groups that we'll need hashtable entries
    1137             :      * for; this is the size of the left-hand input for EXCEPT, or the smaller
    1138             :      * input for INTERSECT.  Also estimate the number of eventual output rows.
    1139             :      * In non-ALL cases, we estimate each group produces one output row; in
    1140             :      * ALL cases use the relevant relation size.  These are worst-case
    1141             :      * estimates, of course, but we need to be conservative.
    1142             :      */
    1143         662 :     if (op->op == SETOP_EXCEPT)
    1144             :     {
    1145         434 :         dNumGroups = dLeftGroups;
    1146         434 :         dNumOutputRows = op->all ? lpath->rows : dNumGroups;
    1147             :     }
    1148             :     else
    1149             :     {
    1150         228 :         dNumGroups = dLeftGroups;
    1151         228 :         dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
    1152             :     }
    1153         662 :     result_rel->rows = dNumOutputRows;
    1154             : 
    1155             :     /* Select the SetOpCmd type */
    1156         662 :     switch (op->op)
    1157             :     {
    1158         228 :         case SETOP_INTERSECT:
    1159         228 :             cmd = op->all ? SETOPCMD_INTERSECT_ALL : SETOPCMD_INTERSECT;
    1160         228 :             break;
    1161         434 :         case SETOP_EXCEPT:
    1162         434 :             cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT;
    1163         434 :             break;
    1164           0 :         default:
    1165           0 :             elog(ERROR, "unrecognized set op: %d", (int) op->op);
    1166             :             cmd = SETOPCMD_INTERSECT;   /* keep compiler quiet */
    1167             :             break;
    1168             :     }
    1169             : 
    1170             :     /*
    1171             :      * If we can hash, that just requires a SetOp atop the cheapest inputs.
    1172             :      */
    1173         662 :     if (can_hash)
    1174             :     {
    1175         602 :         path = (Path *) create_setop_path(root,
    1176             :                                           result_rel,
    1177             :                                           lpath,
    1178             :                                           rpath,
    1179             :                                           cmd,
    1180             :                                           SETOP_HASHED,
    1181             :                                           groupList,
    1182             :                                           dNumGroups,
    1183             :                                           dNumOutputRows);
    1184         602 :         add_path(result_rel, path);
    1185             :     }
    1186             : 
    1187             :     /*
    1188             :      * If we can sort, generate the cheapest sorted input paths, and add a
    1189             :      * SetOp atop those.
    1190             :      */
    1191         662 :     if (can_sort)
    1192             :     {
    1193             :         List       *pathkeys;
    1194             :         Path       *slpath,
    1195             :                    *srpath;
    1196             : 
    1197             :         /* First the left input ... */
    1198         662 :         pathkeys = make_pathkeys_for_sortclauses(root,
    1199             :                                                  groupList,
    1200             :                                                  lpath_tlist);
    1201         662 :         if (pathkeys_contained_in(pathkeys, lpath->pathkeys))
    1202          96 :             slpath = lpath;     /* cheapest path is already sorted */
    1203             :         else
    1204             :         {
    1205         566 :             slpath = get_cheapest_path_for_pathkeys(lrel->pathlist,
    1206             :                                                     nonunion_pathkeys,
    1207             :                                                     NULL,
    1208             :                                                     TOTAL_COST,
    1209             :                                                     false);
    1210             :             /* Subquery failed to produce any presorted paths? */
    1211         566 :             if (slpath == NULL)
    1212         144 :                 slpath = (Path *) create_sort_path(root,
    1213             :                                                    lpath->parent,
    1214             :                                                    lpath,
    1215             :                                                    pathkeys,
    1216             :                                                    -1.0);
    1217             :         }
    1218             : 
    1219             :         /* and now the same for the right. */
    1220         662 :         pathkeys = make_pathkeys_for_sortclauses(root,
    1221             :                                                  groupList,
    1222             :                                                  rpath_tlist);
    1223         662 :         if (pathkeys_contained_in(pathkeys, rpath->pathkeys))
    1224         108 :             srpath = rpath;     /* cheapest path is already sorted */
    1225             :         else
    1226             :         {
    1227         554 :             srpath = get_cheapest_path_for_pathkeys(rrel->pathlist,
    1228             :                                                     nonunion_pathkeys,
    1229             :                                                     NULL,
    1230             :                                                     TOTAL_COST,
    1231             :                                                     false);
    1232             :             /* Subquery failed to produce any presorted paths? */
    1233         554 :             if (srpath == NULL)
    1234         150 :                 srpath = (Path *) create_sort_path(root,
    1235             :                                                    rpath->parent,
    1236             :                                                    rpath,
    1237             :                                                    pathkeys,
    1238             :                                                    -1.0);
    1239             :         }
    1240             : 
    1241         662 :         path = (Path *) create_setop_path(root,
    1242             :                                           result_rel,
    1243             :                                           slpath,
    1244             :                                           srpath,
    1245             :                                           cmd,
    1246             :                                           SETOP_SORTED,
    1247             :                                           groupList,
    1248             :                                           dNumGroups,
    1249             :                                           dNumOutputRows);
    1250         662 :         add_path(result_rel, path);
    1251             :     }
    1252             : 
    1253         662 :     return result_rel;
    1254             : }
    1255             : 
    1256             : /*
    1257             :  * Pull up children of a UNION node that are identically-propertied UNIONs,
    1258             :  * and perform planning of the queries underneath the N-way UNION.
    1259             :  *
    1260             :  * The result is a list of RelOptInfos containing Paths for sub-nodes, with
    1261             :  * one entry for each descendant that is a leaf query or non-identical setop.
    1262             :  * We also return parallel lists of the childrens' targetlists and
    1263             :  * is-trivial-tlist flags.
    1264             :  *
    1265             :  * NOTE: we can also pull a UNION ALL up into a UNION, since the distinct
    1266             :  * output rows will be lost anyway.
    1267             :  */
    1268             : static List *
    1269        4652 : plan_union_children(PlannerInfo *root,
    1270             :                     SetOperationStmt *top_union,
    1271             :                     List *refnames_tlist,
    1272             :                     List **tlist_list,
    1273             :                     List **istrivial_tlist)
    1274             : {
    1275        4652 :     List       *pending_rels = list_make1(top_union);
    1276        4652 :     List       *result = NIL;
    1277             :     List       *child_tlist;
    1278             :     bool        trivial_tlist;
    1279             : 
    1280        4652 :     *tlist_list = NIL;
    1281        4652 :     *istrivial_tlist = NIL;
    1282             : 
    1283       26208 :     while (pending_rels != NIL)
    1284             :     {
    1285       21556 :         Node       *setOp = linitial(pending_rels);
    1286             : 
    1287       21556 :         pending_rels = list_delete_first(pending_rels);
    1288             : 
    1289       21556 :         if (IsA(setOp, SetOperationStmt))
    1290             :         {
    1291        8572 :             SetOperationStmt *op = (SetOperationStmt *) setOp;
    1292             : 
    1293        8572 :             if (op->op == top_union->op &&
    1294       16946 :                 (op->all == top_union->all || op->all) &&
    1295       16916 :                 equal(op->colTypes, top_union->colTypes) &&
    1296        8452 :                 equal(op->colCollations, top_union->colCollations))
    1297             :             {
    1298             :                 /* Same UNION, so fold children into parent */
    1299        8452 :                 pending_rels = lcons(op->rarg, pending_rels);
    1300        8452 :                 pending_rels = lcons(op->larg, pending_rels);
    1301        8452 :                 continue;
    1302             :             }
    1303             :         }
    1304             : 
    1305             :         /*
    1306             :          * Not same, so plan this child separately.
    1307             :          *
    1308             :          * If top_union isn't a UNION ALL, then we are interested in sorted
    1309             :          * output from the child, so pass top_union as parentOp.  Note that
    1310             :          * this isn't necessarily the child node's immediate SetOperationStmt
    1311             :          * parent, but that's fine: it's the effective parent.
    1312             :          */
    1313       13104 :         result = lappend(result, recurse_set_operations(setOp, root,
    1314       13104 :                                                         top_union->all ? NULL : top_union,
    1315             :                                                         top_union->colTypes,
    1316             :                                                         top_union->colCollations,
    1317             :                                                         refnames_tlist,
    1318             :                                                         &child_tlist,
    1319             :                                                         &trivial_tlist));
    1320       13104 :         *tlist_list = lappend(*tlist_list, child_tlist);
    1321       13104 :         *istrivial_tlist = lappend_int(*istrivial_tlist, trivial_tlist);
    1322             :     }
    1323             : 
    1324        4652 :     return result;
    1325             : }
    1326             : 
    1327             : /*
    1328             :  * postprocess_setop_rel - perform steps required after adding paths
    1329             :  */
    1330             : static void
    1331       22610 : postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel)
    1332             : {
    1333             :     /*
    1334             :      * We don't currently worry about allowing FDWs to contribute paths to
    1335             :      * this relation, but give extensions a chance.
    1336             :      */
    1337       22610 :     if (create_upper_paths_hook)
    1338           0 :         (*create_upper_paths_hook) (root, UPPERREL_SETOP,
    1339             :                                     NULL, rel, NULL);
    1340             : 
    1341             :     /* Select cheapest path */
    1342       22610 :     set_cheapest(rel);
    1343       22610 : }
    1344             : 
    1345             : /*
    1346             :  * Generate targetlist for a set-operation plan node
    1347             :  *
    1348             :  * colTypes: OID list of set-op's result column datatypes
    1349             :  * colCollations: OID list of set-op's result column collations
    1350             :  * varno: varno to use in generated Vars
    1351             :  * hack_constants: true to copy up constants (see comments in code)
    1352             :  * input_tlist: targetlist of this node's input node
    1353             :  * refnames_tlist: targetlist to take column names from
    1354             :  * trivial_tlist: output parameter, set to true if targetlist is trivial
    1355             :  */
    1356             : static List *
    1357       16966 : generate_setop_tlist(List *colTypes, List *colCollations,
    1358             :                      Index varno,
    1359             :                      bool hack_constants,
    1360             :                      List *input_tlist,
    1361             :                      List *refnames_tlist,
    1362             :                      bool *trivial_tlist)
    1363             : {
    1364       16966 :     List       *tlist = NIL;
    1365       16966 :     int         resno = 1;
    1366             :     ListCell   *ctlc,
    1367             :                *cclc,
    1368             :                *itlc,
    1369             :                *rtlc;
    1370             :     TargetEntry *tle;
    1371             :     Node       *expr;
    1372             : 
    1373       16966 :     *trivial_tlist = true;      /* until proven differently */
    1374             : 
    1375       68770 :     forfour(ctlc, colTypes, cclc, colCollations,
    1376             :             itlc, input_tlist, rtlc, refnames_tlist)
    1377             :     {
    1378       51804 :         Oid         colType = lfirst_oid(ctlc);
    1379       51804 :         Oid         colColl = lfirst_oid(cclc);
    1380       51804 :         TargetEntry *inputtle = (TargetEntry *) lfirst(itlc);
    1381       51804 :         TargetEntry *reftle = (TargetEntry *) lfirst(rtlc);
    1382             : 
    1383             :         Assert(inputtle->resno == resno);
    1384             :         Assert(reftle->resno == resno);
    1385             :         Assert(!inputtle->resjunk);
    1386             :         Assert(!reftle->resjunk);
    1387             : 
    1388             :         /*
    1389             :          * Generate columns referencing input columns and having appropriate
    1390             :          * data types and column names.  Insert datatype coercions where
    1391             :          * necessary.
    1392             :          *
    1393             :          * HACK: constants in the input's targetlist are copied up as-is
    1394             :          * rather than being referenced as subquery outputs.  This is mainly
    1395             :          * to ensure that when we try to coerce them to the output column's
    1396             :          * datatype, the right things happen for UNKNOWN constants.  But do
    1397             :          * this only at the first level of subquery-scan plans; we don't want
    1398             :          * phony constants appearing in the output tlists of upper-level
    1399             :          * nodes!
    1400             :          *
    1401             :          * Note that copying a constant doesn't in itself require us to mark
    1402             :          * the tlist nontrivial; see trivial_subqueryscan() in setrefs.c.
    1403             :          */
    1404       51804 :         if (hack_constants && inputtle->expr && IsA(inputtle->expr, Const))
    1405       16210 :             expr = (Node *) inputtle->expr;
    1406             :         else
    1407      142376 :             expr = (Node *) makeVar(varno,
    1408       35594 :                                     inputtle->resno,
    1409       35594 :                                     exprType((Node *) inputtle->expr),
    1410       35594 :                                     exprTypmod((Node *) inputtle->expr),
    1411       35594 :                                     exprCollation((Node *) inputtle->expr),
    1412             :                                     0);
    1413             : 
    1414       51804 :         if (exprType(expr) != colType)
    1415             :         {
    1416             :             /*
    1417             :              * Note: it's not really cool to be applying coerce_to_common_type
    1418             :              * here; one notable point is that assign_expr_collations never
    1419             :              * gets run on any generated nodes.  For the moment that's not a
    1420             :              * problem because we force the correct exposed collation below.
    1421             :              * It would likely be best to make the parser generate the correct
    1422             :              * output tlist for every set-op to begin with, though.
    1423             :              */
    1424        1842 :             expr = coerce_to_common_type(NULL,  /* no UNKNOWNs here */
    1425             :                                          expr,
    1426             :                                          colType,
    1427             :                                          "UNION/INTERSECT/EXCEPT");
    1428        1842 :             *trivial_tlist = false; /* the coercion makes it not trivial */
    1429             :         }
    1430             : 
    1431             :         /*
    1432             :          * Ensure the tlist entry's exposed collation matches the set-op. This
    1433             :          * is necessary because plan_set_operations() reports the result
    1434             :          * ordering as a list of SortGroupClauses, which don't carry collation
    1435             :          * themselves but just refer to tlist entries.  If we don't show the
    1436             :          * right collation then planner.c might do the wrong thing in
    1437             :          * higher-level queries.
    1438             :          *
    1439             :          * Note we use RelabelType, not CollateExpr, since this expression
    1440             :          * will reach the executor without any further processing.
    1441             :          */
    1442       51804 :         if (exprCollation(expr) != colColl)
    1443             :         {
    1444       13214 :             expr = applyRelabelType(expr,
    1445             :                                     exprType(expr), exprTypmod(expr), colColl,
    1446             :                                     COERCE_IMPLICIT_CAST, -1, false);
    1447       13214 :             *trivial_tlist = false; /* the relabel makes it not trivial */
    1448             :         }
    1449             : 
    1450      103608 :         tle = makeTargetEntry((Expr *) expr,
    1451       51804 :                               (AttrNumber) resno++,
    1452       51804 :                               pstrdup(reftle->resname),
    1453             :                               false);
    1454             : 
    1455             :         /*
    1456             :          * By convention, all output columns in a setop tree have
    1457             :          * ressortgroupref equal to their resno.  In some cases the ref isn't
    1458             :          * needed, but this is a cleaner way than modifying the tlist later.
    1459             :          */
    1460       51804 :         tle->ressortgroupref = tle->resno;
    1461             : 
    1462       51804 :         tlist = lappend(tlist, tle);
    1463             :     }
    1464             : 
    1465       16966 :     return tlist;
    1466             : }
    1467             : 
    1468             : /*
    1469             :  * Generate targetlist for a set-operation Append node
    1470             :  *
    1471             :  * colTypes: OID list of set-op's result column datatypes
    1472             :  * colCollations: OID list of set-op's result column collations
    1473             :  * input_tlists: list of tlists for sub-plans of the Append
    1474             :  * refnames_tlist: targetlist to take column names from
    1475             :  *
    1476             :  * The entries in the Append's targetlist should always be simple Vars;
    1477             :  * we just have to make sure they have the right datatypes/typmods/collations.
    1478             :  * The Vars are always generated with varno 0.
    1479             :  *
    1480             :  * XXX a problem with the varno-zero approach is that set_pathtarget_cost_width
    1481             :  * cannot figure out a realistic width for the tlist we make here.  But we
    1482             :  * ought to refactor this code to produce a PathTarget directly, anyway.
    1483             :  */
    1484             : static List *
    1485        5662 : generate_append_tlist(List *colTypes, List *colCollations,
    1486             :                       List *input_tlists,
    1487             :                       List *refnames_tlist)
    1488             : {
    1489        5662 :     List       *tlist = NIL;
    1490        5662 :     int         resno = 1;
    1491             :     ListCell   *curColType;
    1492             :     ListCell   *curColCollation;
    1493             :     ListCell   *ref_tl_item;
    1494             :     int         colindex;
    1495             :     TargetEntry *tle;
    1496             :     Node       *expr;
    1497             :     ListCell   *tlistl;
    1498             :     int32      *colTypmods;
    1499             : 
    1500             :     /*
    1501             :      * First extract typmods to use.
    1502             :      *
    1503             :      * If the inputs all agree on type and typmod of a particular column, use
    1504             :      * that typmod; else use -1.
    1505             :      */
    1506        5662 :     colTypmods = (int32 *) palloc(list_length(colTypes) * sizeof(int32));
    1507             : 
    1508       20786 :     foreach(tlistl, input_tlists)
    1509             :     {
    1510       15124 :         List       *subtlist = (List *) lfirst(tlistl);
    1511             :         ListCell   *subtlistl;
    1512             : 
    1513       15124 :         curColType = list_head(colTypes);
    1514       15124 :         colindex = 0;
    1515       60520 :         foreach(subtlistl, subtlist)
    1516             :         {
    1517       45396 :             TargetEntry *subtle = (TargetEntry *) lfirst(subtlistl);
    1518             : 
    1519             :             Assert(!subtle->resjunk);
    1520             :             Assert(curColType != NULL);
    1521       45396 :             if (exprType((Node *) subtle->expr) == lfirst_oid(curColType))
    1522             :             {
    1523             :                 /* If first subplan, copy the typmod; else compare */
    1524       45396 :                 int32       subtypmod = exprTypmod((Node *) subtle->expr);
    1525             : 
    1526       45396 :                 if (tlistl == list_head(input_tlists))
    1527       16092 :                     colTypmods[colindex] = subtypmod;
    1528       29304 :                 else if (subtypmod != colTypmods[colindex])
    1529          12 :                     colTypmods[colindex] = -1;
    1530             :             }
    1531             :             else
    1532             :             {
    1533             :                 /* types disagree, so force typmod to -1 */
    1534           0 :                 colTypmods[colindex] = -1;
    1535             :             }
    1536       45396 :             curColType = lnext(colTypes, curColType);
    1537       45396 :             colindex++;
    1538             :         }
    1539             :         Assert(curColType == NULL);
    1540             :     }
    1541             : 
    1542             :     /*
    1543             :      * Now we can build the tlist for the Append.
    1544             :      */
    1545        5662 :     colindex = 0;
    1546       21754 :     forthree(curColType, colTypes, curColCollation, colCollations,
    1547             :              ref_tl_item, refnames_tlist)
    1548             :     {
    1549       16092 :         Oid         colType = lfirst_oid(curColType);
    1550       16092 :         int32       colTypmod = colTypmods[colindex++];
    1551       16092 :         Oid         colColl = lfirst_oid(curColCollation);
    1552       16092 :         TargetEntry *reftle = (TargetEntry *) lfirst(ref_tl_item);
    1553             : 
    1554             :         Assert(reftle->resno == resno);
    1555             :         Assert(!reftle->resjunk);
    1556       16092 :         expr = (Node *) makeVar(0,
    1557             :                                 resno,
    1558             :                                 colType,
    1559             :                                 colTypmod,
    1560             :                                 colColl,
    1561             :                                 0);
    1562       32184 :         tle = makeTargetEntry((Expr *) expr,
    1563       16092 :                               (AttrNumber) resno++,
    1564       16092 :                               pstrdup(reftle->resname),
    1565             :                               false);
    1566             : 
    1567             :         /*
    1568             :          * By convention, all output columns in a setop tree have
    1569             :          * ressortgroupref equal to their resno.  In some cases the ref isn't
    1570             :          * needed, but this is a cleaner way than modifying the tlist later.
    1571             :          */
    1572       16092 :         tle->ressortgroupref = tle->resno;
    1573             : 
    1574       16092 :         tlist = lappend(tlist, tle);
    1575             :     }
    1576             : 
    1577        5662 :     pfree(colTypmods);
    1578             : 
    1579        5662 :     return tlist;
    1580             : }
    1581             : 
    1582             : /*
    1583             :  * generate_setop_grouplist
    1584             :  *      Build a SortGroupClause list defining the sort/grouping properties
    1585             :  *      of the setop's output columns.
    1586             :  *
    1587             :  * Parse analysis already determined the properties and built a suitable
    1588             :  * list, except that the entries do not have sortgrouprefs set because
    1589             :  * the parser output representation doesn't include a tlist for each
    1590             :  * setop.  So what we need to do here is copy that list and install
    1591             :  * proper sortgrouprefs into it (copying those from the targetlist).
    1592             :  */
    1593             : static List *
    1594        5000 : generate_setop_grouplist(SetOperationStmt *op, List *targetlist)
    1595             : {
    1596        5000 :     List       *grouplist = copyObject(op->groupClauses);
    1597             :     ListCell   *lg;
    1598             :     ListCell   *lt;
    1599             : 
    1600        5000 :     lg = list_head(grouplist);
    1601       19250 :     foreach(lt, targetlist)
    1602             :     {
    1603       14250 :         TargetEntry *tle = (TargetEntry *) lfirst(lt);
    1604             :         SortGroupClause *sgc;
    1605             : 
    1606             :         Assert(!tle->resjunk);
    1607             : 
    1608             :         /* non-resjunk columns should have sortgroupref = resno */
    1609             :         Assert(tle->ressortgroupref == tle->resno);
    1610             : 
    1611             :         /* non-resjunk columns should have grouping clauses */
    1612             :         Assert(lg != NULL);
    1613       14250 :         sgc = (SortGroupClause *) lfirst(lg);
    1614       14250 :         lg = lnext(grouplist, lg);
    1615             :         Assert(sgc->tleSortGroupRef == 0);
    1616             : 
    1617       14250 :         sgc->tleSortGroupRef = tle->ressortgroupref;
    1618             :     }
    1619             :     Assert(lg == NULL);
    1620        5000 :     return grouplist;
    1621             : }

Generated by: LCOV version 1.16