LCOV - code coverage report
Current view: top level - src/backend/optimizer/path - joinpath.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 576 588 98.0 %
Date: 2024-09-16 11:11:08 Functions: 19 19 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * joinpath.c
       4             :  *    Routines to find all possible paths for processing a set of joins
       5             :  *
       6             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/optimizer/path/joinpath.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include <math.h>
      18             : 
      19             : #include "executor/executor.h"
      20             : #include "foreign/fdwapi.h"
      21             : #include "nodes/nodeFuncs.h"
      22             : #include "optimizer/cost.h"
      23             : #include "optimizer/optimizer.h"
      24             : #include "optimizer/pathnode.h"
      25             : #include "optimizer/paths.h"
      26             : #include "optimizer/placeholder.h"
      27             : #include "optimizer/planmain.h"
      28             : #include "utils/lsyscache.h"
      29             : #include "utils/typcache.h"
      30             : 
      31             : /* Hook for plugins to get control in add_paths_to_joinrel() */
      32             : set_join_pathlist_hook_type set_join_pathlist_hook = NULL;
      33             : 
      34             : /*
      35             :  * Paths parameterized by a parent rel can be considered to be parameterized
      36             :  * by any of its children, when we are performing partitionwise joins.  These
      37             :  * macros simplify checking for such cases.  Beware multiple eval of args.
      38             :  */
      39             : #define PATH_PARAM_BY_PARENT(path, rel) \
      40             :     ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), \
      41             :                                        (rel)->top_parent_relids))
      42             : #define PATH_PARAM_BY_REL_SELF(path, rel)  \
      43             :     ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
      44             : 
      45             : #define PATH_PARAM_BY_REL(path, rel)    \
      46             :     (PATH_PARAM_BY_REL_SELF(path, rel) || PATH_PARAM_BY_PARENT(path, rel))
      47             : 
      48             : static void try_partial_mergejoin_path(PlannerInfo *root,
      49             :                                        RelOptInfo *joinrel,
      50             :                                        Path *outer_path,
      51             :                                        Path *inner_path,
      52             :                                        List *pathkeys,
      53             :                                        List *mergeclauses,
      54             :                                        List *outersortkeys,
      55             :                                        List *innersortkeys,
      56             :                                        JoinType jointype,
      57             :                                        JoinPathExtraData *extra);
      58             : static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
      59             :                                  RelOptInfo *outerrel, RelOptInfo *innerrel,
      60             :                                  JoinType jointype, JoinPathExtraData *extra);
      61             : static inline bool clause_sides_match_join(RestrictInfo *rinfo,
      62             :                                            RelOptInfo *outerrel,
      63             :                                            RelOptInfo *innerrel);
      64             : static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
      65             :                                  RelOptInfo *outerrel, RelOptInfo *innerrel,
      66             :                                  JoinType jointype, JoinPathExtraData *extra);
      67             : static void consider_parallel_nestloop(PlannerInfo *root,
      68             :                                        RelOptInfo *joinrel,
      69             :                                        RelOptInfo *outerrel,
      70             :                                        RelOptInfo *innerrel,
      71             :                                        JoinType jointype,
      72             :                                        JoinPathExtraData *extra);
      73             : static void consider_parallel_mergejoin(PlannerInfo *root,
      74             :                                         RelOptInfo *joinrel,
      75             :                                         RelOptInfo *outerrel,
      76             :                                         RelOptInfo *innerrel,
      77             :                                         JoinType jointype,
      78             :                                         JoinPathExtraData *extra,
      79             :                                         Path *inner_cheapest_total);
      80             : static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
      81             :                                  RelOptInfo *outerrel, RelOptInfo *innerrel,
      82             :                                  JoinType jointype, JoinPathExtraData *extra);
      83             : static List *select_mergejoin_clauses(PlannerInfo *root,
      84             :                                       RelOptInfo *joinrel,
      85             :                                       RelOptInfo *outerrel,
      86             :                                       RelOptInfo *innerrel,
      87             :                                       List *restrictlist,
      88             :                                       JoinType jointype,
      89             :                                       bool *mergejoin_allowed);
      90             : static void generate_mergejoin_paths(PlannerInfo *root,
      91             :                                      RelOptInfo *joinrel,
      92             :                                      RelOptInfo *innerrel,
      93             :                                      Path *outerpath,
      94             :                                      JoinType jointype,
      95             :                                      JoinPathExtraData *extra,
      96             :                                      bool useallclauses,
      97             :                                      Path *inner_cheapest_total,
      98             :                                      List *merge_pathkeys,
      99             :                                      bool is_partial);
     100             : 
     101             : 
     102             : /*
     103             :  * add_paths_to_joinrel
     104             :  *    Given a join relation and two component rels from which it can be made,
     105             :  *    consider all possible paths that use the two component rels as outer
     106             :  *    and inner rel respectively.  Add these paths to the join rel's pathlist
     107             :  *    if they survive comparison with other paths (and remove any existing
     108             :  *    paths that are dominated by these paths).
     109             :  *
     110             :  * Modifies the pathlist field of the joinrel node to contain the best
     111             :  * paths found so far.
     112             :  *
     113             :  * jointype is not necessarily the same as sjinfo->jointype; it might be
     114             :  * "flipped around" if we are considering joining the rels in the opposite
     115             :  * direction from what's indicated in sjinfo.
     116             :  *
     117             :  * Also, this routine and others in this module accept the special JoinTypes
     118             :  * JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should
     119             :  * unique-ify the outer or inner relation and then apply a regular inner
     120             :  * join.  These values are not allowed to propagate outside this module,
     121             :  * however.  Path cost estimation code may need to recognize that it's
     122             :  * dealing with such a case --- the combination of nominal jointype INNER
     123             :  * with sjinfo->jointype == JOIN_SEMI indicates that.
     124             :  */
     125             : void
     126      525716 : add_paths_to_joinrel(PlannerInfo *root,
     127             :                      RelOptInfo *joinrel,
     128             :                      RelOptInfo *outerrel,
     129             :                      RelOptInfo *innerrel,
     130             :                      JoinType jointype,
     131             :                      SpecialJoinInfo *sjinfo,
     132             :                      List *restrictlist)
     133             : {
     134             :     JoinPathExtraData extra;
     135      525716 :     bool        mergejoin_allowed = true;
     136             :     ListCell   *lc;
     137             :     Relids      joinrelids;
     138             : 
     139             :     /*
     140             :      * PlannerInfo doesn't contain the SpecialJoinInfos created for joins
     141             :      * between child relations, even if there is a SpecialJoinInfo node for
     142             :      * the join between the topmost parents. So, while calculating Relids set
     143             :      * representing the restriction, consider relids of topmost parent of
     144             :      * partitions.
     145             :      */
     146      525716 :     if (joinrel->reloptkind == RELOPT_OTHER_JOINREL)
     147       11176 :         joinrelids = joinrel->top_parent_relids;
     148             :     else
     149      514540 :         joinrelids = joinrel->relids;
     150             : 
     151      525716 :     extra.restrictlist = restrictlist;
     152      525716 :     extra.mergeclause_list = NIL;
     153      525716 :     extra.sjinfo = sjinfo;
     154      525716 :     extra.param_source_rels = NULL;
     155             : 
     156             :     /*
     157             :      * See if the inner relation is provably unique for this outer rel.
     158             :      *
     159             :      * We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't
     160             :      * matter since the executor can make the equivalent optimization anyway;
     161             :      * we need not expend planner cycles on proofs.  For JOIN_UNIQUE_INNER, we
     162             :      * must be considering a semijoin whose inner side is not provably unique
     163             :      * (else reduce_unique_semijoins would've simplified it), so there's no
     164             :      * point in calling innerrel_is_unique.  However, if the LHS covers all of
     165             :      * the semijoin's min_lefthand, then it's appropriate to set inner_unique
     166             :      * because the path produced by create_unique_path will be unique relative
     167             :      * to the LHS.  (If we have an LHS that's only part of the min_lefthand,
     168             :      * that is *not* true.)  For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid
     169             :      * letting that value escape this module.
     170             :      */
     171      525716 :     switch (jointype)
     172             :     {
     173        8688 :         case JOIN_SEMI:
     174             :         case JOIN_ANTI:
     175             : 
     176             :             /*
     177             :              * XXX it may be worth proving this to allow a Memoize to be
     178             :              * considered for Nested Loop Semi/Anti Joins.
     179             :              */
     180        8688 :             extra.inner_unique = false; /* well, unproven */
     181        8688 :             break;
     182        4258 :         case JOIN_UNIQUE_INNER:
     183        8516 :             extra.inner_unique = bms_is_subset(sjinfo->min_lefthand,
     184        4258 :                                                outerrel->relids);
     185        4258 :             break;
     186        4258 :         case JOIN_UNIQUE_OUTER:
     187        4258 :             extra.inner_unique = innerrel_is_unique(root,
     188             :                                                     joinrel->relids,
     189             :                                                     outerrel->relids,
     190             :                                                     innerrel,
     191             :                                                     JOIN_INNER,
     192             :                                                     restrictlist,
     193             :                                                     false);
     194        4258 :             break;
     195      508512 :         default:
     196      508512 :             extra.inner_unique = innerrel_is_unique(root,
     197             :                                                     joinrel->relids,
     198             :                                                     outerrel->relids,
     199             :                                                     innerrel,
     200             :                                                     jointype,
     201             :                                                     restrictlist,
     202             :                                                     false);
     203      508512 :             break;
     204             :     }
     205             : 
     206             :     /*
     207             :      * Find potential mergejoin clauses.  We can skip this if we are not
     208             :      * interested in doing a mergejoin.  However, mergejoin may be our only
     209             :      * way of implementing a full outer join, so override enable_mergejoin if
     210             :      * it's a full join.
     211             :      */
     212      525716 :     if (enable_mergejoin || jointype == JOIN_FULL)
     213      524188 :         extra.mergeclause_list = select_mergejoin_clauses(root,
     214             :                                                           joinrel,
     215             :                                                           outerrel,
     216             :                                                           innerrel,
     217             :                                                           restrictlist,
     218             :                                                           jointype,
     219             :                                                           &mergejoin_allowed);
     220             : 
     221             :     /*
     222             :      * If it's SEMI, ANTI, or inner_unique join, compute correction factors
     223             :      * for cost estimation.  These will be the same for all paths.
     224             :      */
     225      525716 :     if (jointype == JOIN_SEMI || jointype == JOIN_ANTI || extra.inner_unique)
     226      160974 :         compute_semi_anti_join_factors(root, joinrel, outerrel, innerrel,
     227             :                                        jointype, sjinfo, restrictlist,
     228             :                                        &extra.semifactors);
     229             : 
     230             :     /*
     231             :      * Decide whether it's sensible to generate parameterized paths for this
     232             :      * joinrel, and if so, which relations such paths should require.  There
     233             :      * is usually no need to create a parameterized result path unless there
     234             :      * is a join order restriction that prevents joining one of our input rels
     235             :      * directly to the parameter source rel instead of joining to the other
     236             :      * input rel.  (But see allow_star_schema_join().)  This restriction
     237             :      * reduces the number of parameterized paths we have to deal with at
     238             :      * higher join levels, without compromising the quality of the resulting
     239             :      * plan.  We express the restriction as a Relids set that must overlap the
     240             :      * parameterization of any proposed join path.  Note: param_source_rels
     241             :      * should contain only baserels, not OJ relids, so starting from
     242             :      * all_baserels not all_query_rels is correct.
     243             :      */
     244     1085948 :     foreach(lc, root->join_info_list)
     245             :     {
     246      560232 :         SpecialJoinInfo *sjinfo2 = (SpecialJoinInfo *) lfirst(lc);
     247             : 
     248             :         /*
     249             :          * SJ is relevant to this join if we have some part of its RHS
     250             :          * (possibly not all of it), and haven't yet joined to its LHS.  (This
     251             :          * test is pretty simplistic, but should be sufficient considering the
     252             :          * join has already been proven legal.)  If the SJ is relevant, it
     253             :          * presents constraints for joining to anything not in its RHS.
     254             :          */
     255      560232 :         if (bms_overlap(joinrelids, sjinfo2->min_righthand) &&
     256      372788 :             !bms_overlap(joinrelids, sjinfo2->min_lefthand))
     257       19664 :             extra.param_source_rels = bms_join(extra.param_source_rels,
     258       19664 :                                                bms_difference(root->all_baserels,
     259       19664 :                                                               sjinfo2->min_righthand));
     260             : 
     261             :         /* full joins constrain both sides symmetrically */
     262      565088 :         if (sjinfo2->jointype == JOIN_FULL &&
     263        4856 :             bms_overlap(joinrelids, sjinfo2->min_lefthand) &&
     264        4800 :             !bms_overlap(joinrelids, sjinfo2->min_righthand))
     265         672 :             extra.param_source_rels = bms_join(extra.param_source_rels,
     266         672 :                                                bms_difference(root->all_baserels,
     267         672 :                                                               sjinfo2->min_lefthand));
     268             :     }
     269             : 
     270             :     /*
     271             :      * However, when a LATERAL subquery is involved, there will simply not be
     272             :      * any paths for the joinrel that aren't parameterized by whatever the
     273             :      * subquery is parameterized by, unless its parameterization is resolved
     274             :      * within the joinrel.  So we might as well allow additional dependencies
     275             :      * on whatever residual lateral dependencies the joinrel will have.
     276             :      */
     277     1051432 :     extra.param_source_rels = bms_add_members(extra.param_source_rels,
     278      525716 :                                               joinrel->lateral_relids);
     279             : 
     280             :     /*
     281             :      * 1. Consider mergejoin paths where both relations must be explicitly
     282             :      * sorted.  Skip this if we can't mergejoin.
     283             :      */
     284      525716 :     if (mergejoin_allowed)
     285      514284 :         sort_inner_and_outer(root, joinrel, outerrel, innerrel,
     286             :                              jointype, &extra);
     287             : 
     288             :     /*
     289             :      * 2. Consider paths where the outer relation need not be explicitly
     290             :      * sorted. This includes both nestloops and mergejoins where the outer
     291             :      * path is already ordered.  Again, skip this if we can't mergejoin.
     292             :      * (That's okay because we know that nestloop can't handle
     293             :      * right/right-anti/right-semi/full joins at all, so it wouldn't work in
     294             :      * the prohibited cases either.)
     295             :      */
     296      525716 :     if (mergejoin_allowed)
     297      514284 :         match_unsorted_outer(root, joinrel, outerrel, innerrel,
     298             :                              jointype, &extra);
     299             : 
     300             : #ifdef NOT_USED
     301             : 
     302             :     /*
     303             :      * 3. Consider paths where the inner relation need not be explicitly
     304             :      * sorted.  This includes mergejoins only (nestloops were already built in
     305             :      * match_unsorted_outer).
     306             :      *
     307             :      * Diked out as redundant 2/13/2000 -- tgl.  There isn't any really
     308             :      * significant difference between the inner and outer side of a mergejoin,
     309             :      * so match_unsorted_inner creates no paths that aren't equivalent to
     310             :      * those made by match_unsorted_outer when add_paths_to_joinrel() is
     311             :      * invoked with the two rels given in the other order.
     312             :      */
     313             :     if (mergejoin_allowed)
     314             :         match_unsorted_inner(root, joinrel, outerrel, innerrel,
     315             :                              jointype, &extra);
     316             : #endif
     317             : 
     318             :     /*
     319             :      * 4. Consider paths where both outer and inner relations must be hashed
     320             :      * before being joined.  As above, disregard enable_hashjoin for full
     321             :      * joins, because there may be no other alternative.
     322             :      */
     323      525716 :     if (enable_hashjoin || jointype == JOIN_FULL)
     324      522548 :         hash_inner_and_outer(root, joinrel, outerrel, innerrel,
     325             :                              jointype, &extra);
     326             : 
     327             :     /*
     328             :      * 5. If inner and outer relations are foreign tables (or joins) belonging
     329             :      * to the same server and assigned to the same user to check access
     330             :      * permissions as, give the FDW a chance to push down joins.
     331             :      */
     332      525716 :     if (joinrel->fdwroutine &&
     333        2548 :         joinrel->fdwroutine->GetForeignJoinPaths)
     334        2544 :         joinrel->fdwroutine->GetForeignJoinPaths(root, joinrel,
     335             :                                                  outerrel, innerrel,
     336             :                                                  jointype, &extra);
     337             : 
     338             :     /*
     339             :      * 6. Finally, give extensions a chance to manipulate the path list.  They
     340             :      * could add new paths (such as CustomPaths) by calling add_path(), or
     341             :      * add_partial_path() if parallel aware.  They could also delete or modify
     342             :      * paths added by the core code.
     343             :      */
     344      525716 :     if (set_join_pathlist_hook)
     345           0 :         set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
     346             :                                jointype, &extra);
     347      525716 : }
     348             : 
     349             : /*
     350             :  * We override the param_source_rels heuristic to accept nestloop paths in
     351             :  * which the outer rel satisfies some but not all of the inner path's
     352             :  * parameterization.  This is necessary to get good plans for star-schema
     353             :  * scenarios, in which a parameterized path for a large table may require
     354             :  * parameters from multiple small tables that will not get joined directly to
     355             :  * each other.  We can handle that by stacking nestloops that have the small
     356             :  * tables on the outside; but this breaks the rule the param_source_rels
     357             :  * heuristic is based on, namely that parameters should not be passed down
     358             :  * across joins unless there's a join-order-constraint-based reason to do so.
     359             :  * So we ignore the param_source_rels restriction when this case applies.
     360             :  *
     361             :  * allow_star_schema_join() returns true if the param_source_rels restriction
     362             :  * should be overridden, ie, it's okay to perform this join.
     363             :  */
     364             : static inline bool
     365      217960 : allow_star_schema_join(PlannerInfo *root,
     366             :                        Relids outerrelids,
     367             :                        Relids inner_paramrels)
     368             : {
     369             :     /*
     370             :      * It's a star-schema case if the outer rel provides some but not all of
     371             :      * the inner rel's parameterization.
     372             :      */
     373      254220 :     return (bms_overlap(inner_paramrels, outerrelids) &&
     374       36260 :             bms_nonempty_difference(inner_paramrels, outerrelids));
     375             : }
     376             : 
     377             : /*
     378             :  * If the parameterization is only partly satisfied by the outer rel,
     379             :  * the unsatisfied part can't include any outer-join relids that could
     380             :  * null rels of the satisfied part.  That would imply that we're trying
     381             :  * to use a clause involving a Var with nonempty varnullingrels at
     382             :  * a join level where that value isn't yet computable.
     383             :  *
     384             :  * In practice, this test never finds a problem because earlier join order
     385             :  * restrictions prevent us from attempting a join that would cause a problem.
     386             :  * (That's unsurprising, because the code worked before we ever added
     387             :  * outer-join relids to expression relids.)  It still seems worth checking
     388             :  * as a backstop, but we only do so in assert-enabled builds.
     389             :  */
     390             : #ifdef USE_ASSERT_CHECKING
     391             : static inline bool
     392             : have_unsafe_outer_join_ref(PlannerInfo *root,
     393             :                            Relids outerrelids,
     394             :                            Relids inner_paramrels)
     395             : {
     396             :     bool        result = false;
     397             :     Relids      unsatisfied = bms_difference(inner_paramrels, outerrelids);
     398             :     Relids      satisfied = bms_intersect(inner_paramrels, outerrelids);
     399             : 
     400             :     if (bms_overlap(unsatisfied, root->outer_join_rels))
     401             :     {
     402             :         ListCell   *lc;
     403             : 
     404             :         foreach(lc, root->join_info_list)
     405             :         {
     406             :             SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
     407             : 
     408             :             if (!bms_is_member(sjinfo->ojrelid, unsatisfied))
     409             :                 continue;       /* not relevant */
     410             :             if (bms_overlap(satisfied, sjinfo->min_righthand) ||
     411             :                 (sjinfo->jointype == JOIN_FULL &&
     412             :                  bms_overlap(satisfied, sjinfo->min_lefthand)))
     413             :             {
     414             :                 result = true;  /* doesn't work */
     415             :                 break;
     416             :             }
     417             :         }
     418             :     }
     419             : 
     420             :     /* Waste no memory when we reject a path here */
     421             :     bms_free(unsatisfied);
     422             :     bms_free(satisfied);
     423             : 
     424             :     return result;
     425             : }
     426             : #endif                          /* USE_ASSERT_CHECKING */
     427             : 
     428             : /*
     429             :  * paraminfo_get_equal_hashops
     430             :  *      Determine if the clauses in param_info and innerrel's lateral vars
     431             :  *      can be hashed.
     432             :  *      Returns true if hashing is possible, otherwise false.
     433             :  *
     434             :  * Additionally, on success we collect the outer expressions and the
     435             :  * appropriate equality operators for each hashable parameter to innerrel.
     436             :  * These are returned in parallel lists in *param_exprs and *operators.
     437             :  * We also set *binary_mode to indicate whether strict binary matching is
     438             :  * required.
     439             :  */
     440             : static bool
     441      320430 : paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
     442             :                             RelOptInfo *outerrel, RelOptInfo *innerrel,
     443             :                             List *ph_lateral_vars, List **param_exprs,
     444             :                             List **operators, bool *binary_mode)
     445             : 
     446             : {
     447             :     List       *lateral_vars;
     448             :     ListCell   *lc;
     449             : 
     450      320430 :     *param_exprs = NIL;
     451      320430 :     *operators = NIL;
     452      320430 :     *binary_mode = false;
     453             : 
     454             :     /* Add join clauses from param_info to the hash key */
     455      320430 :     if (param_info != NULL)
     456             :     {
     457      320430 :         List       *clauses = param_info->ppi_clauses;
     458             : 
     459      575960 :         foreach(lc, clauses)
     460             :         {
     461      339658 :             RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
     462             :             OpExpr     *opexpr;
     463             :             Node       *expr;
     464             :             Oid         hasheqoperator;
     465             : 
     466      339658 :             opexpr = (OpExpr *) rinfo->clause;
     467             : 
     468             :             /*
     469             :              * Bail if the rinfo is not compatible.  We need a join OpExpr
     470             :              * with 2 args.
     471             :              */
     472      339658 :             if (!IsA(opexpr, OpExpr) || list_length(opexpr->args) != 2 ||
     473      316824 :                 !clause_sides_match_join(rinfo, outerrel, innerrel))
     474             :             {
     475       83972 :                 list_free(*operators);
     476       83972 :                 list_free(*param_exprs);
     477       84128 :                 return false;
     478             :             }
     479             : 
     480      255686 :             if (rinfo->outer_is_left)
     481             :             {
     482      145740 :                 expr = (Node *) linitial(opexpr->args);
     483      145740 :                 hasheqoperator = rinfo->left_hasheqoperator;
     484             :             }
     485             :             else
     486             :             {
     487      109946 :                 expr = (Node *) lsecond(opexpr->args);
     488      109946 :                 hasheqoperator = rinfo->right_hasheqoperator;
     489             :             }
     490             : 
     491             :             /* can't do memoize if we can't hash the outer type */
     492      255686 :             if (!OidIsValid(hasheqoperator))
     493             :             {
     494         156 :                 list_free(*operators);
     495         156 :                 list_free(*param_exprs);
     496         156 :                 return false;
     497             :             }
     498             : 
     499             :             /*
     500             :              * 'expr' may already exist as a parameter from a previous item in
     501             :              * ppi_clauses.  No need to include it again, however we'd better
     502             :              * ensure we do switch into binary mode if required.  See below.
     503             :              */
     504      255530 :             if (!list_member(*param_exprs, expr))
     505             :             {
     506      255524 :                 *operators = lappend_oid(*operators, hasheqoperator);
     507      255524 :                 *param_exprs = lappend(*param_exprs, expr);
     508             :             }
     509             : 
     510             :             /*
     511             :              * When the join operator is not hashable then it's possible that
     512             :              * the operator will be able to distinguish something that the
     513             :              * hash equality operator could not. For example with floating
     514             :              * point types -0.0 and +0.0 are classed as equal by the hash
     515             :              * function and equality function, but some other operator may be
     516             :              * able to tell those values apart.  This means that we must put
     517             :              * memoize into binary comparison mode so that it does bit-by-bit
     518             :              * comparisons rather than a "logical" comparison as it would
     519             :              * using the hash equality operator.
     520             :              */
     521      255530 :             if (!OidIsValid(rinfo->hashjoinoperator))
     522        1070 :                 *binary_mode = true;
     523             :         }
     524             :     }
     525             : 
     526             :     /* Now add any lateral vars to the cache key too */
     527      236302 :     lateral_vars = list_concat(ph_lateral_vars, innerrel->lateral_vars);
     528      240424 :     foreach(lc, lateral_vars)
     529             :     {
     530        4320 :         Node       *expr = (Node *) lfirst(lc);
     531             :         TypeCacheEntry *typentry;
     532             : 
     533             :         /* Reject if there are any volatile functions in lateral vars */
     534        4320 :         if (contain_volatile_functions(expr))
     535             :         {
     536           0 :             list_free(*operators);
     537           0 :             list_free(*param_exprs);
     538         198 :             return false;
     539             :         }
     540             : 
     541        4320 :         typentry = lookup_type_cache(exprType(expr),
     542             :                                      TYPECACHE_HASH_PROC | TYPECACHE_EQ_OPR);
     543             : 
     544             :         /* can't use memoize without a valid hash proc and equals operator */
     545        4320 :         if (!OidIsValid(typentry->hash_proc) || !OidIsValid(typentry->eq_opr))
     546             :         {
     547         198 :             list_free(*operators);
     548         198 :             list_free(*param_exprs);
     549         198 :             return false;
     550             :         }
     551             : 
     552             :         /*
     553             :          * 'expr' may already exist as a parameter from the ppi_clauses.  No
     554             :          * need to include it again, however we'd better ensure we do switch
     555             :          * into binary mode.
     556             :          */
     557        4122 :         if (!list_member(*param_exprs, expr))
     558             :         {
     559        3684 :             *operators = lappend_oid(*operators, typentry->eq_opr);
     560        3684 :             *param_exprs = lappend(*param_exprs, expr);
     561             :         }
     562             : 
     563             :         /*
     564             :          * We must go into binary mode as we don't have too much of an idea of
     565             :          * how these lateral Vars are being used.  See comment above when we
     566             :          * set *binary_mode for the non-lateral Var case. This could be
     567             :          * relaxed a bit if we had the RestrictInfos and knew the operators
     568             :          * being used, however for cases like Vars that are arguments to
     569             :          * functions we must operate in binary mode as we don't have
     570             :          * visibility into what the function is doing with the Vars.
     571             :          */
     572        4122 :         *binary_mode = true;
     573             :     }
     574             : 
     575             :     /* We're okay to use memoize */
     576      236104 :     return true;
     577             : }
     578             : 
     579             : /*
     580             :  * extract_lateral_vars_from_PHVs
     581             :  *    Extract lateral references within PlaceHolderVars that are due to be
     582             :  *    evaluated at 'innerrelids'.
     583             :  */
     584             : static List *
     585      953964 : extract_lateral_vars_from_PHVs(PlannerInfo *root, Relids innerrelids)
     586             : {
     587      953964 :     List       *ph_lateral_vars = NIL;
     588             :     ListCell   *lc;
     589             : 
     590             :     /* Nothing would be found if the query contains no LATERAL RTEs */
     591      953964 :     if (!root->hasLateralRTEs)
     592      935138 :         return NIL;
     593             : 
     594             :     /*
     595             :      * No need to consider PHVs that are due to be evaluated at joinrels,
     596             :      * since we do not add Memoize nodes on top of joinrel paths.
     597             :      */
     598       18826 :     if (bms_membership(innerrelids) == BMS_MULTIPLE)
     599        5734 :         return NIL;
     600             : 
     601       15236 :     foreach(lc, root->placeholder_list)
     602             :     {
     603        2144 :         PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
     604             :         List       *vars;
     605             :         ListCell   *cell;
     606             : 
     607             :         /* PHV is uninteresting if no lateral refs */
     608        2144 :         if (phinfo->ph_lateral == NULL)
     609         978 :             continue;
     610             : 
     611             :         /* PHV is uninteresting if not due to be evaluated at innerrelids */
     612        1166 :         if (!bms_equal(phinfo->ph_eval_at, innerrelids))
     613         970 :             continue;
     614             : 
     615             :         /*
     616             :          * If the PHV does not reference any rels in innerrelids, use its
     617             :          * contained expression as a cache key rather than extracting the
     618             :          * Vars/PHVs from it and using those.  This can be beneficial in cases
     619             :          * where the expression results in fewer distinct values to cache
     620             :          * tuples for.
     621             :          */
     622         196 :         if (!bms_overlap(pull_varnos(root, (Node *) phinfo->ph_var->phexpr),
     623             :                          innerrelids))
     624             :         {
     625         184 :             ph_lateral_vars = lappend(ph_lateral_vars, phinfo->ph_var->phexpr);
     626         184 :             continue;
     627             :         }
     628             : 
     629             :         /* Fetch Vars and PHVs of lateral references within PlaceHolderVars */
     630          12 :         vars = pull_vars_of_level((Node *) phinfo->ph_var->phexpr, 0);
     631          36 :         foreach(cell, vars)
     632             :         {
     633          24 :             Node       *node = (Node *) lfirst(cell);
     634             : 
     635          24 :             if (IsA(node, Var))
     636             :             {
     637          12 :                 Var        *var = (Var *) node;
     638             : 
     639             :                 Assert(var->varlevelsup == 0);
     640             : 
     641          12 :                 if (bms_is_member(var->varno, phinfo->ph_lateral))
     642           0 :                     ph_lateral_vars = lappend(ph_lateral_vars, node);
     643             :             }
     644          12 :             else if (IsA(node, PlaceHolderVar))
     645             :             {
     646          12 :                 PlaceHolderVar *phv = (PlaceHolderVar *) node;
     647             : 
     648             :                 Assert(phv->phlevelsup == 0);
     649             : 
     650          12 :                 if (bms_is_subset(find_placeholder_info(root, phv)->ph_eval_at,
     651          12 :                                   phinfo->ph_lateral))
     652          12 :                     ph_lateral_vars = lappend(ph_lateral_vars, node);
     653             :             }
     654             :             else
     655             :                 Assert(false);
     656             :         }
     657             : 
     658          12 :         list_free(vars);
     659             :     }
     660             : 
     661       13092 :     return ph_lateral_vars;
     662             : }
     663             : 
     664             : /*
     665             :  * get_memoize_path
     666             :  *      If possible, make and return a Memoize path atop of 'inner_path'.
     667             :  *      Otherwise return NULL.
     668             :  *
     669             :  * Note that currently we do not add Memoize nodes on top of join relation
     670             :  * paths.  This is because the ParamPathInfos for join relation paths do not
     671             :  * maintain ppi_clauses, as the set of relevant clauses varies depending on how
     672             :  * the join is formed.  In addition, joinrels do not maintain lateral_vars.  So
     673             :  * we do not have a way to extract cache keys from joinrels.
     674             :  */
     675             : static Path *
     676     1352230 : get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
     677             :                  RelOptInfo *outerrel, Path *inner_path,
     678             :                  Path *outer_path, JoinType jointype,
     679             :                  JoinPathExtraData *extra)
     680             : {
     681             :     List       *param_exprs;
     682             :     List       *hash_operators;
     683             :     ListCell   *lc;
     684             :     bool        binary_mode;
     685             :     List       *ph_lateral_vars;
     686             : 
     687             :     /* Obviously not if it's disabled */
     688     1352230 :     if (!enable_memoize)
     689         388 :         return NULL;
     690             : 
     691             :     /*
     692             :      * We can safely not bother with all this unless we expect to perform more
     693             :      * than one inner scan.  The first scan is always going to be a cache
     694             :      * miss.  This would likely fail later anyway based on costs, so this is
     695             :      * really just to save some wasted effort.
     696             :      */
     697     1351842 :     if (outer_path->parent->rows < 2)
     698      397878 :         return NULL;
     699             : 
     700             :     /*
     701             :      * Extract lateral Vars/PHVs within PlaceHolderVars that are due to be
     702             :      * evaluated at innerrel.  These lateral Vars/PHVs could be used as
     703             :      * memoize cache keys.
     704             :      */
     705      953964 :     ph_lateral_vars = extract_lateral_vars_from_PHVs(root, innerrel->relids);
     706             : 
     707             :     /*
     708             :      * We can only have a memoize node when there's some kind of cache key,
     709             :      * either parameterized path clauses or lateral Vars.  No cache key sounds
     710             :      * more like something a Materialize node might be more useful for.
     711             :      */
     712      953964 :     if ((inner_path->param_info == NULL ||
     713      395740 :          inner_path->param_info->ppi_clauses == NIL) &&
     714      576656 :         innerrel->lateral_vars == NIL &&
     715             :         ph_lateral_vars == NIL)
     716      573708 :         return NULL;
     717             : 
     718             :     /*
     719             :      * Currently we don't do this for SEMI and ANTI joins unless they're
     720             :      * marked as inner_unique.  This is because nested loop SEMI/ANTI joins
     721             :      * don't scan the inner node to completion, which will mean memoize cannot
     722             :      * mark the cache entry as complete.
     723             :      *
     724             :      * XXX Currently we don't attempt to mark SEMI/ANTI joins as inner_unique
     725             :      * = true.  Should we?  See add_paths_to_joinrel()
     726             :      */
     727      380256 :     if (!extra->inner_unique && (jointype == JOIN_SEMI ||
     728             :                                  jointype == JOIN_ANTI))
     729        8480 :         return NULL;
     730             : 
     731             :     /*
     732             :      * Memoize normally marks cache entries as complete when it runs out of
     733             :      * tuples to read from its subplan.  However, with unique joins, Nested
     734             :      * Loop will skip to the next outer tuple after finding the first matching
     735             :      * inner tuple.  This means that we may not read the inner side of the
     736             :      * join to completion which leaves no opportunity to mark the cache entry
     737             :      * as complete.  To work around that, when the join is unique we
     738             :      * automatically mark cache entries as complete after fetching the first
     739             :      * tuple.  This works when the entire join condition is parameterized.
     740             :      * Otherwise, when the parameterization is only a subset of the join
     741             :      * condition, we can't be sure which part of it causes the join to be
     742             :      * unique.  This means there are no guarantees that only 1 tuple will be
     743             :      * read.  We cannot mark the cache entry as complete after reading the
     744             :      * first tuple without that guarantee.  This means the scope of Memoize
     745             :      * node's usefulness is limited to only outer rows that have no join
     746             :      * partner as this is the only case where Nested Loop would exhaust the
     747             :      * inner scan of a unique join.  Since the scope is limited to that, we
     748             :      * just don't bother making a memoize path in this case.
     749             :      *
     750             :      * Lateral vars needn't be considered here as they're not considered when
     751             :      * determining if the join is unique.
     752             :      *
     753             :      * XXX this could be enabled if the remaining join quals were made part of
     754             :      * the inner scan's filter instead of the join filter.  Maybe it's worth
     755             :      * considering doing that?
     756             :      */
     757      371776 :     if (extra->inner_unique &&
     758      451592 :         (inner_path->param_info == NULL ||
     759      225796 :          bms_num_members(inner_path->param_info->ppi_serials) <
     760      225796 :          list_length(extra->restrictlist)))
     761       51332 :         return NULL;
     762             : 
     763             :     /*
     764             :      * We can't use a memoize node if there are volatile functions in the
     765             :      * inner rel's target list or restrict list.  A cache hit could reduce the
     766             :      * number of calls to these functions.
     767             :      */
     768      320444 :     if (contain_volatile_functions((Node *) innerrel->reltarget))
     769           0 :         return NULL;
     770             : 
     771      530178 :     foreach(lc, innerrel->baserestrictinfo)
     772             :     {
     773      209746 :         RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
     774             : 
     775      209746 :         if (contain_volatile_functions((Node *) rinfo))
     776          12 :             return NULL;
     777             :     }
     778             : 
     779             :     /*
     780             :      * Also check the parameterized path restrictinfos for volatile functions.
     781             :      * Indexed functions must be immutable so shouldn't have any volatile
     782             :      * functions, however, with a lateral join the inner scan may not be an
     783             :      * index scan.
     784             :      */
     785      320432 :     if (inner_path->param_info != NULL)
     786             :     {
     787      687266 :         foreach(lc, inner_path->param_info->ppi_clauses)
     788             :         {
     789      366836 :             RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
     790             : 
     791      366836 :             if (contain_volatile_functions((Node *) rinfo))
     792           2 :                 return NULL;
     793             :         }
     794             :     }
     795             : 
     796             :     /* Check if we have hash ops for each parameter to the path */
     797      320430 :     if (paraminfo_get_equal_hashops(root,
     798             :                                     inner_path->param_info,
     799      320430 :                                     outerrel->top_parent ?
     800             :                                     outerrel->top_parent : outerrel,
     801             :                                     innerrel,
     802             :                                     ph_lateral_vars,
     803             :                                     &param_exprs,
     804             :                                     &hash_operators,
     805             :                                     &binary_mode))
     806             :     {
     807      236104 :         return (Path *) create_memoize_path(root,
     808             :                                             innerrel,
     809             :                                             inner_path,
     810             :                                             param_exprs,
     811             :                                             hash_operators,
     812      236104 :                                             extra->inner_unique,
     813             :                                             binary_mode,
     814             :                                             outer_path->rows);
     815             :     }
     816             : 
     817       84326 :     return NULL;
     818             : }
     819             : 
     820             : /*
     821             :  * try_nestloop_path
     822             :  *    Consider a nestloop join path; if it appears useful, push it into
     823             :  *    the joinrel's pathlist via add_path().
     824             :  */
     825             : static void
     826     2335072 : try_nestloop_path(PlannerInfo *root,
     827             :                   RelOptInfo *joinrel,
     828             :                   Path *outer_path,
     829             :                   Path *inner_path,
     830             :                   List *pathkeys,
     831             :                   JoinType jointype,
     832             :                   JoinPathExtraData *extra)
     833             : {
     834             :     Relids      required_outer;
     835             :     JoinCostWorkspace workspace;
     836     2335072 :     RelOptInfo *innerrel = inner_path->parent;
     837     2335072 :     RelOptInfo *outerrel = outer_path->parent;
     838             :     Relids      innerrelids;
     839             :     Relids      outerrelids;
     840     2335072 :     Relids      inner_paramrels = PATH_REQ_OUTER(inner_path);
     841     2335072 :     Relids      outer_paramrels = PATH_REQ_OUTER(outer_path);
     842             : 
     843             :     /*
     844             :      * If we are forming an outer join at this join, it's nonsensical to use
     845             :      * an input path that uses the outer join as part of its parameterization.
     846             :      * (This can happen despite our join order restrictions, since those apply
     847             :      * to what is in an input relation not what its parameters are.)
     848             :      */
     849     2860794 :     if (extra->sjinfo->ojrelid != 0 &&
     850     1051444 :         (bms_is_member(extra->sjinfo->ojrelid, inner_paramrels) ||
     851      525722 :          bms_is_member(extra->sjinfo->ojrelid, outer_paramrels)))
     852      217748 :         return;
     853             : 
     854             :     /*
     855             :      * Any parameterization of the input paths refers to topmost parents of
     856             :      * the relevant relations, because reparameterize_path_by_child() hasn't
     857             :      * been called yet.  So we must consider topmost parents of the relations
     858             :      * being joined, too, while determining parameterization of the result and
     859             :      * checking for disallowed parameterization cases.
     860             :      */
     861     2334952 :     if (innerrel->top_parent_relids)
     862       33820 :         innerrelids = innerrel->top_parent_relids;
     863             :     else
     864     2301132 :         innerrelids = innerrel->relids;
     865             : 
     866     2334952 :     if (outerrel->top_parent_relids)
     867       33820 :         outerrelids = outerrel->top_parent_relids;
     868             :     else
     869     2301132 :         outerrelids = outerrel->relids;
     870             : 
     871             :     /*
     872             :      * Check to see if proposed path is still parameterized, and reject if the
     873             :      * parameterization wouldn't be sensible --- unless allow_star_schema_join
     874             :      * says to allow it anyway.  Also, we must reject if have_dangerous_phv
     875             :      * doesn't like the look of it, which could only happen if the nestloop is
     876             :      * still parameterized.
     877             :      */
     878     2334952 :     required_outer = calc_nestloop_required_outer(outerrelids, outer_paramrels,
     879             :                                                   innerrelids, inner_paramrels);
     880     2334952 :     if (required_outer &&
     881      248868 :         ((!bms_overlap(required_outer, extra->param_source_rels) &&
     882      249296 :           !allow_star_schema_join(root, outerrelids, inner_paramrels)) ||
     883       31336 :          have_dangerous_phv(root, outerrelids, inner_paramrels)))
     884             :     {
     885             :         /* Waste no memory when we reject a path here */
     886      217628 :         bms_free(required_outer);
     887      217628 :         return;
     888             :     }
     889             : 
     890             :     /* If we got past that, we shouldn't have any unsafe outer-join refs */
     891             :     Assert(!have_unsafe_outer_join_ref(root, outerrelids, inner_paramrels));
     892             : 
     893             :     /*
     894             :      * If the inner path is parameterized, it is parameterized by the topmost
     895             :      * parent of the outer rel, not the outer rel itself.  We will need to
     896             :      * translate the parameterization, if this path is chosen, during
     897             :      * create_plan().  Here we just check whether we will be able to perform
     898             :      * the translation, and if not avoid creating a nestloop path.
     899             :      */
     900     2117324 :     if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent) &&
     901       10888 :         !path_is_reparameterizable_by_child(inner_path, outer_path->parent))
     902             :     {
     903           0 :         bms_free(required_outer);
     904           0 :         return;
     905             :     }
     906             : 
     907             :     /*
     908             :      * Do a precheck to quickly eliminate obviously-inferior paths.  We
     909             :      * calculate a cheap lower bound on the path's cost and then use
     910             :      * add_path_precheck() to see if the path is clearly going to be dominated
     911             :      * by some existing path for the joinrel.  If not, do the full pushup with
     912             :      * creating a fully valid path structure and submitting it to add_path().
     913             :      * The latter two steps are expensive enough to make this two-phase
     914             :      * methodology worthwhile.
     915             :      */
     916     2117324 :     initial_cost_nestloop(root, &workspace, jointype,
     917             :                           outer_path, inner_path, extra);
     918             : 
     919     2117324 :     if (add_path_precheck(joinrel, workspace.disabled_nodes,
     920             :                           workspace.startup_cost, workspace.total_cost,
     921             :                           pathkeys, required_outer))
     922             :     {
     923     1038124 :         add_path(joinrel, (Path *)
     924     1038124 :                  create_nestloop_path(root,
     925             :                                       joinrel,
     926             :                                       jointype,
     927             :                                       &workspace,
     928             :                                       extra,
     929             :                                       outer_path,
     930             :                                       inner_path,
     931             :                                       extra->restrictlist,
     932             :                                       pathkeys,
     933             :                                       required_outer));
     934             :     }
     935             :     else
     936             :     {
     937             :         /* Waste no memory when we reject a path here */
     938     1079200 :         bms_free(required_outer);
     939             :     }
     940             : }
     941             : 
     942             : /*
     943             :  * try_partial_nestloop_path
     944             :  *    Consider a partial nestloop join path; if it appears useful, push it into
     945             :  *    the joinrel's partial_pathlist via add_partial_path().
     946             :  */
     947             : static void
     948       39832 : try_partial_nestloop_path(PlannerInfo *root,
     949             :                           RelOptInfo *joinrel,
     950             :                           Path *outer_path,
     951             :                           Path *inner_path,
     952             :                           List *pathkeys,
     953             :                           JoinType jointype,
     954             :                           JoinPathExtraData *extra)
     955             : {
     956             :     JoinCostWorkspace workspace;
     957             : 
     958             :     /*
     959             :      * If the inner path is parameterized, the parameterization must be fully
     960             :      * satisfied by the proposed outer path.  Parameterized partial paths are
     961             :      * not supported.  The caller should already have verified that no lateral
     962             :      * rels are required here.
     963             :      */
     964             :     Assert(bms_is_empty(joinrel->lateral_relids));
     965             :     Assert(bms_is_empty(PATH_REQ_OUTER(outer_path)));
     966       39832 :     if (inner_path->param_info != NULL)
     967             :     {
     968       13686 :         Relids      inner_paramrels = inner_path->param_info->ppi_req_outer;
     969       13686 :         RelOptInfo *outerrel = outer_path->parent;
     970             :         Relids      outerrelids;
     971             : 
     972             :         /*
     973             :          * The inner and outer paths are parameterized, if at all, by the top
     974             :          * level parents, not the child relations, so we must use those relids
     975             :          * for our parameterization tests.
     976             :          */
     977       13686 :         if (outerrel->top_parent_relids)
     978       10188 :             outerrelids = outerrel->top_parent_relids;
     979             :         else
     980        3498 :             outerrelids = outerrel->relids;
     981             : 
     982       13686 :         if (!bms_is_subset(inner_paramrels, outerrelids))
     983       27586 :             return;
     984             :     }
     985             : 
     986             :     /*
     987             :      * If the inner path is parameterized, it is parameterized by the topmost
     988             :      * parent of the outer rel, not the outer rel itself.  We will need to
     989             :      * translate the parameterization, if this path is chosen, during
     990             :      * create_plan().  Here we just check whether we will be able to perform
     991             :      * the translation, and if not avoid creating a nestloop path.
     992             :      */
     993       38740 :     if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent) &&
     994        9396 :         !path_is_reparameterizable_by_child(inner_path, outer_path->parent))
     995           0 :         return;
     996             : 
     997             :     /*
     998             :      * Before creating a path, get a quick lower bound on what it is likely to
     999             :      * cost.  Bail out right away if it looks terrible.
    1000             :      */
    1001       38740 :     initial_cost_nestloop(root, &workspace, jointype,
    1002             :                           outer_path, inner_path, extra);
    1003       38740 :     if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
    1004             :                                    workspace.total_cost, pathkeys))
    1005       26494 :         return;
    1006             : 
    1007             :     /* Might be good enough to be worth trying, so let's try it. */
    1008       12246 :     add_partial_path(joinrel, (Path *)
    1009       12246 :                      create_nestloop_path(root,
    1010             :                                           joinrel,
    1011             :                                           jointype,
    1012             :                                           &workspace,
    1013             :                                           extra,
    1014             :                                           outer_path,
    1015             :                                           inner_path,
    1016             :                                           extra->restrictlist,
    1017             :                                           pathkeys,
    1018             :                                           NULL));
    1019             : }
    1020             : 
    1021             : /*
    1022             :  * try_mergejoin_path
    1023             :  *    Consider a merge join path; if it appears useful, push it into
    1024             :  *    the joinrel's pathlist via add_path().
    1025             :  */
    1026             : static void
    1027      940822 : try_mergejoin_path(PlannerInfo *root,
    1028             :                    RelOptInfo *joinrel,
    1029             :                    Path *outer_path,
    1030             :                    Path *inner_path,
    1031             :                    List *pathkeys,
    1032             :                    List *mergeclauses,
    1033             :                    List *outersortkeys,
    1034             :                    List *innersortkeys,
    1035             :                    JoinType jointype,
    1036             :                    JoinPathExtraData *extra,
    1037             :                    bool is_partial)
    1038             : {
    1039             :     Relids      required_outer;
    1040             :     JoinCostWorkspace workspace;
    1041             : 
    1042      940822 :     if (is_partial)
    1043             :     {
    1044        6314 :         try_partial_mergejoin_path(root,
    1045             :                                    joinrel,
    1046             :                                    outer_path,
    1047             :                                    inner_path,
    1048             :                                    pathkeys,
    1049             :                                    mergeclauses,
    1050             :                                    outersortkeys,
    1051             :                                    innersortkeys,
    1052             :                                    jointype,
    1053             :                                    extra);
    1054       32160 :         return;
    1055             :     }
    1056             : 
    1057             :     /*
    1058             :      * If we are forming an outer join at this join, it's nonsensical to use
    1059             :      * an input path that uses the outer join as part of its parameterization.
    1060             :      * (This can happen despite our join order restrictions, since those apply
    1061             :      * to what is in an input relation not what its parameters are.)
    1062             :      */
    1063     1229866 :     if (extra->sjinfo->ojrelid != 0 &&
    1064      590716 :         (bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(inner_path)) ||
    1065      295358 :          bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(outer_path))))
    1066          12 :         return;
    1067             : 
    1068             :     /*
    1069             :      * Check to see if proposed path is still parameterized, and reject if the
    1070             :      * parameterization wouldn't be sensible.
    1071             :      */
    1072      934496 :     required_outer = calc_non_nestloop_required_outer(outer_path,
    1073             :                                                       inner_path);
    1074      934496 :     if (required_outer &&
    1075       28318 :         !bms_overlap(required_outer, extra->param_source_rels))
    1076             :     {
    1077             :         /* Waste no memory when we reject a path here */
    1078       25834 :         bms_free(required_outer);
    1079       25834 :         return;
    1080             :     }
    1081             : 
    1082             :     /*
    1083             :      * If the given paths are already well enough ordered, we can skip doing
    1084             :      * an explicit sort.
    1085             :      */
    1086     1356820 :     if (outersortkeys &&
    1087      448158 :         pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
    1088       17350 :         outersortkeys = NIL;
    1089     1649286 :     if (innersortkeys &&
    1090      740624 :         pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
    1091       31868 :         innersortkeys = NIL;
    1092             : 
    1093             :     /*
    1094             :      * See comments in try_nestloop_path().
    1095             :      */
    1096      908662 :     initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
    1097             :                            outer_path, inner_path,
    1098             :                            outersortkeys, innersortkeys,
    1099             :                            extra);
    1100             : 
    1101      908662 :     if (add_path_precheck(joinrel, workspace.disabled_nodes,
    1102             :                           workspace.startup_cost, workspace.total_cost,
    1103             :                           pathkeys, required_outer))
    1104             :     {
    1105      231010 :         add_path(joinrel, (Path *)
    1106      231010 :                  create_mergejoin_path(root,
    1107             :                                        joinrel,
    1108             :                                        jointype,
    1109             :                                        &workspace,
    1110             :                                        extra,
    1111             :                                        outer_path,
    1112             :                                        inner_path,
    1113             :                                        extra->restrictlist,
    1114             :                                        pathkeys,
    1115             :                                        required_outer,
    1116             :                                        mergeclauses,
    1117             :                                        outersortkeys,
    1118             :                                        innersortkeys));
    1119             :     }
    1120             :     else
    1121             :     {
    1122             :         /* Waste no memory when we reject a path here */
    1123      677652 :         bms_free(required_outer);
    1124             :     }
    1125             : }
    1126             : 
    1127             : /*
    1128             :  * try_partial_mergejoin_path
    1129             :  *    Consider a partial merge join path; if it appears useful, push it into
    1130             :  *    the joinrel's pathlist via add_partial_path().
    1131             :  */
    1132             : static void
    1133       19034 : try_partial_mergejoin_path(PlannerInfo *root,
    1134             :                            RelOptInfo *joinrel,
    1135             :                            Path *outer_path,
    1136             :                            Path *inner_path,
    1137             :                            List *pathkeys,
    1138             :                            List *mergeclauses,
    1139             :                            List *outersortkeys,
    1140             :                            List *innersortkeys,
    1141             :                            JoinType jointype,
    1142             :                            JoinPathExtraData *extra)
    1143             : {
    1144             :     JoinCostWorkspace workspace;
    1145             : 
    1146             :     /*
    1147             :      * See comments in try_partial_hashjoin_path().
    1148             :      */
    1149             :     Assert(bms_is_empty(joinrel->lateral_relids));
    1150             :     Assert(bms_is_empty(PATH_REQ_OUTER(outer_path)));
    1151       19034 :     if (!bms_is_empty(PATH_REQ_OUTER(inner_path)))
    1152        9876 :         return;
    1153             : 
    1154             :     /*
    1155             :      * If the given paths are already well enough ordered, we can skip doing
    1156             :      * an explicit sort.
    1157             :      */
    1158       31754 :     if (outersortkeys &&
    1159       12720 :         pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
    1160         138 :         outersortkeys = NIL;
    1161       35478 :     if (innersortkeys &&
    1162       16444 :         pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
    1163         252 :         innersortkeys = NIL;
    1164             : 
    1165             :     /*
    1166             :      * See comments in try_partial_nestloop_path().
    1167             :      */
    1168       19034 :     initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
    1169             :                            outer_path, inner_path,
    1170             :                            outersortkeys, innersortkeys,
    1171             :                            extra);
    1172             : 
    1173       19034 :     if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
    1174             :                                    workspace.total_cost, pathkeys))
    1175        9876 :         return;
    1176             : 
    1177             :     /* Might be good enough to be worth trying, so let's try it. */
    1178        9158 :     add_partial_path(joinrel, (Path *)
    1179        9158 :                      create_mergejoin_path(root,
    1180             :                                            joinrel,
    1181             :                                            jointype,
    1182             :                                            &workspace,
    1183             :                                            extra,
    1184             :                                            outer_path,
    1185             :                                            inner_path,
    1186             :                                            extra->restrictlist,
    1187             :                                            pathkeys,
    1188             :                                            NULL,
    1189             :                                            mergeclauses,
    1190             :                                            outersortkeys,
    1191             :                                            innersortkeys));
    1192             : }
    1193             : 
    1194             : /*
    1195             :  * try_hashjoin_path
    1196             :  *    Consider a hash join path; if it appears useful, push it into
    1197             :  *    the joinrel's pathlist via add_path().
    1198             :  */
    1199             : static void
    1200      578660 : try_hashjoin_path(PlannerInfo *root,
    1201             :                   RelOptInfo *joinrel,
    1202             :                   Path *outer_path,
    1203             :                   Path *inner_path,
    1204             :                   List *hashclauses,
    1205             :                   JoinType jointype,
    1206             :                   JoinPathExtraData *extra)
    1207             : {
    1208             :     Relids      required_outer;
    1209             :     JoinCostWorkspace workspace;
    1210             : 
    1211             :     /*
    1212             :      * If we are forming an outer join at this join, it's nonsensical to use
    1213             :      * an input path that uses the outer join as part of its parameterization.
    1214             :      * (This can happen despite our join order restrictions, since those apply
    1215             :      * to what is in an input relation not what its parameters are.)
    1216             :      */
    1217      783250 :     if (extra->sjinfo->ojrelid != 0 &&
    1218      409150 :         (bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(inner_path)) ||
    1219      204560 :          bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(outer_path))))
    1220       84252 :         return;
    1221             : 
    1222             :     /*
    1223             :      * Check to see if proposed path is still parameterized, and reject if the
    1224             :      * parameterization wouldn't be sensible.
    1225             :      */
    1226      578600 :     required_outer = calc_non_nestloop_required_outer(outer_path,
    1227             :                                                       inner_path);
    1228      578600 :     if (required_outer &&
    1229       96650 :         !bms_overlap(required_outer, extra->param_source_rels))
    1230             :     {
    1231             :         /* Waste no memory when we reject a path here */
    1232       84192 :         bms_free(required_outer);
    1233       84192 :         return;
    1234             :     }
    1235             : 
    1236             :     /*
    1237             :      * See comments in try_nestloop_path().  Also note that hashjoin paths
    1238             :      * never have any output pathkeys, per comments in create_hashjoin_path.
    1239             :      */
    1240      494408 :     initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
    1241             :                           outer_path, inner_path, extra, false);
    1242             : 
    1243      494408 :     if (add_path_precheck(joinrel, workspace.disabled_nodes,
    1244             :                           workspace.startup_cost, workspace.total_cost,
    1245             :                           NIL, required_outer))
    1246             :     {
    1247      207420 :         add_path(joinrel, (Path *)
    1248      207420 :                  create_hashjoin_path(root,
    1249             :                                       joinrel,
    1250             :                                       jointype,
    1251             :                                       &workspace,
    1252             :                                       extra,
    1253             :                                       outer_path,
    1254             :                                       inner_path,
    1255             :                                       false,    /* parallel_hash */
    1256             :                                       extra->restrictlist,
    1257             :                                       required_outer,
    1258             :                                       hashclauses));
    1259             :     }
    1260             :     else
    1261             :     {
    1262             :         /* Waste no memory when we reject a path here */
    1263      286988 :         bms_free(required_outer);
    1264             :     }
    1265             : }
    1266             : 
    1267             : /*
    1268             :  * try_partial_hashjoin_path
    1269             :  *    Consider a partial hashjoin join path; if it appears useful, push it into
    1270             :  *    the joinrel's partial_pathlist via add_partial_path().
    1271             :  *    The outer side is partial.  If parallel_hash is true, then the inner path
    1272             :  *    must be partial and will be run in parallel to create one or more shared
    1273             :  *    hash tables; otherwise the inner path must be complete and a copy of it
    1274             :  *    is run in every process to create separate identical private hash tables.
    1275             :  */
    1276             : static void
    1277       21146 : try_partial_hashjoin_path(PlannerInfo *root,
    1278             :                           RelOptInfo *joinrel,
    1279             :                           Path *outer_path,
    1280             :                           Path *inner_path,
    1281             :                           List *hashclauses,
    1282             :                           JoinType jointype,
    1283             :                           JoinPathExtraData *extra,
    1284             :                           bool parallel_hash)
    1285             : {
    1286             :     JoinCostWorkspace workspace;
    1287             : 
    1288             :     /*
    1289             :      * If the inner path is parameterized, we can't use a partial hashjoin.
    1290             :      * Parameterized partial paths are not supported.  The caller should
    1291             :      * already have verified that no lateral rels are required here.
    1292             :      */
    1293             :     Assert(bms_is_empty(joinrel->lateral_relids));
    1294             :     Assert(bms_is_empty(PATH_REQ_OUTER(outer_path)));
    1295       21146 :     if (!bms_is_empty(PATH_REQ_OUTER(inner_path)))
    1296       10208 :         return;
    1297             : 
    1298             :     /*
    1299             :      * Before creating a path, get a quick lower bound on what it is likely to
    1300             :      * cost.  Bail out right away if it looks terrible.
    1301             :      */
    1302       21146 :     initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
    1303             :                           outer_path, inner_path, extra, parallel_hash);
    1304       21146 :     if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
    1305             :                                    workspace.total_cost, NIL))
    1306       10208 :         return;
    1307             : 
    1308             :     /* Might be good enough to be worth trying, so let's try it. */
    1309       10938 :     add_partial_path(joinrel, (Path *)
    1310       10938 :                      create_hashjoin_path(root,
    1311             :                                           joinrel,
    1312             :                                           jointype,
    1313             :                                           &workspace,
    1314             :                                           extra,
    1315             :                                           outer_path,
    1316             :                                           inner_path,
    1317             :                                           parallel_hash,
    1318             :                                           extra->restrictlist,
    1319             :                                           NULL,
    1320             :                                           hashclauses));
    1321             : }
    1322             : 
    1323             : /*
    1324             :  * clause_sides_match_join
    1325             :  *    Determine whether a join clause is of the right form to use in this join.
    1326             :  *
    1327             :  * We already know that the clause is a binary opclause referencing only the
    1328             :  * rels in the current join.  The point here is to check whether it has the
    1329             :  * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
    1330             :  * rather than mixing outer and inner vars on either side.  If it matches,
    1331             :  * we set the transient flag outer_is_left to identify which side is which.
    1332             :  */
    1333             : static inline bool
    1334     1232236 : clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel,
    1335             :                         RelOptInfo *innerrel)
    1336             : {
    1337     1836846 :     if (bms_is_subset(rinfo->left_relids, outerrel->relids) &&
    1338      604610 :         bms_is_subset(rinfo->right_relids, innerrel->relids))
    1339             :     {
    1340             :         /* lefthand side is outer */
    1341      604262 :         rinfo->outer_is_left = true;
    1342      604262 :         return true;
    1343             :     }
    1344     1238226 :     else if (bms_is_subset(rinfo->left_relids, innerrel->relids) &&
    1345      610252 :              bms_is_subset(rinfo->right_relids, outerrel->relids))
    1346             :     {
    1347             :         /* righthand side is outer */
    1348      565924 :         rinfo->outer_is_left = false;
    1349      565924 :         return true;
    1350             :     }
    1351       62050 :     return false;               /* no good for these input relations */
    1352             : }
    1353             : 
    1354             : /*
    1355             :  * sort_inner_and_outer
    1356             :  *    Create mergejoin join paths by explicitly sorting both the outer and
    1357             :  *    inner join relations on each available merge ordering.
    1358             :  *
    1359             :  * 'joinrel' is the join relation
    1360             :  * 'outerrel' is the outer join relation
    1361             :  * 'innerrel' is the inner join relation
    1362             :  * 'jointype' is the type of join to do
    1363             :  * 'extra' contains additional input values
    1364             :  */
    1365             : static void
    1366      514284 : sort_inner_and_outer(PlannerInfo *root,
    1367             :                      RelOptInfo *joinrel,
    1368             :                      RelOptInfo *outerrel,
    1369             :                      RelOptInfo *innerrel,
    1370             :                      JoinType jointype,
    1371             :                      JoinPathExtraData *extra)
    1372             : {
    1373      514284 :     JoinType    save_jointype = jointype;
    1374             :     Path       *outer_path;
    1375             :     Path       *inner_path;
    1376      514284 :     Path       *cheapest_partial_outer = NULL;
    1377      514284 :     Path       *cheapest_safe_inner = NULL;
    1378             :     List       *all_pathkeys;
    1379             :     ListCell   *l;
    1380             : 
    1381             :     /* Nothing to do if there are no available mergejoin clauses */
    1382      514284 :     if (extra->mergeclause_list == NIL)
    1383      103750 :         return;
    1384             : 
    1385             :     /*
    1386             :      * We only consider the cheapest-total-cost input paths, since we are
    1387             :      * assuming here that a sort is required.  We will consider
    1388             :      * cheapest-startup-cost input paths later, and only if they don't need a
    1389             :      * sort.
    1390             :      *
    1391             :      * This function intentionally does not consider parameterized input
    1392             :      * paths, except when the cheapest-total is parameterized.  If we did so,
    1393             :      * we'd have a combinatorial explosion of mergejoin paths of dubious
    1394             :      * value.  This interacts with decisions elsewhere that also discriminate
    1395             :      * against mergejoins with parameterized inputs; see comments in
    1396             :      * src/backend/optimizer/README.
    1397             :      */
    1398      410534 :     outer_path = outerrel->cheapest_total_path;
    1399      410534 :     inner_path = innerrel->cheapest_total_path;
    1400             : 
    1401             :     /*
    1402             :      * If either cheapest-total path is parameterized by the other rel, we
    1403             :      * can't use a mergejoin.  (There's no use looking for alternative input
    1404             :      * paths, since these should already be the least-parameterized available
    1405             :      * paths.)
    1406             :      */
    1407      410534 :     if (PATH_PARAM_BY_REL(outer_path, innerrel) ||
    1408      409846 :         PATH_PARAM_BY_REL(inner_path, outerrel))
    1409        1388 :         return;
    1410             : 
    1411             :     /*
    1412             :      * If unique-ification is requested, do it and then handle as a plain
    1413             :      * inner join.
    1414             :      */
    1415      409146 :     if (jointype == JOIN_UNIQUE_OUTER)
    1416             :     {
    1417        2662 :         outer_path = (Path *) create_unique_path(root, outerrel,
    1418             :                                                  outer_path, extra->sjinfo);
    1419             :         Assert(outer_path);
    1420        2662 :         jointype = JOIN_INNER;
    1421             :     }
    1422      406484 :     else if (jointype == JOIN_UNIQUE_INNER)
    1423             :     {
    1424        2662 :         inner_path = (Path *) create_unique_path(root, innerrel,
    1425             :                                                  inner_path, extra->sjinfo);
    1426             :         Assert(inner_path);
    1427        2662 :         jointype = JOIN_INNER;
    1428             :     }
    1429             : 
    1430             :     /*
    1431             :      * If the joinrel is parallel-safe, we may be able to consider a partial
    1432             :      * merge join.  However, we can't handle JOIN_UNIQUE_OUTER, because the
    1433             :      * outer path will be partial, and therefore we won't be able to properly
    1434             :      * guarantee uniqueness.  Similarly, we can't handle JOIN_FULL, JOIN_RIGHT
    1435             :      * and JOIN_RIGHT_ANTI, because they can produce false null extended rows.
    1436             :      * Also, the resulting path must not be parameterized.
    1437             :      */
    1438      409146 :     if (joinrel->consider_parallel &&
    1439      361864 :         save_jointype != JOIN_UNIQUE_OUTER &&
    1440      359520 :         save_jointype != JOIN_FULL &&
    1441      294962 :         save_jointype != JOIN_RIGHT &&
    1442      293664 :         save_jointype != JOIN_RIGHT_ANTI &&
    1443      293664 :         outerrel->partial_pathlist != NIL &&
    1444        9176 :         bms_is_empty(joinrel->lateral_relids))
    1445             :     {
    1446        9176 :         cheapest_partial_outer = (Path *) linitial(outerrel->partial_pathlist);
    1447             : 
    1448        9176 :         if (inner_path->parallel_safe)
    1449        9080 :             cheapest_safe_inner = inner_path;
    1450          96 :         else if (save_jointype != JOIN_UNIQUE_INNER)
    1451             :             cheapest_safe_inner =
    1452          90 :                 get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
    1453             :     }
    1454             : 
    1455             :     /*
    1456             :      * Each possible ordering of the available mergejoin clauses will generate
    1457             :      * a differently-sorted result path at essentially the same cost.  We have
    1458             :      * no basis for choosing one over another at this level of joining, but
    1459             :      * some sort orders may be more useful than others for higher-level
    1460             :      * mergejoins, so it's worth considering multiple orderings.
    1461             :      *
    1462             :      * Actually, it's not quite true that every mergeclause ordering will
    1463             :      * generate a different path order, because some of the clauses may be
    1464             :      * partially redundant (refer to the same EquivalenceClasses).  Therefore,
    1465             :      * what we do is convert the mergeclause list to a list of canonical
    1466             :      * pathkeys, and then consider different orderings of the pathkeys.
    1467             :      *
    1468             :      * Generating a path for *every* permutation of the pathkeys doesn't seem
    1469             :      * like a winning strategy; the cost in planning time is too high. For
    1470             :      * now, we generate one path for each pathkey, listing that pathkey first
    1471             :      * and the rest in random order.  This should allow at least a one-clause
    1472             :      * mergejoin without re-sorting against any other possible mergejoin
    1473             :      * partner path.  But if we've not guessed the right ordering of secondary
    1474             :      * keys, we may end up evaluating clauses as qpquals when they could have
    1475             :      * been done as mergeclauses.  (In practice, it's rare that there's more
    1476             :      * than two or three mergeclauses, so expending a huge amount of thought
    1477             :      * on that is probably not worth it.)
    1478             :      *
    1479             :      * The pathkey order returned by select_outer_pathkeys_for_merge() has
    1480             :      * some heuristics behind it (see that function), so be sure to try it
    1481             :      * exactly as-is as well as making variants.
    1482             :      */
    1483      409146 :     all_pathkeys = select_outer_pathkeys_for_merge(root,
    1484             :                                                    extra->mergeclause_list,
    1485             :                                                    joinrel);
    1486             : 
    1487      857304 :     foreach(l, all_pathkeys)
    1488             :     {
    1489      448158 :         PathKey    *front_pathkey = (PathKey *) lfirst(l);
    1490             :         List       *cur_mergeclauses;
    1491             :         List       *outerkeys;
    1492             :         List       *innerkeys;
    1493             :         List       *merge_pathkeys;
    1494             : 
    1495             :         /* Make a pathkey list with this guy first */
    1496      448158 :         if (l != list_head(all_pathkeys))
    1497       39012 :             outerkeys = lcons(front_pathkey,
    1498             :                               list_delete_nth_cell(list_copy(all_pathkeys),
    1499             :                                                    foreach_current_index(l)));
    1500             :         else
    1501      409146 :             outerkeys = all_pathkeys;   /* no work at first one... */
    1502             : 
    1503             :         /* Sort the mergeclauses into the corresponding ordering */
    1504             :         cur_mergeclauses =
    1505      448158 :             find_mergeclauses_for_outer_pathkeys(root,
    1506             :                                                  outerkeys,
    1507             :                                                  extra->mergeclause_list);
    1508             : 
    1509             :         /* Should have used them all... */
    1510             :         Assert(list_length(cur_mergeclauses) == list_length(extra->mergeclause_list));
    1511             : 
    1512             :         /* Build sort pathkeys for the inner side */
    1513      448158 :         innerkeys = make_inner_pathkeys_for_merge(root,
    1514             :                                                   cur_mergeclauses,
    1515             :                                                   outerkeys);
    1516             : 
    1517             :         /* Build pathkeys representing output sort order */
    1518      448158 :         merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
    1519             :                                              outerkeys);
    1520             : 
    1521             :         /*
    1522             :          * And now we can make the path.
    1523             :          *
    1524             :          * Note: it's possible that the cheapest paths will already be sorted
    1525             :          * properly.  try_mergejoin_path will detect that case and suppress an
    1526             :          * explicit sort step, so we needn't do so here.
    1527             :          */
    1528      448158 :         try_mergejoin_path(root,
    1529             :                            joinrel,
    1530             :                            outer_path,
    1531             :                            inner_path,
    1532             :                            merge_pathkeys,
    1533             :                            cur_mergeclauses,
    1534             :                            outerkeys,
    1535             :                            innerkeys,
    1536             :                            jointype,
    1537             :                            extra,
    1538             :                            false);
    1539             : 
    1540             :         /*
    1541             :          * If we have partial outer and parallel safe inner path then try
    1542             :          * partial mergejoin path.
    1543             :          */
    1544      448158 :         if (cheapest_partial_outer && cheapest_safe_inner)
    1545       12720 :             try_partial_mergejoin_path(root,
    1546             :                                        joinrel,
    1547             :                                        cheapest_partial_outer,
    1548             :                                        cheapest_safe_inner,
    1549             :                                        merge_pathkeys,
    1550             :                                        cur_mergeclauses,
    1551             :                                        outerkeys,
    1552             :                                        innerkeys,
    1553             :                                        jointype,
    1554             :                                        extra);
    1555             :     }
    1556             : }
    1557             : 
    1558             : /*
    1559             :  * generate_mergejoin_paths
    1560             :  *  Creates possible mergejoin paths for input outerpath.
    1561             :  *
    1562             :  * We generate mergejoins if mergejoin clauses are available.  We have
    1563             :  * two ways to generate the inner path for a mergejoin: sort the cheapest
    1564             :  * inner path, or use an inner path that is already suitably ordered for the
    1565             :  * merge.  If we have several mergeclauses, it could be that there is no inner
    1566             :  * path (or only a very expensive one) for the full list of mergeclauses, but
    1567             :  * better paths exist if we truncate the mergeclause list (thereby discarding
    1568             :  * some sort key requirements).  So, we consider truncations of the
    1569             :  * mergeclause list as well as the full list.  (Ideally we'd consider all
    1570             :  * subsets of the mergeclause list, but that seems way too expensive.)
    1571             :  */
    1572             : static void
    1573      966584 : generate_mergejoin_paths(PlannerInfo *root,
    1574             :                          RelOptInfo *joinrel,
    1575             :                          RelOptInfo *innerrel,
    1576             :                          Path *outerpath,
    1577             :                          JoinType jointype,
    1578             :                          JoinPathExtraData *extra,
    1579             :                          bool useallclauses,
    1580             :                          Path *inner_cheapest_total,
    1581             :                          List *merge_pathkeys,
    1582             :                          bool is_partial)
    1583             : {
    1584             :     List       *mergeclauses;
    1585             :     List       *innersortkeys;
    1586             :     List       *trialsortkeys;
    1587             :     Path       *cheapest_startup_inner;
    1588             :     Path       *cheapest_total_inner;
    1589      966584 :     JoinType    save_jointype = jointype;
    1590             :     int         num_sortkeys;
    1591             :     int         sortkeycnt;
    1592             : 
    1593      966584 :     if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
    1594        8748 :         jointype = JOIN_INNER;
    1595             : 
    1596             :     /* Look for useful mergeclauses (if any) */
    1597             :     mergeclauses =
    1598      966584 :         find_mergeclauses_for_outer_pathkeys(root,
    1599             :                                              outerpath->pathkeys,
    1600             :                                              extra->mergeclause_list);
    1601             : 
    1602             :     /*
    1603             :      * Done with this outer path if no chance for a mergejoin.
    1604             :      *
    1605             :      * Special corner case: for "x FULL JOIN y ON true", there will be no join
    1606             :      * clauses at all.  Ordinarily we'd generate a clauseless nestloop path,
    1607             :      * but since mergejoin is our only join type that supports FULL JOIN
    1608             :      * without any join clauses, it's necessary to generate a clauseless
    1609             :      * mergejoin path instead.
    1610             :      */
    1611      966584 :     if (mergeclauses == NIL)
    1612             :     {
    1613      649078 :         if (jointype == JOIN_FULL)
    1614             :              /* okay to try for mergejoin */ ;
    1615             :         else
    1616      645718 :             return;
    1617             :     }
    1618      391604 :     if (useallclauses &&
    1619       70738 :         list_length(mergeclauses) != list_length(extra->mergeclause_list))
    1620        9484 :         return;
    1621             : 
    1622             :     /* Compute the required ordering of the inner path */
    1623      311382 :     innersortkeys = make_inner_pathkeys_for_merge(root,
    1624             :                                                   mergeclauses,
    1625             :                                                   outerpath->pathkeys);
    1626             : 
    1627             :     /*
    1628             :      * Generate a mergejoin on the basis of sorting the cheapest inner. Since
    1629             :      * a sort will be needed, only cheapest total cost matters. (But
    1630             :      * try_mergejoin_path will do the right thing if inner_cheapest_total is
    1631             :      * already correctly sorted.)
    1632             :      */
    1633      311382 :     try_mergejoin_path(root,
    1634             :                        joinrel,
    1635             :                        outerpath,
    1636             :                        inner_cheapest_total,
    1637             :                        merge_pathkeys,
    1638             :                        mergeclauses,
    1639             :                        NIL,
    1640             :                        innersortkeys,
    1641             :                        jointype,
    1642             :                        extra,
    1643             :                        is_partial);
    1644             : 
    1645             :     /* Can't do anything else if inner path needs to be unique'd */
    1646      311382 :     if (save_jointype == JOIN_UNIQUE_INNER)
    1647        1798 :         return;
    1648             : 
    1649             :     /*
    1650             :      * Look for presorted inner paths that satisfy the innersortkey list ---
    1651             :      * or any truncation thereof, if we are allowed to build a mergejoin using
    1652             :      * a subset of the merge clauses.  Here, we consider both cheap startup
    1653             :      * cost and cheap total cost.
    1654             :      *
    1655             :      * Currently we do not consider parameterized inner paths here. This
    1656             :      * interacts with decisions elsewhere that also discriminate against
    1657             :      * mergejoins with parameterized inputs; see comments in
    1658             :      * src/backend/optimizer/README.
    1659             :      *
    1660             :      * As we shorten the sortkey list, we should consider only paths that are
    1661             :      * strictly cheaper than (in particular, not the same as) any path found
    1662             :      * in an earlier iteration.  Otherwise we'd be intentionally using fewer
    1663             :      * merge keys than a given path allows (treating the rest as plain
    1664             :      * joinquals), which is unlikely to be a good idea.  Also, eliminating
    1665             :      * paths here on the basis of compare_path_costs is a lot cheaper than
    1666             :      * building the mergejoin path only to throw it away.
    1667             :      *
    1668             :      * If inner_cheapest_total is well enough sorted to have not required a
    1669             :      * sort in the path made above, we shouldn't make a duplicate path with
    1670             :      * it, either.  We handle that case with the same logic that handles the
    1671             :      * previous consideration, by initializing the variables that track
    1672             :      * cheapest-so-far properly.  Note that we do NOT reject
    1673             :      * inner_cheapest_total if we find it matches some shorter set of
    1674             :      * pathkeys.  That case corresponds to using fewer mergekeys to avoid
    1675             :      * sorting inner_cheapest_total, whereas we did sort it above, so the
    1676             :      * plans being considered are different.
    1677             :      */
    1678      309584 :     if (pathkeys_contained_in(innersortkeys,
    1679             :                               inner_cheapest_total->pathkeys))
    1680             :     {
    1681             :         /* inner_cheapest_total didn't require a sort */
    1682       15512 :         cheapest_startup_inner = inner_cheapest_total;
    1683       15512 :         cheapest_total_inner = inner_cheapest_total;
    1684             :     }
    1685             :     else
    1686             :     {
    1687             :         /* it did require a sort, at least for the full set of keys */
    1688      294072 :         cheapest_startup_inner = NULL;
    1689      294072 :         cheapest_total_inner = NULL;
    1690             :     }
    1691      309584 :     num_sortkeys = list_length(innersortkeys);
    1692      309584 :     if (num_sortkeys > 1 && !useallclauses)
    1693        8952 :         trialsortkeys = list_copy(innersortkeys);   /* need modifiable copy */
    1694             :     else
    1695      300632 :         trialsortkeys = innersortkeys;  /* won't really truncate */
    1696             : 
    1697      567002 :     for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
    1698             :     {
    1699             :         Path       *innerpath;
    1700      318528 :         List       *newclauses = NIL;
    1701             : 
    1702             :         /*
    1703             :          * Look for an inner path ordered well enough for the first
    1704             :          * 'sortkeycnt' innersortkeys.  NB: trialsortkeys list is modified
    1705             :          * destructively, which is why we made a copy...
    1706             :          */
    1707      318528 :         trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
    1708      318528 :         innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
    1709             :                                                    trialsortkeys,
    1710             :                                                    NULL,
    1711             :                                                    TOTAL_COST,
    1712             :                                                    is_partial);
    1713      318528 :         if (innerpath != NULL &&
    1714       20506 :             (cheapest_total_inner == NULL ||
    1715       20506 :              compare_path_costs(innerpath, cheapest_total_inner,
    1716             :                                 TOTAL_COST) < 0))
    1717             :         {
    1718             :             /* Found a cheap (or even-cheaper) sorted path */
    1719             :             /* Select the right mergeclauses, if we didn't already */
    1720      180180 :             if (sortkeycnt < num_sortkeys)
    1721             :             {
    1722             :                 newclauses =
    1723        2612 :                     trim_mergeclauses_for_inner_pathkeys(root,
    1724             :                                                          mergeclauses,
    1725             :                                                          trialsortkeys);
    1726             :                 Assert(newclauses != NIL);
    1727             :             }
    1728             :             else
    1729      177568 :                 newclauses = mergeclauses;
    1730      180180 :             try_mergejoin_path(root,
    1731             :                                joinrel,
    1732             :                                outerpath,
    1733             :                                innerpath,
    1734             :                                merge_pathkeys,
    1735             :                                newclauses,
    1736             :                                NIL,
    1737             :                                NIL,
    1738             :                                jointype,
    1739             :                                extra,
    1740             :                                is_partial);
    1741      180180 :             cheapest_total_inner = innerpath;
    1742             :         }
    1743             :         /* Same on the basis of cheapest startup cost ... */
    1744      318528 :         innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
    1745             :                                                    trialsortkeys,
    1746             :                                                    NULL,
    1747             :                                                    STARTUP_COST,
    1748             :                                                    is_partial);
    1749      318528 :         if (innerpath != NULL &&
    1750       20506 :             (cheapest_startup_inner == NULL ||
    1751       20506 :              compare_path_costs(innerpath, cheapest_startup_inner,
    1752             :                                 STARTUP_COST) < 0))
    1753             :         {
    1754             :             /* Found a cheap (or even-cheaper) sorted path */
    1755      178818 :             if (innerpath != cheapest_total_inner)
    1756             :             {
    1757             :                 /*
    1758             :                  * Avoid rebuilding clause list if we already made one; saves
    1759             :                  * memory in big join trees...
    1760             :                  */
    1761        1102 :                 if (newclauses == NIL)
    1762             :                 {
    1763          18 :                     if (sortkeycnt < num_sortkeys)
    1764             :                     {
    1765             :                         newclauses =
    1766           0 :                             trim_mergeclauses_for_inner_pathkeys(root,
    1767             :                                                                  mergeclauses,
    1768             :                                                                  trialsortkeys);
    1769             :                         Assert(newclauses != NIL);
    1770             :                     }
    1771             :                     else
    1772          18 :                         newclauses = mergeclauses;
    1773             :                 }
    1774        1102 :                 try_mergejoin_path(root,
    1775             :                                    joinrel,
    1776             :                                    outerpath,
    1777             :                                    innerpath,
    1778             :                                    merge_pathkeys,
    1779             :                                    newclauses,
    1780             :                                    NIL,
    1781             :                                    NIL,
    1782             :                                    jointype,
    1783             :                                    extra,
    1784             :                                    is_partial);
    1785             :             }
    1786      178818 :             cheapest_startup_inner = innerpath;
    1787             :         }
    1788             : 
    1789             :         /*
    1790             :          * Don't consider truncated sortkeys if we need all clauses.
    1791             :          */
    1792      318528 :         if (useallclauses)
    1793       61110 :             break;
    1794             :     }
    1795             : }
    1796             : 
    1797             : /*
    1798             :  * match_unsorted_outer
    1799             :  *    Creates possible join paths for processing a single join relation
    1800             :  *    'joinrel' by employing either iterative substitution or
    1801             :  *    mergejoining on each of its possible outer paths (considering
    1802             :  *    only outer paths that are already ordered well enough for merging).
    1803             :  *
    1804             :  * We always generate a nestloop path for each available outer path.
    1805             :  * In fact we may generate as many as five: one on the cheapest-total-cost
    1806             :  * inner path, one on the same with materialization, one on the
    1807             :  * cheapest-startup-cost inner path (if different), one on the
    1808             :  * cheapest-total inner-indexscan path (if any), and one on the
    1809             :  * cheapest-startup inner-indexscan path (if different).
    1810             :  *
    1811             :  * We also consider mergejoins if mergejoin clauses are available.  See
    1812             :  * detailed comments in generate_mergejoin_paths.
    1813             :  *
    1814             :  * 'joinrel' is the join relation
    1815             :  * 'outerrel' is the outer join relation
    1816             :  * 'innerrel' is the inner join relation
    1817             :  * 'jointype' is the type of join to do
    1818             :  * 'extra' contains additional input values
    1819             :  */
    1820             : static void
    1821      514284 : match_unsorted_outer(PlannerInfo *root,
    1822             :                      RelOptInfo *joinrel,
    1823             :                      RelOptInfo *outerrel,
    1824             :                      RelOptInfo *innerrel,
    1825             :                      JoinType jointype,
    1826             :                      JoinPathExtraData *extra)
    1827             : {
    1828      514284 :     JoinType    save_jointype = jointype;
    1829             :     bool        nestjoinOK;
    1830             :     bool        useallclauses;
    1831      514284 :     Path       *inner_cheapest_total = innerrel->cheapest_total_path;
    1832      514284 :     Path       *matpath = NULL;
    1833             :     ListCell   *lc1;
    1834             : 
    1835             :     /*
    1836             :      * For now we do not support RIGHT_SEMI join in mergejoin or nestloop
    1837             :      * join.
    1838             :      */
    1839      514284 :     if (jointype == JOIN_RIGHT_SEMI)
    1840           0 :         return;
    1841             : 
    1842             :     /*
    1843             :      * Nestloop only supports inner, left, semi, and anti joins.  Also, if we
    1844             :      * are doing a right, right-anti or full mergejoin, we must use *all* the
    1845             :      * mergeclauses as join clauses, else we will not have a valid plan.
    1846             :      * (Although these two flags are currently inverses, keep them separate
    1847             :      * for clarity and possible future changes.)
    1848             :      */
    1849      514284 :     switch (jointype)
    1850             :     {
    1851      428686 :         case JOIN_INNER:
    1852             :         case JOIN_LEFT:
    1853             :         case JOIN_SEMI:
    1854             :         case JOIN_ANTI:
    1855      428686 :             nestjoinOK = true;
    1856      428686 :             useallclauses = false;
    1857      428686 :             break;
    1858       77082 :         case JOIN_RIGHT:
    1859             :         case JOIN_RIGHT_ANTI:
    1860             :         case JOIN_FULL:
    1861       77082 :             nestjoinOK = false;
    1862       77082 :             useallclauses = true;
    1863       77082 :             break;
    1864        8516 :         case JOIN_UNIQUE_OUTER:
    1865             :         case JOIN_UNIQUE_INNER:
    1866        8516 :             jointype = JOIN_INNER;
    1867        8516 :             nestjoinOK = true;
    1868        8516 :             useallclauses = false;
    1869        8516 :             break;
    1870           0 :         default:
    1871           0 :             elog(ERROR, "unrecognized join type: %d",
    1872             :                  (int) jointype);
    1873             :             nestjoinOK = false; /* keep compiler quiet */
    1874             :             useallclauses = false;
    1875             :             break;
    1876             :     }
    1877             : 
    1878             :     /*
    1879             :      * If inner_cheapest_total is parameterized by the outer rel, ignore it;
    1880             :      * we will consider it below as a member of cheapest_parameterized_paths,
    1881             :      * but the other possibilities considered in this routine aren't usable.
    1882             :      */
    1883      514284 :     if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel))
    1884       11402 :         inner_cheapest_total = NULL;
    1885             : 
    1886             :     /*
    1887             :      * If we need to unique-ify the inner path, we will consider only the
    1888             :      * cheapest-total inner.
    1889             :      */
    1890      514284 :     if (save_jointype == JOIN_UNIQUE_INNER)
    1891             :     {
    1892             :         /* No way to do this with an inner path parameterized by outer rel */
    1893        4258 :         if (inner_cheapest_total == NULL)
    1894          12 :             return;
    1895             :         inner_cheapest_total = (Path *)
    1896        4246 :             create_unique_path(root, innerrel, inner_cheapest_total, extra->sjinfo);
    1897             :         Assert(inner_cheapest_total);
    1898             :     }
    1899      510026 :     else if (nestjoinOK)
    1900             :     {
    1901             :         /*
    1902             :          * Consider materializing the cheapest inner path, unless
    1903             :          * enable_material is off or the path in question materializes its
    1904             :          * output anyway.
    1905             :          */
    1906      432944 :         if (enable_material && inner_cheapest_total != NULL &&
    1907      421120 :             !ExecMaterializesOutput(inner_cheapest_total->pathtype))
    1908             :             matpath = (Path *)
    1909      401448 :                 create_material_path(innerrel, inner_cheapest_total);
    1910             :     }
    1911             : 
    1912     1681540 :     foreach(lc1, outerrel->pathlist)
    1913             :     {
    1914     1167268 :         Path       *outerpath = (Path *) lfirst(lc1);
    1915             :         List       *merge_pathkeys;
    1916             : 
    1917             :         /*
    1918             :          * We cannot use an outer path that is parameterized by the inner rel.
    1919             :          */
    1920     1167268 :         if (PATH_PARAM_BY_REL(outerpath, innerrel))
    1921      197420 :             continue;
    1922             : 
    1923             :         /*
    1924             :          * If we need to unique-ify the outer path, it's pointless to consider
    1925             :          * any but the cheapest outer.  (XXX we don't consider parameterized
    1926             :          * outers, nor inners, for unique-ified cases.  Should we?)
    1927             :          */
    1928      969848 :         if (save_jointype == JOIN_UNIQUE_OUTER)
    1929             :         {
    1930        4932 :             if (outerpath != outerrel->cheapest_total_path)
    1931         686 :                 continue;
    1932        4246 :             outerpath = (Path *) create_unique_path(root, outerrel,
    1933             :                                                     outerpath, extra->sjinfo);
    1934             :             Assert(outerpath);
    1935             :         }
    1936             : 
    1937             :         /*
    1938             :          * The result will have this sort order (even if it is implemented as
    1939             :          * a nestloop, and even if some of the mergeclauses are implemented by
    1940             :          * qpquals rather than as true mergeclauses):
    1941             :          */
    1942      969162 :         merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
    1943             :                                              outerpath->pathkeys);
    1944             : 
    1945      969162 :         if (save_jointype == JOIN_UNIQUE_INNER)
    1946             :         {
    1947             :             /*
    1948             :              * Consider nestloop join, but only with the unique-ified cheapest
    1949             :              * inner path
    1950             :              */
    1951        7902 :             try_nestloop_path(root,
    1952             :                               joinrel,
    1953             :                               outerpath,
    1954             :                               inner_cheapest_total,
    1955             :                               merge_pathkeys,
    1956             :                               jointype,
    1957             :                               extra);
    1958             :         }
    1959      961260 :         else if (nestjoinOK)
    1960             :         {
    1961             :             /*
    1962             :              * Consider nestloop joins using this outer path and various
    1963             :              * available paths for the inner relation.  We consider the
    1964             :              * cheapest-total paths for each available parameterization of the
    1965             :              * inner relation, including the unparameterized case.
    1966             :              */
    1967             :             ListCell   *lc2;
    1968             : 
    1969     2147316 :             foreach(lc2, innerrel->cheapest_parameterized_paths)
    1970             :             {
    1971     1330402 :                 Path       *innerpath = (Path *) lfirst(lc2);
    1972             :                 Path       *mpath;
    1973             : 
    1974     1330402 :                 try_nestloop_path(root,
    1975             :                                   joinrel,
    1976             :                                   outerpath,
    1977             :                                   innerpath,
    1978             :                                   merge_pathkeys,
    1979             :                                   jointype,
    1980             :                                   extra);
    1981             : 
    1982             :                 /*
    1983             :                  * Try generating a memoize path and see if that makes the
    1984             :                  * nested loop any cheaper.
    1985             :                  */
    1986     1330402 :                 mpath = get_memoize_path(root, innerrel, outerrel,
    1987             :                                          innerpath, outerpath, jointype,
    1988             :                                          extra);
    1989     1330402 :                 if (mpath != NULL)
    1990      230696 :                     try_nestloop_path(root,
    1991             :                                       joinrel,
    1992             :                                       outerpath,
    1993             :                                       mpath,
    1994             :                                       merge_pathkeys,
    1995             :                                       jointype,
    1996             :                                       extra);
    1997             :             }
    1998             : 
    1999             :             /* Also consider materialized form of the cheapest inner path */
    2000      816914 :             if (matpath != NULL)
    2001      766072 :                 try_nestloop_path(root,
    2002             :                                   joinrel,
    2003             :                                   outerpath,
    2004             :                                   matpath,
    2005             :                                   merge_pathkeys,
    2006             :                                   jointype,
    2007             :                                   extra);
    2008             :         }
    2009             : 
    2010             :         /* Can't do anything else if outer path needs to be unique'd */
    2011      969162 :         if (save_jointype == JOIN_UNIQUE_OUTER)
    2012        4246 :             continue;
    2013             : 
    2014             :         /* Can't do anything else if inner rel is parameterized by outer */
    2015      964916 :         if (inner_cheapest_total == NULL)
    2016       12266 :             continue;
    2017             : 
    2018             :         /* Generate merge join paths */
    2019      952650 :         generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
    2020             :                                  save_jointype, extra, useallclauses,
    2021             :                                  inner_cheapest_total, merge_pathkeys,
    2022             :                                  false);
    2023             :     }
    2024             : 
    2025             :     /*
    2026             :      * Consider partial nestloop and mergejoin plan if outerrel has any
    2027             :      * partial path and the joinrel is parallel-safe.  However, we can't
    2028             :      * handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and
    2029             :      * therefore we won't be able to properly guarantee uniqueness.  Nor can
    2030             :      * we handle joins needing lateral rels, since partial paths must not be
    2031             :      * parameterized. Similarly, we can't handle JOIN_FULL, JOIN_RIGHT and
    2032             :      * JOIN_RIGHT_ANTI, because they can produce false null extended rows.
    2033             :      */
    2034      514272 :     if (joinrel->consider_parallel &&
    2035      428834 :         save_jointype != JOIN_UNIQUE_OUTER &&
    2036      426370 :         save_jointype != JOIN_FULL &&
    2037      360344 :         save_jointype != JOIN_RIGHT &&
    2038      359046 :         save_jointype != JOIN_RIGHT_ANTI &&
    2039      359046 :         outerrel->partial_pathlist != NIL &&
    2040       10796 :         bms_is_empty(joinrel->lateral_relids))
    2041             :     {
    2042       10796 :         if (nestjoinOK)
    2043       10796 :             consider_parallel_nestloop(root, joinrel, outerrel, innerrel,
    2044             :                                        save_jointype, extra);
    2045             : 
    2046             :         /*
    2047             :          * If inner_cheapest_total is NULL or non parallel-safe then find the
    2048             :          * cheapest total parallel safe path.  If doing JOIN_UNIQUE_INNER, we
    2049             :          * can't use any alternative inner path.
    2050             :          */
    2051       10796 :         if (inner_cheapest_total == NULL ||
    2052       10388 :             !inner_cheapest_total->parallel_safe)
    2053             :         {
    2054         780 :             if (save_jointype == JOIN_UNIQUE_INNER)
    2055           6 :                 return;
    2056             : 
    2057         774 :             inner_cheapest_total = get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
    2058             :         }
    2059             : 
    2060       10790 :         if (inner_cheapest_total)
    2061       10376 :             consider_parallel_mergejoin(root, joinrel, outerrel, innerrel,
    2062             :                                         save_jointype, extra,
    2063             :                                         inner_cheapest_total);
    2064             :     }
    2065             : }
    2066             : 
    2067             : /*
    2068             :  * consider_parallel_mergejoin
    2069             :  *    Try to build partial paths for a joinrel by joining a partial path
    2070             :  *    for the outer relation to a complete path for the inner relation.
    2071             :  *
    2072             :  * 'joinrel' is the join relation
    2073             :  * 'outerrel' is the outer join relation
    2074             :  * 'innerrel' is the inner join relation
    2075             :  * 'jointype' is the type of join to do
    2076             :  * 'extra' contains additional input values
    2077             :  * 'inner_cheapest_total' cheapest total path for innerrel
    2078             :  */
    2079             : static void
    2080       10376 : consider_parallel_mergejoin(PlannerInfo *root,
    2081             :                             RelOptInfo *joinrel,
    2082             :                             RelOptInfo *outerrel,
    2083             :                             RelOptInfo *innerrel,
    2084             :                             JoinType jointype,
    2085             :                             JoinPathExtraData *extra,
    2086             :                             Path *inner_cheapest_total)
    2087             : {
    2088             :     ListCell   *lc1;
    2089             : 
    2090             :     /* generate merge join path for each partial outer path */
    2091       24310 :     foreach(lc1, outerrel->partial_pathlist)
    2092             :     {
    2093       13934 :         Path       *outerpath = (Path *) lfirst(lc1);
    2094             :         List       *merge_pathkeys;
    2095             : 
    2096             :         /*
    2097             :          * Figure out what useful ordering any paths we create will have.
    2098             :          */
    2099       13934 :         merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
    2100             :                                              outerpath->pathkeys);
    2101             : 
    2102       13934 :         generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype,
    2103             :                                  extra, false, inner_cheapest_total,
    2104             :                                  merge_pathkeys, true);
    2105             :     }
    2106       10376 : }
    2107             : 
    2108             : /*
    2109             :  * consider_parallel_nestloop
    2110             :  *    Try to build partial paths for a joinrel by joining a partial path for the
    2111             :  *    outer relation to a complete path for the inner relation.
    2112             :  *
    2113             :  * 'joinrel' is the join relation
    2114             :  * 'outerrel' is the outer join relation
    2115             :  * 'innerrel' is the inner join relation
    2116             :  * 'jointype' is the type of join to do
    2117             :  * 'extra' contains additional input values
    2118             :  */
    2119             : static void
    2120       10796 : consider_parallel_nestloop(PlannerInfo *root,
    2121             :                            RelOptInfo *joinrel,
    2122             :                            RelOptInfo *outerrel,
    2123             :                            RelOptInfo *innerrel,
    2124             :                            JoinType jointype,
    2125             :                            JoinPathExtraData *extra)
    2126             : {
    2127       10796 :     JoinType    save_jointype = jointype;
    2128       10796 :     Path       *inner_cheapest_total = innerrel->cheapest_total_path;
    2129       10796 :     Path       *matpath = NULL;
    2130             :     ListCell   *lc1;
    2131             : 
    2132       10796 :     if (jointype == JOIN_UNIQUE_INNER)
    2133         564 :         jointype = JOIN_INNER;
    2134             : 
    2135             :     /*
    2136             :      * Consider materializing the cheapest inner path, unless: 1) we're doing
    2137             :      * JOIN_UNIQUE_INNER, because in this case we have to unique-ify the
    2138             :      * cheapest inner path, 2) enable_material is off, 3) the cheapest inner
    2139             :      * path is not parallel-safe, 4) the cheapest inner path is parameterized
    2140             :      * by the outer rel, or 5) the cheapest inner path materializes its output
    2141             :      * anyway.
    2142             :      */
    2143       10796 :     if (save_jointype != JOIN_UNIQUE_INNER &&
    2144       10028 :         enable_material && inner_cheapest_total->parallel_safe &&
    2145        9836 :         !PATH_PARAM_BY_REL(inner_cheapest_total, outerrel) &&
    2146        9446 :         !ExecMaterializesOutput(inner_cheapest_total->pathtype))
    2147             :     {
    2148             :         matpath = (Path *)
    2149        9350 :             create_material_path(innerrel, inner_cheapest_total);
    2150             :         Assert(matpath->parallel_safe);
    2151             :     }
    2152             : 
    2153       25288 :     foreach(lc1, outerrel->partial_pathlist)
    2154             :     {
    2155       14492 :         Path       *outerpath = (Path *) lfirst(lc1);
    2156             :         List       *pathkeys;
    2157             :         ListCell   *lc2;
    2158             : 
    2159             :         /* Figure out what useful ordering any paths we create will have. */
    2160       14492 :         pathkeys = build_join_pathkeys(root, joinrel, jointype,
    2161             :                                        outerpath->pathkeys);
    2162             : 
    2163             :         /*
    2164             :          * Try the cheapest parameterized paths; only those which will produce
    2165             :          * an unparameterized path when joined to this outerrel will survive
    2166             :          * try_partial_nestloop_path.  The cheapest unparameterized path is
    2167             :          * also in this list.
    2168             :          */
    2169       37622 :         foreach(lc2, innerrel->cheapest_parameterized_paths)
    2170             :         {
    2171       23130 :             Path       *innerpath = (Path *) lfirst(lc2);
    2172             :             Path       *mpath;
    2173             : 
    2174             :             /* Can't join to an inner path that is not parallel-safe */
    2175       23130 :             if (!innerpath->parallel_safe)
    2176         426 :                 continue;
    2177             : 
    2178             :             /*
    2179             :              * If we're doing JOIN_UNIQUE_INNER, we can only use the inner's
    2180             :              * cheapest_total_path, and we have to unique-ify it.  (We might
    2181             :              * be able to relax this to allow other safe, unparameterized
    2182             :              * inner paths, but right now create_unique_path is not on board
    2183             :              * with that.)
    2184             :              */
    2185       22704 :             if (save_jointype == JOIN_UNIQUE_INNER)
    2186             :             {
    2187        1722 :                 if (innerpath != innerrel->cheapest_total_path)
    2188         876 :                     continue;
    2189         846 :                 innerpath = (Path *) create_unique_path(root, innerrel,
    2190             :                                                         innerpath,
    2191             :                                                         extra->sjinfo);
    2192             :                 Assert(innerpath);
    2193             :             }
    2194             : 
    2195       21828 :             try_partial_nestloop_path(root, joinrel, outerpath, innerpath,
    2196             :                                       pathkeys, jointype, extra);
    2197             : 
    2198             :             /*
    2199             :              * Try generating a memoize path and see if that makes the nested
    2200             :              * loop any cheaper.
    2201             :              */
    2202       21828 :             mpath = get_memoize_path(root, innerrel, outerrel,
    2203             :                                      innerpath, outerpath, jointype,
    2204             :                                      extra);
    2205       21828 :             if (mpath != NULL)
    2206        5408 :                 try_partial_nestloop_path(root, joinrel, outerpath, mpath,
    2207             :                                           pathkeys, jointype, extra);
    2208             :         }
    2209             : 
    2210             :         /* Also consider materialized form of the cheapest inner path */
    2211       14492 :         if (matpath != NULL)
    2212       12596 :             try_partial_nestloop_path(root, joinrel, outerpath, matpath,
    2213             :                                       pathkeys, jointype, extra);
    2214             :     }
    2215       10796 : }
    2216             : 
    2217             : /*
    2218             :  * hash_inner_and_outer
    2219             :  *    Create hashjoin join paths by explicitly hashing both the outer and
    2220             :  *    inner keys of each available hash clause.
    2221             :  *
    2222             :  * 'joinrel' is the join relation
    2223             :  * 'outerrel' is the outer join relation
    2224             :  * 'innerrel' is the inner join relation
    2225             :  * 'jointype' is the type of join to do
    2226             :  * 'extra' contains additional input values
    2227             :  */
    2228             : static void
    2229      522548 : hash_inner_and_outer(PlannerInfo *root,
    2230             :                      RelOptInfo *joinrel,
    2231             :                      RelOptInfo *outerrel,
    2232             :                      RelOptInfo *innerrel,
    2233             :                      JoinType jointype,
    2234             :                      JoinPathExtraData *extra)
    2235             : {
    2236      522548 :     JoinType    save_jointype = jointype;
    2237      522548 :     bool        isouterjoin = IS_OUTER_JOIN(jointype);
    2238             :     List       *hashclauses;
    2239             :     ListCell   *l;
    2240             : 
    2241             :     /*
    2242             :      * We need to build only one hashclauses list for any given pair of outer
    2243             :      * and inner relations; all of the hashable clauses will be used as keys.
    2244             :      *
    2245             :      * Scan the join's restrictinfo list to find hashjoinable clauses that are
    2246             :      * usable with this pair of sub-relations.
    2247             :      */
    2248      522548 :     hashclauses = NIL;
    2249     1064268 :     foreach(l, extra->restrictlist)
    2250             :     {
    2251      541720 :         RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
    2252             : 
    2253             :         /*
    2254             :          * If processing an outer join, only use its own join clauses for
    2255             :          * hashing.  For inner joins we need not be so picky.
    2256             :          */
    2257      541720 :         if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
    2258        9092 :             continue;
    2259             : 
    2260      532628 :         if (!restrictinfo->can_join ||
    2261      474276 :             restrictinfo->hashjoinoperator == InvalidOid)
    2262       74952 :             continue;           /* not hashjoinable */
    2263             : 
    2264             :         /*
    2265             :          * Check if clause has the form "outer op inner" or "inner op outer".
    2266             :          */
    2267      457676 :         if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
    2268         144 :             continue;           /* no good for these input relations */
    2269             : 
    2270             :         /*
    2271             :          * If clause has the form "inner op outer", check if its operator has
    2272             :          * valid commutator.  This is necessary because hashclauses in this
    2273             :          * form will get commuted in createplan.c to put the outer var on the
    2274             :          * left (see get_switched_clauses).  This probably shouldn't ever
    2275             :          * fail, since hashable operators ought to have commutators, but be
    2276             :          * paranoid.
    2277             :          *
    2278             :          * The clause being hashjoinable indicates that it's an OpExpr.
    2279             :          */
    2280      686298 :         if (!restrictinfo->outer_is_left &&
    2281      228766 :             !OidIsValid(get_commutator(castNode(OpExpr, restrictinfo->clause)->opno)))
    2282           6 :             continue;
    2283             : 
    2284      457526 :         hashclauses = lappend(hashclauses, restrictinfo);
    2285             :     }
    2286             : 
    2287             :     /* If we found any usable hashclauses, make paths */
    2288      522548 :     if (hashclauses)
    2289             :     {
    2290             :         /*
    2291             :          * We consider both the cheapest-total-cost and cheapest-startup-cost
    2292             :          * outer paths.  There's no need to consider any but the
    2293             :          * cheapest-total-cost inner path, however.
    2294             :          */
    2295      417738 :         Path       *cheapest_startup_outer = outerrel->cheapest_startup_path;
    2296      417738 :         Path       *cheapest_total_outer = outerrel->cheapest_total_path;
    2297      417738 :         Path       *cheapest_total_inner = innerrel->cheapest_total_path;
    2298             : 
    2299             :         /*
    2300             :          * If either cheapest-total path is parameterized by the other rel, we
    2301             :          * can't use a hashjoin.  (There's no use looking for alternative
    2302             :          * input paths, since these should already be the least-parameterized
    2303             :          * available paths.)
    2304             :          */
    2305      417738 :         if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) ||
    2306      417062 :             PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
    2307        1352 :             return;
    2308             : 
    2309             :         /* Unique-ify if need be; we ignore parameterized possibilities */
    2310      416386 :         if (jointype == JOIN_UNIQUE_OUTER)
    2311             :         {
    2312             :             cheapest_total_outer = (Path *)
    2313        2560 :                 create_unique_path(root, outerrel,
    2314             :                                    cheapest_total_outer, extra->sjinfo);
    2315             :             Assert(cheapest_total_outer);
    2316        2560 :             jointype = JOIN_INNER;
    2317        2560 :             try_hashjoin_path(root,
    2318             :                               joinrel,
    2319             :                               cheapest_total_outer,
    2320             :                               cheapest_total_inner,
    2321             :                               hashclauses,
    2322             :                               jointype,
    2323             :                               extra);
    2324             :             /* no possibility of cheap startup here */
    2325             :         }
    2326      413826 :         else if (jointype == JOIN_UNIQUE_INNER)
    2327             :         {
    2328             :             cheapest_total_inner = (Path *)
    2329        2560 :                 create_unique_path(root, innerrel,
    2330             :                                    cheapest_total_inner, extra->sjinfo);
    2331             :             Assert(cheapest_total_inner);
    2332        2560 :             jointype = JOIN_INNER;
    2333        2560 :             try_hashjoin_path(root,
    2334             :                               joinrel,
    2335             :                               cheapest_total_outer,
    2336             :                               cheapest_total_inner,
    2337             :                               hashclauses,
    2338             :                               jointype,
    2339             :                               extra);
    2340        2560 :             if (cheapest_startup_outer != NULL &&
    2341             :                 cheapest_startup_outer != cheapest_total_outer)
    2342         324 :                 try_hashjoin_path(root,
    2343             :                                   joinrel,
    2344             :                                   cheapest_startup_outer,
    2345             :                                   cheapest_total_inner,
    2346             :                                   hashclauses,
    2347             :                                   jointype,
    2348             :                                   extra);
    2349             :         }
    2350             :         else
    2351             :         {
    2352             :             /*
    2353             :              * For other jointypes, we consider the cheapest startup outer
    2354             :              * together with the cheapest total inner, and then consider
    2355             :              * pairings of cheapest-total paths including parameterized ones.
    2356             :              * There is no use in generating parameterized paths on the basis
    2357             :              * of possibly cheap startup cost, so this is sufficient.
    2358             :              */
    2359             :             ListCell   *lc1;
    2360             :             ListCell   *lc2;
    2361             : 
    2362      411266 :             if (cheapest_startup_outer != NULL)
    2363      410636 :                 try_hashjoin_path(root,
    2364             :                                   joinrel,
    2365             :                                   cheapest_startup_outer,
    2366             :                                   cheapest_total_inner,
    2367             :                                   hashclauses,
    2368             :                                   jointype,
    2369             :                                   extra);
    2370             : 
    2371     1054532 :             foreach(lc1, outerrel->cheapest_parameterized_paths)
    2372             :             {
    2373      643266 :                 Path       *outerpath = (Path *) lfirst(lc1);
    2374             : 
    2375             :                 /*
    2376             :                  * We cannot use an outer path that is parameterized by the
    2377             :                  * inner rel.
    2378             :                  */
    2379      643266 :                 if (PATH_PARAM_BY_REL(outerpath, innerrel))
    2380      189552 :                     continue;
    2381             : 
    2382     1175528 :                 foreach(lc2, innerrel->cheapest_parameterized_paths)
    2383             :                 {
    2384      721814 :                     Path       *innerpath = (Path *) lfirst(lc2);
    2385             : 
    2386             :                     /*
    2387             :                      * We cannot use an inner path that is parameterized by
    2388             :                      * the outer rel, either.
    2389             :                      */
    2390      721814 :                     if (PATH_PARAM_BY_REL(innerpath, outerrel))
    2391      214924 :                         continue;
    2392             : 
    2393      506890 :                     if (outerpath == cheapest_startup_outer &&
    2394             :                         innerpath == cheapest_total_inner)
    2395      344310 :                         continue;   /* already tried it */
    2396             : 
    2397      162580 :                     try_hashjoin_path(root,
    2398             :                                       joinrel,
    2399             :                                       outerpath,
    2400             :                                       innerpath,
    2401             :                                       hashclauses,
    2402             :                                       jointype,
    2403             :                                       extra);
    2404             :                 }
    2405             :             }
    2406             :         }
    2407             : 
    2408             :         /*
    2409             :          * If the joinrel is parallel-safe, we may be able to consider a
    2410             :          * partial hash join.  However, we can't handle JOIN_UNIQUE_OUTER,
    2411             :          * because the outer path will be partial, and therefore we won't be
    2412             :          * able to properly guarantee uniqueness.  Also, the resulting path
    2413             :          * must not be parameterized.
    2414             :          */
    2415      416386 :         if (joinrel->consider_parallel &&
    2416      369058 :             save_jointype != JOIN_UNIQUE_OUTER &&
    2417      369058 :             outerrel->partial_pathlist != NIL &&
    2418       13916 :             bms_is_empty(joinrel->lateral_relids))
    2419             :         {
    2420             :             Path       *cheapest_partial_outer;
    2421       13916 :             Path       *cheapest_partial_inner = NULL;
    2422       13916 :             Path       *cheapest_safe_inner = NULL;
    2423             : 
    2424       13916 :             cheapest_partial_outer =
    2425       13916 :                 (Path *) linitial(outerrel->partial_pathlist);
    2426             : 
    2427             :             /*
    2428             :              * Can we use a partial inner plan too, so that we can build a
    2429             :              * shared hash table in parallel?  We can't handle
    2430             :              * JOIN_UNIQUE_INNER because we can't guarantee uniqueness.
    2431             :              */
    2432       13916 :             if (innerrel->partial_pathlist != NIL &&
    2433       12540 :                 save_jointype != JOIN_UNIQUE_INNER &&
    2434             :                 enable_parallel_hash)
    2435             :             {
    2436       12264 :                 cheapest_partial_inner =
    2437       12264 :                     (Path *) linitial(innerrel->partial_pathlist);
    2438       12264 :                 try_partial_hashjoin_path(root, joinrel,
    2439             :                                           cheapest_partial_outer,
    2440             :                                           cheapest_partial_inner,
    2441             :                                           hashclauses, jointype, extra,
    2442             :                                           true /* parallel_hash */ );
    2443             :             }
    2444             : 
    2445             :             /*
    2446             :              * Normally, given that the joinrel is parallel-safe, the cheapest
    2447             :              * total inner path will also be parallel-safe, but if not, we'll
    2448             :              * have to search for the cheapest safe, unparameterized inner
    2449             :              * path.  If doing JOIN_UNIQUE_INNER, we can't use any alternative
    2450             :              * inner path.  If full, right, right-semi or right-anti join, we
    2451             :              * can't use parallelism (building the hash table in each backend)
    2452             :              * because no one process has all the match bits.
    2453             :              */
    2454       13916 :             if (save_jointype == JOIN_FULL ||
    2455       10100 :                 save_jointype == JOIN_RIGHT ||
    2456        9224 :                 save_jointype == JOIN_RIGHT_SEMI ||
    2457             :                 save_jointype == JOIN_RIGHT_ANTI)
    2458        5022 :                 cheapest_safe_inner = NULL;
    2459        8894 :             else if (cheapest_total_inner->parallel_safe)
    2460        8666 :                 cheapest_safe_inner = cheapest_total_inner;
    2461         228 :             else if (save_jointype != JOIN_UNIQUE_INNER)
    2462             :                 cheapest_safe_inner =
    2463         222 :                     get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
    2464             : 
    2465       13916 :             if (cheapest_safe_inner != NULL)
    2466        8882 :                 try_partial_hashjoin_path(root, joinrel,
    2467             :                                           cheapest_partial_outer,
    2468             :                                           cheapest_safe_inner,
    2469             :                                           hashclauses, jointype, extra,
    2470             :                                           false /* parallel_hash */ );
    2471             :         }
    2472             :     }
    2473             : }
    2474             : 
    2475             : /*
    2476             :  * select_mergejoin_clauses
    2477             :  *    Select mergejoin clauses that are usable for a particular join.
    2478             :  *    Returns a list of RestrictInfo nodes for those clauses.
    2479             :  *
    2480             :  * *mergejoin_allowed is normally set to true, but it is set to false if
    2481             :  * this is a right-semi join, or this is a right/right-anti/full join and
    2482             :  * there are nonmergejoinable join clauses.  The executor's mergejoin
    2483             :  * machinery cannot handle such cases, so we have to avoid generating a
    2484             :  * mergejoin plan.  (Note that this flag does NOT consider whether there are
    2485             :  * actually any mergejoinable clauses.  This is correct because in some
    2486             :  * cases we need to build a clauseless mergejoin.  Simply returning NIL is
    2487             :  * therefore not enough to distinguish safe from unsafe cases.)
    2488             :  *
    2489             :  * We also mark each selected RestrictInfo to show which side is currently
    2490             :  * being considered as outer.  These are transient markings that are only
    2491             :  * good for the duration of the current add_paths_to_joinrel() call!
    2492             :  *
    2493             :  * We examine each restrictinfo clause known for the join to see
    2494             :  * if it is mergejoinable and involves vars from the two sub-relations
    2495             :  * currently of interest.
    2496             :  */
    2497             : static List *
    2498      524188 : select_mergejoin_clauses(PlannerInfo *root,
    2499             :                          RelOptInfo *joinrel,
    2500             :                          RelOptInfo *outerrel,
    2501             :                          RelOptInfo *innerrel,
    2502             :                          List *restrictlist,
    2503             :                          JoinType jointype,
    2504             :                          bool *mergejoin_allowed)
    2505             : {
    2506      524188 :     List       *result_list = NIL;
    2507      524188 :     bool        isouterjoin = IS_OUTER_JOIN(jointype);
    2508      524188 :     bool        have_nonmergeable_joinclause = false;
    2509             :     ListCell   *l;
    2510             : 
    2511             :     /*
    2512             :      * For now we do not support RIGHT_SEMI join in mergejoin: the benefit of
    2513             :      * swapping inputs tends to be small here.
    2514             :      */
    2515      524188 :     if (jointype == JOIN_RIGHT_SEMI)
    2516             :     {
    2517        4146 :         *mergejoin_allowed = false;
    2518        4146 :         return NIL;
    2519             :     }
    2520             : 
    2521     1060542 :     foreach(l, restrictlist)
    2522             :     {
    2523      540500 :         RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
    2524             : 
    2525             :         /*
    2526             :          * If processing an outer join, only use its own join clauses in the
    2527             :          * merge.  For inner joins we can use pushed-down clauses too. (Note:
    2528             :          * we don't set have_nonmergeable_joinclause here because pushed-down
    2529             :          * clauses will become otherquals not joinquals.)
    2530             :          */
    2531      540500 :         if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
    2532        9116 :             continue;
    2533             : 
    2534             :         /* Check that clause is a mergeable operator clause */
    2535      531384 :         if (!restrictinfo->can_join ||
    2536      473050 :             restrictinfo->mergeopfamilies == NIL)
    2537             :         {
    2538             :             /*
    2539             :              * The executor can handle extra joinquals that are constants, but
    2540             :              * not anything else, when doing right/right-anti/full merge join.
    2541             :              * (The reason to support constants is so we can do FULL JOIN ON
    2542             :              * FALSE.)
    2543             :              */
    2544       73648 :             if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
    2545       65724 :                 have_nonmergeable_joinclause = true;
    2546       73648 :             continue;           /* not mergejoinable */
    2547             :         }
    2548             : 
    2549             :         /*
    2550             :          * Check if clause has the form "outer op inner" or "inner op outer".
    2551             :          */
    2552      457736 :         if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
    2553             :         {
    2554         768 :             have_nonmergeable_joinclause = true;
    2555         768 :             continue;           /* no good for these input relations */
    2556             :         }
    2557             : 
    2558             :         /*
    2559             :          * If clause has the form "inner op outer", check if its operator has
    2560             :          * valid commutator.  This is necessary because mergejoin clauses in
    2561             :          * this form will get commuted in createplan.c to put the outer var on
    2562             :          * the left (see get_switched_clauses).  This probably shouldn't ever
    2563             :          * fail, since mergejoinable operators ought to have commutators, but
    2564             :          * be paranoid.
    2565             :          *
    2566             :          * The clause being mergejoinable indicates that it's an OpExpr.
    2567             :          */
    2568      684180 :         if (!restrictinfo->outer_is_left &&
    2569      227212 :             !OidIsValid(get_commutator(castNode(OpExpr, restrictinfo->clause)->opno)))
    2570             :         {
    2571          36 :             have_nonmergeable_joinclause = true;
    2572          36 :             continue;
    2573             :         }
    2574             : 
    2575             :         /*
    2576             :          * Insist that each side have a non-redundant eclass.  This
    2577             :          * restriction is needed because various bits of the planner expect
    2578             :          * that each clause in a merge be associable with some pathkey in a
    2579             :          * canonical pathkey list, but redundant eclasses can't appear in
    2580             :          * canonical sort orderings.  (XXX it might be worth relaxing this,
    2581             :          * but not enough time to address it for 8.3.)
    2582             :          */
    2583      456932 :         update_mergeclause_eclasses(root, restrictinfo);
    2584             : 
    2585      456932 :         if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
    2586      456884 :             EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
    2587             :         {
    2588         128 :             have_nonmergeable_joinclause = true;
    2589         128 :             continue;           /* can't handle redundant eclasses */
    2590             :         }
    2591             : 
    2592      456804 :         result_list = lappend(result_list, restrictinfo);
    2593             :     }
    2594             : 
    2595             :     /*
    2596             :      * Report whether mergejoin is allowed (see comment at top of function).
    2597             :      */
    2598      520042 :     switch (jointype)
    2599             :     {
    2600       84260 :         case JOIN_RIGHT:
    2601             :         case JOIN_RIGHT_ANTI:
    2602             :         case JOIN_FULL:
    2603       84260 :             *mergejoin_allowed = !have_nonmergeable_joinclause;
    2604       84260 :             break;
    2605      435782 :         default:
    2606      435782 :             *mergejoin_allowed = true;
    2607      435782 :             break;
    2608             :     }
    2609             : 
    2610      520042 :     return result_list;
    2611             : }

Generated by: LCOV version 1.14