LCOV - code coverage report
Current view: top level - src/backend/optimizer/path - joinpath.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 521 537 97.0 %
Date: 2023-11-29 04:11:06 Functions: 18 18 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * joinpath.c
       4             :  *    Routines to find all possible paths for processing a set of joins
       5             :  *
       6             :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/optimizer/path/joinpath.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include <math.h>
      18             : 
      19             : #include "executor/executor.h"
      20             : #include "foreign/fdwapi.h"
      21             : #include "nodes/nodeFuncs.h"
      22             : #include "optimizer/cost.h"
      23             : #include "optimizer/optimizer.h"
      24             : #include "optimizer/pathnode.h"
      25             : #include "optimizer/paths.h"
      26             : #include "optimizer/planmain.h"
      27             : #include "utils/typcache.h"
      28             : 
      29             : /* Hook for plugins to get control in add_paths_to_joinrel() */
      30             : set_join_pathlist_hook_type set_join_pathlist_hook = NULL;
      31             : 
      32             : /*
      33             :  * Paths parameterized by the parent can be considered to be parameterized by
      34             :  * any of its child.
      35             :  */
      36             : #define PATH_PARAM_BY_PARENT(path, rel) \
      37             :     ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), \
      38             :                                        (rel)->top_parent_relids))
      39             : #define PATH_PARAM_BY_REL_SELF(path, rel)  \
      40             :     ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
      41             : 
      42             : #define PATH_PARAM_BY_REL(path, rel)    \
      43             :     (PATH_PARAM_BY_REL_SELF(path, rel) || PATH_PARAM_BY_PARENT(path, rel))
      44             : 
      45             : static void try_partial_mergejoin_path(PlannerInfo *root,
      46             :                                        RelOptInfo *joinrel,
      47             :                                        Path *outer_path,
      48             :                                        Path *inner_path,
      49             :                                        List *pathkeys,
      50             :                                        List *mergeclauses,
      51             :                                        List *outersortkeys,
      52             :                                        List *innersortkeys,
      53             :                                        JoinType jointype,
      54             :                                        JoinPathExtraData *extra);
      55             : static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
      56             :                                  RelOptInfo *outerrel, RelOptInfo *innerrel,
      57             :                                  JoinType jointype, JoinPathExtraData *extra);
      58             : static inline bool clause_sides_match_join(RestrictInfo *rinfo,
      59             :                                            RelOptInfo *outerrel,
      60             :                                            RelOptInfo *innerrel);
      61             : static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
      62             :                                  RelOptInfo *outerrel, RelOptInfo *innerrel,
      63             :                                  JoinType jointype, JoinPathExtraData *extra);
      64             : static void consider_parallel_nestloop(PlannerInfo *root,
      65             :                                        RelOptInfo *joinrel,
      66             :                                        RelOptInfo *outerrel,
      67             :                                        RelOptInfo *innerrel,
      68             :                                        JoinType jointype,
      69             :                                        JoinPathExtraData *extra);
      70             : static void consider_parallel_mergejoin(PlannerInfo *root,
      71             :                                         RelOptInfo *joinrel,
      72             :                                         RelOptInfo *outerrel,
      73             :                                         RelOptInfo *innerrel,
      74             :                                         JoinType jointype,
      75             :                                         JoinPathExtraData *extra,
      76             :                                         Path *inner_cheapest_total);
      77             : static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
      78             :                                  RelOptInfo *outerrel, RelOptInfo *innerrel,
      79             :                                  JoinType jointype, JoinPathExtraData *extra);
      80             : static List *select_mergejoin_clauses(PlannerInfo *root,
      81             :                                       RelOptInfo *joinrel,
      82             :                                       RelOptInfo *outerrel,
      83             :                                       RelOptInfo *innerrel,
      84             :                                       List *restrictlist,
      85             :                                       JoinType jointype,
      86             :                                       bool *mergejoin_allowed);
      87             : static void generate_mergejoin_paths(PlannerInfo *root,
      88             :                                      RelOptInfo *joinrel,
      89             :                                      RelOptInfo *innerrel,
      90             :                                      Path *outerpath,
      91             :                                      JoinType jointype,
      92             :                                      JoinPathExtraData *extra,
      93             :                                      bool useallclauses,
      94             :                                      Path *inner_cheapest_total,
      95             :                                      List *merge_pathkeys,
      96             :                                      bool is_partial);
      97             : 
      98             : 
      99             : /*
     100             :  * add_paths_to_joinrel
     101             :  *    Given a join relation and two component rels from which it can be made,
     102             :  *    consider all possible paths that use the two component rels as outer
     103             :  *    and inner rel respectively.  Add these paths to the join rel's pathlist
     104             :  *    if they survive comparison with other paths (and remove any existing
     105             :  *    paths that are dominated by these paths).
     106             :  *
     107             :  * Modifies the pathlist field of the joinrel node to contain the best
     108             :  * paths found so far.
     109             :  *
     110             :  * jointype is not necessarily the same as sjinfo->jointype; it might be
     111             :  * "flipped around" if we are considering joining the rels in the opposite
     112             :  * direction from what's indicated in sjinfo.
     113             :  *
     114             :  * Also, this routine and others in this module accept the special JoinTypes
     115             :  * JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should
     116             :  * unique-ify the outer or inner relation and then apply a regular inner
     117             :  * join.  These values are not allowed to propagate outside this module,
     118             :  * however.  Path cost estimation code may need to recognize that it's
     119             :  * dealing with such a case --- the combination of nominal jointype INNER
     120             :  * with sjinfo->jointype == JOIN_SEMI indicates that.
     121             :  */
     122             : void
     123      498796 : add_paths_to_joinrel(PlannerInfo *root,
     124             :                      RelOptInfo *joinrel,
     125             :                      RelOptInfo *outerrel,
     126             :                      RelOptInfo *innerrel,
     127             :                      JoinType jointype,
     128             :                      SpecialJoinInfo *sjinfo,
     129             :                      List *restrictlist)
     130             : {
     131             :     JoinPathExtraData extra;
     132      498796 :     bool        mergejoin_allowed = true;
     133             :     ListCell   *lc;
     134             :     Relids      joinrelids;
     135             : 
     136             :     /*
     137             :      * PlannerInfo doesn't contain the SpecialJoinInfos created for joins
     138             :      * between child relations, even if there is a SpecialJoinInfo node for
     139             :      * the join between the topmost parents. So, while calculating Relids set
     140             :      * representing the restriction, consider relids of topmost parent of
     141             :      * partitions.
     142             :      */
     143      498796 :     if (joinrel->reloptkind == RELOPT_OTHER_JOINREL)
     144       10156 :         joinrelids = joinrel->top_parent_relids;
     145             :     else
     146      488640 :         joinrelids = joinrel->relids;
     147             : 
     148      498796 :     extra.restrictlist = restrictlist;
     149      498796 :     extra.mergeclause_list = NIL;
     150      498796 :     extra.sjinfo = sjinfo;
     151      498796 :     extra.param_source_rels = NULL;
     152             : 
     153             :     /*
     154             :      * See if the inner relation is provably unique for this outer rel.
     155             :      *
     156             :      * We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't
     157             :      * matter since the executor can make the equivalent optimization anyway;
     158             :      * we need not expend planner cycles on proofs.  For JOIN_UNIQUE_INNER, we
     159             :      * must be considering a semijoin whose inner side is not provably unique
     160             :      * (else reduce_unique_semijoins would've simplified it), so there's no
     161             :      * point in calling innerrel_is_unique.  However, if the LHS covers all of
     162             :      * the semijoin's min_lefthand, then it's appropriate to set inner_unique
     163             :      * because the path produced by create_unique_path will be unique relative
     164             :      * to the LHS.  (If we have an LHS that's only part of the min_lefthand,
     165             :      * that is *not* true.)  For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid
     166             :      * letting that value escape this module.
     167             :      */
     168      498796 :     switch (jointype)
     169             :     {
     170        7728 :         case JOIN_SEMI:
     171             :         case JOIN_ANTI:
     172             : 
     173             :             /*
     174             :              * XXX it may be worth proving this to allow a Memoize to be
     175             :              * considered for Nested Loop Semi/Anti Joins.
     176             :              */
     177        7728 :             extra.inner_unique = false; /* well, unproven */
     178        7728 :             break;
     179        3586 :         case JOIN_UNIQUE_INNER:
     180        7172 :             extra.inner_unique = bms_is_subset(sjinfo->min_lefthand,
     181        3586 :                                                outerrel->relids);
     182        3586 :             break;
     183        3586 :         case JOIN_UNIQUE_OUTER:
     184        3586 :             extra.inner_unique = innerrel_is_unique(root,
     185             :                                                     joinrel->relids,
     186             :                                                     outerrel->relids,
     187             :                                                     innerrel,
     188             :                                                     JOIN_INNER,
     189             :                                                     restrictlist,
     190             :                                                     false);
     191        3586 :             break;
     192      483896 :         default:
     193      483896 :             extra.inner_unique = innerrel_is_unique(root,
     194             :                                                     joinrel->relids,
     195             :                                                     outerrel->relids,
     196             :                                                     innerrel,
     197             :                                                     jointype,
     198             :                                                     restrictlist,
     199             :                                                     false);
     200      483896 :             break;
     201             :     }
     202             : 
     203             :     /*
     204             :      * Find potential mergejoin clauses.  We can skip this if we are not
     205             :      * interested in doing a mergejoin.  However, mergejoin may be our only
     206             :      * way of implementing a full outer join, so override enable_mergejoin if
     207             :      * it's a full join.
     208             :      */
     209      498796 :     if (enable_mergejoin || jointype == JOIN_FULL)
     210      495614 :         extra.mergeclause_list = select_mergejoin_clauses(root,
     211             :                                                           joinrel,
     212             :                                                           outerrel,
     213             :                                                           innerrel,
     214             :                                                           restrictlist,
     215             :                                                           jointype,
     216             :                                                           &mergejoin_allowed);
     217             : 
     218             :     /*
     219             :      * If it's SEMI, ANTI, or inner_unique join, compute correction factors
     220             :      * for cost estimation.  These will be the same for all paths.
     221             :      */
     222      498796 :     if (jointype == JOIN_SEMI || jointype == JOIN_ANTI || extra.inner_unique)
     223      159396 :         compute_semi_anti_join_factors(root, joinrel, outerrel, innerrel,
     224             :                                        jointype, sjinfo, restrictlist,
     225             :                                        &extra.semifactors);
     226             : 
     227             :     /*
     228             :      * Decide whether it's sensible to generate parameterized paths for this
     229             :      * joinrel, and if so, which relations such paths should require.  There
     230             :      * is usually no need to create a parameterized result path unless there
     231             :      * is a join order restriction that prevents joining one of our input rels
     232             :      * directly to the parameter source rel instead of joining to the other
     233             :      * input rel.  (But see allow_star_schema_join().)  This restriction
     234             :      * reduces the number of parameterized paths we have to deal with at
     235             :      * higher join levels, without compromising the quality of the resulting
     236             :      * plan.  We express the restriction as a Relids set that must overlap the
     237             :      * parameterization of any proposed join path.  Note: param_source_rels
     238             :      * should contain only baserels, not OJ relids, so starting from
     239             :      * all_baserels not all_query_rels is correct.
     240             :      */
     241     1081568 :     foreach(lc, root->join_info_list)
     242             :     {
     243      582772 :         SpecialJoinInfo *sjinfo2 = (SpecialJoinInfo *) lfirst(lc);
     244             : 
     245             :         /*
     246             :          * SJ is relevant to this join if we have some part of its RHS
     247             :          * (possibly not all of it), and haven't yet joined to its LHS.  (This
     248             :          * test is pretty simplistic, but should be sufficient considering the
     249             :          * join has already been proven legal.)  If the SJ is relevant, it
     250             :          * presents constraints for joining to anything not in its RHS.
     251             :          */
     252      582772 :         if (bms_overlap(joinrelids, sjinfo2->min_righthand) &&
     253      383666 :             !bms_overlap(joinrelids, sjinfo2->min_lefthand))
     254       18962 :             extra.param_source_rels = bms_join(extra.param_source_rels,
     255       18962 :                                                bms_difference(root->all_baserels,
     256       18962 :                                                               sjinfo2->min_righthand));
     257             : 
     258             :         /* full joins constrain both sides symmetrically */
     259      587412 :         if (sjinfo2->jointype == JOIN_FULL &&
     260        4640 :             bms_overlap(joinrelids, sjinfo2->min_lefthand) &&
     261        4584 :             !bms_overlap(joinrelids, sjinfo2->min_righthand))
     262         672 :             extra.param_source_rels = bms_join(extra.param_source_rels,
     263         672 :                                                bms_difference(root->all_baserels,
     264         672 :                                                               sjinfo2->min_lefthand));
     265             :     }
     266             : 
     267             :     /*
     268             :      * However, when a LATERAL subquery is involved, there will simply not be
     269             :      * any paths for the joinrel that aren't parameterized by whatever the
     270             :      * subquery is parameterized by, unless its parameterization is resolved
     271             :      * within the joinrel.  So we might as well allow additional dependencies
     272             :      * on whatever residual lateral dependencies the joinrel will have.
     273             :      */
     274      997592 :     extra.param_source_rels = bms_add_members(extra.param_source_rels,
     275      498796 :                                               joinrel->lateral_relids);
     276             : 
     277             :     /*
     278             :      * 1. Consider mergejoin paths where both relations must be explicitly
     279             :      * sorted.  Skip this if we can't mergejoin.
     280             :      */
     281      498796 :     if (mergejoin_allowed)
     282      491772 :         sort_inner_and_outer(root, joinrel, outerrel, innerrel,
     283             :                              jointype, &extra);
     284             : 
     285             :     /*
     286             :      * 2. Consider paths where the outer relation need not be explicitly
     287             :      * sorted. This includes both nestloops and mergejoins where the outer
     288             :      * path is already ordered.  Again, skip this if we can't mergejoin.
     289             :      * (That's okay because we know that nestloop can't handle
     290             :      * right/right-anti/full joins at all, so it wouldn't work in the
     291             :      * prohibited cases either.)
     292             :      */
     293      498796 :     if (mergejoin_allowed)
     294      491772 :         match_unsorted_outer(root, joinrel, outerrel, innerrel,
     295             :                              jointype, &extra);
     296             : 
     297             : #ifdef NOT_USED
     298             : 
     299             :     /*
     300             :      * 3. Consider paths where the inner relation need not be explicitly
     301             :      * sorted.  This includes mergejoins only (nestloops were already built in
     302             :      * match_unsorted_outer).
     303             :      *
     304             :      * Diked out as redundant 2/13/2000 -- tgl.  There isn't any really
     305             :      * significant difference between the inner and outer side of a mergejoin,
     306             :      * so match_unsorted_inner creates no paths that aren't equivalent to
     307             :      * those made by match_unsorted_outer when add_paths_to_joinrel() is
     308             :      * invoked with the two rels given in the other order.
     309             :      */
     310             :     if (mergejoin_allowed)
     311             :         match_unsorted_inner(root, joinrel, outerrel, innerrel,
     312             :                              jointype, &extra);
     313             : #endif
     314             : 
     315             :     /*
     316             :      * 4. Consider paths where both outer and inner relations must be hashed
     317             :      * before being joined.  As above, disregard enable_hashjoin for full
     318             :      * joins, because there may be no other alternative.
     319             :      */
     320      498796 :     if (enable_hashjoin || jointype == JOIN_FULL)
     321      494232 :         hash_inner_and_outer(root, joinrel, outerrel, innerrel,
     322             :                              jointype, &extra);
     323             : 
     324             :     /*
     325             :      * 5. If inner and outer relations are foreign tables (or joins) belonging
     326             :      * to the same server and assigned to the same user to check access
     327             :      * permissions as, give the FDW a chance to push down joins.
     328             :      */
     329      498796 :     if (joinrel->fdwroutine &&
     330        1222 :         joinrel->fdwroutine->GetForeignJoinPaths)
     331        1218 :         joinrel->fdwroutine->GetForeignJoinPaths(root, joinrel,
     332             :                                                  outerrel, innerrel,
     333             :                                                  jointype, &extra);
     334             : 
     335             :     /*
     336             :      * 6. Finally, give extensions a chance to manipulate the path list.  They
     337             :      * could add new paths (such as CustomPaths) by calling add_path(), or
     338             :      * add_partial_path() if parallel aware.  They could also delete or modify
     339             :      * paths added by the core code.
     340             :      */
     341      498796 :     if (set_join_pathlist_hook)
     342           0 :         set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
     343             :                                jointype, &extra);
     344      498796 : }
     345             : 
     346             : /*
     347             :  * We override the param_source_rels heuristic to accept nestloop paths in
     348             :  * which the outer rel satisfies some but not all of the inner path's
     349             :  * parameterization.  This is necessary to get good plans for star-schema
     350             :  * scenarios, in which a parameterized path for a large table may require
     351             :  * parameters from multiple small tables that will not get joined directly to
     352             :  * each other.  We can handle that by stacking nestloops that have the small
     353             :  * tables on the outside; but this breaks the rule the param_source_rels
     354             :  * heuristic is based on, namely that parameters should not be passed down
     355             :  * across joins unless there's a join-order-constraint-based reason to do so.
     356             :  * So we ignore the param_source_rels restriction when this case applies.
     357             :  *
     358             :  * allow_star_schema_join() returns true if the param_source_rels restriction
     359             :  * should be overridden, ie, it's okay to perform this join.
     360             :  */
     361             : static inline bool
     362      180658 : allow_star_schema_join(PlannerInfo *root,
     363             :                        Relids outerrelids,
     364             :                        Relids inner_paramrels)
     365             : {
     366             :     /*
     367             :      * It's a star-schema case if the outer rel provides some but not all of
     368             :      * the inner rel's parameterization.
     369             :      */
     370      210194 :     return (bms_overlap(inner_paramrels, outerrelids) &&
     371       29536 :             bms_nonempty_difference(inner_paramrels, outerrelids));
     372             : }
     373             : 
     374             : /*
     375             :  * If the parameterization is only partly satisfied by the outer rel,
     376             :  * the unsatisfied part can't include any outer-join relids that could
     377             :  * null rels of the satisfied part.  That would imply that we're trying
     378             :  * to use a clause involving a Var with nonempty varnullingrels at
     379             :  * a join level where that value isn't yet computable.
     380             :  *
     381             :  * In practice, this test never finds a problem because earlier join order
     382             :  * restrictions prevent us from attempting a join that would cause a problem.
     383             :  * (That's unsurprising, because the code worked before we ever added
     384             :  * outer-join relids to expression relids.)  It still seems worth checking
     385             :  * as a backstop, but we only do so in assert-enabled builds.
     386             :  */
     387             : #ifdef USE_ASSERT_CHECKING
     388             : static inline bool
     389             : have_unsafe_outer_join_ref(PlannerInfo *root,
     390             :                            Relids outerrelids,
     391             :                            Relids inner_paramrels)
     392             : {
     393             :     bool        result = false;
     394             :     Relids      unsatisfied = bms_difference(inner_paramrels, outerrelids);
     395             :     Relids      satisfied = bms_intersect(inner_paramrels, outerrelids);
     396             : 
     397             :     if (bms_overlap(unsatisfied, root->outer_join_rels))
     398             :     {
     399             :         ListCell   *lc;
     400             : 
     401             :         foreach(lc, root->join_info_list)
     402             :         {
     403             :             SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
     404             : 
     405             :             if (!bms_is_member(sjinfo->ojrelid, unsatisfied))
     406             :                 continue;       /* not relevant */
     407             :             if (bms_overlap(satisfied, sjinfo->min_righthand) ||
     408             :                 (sjinfo->jointype == JOIN_FULL &&
     409             :                  bms_overlap(satisfied, sjinfo->min_lefthand)))
     410             :             {
     411             :                 result = true;  /* doesn't work */
     412             :                 break;
     413             :             }
     414             :         }
     415             :     }
     416             : 
     417             :     /* Waste no memory when we reject a path here */
     418             :     bms_free(unsatisfied);
     419             :     bms_free(satisfied);
     420             : 
     421             :     return result;
     422             : }
     423             : #endif                          /* USE_ASSERT_CHECKING */
     424             : 
     425             : /*
     426             :  * paraminfo_get_equal_hashops
     427             :  *      Determine if the clauses in param_info and innerrel's lateral_vars
     428             :  *      can be hashed.
     429             :  *      Returns true if hashing is possible, otherwise false.
     430             :  *
     431             :  * Additionally, on success we collect the outer expressions and the
     432             :  * appropriate equality operators for each hashable parameter to innerrel.
     433             :  * These are returned in parallel lists in *param_exprs and *operators.
     434             :  * We also set *binary_mode to indicate whether strict binary matching is
     435             :  * required.
     436             :  */
     437             : static bool
     438      271498 : paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
     439             :                             RelOptInfo *outerrel, RelOptInfo *innerrel,
     440             :                             List **param_exprs, List **operators,
     441             :                             bool *binary_mode)
     442             : 
     443             : {
     444             :     ListCell   *lc;
     445             : 
     446      271498 :     *param_exprs = NIL;
     447      271498 :     *operators = NIL;
     448      271498 :     *binary_mode = false;
     449             : 
     450             :     /* Add join clauses from param_info to the hash key */
     451      271498 :     if (param_info != NULL)
     452             :     {
     453      271498 :         List       *clauses = param_info->ppi_clauses;
     454             : 
     455      493202 :         foreach(lc, clauses)
     456             :         {
     457      290660 :             RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
     458             :             OpExpr     *opexpr;
     459             :             Node       *expr;
     460             :             Oid         hasheqoperator;
     461             : 
     462      290660 :             opexpr = (OpExpr *) rinfo->clause;
     463             : 
     464             :             /*
     465             :              * Bail if the rinfo is not compatible.  We need a join OpExpr
     466             :              * with 2 args.
     467             :              */
     468      290660 :             if (!IsA(opexpr, OpExpr) || list_length(opexpr->args) != 2 ||
     469      271806 :                 !clause_sides_match_join(rinfo, outerrel, innerrel))
     470             :             {
     471       68812 :                 list_free(*operators);
     472       68812 :                 list_free(*param_exprs);
     473       68956 :                 return false;
     474             :             }
     475             : 
     476      221848 :             if (rinfo->outer_is_left)
     477             :             {
     478      130896 :                 expr = (Node *) linitial(opexpr->args);
     479      130896 :                 hasheqoperator = rinfo->left_hasheqoperator;
     480             :             }
     481             :             else
     482             :             {
     483       90952 :                 expr = (Node *) lsecond(opexpr->args);
     484       90952 :                 hasheqoperator = rinfo->right_hasheqoperator;
     485             :             }
     486             : 
     487             :             /* can't do memoize if we can't hash the outer type */
     488      221848 :             if (!OidIsValid(hasheqoperator))
     489             :             {
     490         144 :                 list_free(*operators);
     491         144 :                 list_free(*param_exprs);
     492         144 :                 return false;
     493             :             }
     494             : 
     495      221704 :             *operators = lappend_oid(*operators, hasheqoperator);
     496      221704 :             *param_exprs = lappend(*param_exprs, expr);
     497             : 
     498             :             /*
     499             :              * When the join operator is not hashable then it's possible that
     500             :              * the operator will be able to distinguish something that the
     501             :              * hash equality operator could not. For example with floating
     502             :              * point types -0.0 and +0.0 are classed as equal by the hash
     503             :              * function and equality function, but some other operator may be
     504             :              * able to tell those values apart.  This means that we must put
     505             :              * memoize into binary comparison mode so that it does bit-by-bit
     506             :              * comparisons rather than a "logical" comparison as it would
     507             :              * using the hash equality operator.
     508             :              */
     509      221704 :             if (!OidIsValid(rinfo->hashjoinoperator))
     510         598 :                 *binary_mode = true;
     511             :         }
     512             :     }
     513             : 
     514             :     /* Now add any lateral vars to the cache key too */
     515      205656 :     foreach(lc, innerrel->lateral_vars)
     516             :     {
     517        3258 :         Node       *expr = (Node *) lfirst(lc);
     518             :         TypeCacheEntry *typentry;
     519             : 
     520             :         /* Reject if there are any volatile functions in lateral vars */
     521        3258 :         if (contain_volatile_functions(expr))
     522             :         {
     523           0 :             list_free(*operators);
     524           0 :             list_free(*param_exprs);
     525         144 :             return false;
     526             :         }
     527             : 
     528        3258 :         typentry = lookup_type_cache(exprType(expr),
     529             :                                      TYPECACHE_HASH_PROC | TYPECACHE_EQ_OPR);
     530             : 
     531             :         /* can't use memoize without a valid hash proc and equals operator */
     532        3258 :         if (!OidIsValid(typentry->hash_proc) || !OidIsValid(typentry->eq_opr))
     533             :         {
     534         144 :             list_free(*operators);
     535         144 :             list_free(*param_exprs);
     536         144 :             return false;
     537             :         }
     538             : 
     539        3114 :         *operators = lappend_oid(*operators, typentry->eq_opr);
     540        3114 :         *param_exprs = lappend(*param_exprs, expr);
     541             : 
     542             :         /*
     543             :          * We must go into binary mode as we don't have too much of an idea of
     544             :          * how these lateral Vars are being used.  See comment above when we
     545             :          * set *binary_mode for the non-lateral Var case. This could be
     546             :          * relaxed a bit if we had the RestrictInfos and knew the operators
     547             :          * being used, however for cases like Vars that are arguments to
     548             :          * functions we must operate in binary mode as we don't have
     549             :          * visibility into what the function is doing with the Vars.
     550             :          */
     551        3114 :         *binary_mode = true;
     552             :     }
     553             : 
     554             :     /* We're okay to use memoize */
     555      202398 :     return true;
     556             : }
     557             : 
     558             : /*
     559             :  * get_memoize_path
     560             :  *      If possible, make and return a Memoize path atop of 'inner_path'.
     561             :  *      Otherwise return NULL.
     562             :  */
     563             : static Path *
     564     1208692 : get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
     565             :                  RelOptInfo *outerrel, Path *inner_path,
     566             :                  Path *outer_path, JoinType jointype,
     567             :                  JoinPathExtraData *extra)
     568             : {
     569             :     List       *param_exprs;
     570             :     List       *hash_operators;
     571             :     ListCell   *lc;
     572             :     bool        binary_mode;
     573             : 
     574             :     /* Obviously not if it's disabled */
     575     1208692 :     if (!enable_memoize)
     576         388 :         return NULL;
     577             : 
     578             :     /*
     579             :      * We can safely not bother with all this unless we expect to perform more
     580             :      * than one inner scan.  The first scan is always going to be a cache
     581             :      * miss.  This would likely fail later anyway based on costs, so this is
     582             :      * really just to save some wasted effort.
     583             :      */
     584     1208304 :     if (outer_path->parent->rows < 2)
     585      406724 :         return NULL;
     586             : 
     587             :     /*
     588             :      * We can only have a memoize node when there's some kind of cache key,
     589             :      * either parameterized path clauses or lateral Vars.  No cache key sounds
     590             :      * more like something a Materialize node might be more useful for.
     591             :      */
     592      801580 :     if ((inner_path->param_info == NULL ||
     593      331848 :          inner_path->param_info->ppi_clauses == NIL) &&
     594      485176 :         innerrel->lateral_vars == NIL)
     595      482822 :         return NULL;
     596             : 
     597             :     /*
     598             :      * Currently we don't do this for SEMI and ANTI joins unless they're
     599             :      * marked as inner_unique.  This is because nested loop SEMI/ANTI joins
     600             :      * don't scan the inner node to completion, which will mean memoize cannot
     601             :      * mark the cache entry as complete.
     602             :      *
     603             :      * XXX Currently we don't attempt to mark SEMI/ANTI joins as inner_unique
     604             :      * = true.  Should we?  See add_paths_to_joinrel()
     605             :      */
     606      318758 :     if (!extra->inner_unique && (jointype == JOIN_SEMI ||
     607             :                                  jointype == JOIN_ANTI))
     608        7600 :         return NULL;
     609             : 
     610             :     /*
     611             :      * Memoize normally marks cache entries as complete when it runs out of
     612             :      * tuples to read from its subplan.  However, with unique joins, Nested
     613             :      * Loop will skip to the next outer tuple after finding the first matching
     614             :      * inner tuple.  This means that we may not read the inner side of the
     615             :      * join to completion which leaves no opportunity to mark the cache entry
     616             :      * as complete.  To work around that, when the join is unique we
     617             :      * automatically mark cache entries as complete after fetching the first
     618             :      * tuple.  This works when the entire join condition is parameterized.
     619             :      * Otherwise, when the parameterization is only a subset of the join
     620             :      * condition, we can't be sure which part of it causes the join to be
     621             :      * unique.  This means there are no guarantees that only 1 tuple will be
     622             :      * read.  We cannot mark the cache entry as complete after reading the
     623             :      * first tuple without that guarantee.  This means the scope of Memoize
     624             :      * node's usefulness is limited to only outer rows that have no join
     625             :      * partner as this is the only case where Nested Loop would exhaust the
     626             :      * inner scan of a unique join.  Since the scope is limited to that, we
     627             :      * just don't bother making a memoize path in this case.
     628             :      *
     629             :      * Lateral vars needn't be considered here as they're not considered when
     630             :      * determining if the join is unique.
     631             :      *
     632             :      * XXX this could be enabled if the remaining join quals were made part of
     633             :      * the inner scan's filter instead of the join filter.  Maybe it's worth
     634             :      * considering doing that?
     635             :      */
     636      311158 :     if (extra->inner_unique &&
     637      369948 :         (inner_path->param_info == NULL ||
     638      184974 :          list_length(inner_path->param_info->ppi_clauses) <
     639      184974 :          list_length(extra->restrictlist)))
     640       39658 :         return NULL;
     641             : 
     642             :     /*
     643             :      * We can't use a memoize node if there are volatile functions in the
     644             :      * inner rel's target list or restrict list.  A cache hit could reduce the
     645             :      * number of calls to these functions.
     646             :      */
     647      271500 :     if (contain_volatile_functions((Node *) innerrel->reltarget))
     648           0 :         return NULL;
     649             : 
     650      448240 :     foreach(lc, innerrel->baserestrictinfo)
     651             :     {
     652      176740 :         RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
     653             : 
     654      176740 :         if (contain_volatile_functions((Node *) rinfo))
     655           0 :             return NULL;
     656             :     }
     657             : 
     658             :     /*
     659             :      * Also check the parameterized path restrictinfos for volatile functions.
     660             :      * Indexed functions must be immutable so shouldn't have any volatile
     661             :      * functions, however, with a lateral join the inner scan may not be an
     662             :      * index scan.
     663             :      */
     664      271500 :     if (inner_path->param_info != NULL)
     665             :     {
     666      585004 :         foreach(lc, inner_path->param_info->ppi_clauses)
     667             :         {
     668      313506 :             RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
     669             : 
     670      313506 :             if (contain_volatile_functions((Node *) rinfo))
     671           2 :                 return NULL;
     672             :         }
     673             :     }
     674             : 
     675             :     /* Check if we have hash ops for each parameter to the path */
     676      271498 :     if (paraminfo_get_equal_hashops(root,
     677             :                                     inner_path->param_info,
     678      271498 :                                     outerrel->top_parent ?
     679             :                                     outerrel->top_parent : outerrel,
     680             :                                     innerrel,
     681             :                                     &param_exprs,
     682             :                                     &hash_operators,
     683             :                                     &binary_mode))
     684             :     {
     685      202398 :         return (Path *) create_memoize_path(root,
     686             :                                             innerrel,
     687             :                                             inner_path,
     688             :                                             param_exprs,
     689             :                                             hash_operators,
     690      202398 :                                             extra->inner_unique,
     691             :                                             binary_mode,
     692             :                                             outer_path->rows);
     693             :     }
     694             : 
     695       69100 :     return NULL;
     696             : }
     697             : 
     698             : /*
     699             :  * try_nestloop_path
     700             :  *    Consider a nestloop join path; if it appears useful, push it into
     701             :  *    the joinrel's pathlist via add_path().
     702             :  */
     703             : static void
     704     2074656 : try_nestloop_path(PlannerInfo *root,
     705             :                   RelOptInfo *joinrel,
     706             :                   Path *outer_path,
     707             :                   Path *inner_path,
     708             :                   List *pathkeys,
     709             :                   JoinType jointype,
     710             :                   JoinPathExtraData *extra)
     711             : {
     712             :     Relids      required_outer;
     713             :     JoinCostWorkspace workspace;
     714     2074656 :     RelOptInfo *innerrel = inner_path->parent;
     715     2074656 :     RelOptInfo *outerrel = outer_path->parent;
     716             :     Relids      innerrelids;
     717             :     Relids      outerrelids;
     718     2074656 :     Relids      inner_paramrels = PATH_REQ_OUTER(inner_path);
     719     2074656 :     Relids      outer_paramrels = PATH_REQ_OUTER(outer_path);
     720             : 
     721             :     /*
     722             :      * If we are forming an outer join at this join, it's nonsensical to use
     723             :      * an input path that uses the outer join as part of its parameterization.
     724             :      * (This can happen despite our join order restrictions, since those apply
     725             :      * to what is in an input relation not what its parameters are.)
     726             :      */
     727     2619700 :     if (extra->sjinfo->ojrelid != 0 &&
     728     1090088 :         (bms_is_member(extra->sjinfo->ojrelid, inner_paramrels) ||
     729      545044 :          bms_is_member(extra->sjinfo->ojrelid, outer_paramrels)))
     730      180446 :         return;
     731             : 
     732             :     /*
     733             :      * Paths are parameterized by top-level parents, so run parameterization
     734             :      * tests on the parent relids.
     735             :      */
     736     2074536 :     if (innerrel->top_parent_relids)
     737       32500 :         innerrelids = innerrel->top_parent_relids;
     738             :     else
     739     2042036 :         innerrelids = innerrel->relids;
     740             : 
     741     2074536 :     if (outerrel->top_parent_relids)
     742       32500 :         outerrelids = outerrel->top_parent_relids;
     743             :     else
     744     2042036 :         outerrelids = outerrel->relids;
     745             : 
     746             :     /*
     747             :      * Check to see if proposed path is still parameterized, and reject if the
     748             :      * parameterization wouldn't be sensible --- unless allow_star_schema_join
     749             :      * says to allow it anyway.  Also, we must reject if have_dangerous_phv
     750             :      * doesn't like the look of it, which could only happen if the nestloop is
     751             :      * still parameterized.
     752             :      */
     753     2074536 :     required_outer = calc_nestloop_required_outer(outerrelids, outer_paramrels,
     754             :                                                   innerrelids, inner_paramrels);
     755     2074536 :     if (required_outer &&
     756      211084 :         ((!bms_overlap(required_outer, extra->param_source_rels) &&
     757      211512 :           !allow_star_schema_join(root, outerrelids, inner_paramrels)) ||
     758       30854 :          have_dangerous_phv(root, outerrelids, inner_paramrels)))
     759             :     {
     760             :         /* Waste no memory when we reject a path here */
     761      180326 :         bms_free(required_outer);
     762      180326 :         return;
     763             :     }
     764             : 
     765             :     /* If we got past that, we shouldn't have any unsafe outer-join refs */
     766             :     Assert(!have_unsafe_outer_join_ref(root, outerrelids, inner_paramrels));
     767             : 
     768             :     /*
     769             :      * Do a precheck to quickly eliminate obviously-inferior paths.  We
     770             :      * calculate a cheap lower bound on the path's cost and then use
     771             :      * add_path_precheck() to see if the path is clearly going to be dominated
     772             :      * by some existing path for the joinrel.  If not, do the full pushup with
     773             :      * creating a fully valid path structure and submitting it to add_path().
     774             :      * The latter two steps are expensive enough to make this two-phase
     775             :      * methodology worthwhile.
     776             :      */
     777     1894210 :     initial_cost_nestloop(root, &workspace, jointype,
     778             :                           outer_path, inner_path, extra);
     779             : 
     780     1894210 :     if (add_path_precheck(joinrel,
     781             :                           workspace.startup_cost, workspace.total_cost,
     782             :                           pathkeys, required_outer))
     783             :     {
     784             :         /*
     785             :          * If the inner path is parameterized, it is parameterized by the
     786             :          * topmost parent of the outer rel, not the outer rel itself.  Fix
     787             :          * that.
     788             :          */
     789      945562 :         if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent))
     790             :         {
     791        2638 :             inner_path = reparameterize_path_by_child(root, inner_path,
     792             :                                                       outer_path->parent);
     793             : 
     794             :             /*
     795             :              * If we could not translate the path, we can't create nest loop
     796             :              * path.
     797             :              */
     798        2638 :             if (!inner_path)
     799             :             {
     800           0 :                 bms_free(required_outer);
     801           0 :                 return;
     802             :             }
     803             :         }
     804             : 
     805      945562 :         add_path(joinrel, (Path *)
     806      945562 :                  create_nestloop_path(root,
     807             :                                       joinrel,
     808             :                                       jointype,
     809             :                                       &workspace,
     810             :                                       extra,
     811             :                                       outer_path,
     812             :                                       inner_path,
     813             :                                       extra->restrictlist,
     814             :                                       pathkeys,
     815             :                                       required_outer));
     816             :     }
     817             :     else
     818             :     {
     819             :         /* Waste no memory when we reject a path here */
     820      948648 :         bms_free(required_outer);
     821             :     }
     822             : }
     823             : 
     824             : /*
     825             :  * try_partial_nestloop_path
     826             :  *    Consider a partial nestloop join path; if it appears useful, push it into
     827             :  *    the joinrel's partial_pathlist via add_partial_path().
     828             :  */
     829             : static void
     830       25432 : try_partial_nestloop_path(PlannerInfo *root,
     831             :                           RelOptInfo *joinrel,
     832             :                           Path *outer_path,
     833             :                           Path *inner_path,
     834             :                           List *pathkeys,
     835             :                           JoinType jointype,
     836             :                           JoinPathExtraData *extra)
     837             : {
     838             :     JoinCostWorkspace workspace;
     839             : 
     840             :     /*
     841             :      * If the inner path is parameterized, the parameterization must be fully
     842             :      * satisfied by the proposed outer path.  Parameterized partial paths are
     843             :      * not supported.  The caller should already have verified that no lateral
     844             :      * rels are required here.
     845             :      */
     846             :     Assert(bms_is_empty(joinrel->lateral_relids));
     847       25432 :     if (inner_path->param_info != NULL)
     848             :     {
     849       12598 :         Relids      inner_paramrels = inner_path->param_info->ppi_req_outer;
     850       12598 :         RelOptInfo *outerrel = outer_path->parent;
     851             :         Relids      outerrelids;
     852             : 
     853             :         /*
     854             :          * The inner and outer paths are parameterized, if at all, by the top
     855             :          * level parents, not the child relations, so we must use those relids
     856             :          * for our parameterization tests.
     857             :          */
     858       12598 :         if (outerrel->top_parent_relids)
     859        9396 :             outerrelids = outerrel->top_parent_relids;
     860             :         else
     861        3202 :             outerrelids = outerrel->relids;
     862             : 
     863       12598 :         if (!bms_is_subset(inner_paramrels, outerrelids))
     864       18086 :             return;
     865             :     }
     866             : 
     867             :     /*
     868             :      * Before creating a path, get a quick lower bound on what it is likely to
     869             :      * cost.  Bail out right away if it looks terrible.
     870             :      */
     871       24388 :     initial_cost_nestloop(root, &workspace, jointype,
     872             :                           outer_path, inner_path, extra);
     873       24388 :     if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
     874       17042 :         return;
     875             : 
     876             :     /*
     877             :      * If the inner path is parameterized, it is parameterized by the topmost
     878             :      * parent of the outer rel, not the outer rel itself.  Fix that.
     879             :      */
     880        7346 :     if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent))
     881             :     {
     882        2526 :         inner_path = reparameterize_path_by_child(root, inner_path,
     883             :                                                   outer_path->parent);
     884             : 
     885             :         /*
     886             :          * If we could not translate the path, we can't create nest loop path.
     887             :          */
     888        2526 :         if (!inner_path)
     889           0 :             return;
     890             :     }
     891             : 
     892             :     /* Might be good enough to be worth trying, so let's try it. */
     893        7346 :     add_partial_path(joinrel, (Path *)
     894        7346 :                      create_nestloop_path(root,
     895             :                                           joinrel,
     896             :                                           jointype,
     897             :                                           &workspace,
     898             :                                           extra,
     899             :                                           outer_path,
     900             :                                           inner_path,
     901             :                                           extra->restrictlist,
     902             :                                           pathkeys,
     903             :                                           NULL));
     904             : }
     905             : 
     906             : /*
     907             :  * try_mergejoin_path
     908             :  *    Consider a merge join path; if it appears useful, push it into
     909             :  *    the joinrel's pathlist via add_path().
     910             :  */
     911             : static void
     912      895284 : try_mergejoin_path(PlannerInfo *root,
     913             :                    RelOptInfo *joinrel,
     914             :                    Path *outer_path,
     915             :                    Path *inner_path,
     916             :                    List *pathkeys,
     917             :                    List *mergeclauses,
     918             :                    List *outersortkeys,
     919             :                    List *innersortkeys,
     920             :                    JoinType jointype,
     921             :                    JoinPathExtraData *extra,
     922             :                    bool is_partial)
     923             : {
     924             :     Relids      required_outer;
     925             :     JoinCostWorkspace workspace;
     926             : 
     927      895284 :     if (is_partial)
     928             :     {
     929        6018 :         try_partial_mergejoin_path(root,
     930             :                                    joinrel,
     931             :                                    outer_path,
     932             :                                    inner_path,
     933             :                                    pathkeys,
     934             :                                    mergeclauses,
     935             :                                    outersortkeys,
     936             :                                    innersortkeys,
     937             :                                    jointype,
     938             :                                    extra);
     939       27860 :         return;
     940             :     }
     941             : 
     942             :     /*
     943             :      * If we are forming an outer join at this join, it's nonsensical to use
     944             :      * an input path that uses the outer join as part of its parameterization.
     945             :      * (This can happen despite our join order restrictions, since those apply
     946             :      * to what is in an input relation not what its parameters are.)
     947             :      */
     948     1230488 :     if (extra->sjinfo->ojrelid != 0 &&
     949      682444 :         (bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(inner_path)) ||
     950      341222 :          bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(outer_path))))
     951          12 :         return;
     952             : 
     953             :     /*
     954             :      * Check to see if proposed path is still parameterized, and reject if the
     955             :      * parameterization wouldn't be sensible.
     956             :      */
     957      889254 :     required_outer = calc_non_nestloop_required_outer(outer_path,
     958             :                                                       inner_path);
     959      889254 :     if (required_outer &&
     960       24130 :         !bms_overlap(required_outer, extra->param_source_rels))
     961             :     {
     962             :         /* Waste no memory when we reject a path here */
     963       21830 :         bms_free(required_outer);
     964       21830 :         return;
     965             :     }
     966             : 
     967             :     /*
     968             :      * If the given paths are already well enough ordered, we can skip doing
     969             :      * an explicit sort.
     970             :      */
     971     1294018 :     if (outersortkeys &&
     972      426594 :         pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
     973       12100 :         outersortkeys = NIL;
     974     1570406 :     if (innersortkeys &&
     975      702982 :         pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
     976       20004 :         innersortkeys = NIL;
     977             : 
     978             :     /*
     979             :      * See comments in try_nestloop_path().
     980             :      */
     981      867424 :     initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
     982             :                            outer_path, inner_path,
     983             :                            outersortkeys, innersortkeys,
     984             :                            extra);
     985             : 
     986      867424 :     if (add_path_precheck(joinrel,
     987             :                           workspace.startup_cost, workspace.total_cost,
     988             :                           pathkeys, required_outer))
     989             :     {
     990      215406 :         add_path(joinrel, (Path *)
     991      215406 :                  create_mergejoin_path(root,
     992             :                                        joinrel,
     993             :                                        jointype,
     994             :                                        &workspace,
     995             :                                        extra,
     996             :                                        outer_path,
     997             :                                        inner_path,
     998             :                                        extra->restrictlist,
     999             :                                        pathkeys,
    1000             :                                        required_outer,
    1001             :                                        mergeclauses,
    1002             :                                        outersortkeys,
    1003             :                                        innersortkeys));
    1004             :     }
    1005             :     else
    1006             :     {
    1007             :         /* Waste no memory when we reject a path here */
    1008      652018 :         bms_free(required_outer);
    1009             :     }
    1010             : }
    1011             : 
    1012             : /*
    1013             :  * try_partial_mergejoin_path
    1014             :  *    Consider a partial merge join path; if it appears useful, push it into
    1015             :  *    the joinrel's pathlist via add_partial_path().
    1016             :  */
    1017             : static void
    1018       18132 : try_partial_mergejoin_path(PlannerInfo *root,
    1019             :                            RelOptInfo *joinrel,
    1020             :                            Path *outer_path,
    1021             :                            Path *inner_path,
    1022             :                            List *pathkeys,
    1023             :                            List *mergeclauses,
    1024             :                            List *outersortkeys,
    1025             :                            List *innersortkeys,
    1026             :                            JoinType jointype,
    1027             :                            JoinPathExtraData *extra)
    1028             : {
    1029             :     JoinCostWorkspace workspace;
    1030             : 
    1031             :     /*
    1032             :      * See comments in try_partial_hashjoin_path().
    1033             :      */
    1034             :     Assert(bms_is_empty(joinrel->lateral_relids));
    1035       18132 :     if (inner_path->param_info != NULL)
    1036             :     {
    1037           0 :         Relids      inner_paramrels = inner_path->param_info->ppi_req_outer;
    1038             : 
    1039           0 :         if (!bms_is_empty(inner_paramrels))
    1040        9092 :             return;
    1041             :     }
    1042             : 
    1043             :     /*
    1044             :      * If the given paths are already well enough ordered, we can skip doing
    1045             :      * an explicit sort.
    1046             :      */
    1047       30246 :     if (outersortkeys &&
    1048       12114 :         pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
    1049         102 :         outersortkeys = NIL;
    1050       33822 :     if (innersortkeys &&
    1051       15690 :         pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
    1052         252 :         innersortkeys = NIL;
    1053             : 
    1054             :     /*
    1055             :      * See comments in try_partial_nestloop_path().
    1056             :      */
    1057       18132 :     initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
    1058             :                            outer_path, inner_path,
    1059             :                            outersortkeys, innersortkeys,
    1060             :                            extra);
    1061             : 
    1062       18132 :     if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
    1063        9092 :         return;
    1064             : 
    1065             :     /* Might be good enough to be worth trying, so let's try it. */
    1066        9040 :     add_partial_path(joinrel, (Path *)
    1067        9040 :                      create_mergejoin_path(root,
    1068             :                                            joinrel,
    1069             :                                            jointype,
    1070             :                                            &workspace,
    1071             :                                            extra,
    1072             :                                            outer_path,
    1073             :                                            inner_path,
    1074             :                                            extra->restrictlist,
    1075             :                                            pathkeys,
    1076             :                                            NULL,
    1077             :                                            mergeclauses,
    1078             :                                            outersortkeys,
    1079             :                                            innersortkeys));
    1080             : }
    1081             : 
    1082             : /*
    1083             :  * try_hashjoin_path
    1084             :  *    Consider a hash join path; if it appears useful, push it into
    1085             :  *    the joinrel's pathlist via add_path().
    1086             :  */
    1087             : static void
    1088      524270 : try_hashjoin_path(PlannerInfo *root,
    1089             :                   RelOptInfo *joinrel,
    1090             :                   Path *outer_path,
    1091             :                   Path *inner_path,
    1092             :                   List *hashclauses,
    1093             :                   JoinType jointype,
    1094             :                   JoinPathExtraData *extra)
    1095             : {
    1096             :     Relids      required_outer;
    1097             :     JoinCostWorkspace workspace;
    1098             : 
    1099             :     /*
    1100             :      * If we are forming an outer join at this join, it's nonsensical to use
    1101             :      * an input path that uses the outer join as part of its parameterization.
    1102             :      * (This can happen despite our join order restrictions, since those apply
    1103             :      * to what is in an input relation not what its parameters are.)
    1104             :      */
    1105      732202 :     if (extra->sjinfo->ojrelid != 0 &&
    1106      415834 :         (bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(inner_path)) ||
    1107      207902 :          bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(outer_path))))
    1108       68174 :         return;
    1109             : 
    1110             :     /*
    1111             :      * Check to see if proposed path is still parameterized, and reject if the
    1112             :      * parameterization wouldn't be sensible.
    1113             :      */
    1114      524210 :     required_outer = calc_non_nestloop_required_outer(outer_path,
    1115             :                                                       inner_path);
    1116      524210 :     if (required_outer &&
    1117       79912 :         !bms_overlap(required_outer, extra->param_source_rels))
    1118             :     {
    1119             :         /* Waste no memory when we reject a path here */
    1120       68114 :         bms_free(required_outer);
    1121       68114 :         return;
    1122             :     }
    1123             : 
    1124             :     /*
    1125             :      * See comments in try_nestloop_path().  Also note that hashjoin paths
    1126             :      * never have any output pathkeys, per comments in create_hashjoin_path.
    1127             :      */
    1128      456096 :     initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
    1129             :                           outer_path, inner_path, extra, false);
    1130             : 
    1131      456096 :     if (add_path_precheck(joinrel,
    1132             :                           workspace.startup_cost, workspace.total_cost,
    1133             :                           NIL, required_outer))
    1134             :     {
    1135      186942 :         add_path(joinrel, (Path *)
    1136      186942 :                  create_hashjoin_path(root,
    1137             :                                       joinrel,
    1138             :                                       jointype,
    1139             :                                       &workspace,
    1140             :                                       extra,
    1141             :                                       outer_path,
    1142             :                                       inner_path,
    1143             :                                       false,    /* parallel_hash */
    1144             :                                       extra->restrictlist,
    1145             :                                       required_outer,
    1146             :                                       hashclauses));
    1147             :     }
    1148             :     else
    1149             :     {
    1150             :         /* Waste no memory when we reject a path here */
    1151      269154 :         bms_free(required_outer);
    1152             :     }
    1153             : }
    1154             : 
    1155             : /*
    1156             :  * try_partial_hashjoin_path
    1157             :  *    Consider a partial hashjoin join path; if it appears useful, push it into
    1158             :  *    the joinrel's partial_pathlist via add_partial_path().
    1159             :  *    The outer side is partial.  If parallel_hash is true, then the inner path
    1160             :  *    must be partial and will be run in parallel to create one or more shared
    1161             :  *    hash tables; otherwise the inner path must be complete and a copy of it
    1162             :  *    is run in every process to create separate identical private hash tables.
    1163             :  */
    1164             : static void
    1165       20082 : try_partial_hashjoin_path(PlannerInfo *root,
    1166             :                           RelOptInfo *joinrel,
    1167             :                           Path *outer_path,
    1168             :                           Path *inner_path,
    1169             :                           List *hashclauses,
    1170             :                           JoinType jointype,
    1171             :                           JoinPathExtraData *extra,
    1172             :                           bool parallel_hash)
    1173             : {
    1174             :     JoinCostWorkspace workspace;
    1175             : 
    1176             :     /*
    1177             :      * If the inner path is parameterized, the parameterization must be fully
    1178             :      * satisfied by the proposed outer path.  Parameterized partial paths are
    1179             :      * not supported.  The caller should already have verified that no lateral
    1180             :      * rels are required here.
    1181             :      */
    1182             :     Assert(bms_is_empty(joinrel->lateral_relids));
    1183       20082 :     if (inner_path->param_info != NULL)
    1184             :     {
    1185           0 :         Relids      inner_paramrels = inner_path->param_info->ppi_req_outer;
    1186             : 
    1187           0 :         if (!bms_is_empty(inner_paramrels))
    1188        9508 :             return;
    1189             :     }
    1190             : 
    1191             :     /*
    1192             :      * Before creating a path, get a quick lower bound on what it is likely to
    1193             :      * cost.  Bail out right away if it looks terrible.
    1194             :      */
    1195       20082 :     initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
    1196             :                           outer_path, inner_path, extra, parallel_hash);
    1197       20082 :     if (!add_partial_path_precheck(joinrel, workspace.total_cost, NIL))
    1198        9508 :         return;
    1199             : 
    1200             :     /* Might be good enough to be worth trying, so let's try it. */
    1201       10574 :     add_partial_path(joinrel, (Path *)
    1202       10574 :                      create_hashjoin_path(root,
    1203             :                                           joinrel,
    1204             :                                           jointype,
    1205             :                                           &workspace,
    1206             :                                           extra,
    1207             :                                           outer_path,
    1208             :                                           inner_path,
    1209             :                                           parallel_hash,
    1210             :                                           extra->restrictlist,
    1211             :                                           NULL,
    1212             :                                           hashclauses));
    1213             : }
    1214             : 
    1215             : /*
    1216             :  * clause_sides_match_join
    1217             :  *    Determine whether a join clause is of the right form to use in this join.
    1218             :  *
    1219             :  * We already know that the clause is a binary opclause referencing only the
    1220             :  * rels in the current join.  The point here is to check whether it has the
    1221             :  * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
    1222             :  * rather than mixing outer and inner vars on either side.  If it matches,
    1223             :  * we set the transient flag outer_is_left to identify which side is which.
    1224             :  */
    1225             : static inline bool
    1226     1139070 : clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel,
    1227             :                         RelOptInfo *innerrel)
    1228             : {
    1229     1705348 :     if (bms_is_subset(rinfo->left_relids, outerrel->relids) &&
    1230      566278 :         bms_is_subset(rinfo->right_relids, innerrel->relids))
    1231             :     {
    1232             :         /* lefthand side is outer */
    1233      565930 :         rinfo->outer_is_left = true;
    1234      565930 :         return true;
    1235             :     }
    1236     1131208 :     else if (bms_is_subset(rinfo->left_relids, innerrel->relids) &&
    1237      558068 :              bms_is_subset(rinfo->right_relids, outerrel->relids))
    1238             :     {
    1239             :         /* righthand side is outer */
    1240      522270 :         rinfo->outer_is_left = false;
    1241      522270 :         return true;
    1242             :     }
    1243       50870 :     return false;               /* no good for these input relations */
    1244             : }
    1245             : 
    1246             : /*
    1247             :  * sort_inner_and_outer
    1248             :  *    Create mergejoin join paths by explicitly sorting both the outer and
    1249             :  *    inner join relations on each available merge ordering.
    1250             :  *
    1251             :  * 'joinrel' is the join relation
    1252             :  * 'outerrel' is the outer join relation
    1253             :  * 'innerrel' is the inner join relation
    1254             :  * 'jointype' is the type of join to do
    1255             :  * 'extra' contains additional input values
    1256             :  */
    1257             : static void
    1258      491772 : sort_inner_and_outer(PlannerInfo *root,
    1259             :                      RelOptInfo *joinrel,
    1260             :                      RelOptInfo *outerrel,
    1261             :                      RelOptInfo *innerrel,
    1262             :                      JoinType jointype,
    1263             :                      JoinPathExtraData *extra)
    1264             : {
    1265      491772 :     JoinType    save_jointype = jointype;
    1266             :     Path       *outer_path;
    1267             :     Path       *inner_path;
    1268      491772 :     Path       *cheapest_partial_outer = NULL;
    1269      491772 :     Path       *cheapest_safe_inner = NULL;
    1270             :     List       *all_pathkeys;
    1271             :     ListCell   *l;
    1272             : 
    1273             :     /*
    1274             :      * We only consider the cheapest-total-cost input paths, since we are
    1275             :      * assuming here that a sort is required.  We will consider
    1276             :      * cheapest-startup-cost input paths later, and only if they don't need a
    1277             :      * sort.
    1278             :      *
    1279             :      * This function intentionally does not consider parameterized input
    1280             :      * paths, except when the cheapest-total is parameterized.  If we did so,
    1281             :      * we'd have a combinatorial explosion of mergejoin paths of dubious
    1282             :      * value.  This interacts with decisions elsewhere that also discriminate
    1283             :      * against mergejoins with parameterized inputs; see comments in
    1284             :      * src/backend/optimizer/README.
    1285             :      */
    1286      491772 :     outer_path = outerrel->cheapest_total_path;
    1287      491772 :     inner_path = innerrel->cheapest_total_path;
    1288             : 
    1289             :     /*
    1290             :      * If either cheapest-total path is parameterized by the other rel, we
    1291             :      * can't use a mergejoin.  (There's no use looking for alternative input
    1292             :      * paths, since these should already be the least-parameterized available
    1293             :      * paths.)
    1294             :      */
    1295      491772 :     if (PATH_PARAM_BY_REL(outer_path, innerrel) ||
    1296      481446 :         PATH_PARAM_BY_REL(inner_path, outerrel))
    1297       20688 :         return;
    1298             : 
    1299             :     /*
    1300             :      * If unique-ification is requested, do it and then handle as a plain
    1301             :      * inner join.
    1302             :      */
    1303      471084 :     if (jointype == JOIN_UNIQUE_OUTER)
    1304             :     {
    1305        3586 :         outer_path = (Path *) create_unique_path(root, outerrel,
    1306             :                                                  outer_path, extra->sjinfo);
    1307             :         Assert(outer_path);
    1308        3586 :         jointype = JOIN_INNER;
    1309             :     }
    1310      467498 :     else if (jointype == JOIN_UNIQUE_INNER)
    1311             :     {
    1312        3586 :         inner_path = (Path *) create_unique_path(root, innerrel,
    1313             :                                                  inner_path, extra->sjinfo);
    1314             :         Assert(inner_path);
    1315        3586 :         jointype = JOIN_INNER;
    1316             :     }
    1317             : 
    1318             :     /*
    1319             :      * If the joinrel is parallel-safe, we may be able to consider a partial
    1320             :      * merge join.  However, we can't handle JOIN_UNIQUE_OUTER, because the
    1321             :      * outer path will be partial, and therefore we won't be able to properly
    1322             :      * guarantee uniqueness.  Similarly, we can't handle JOIN_FULL, JOIN_RIGHT
    1323             :      * and JOIN_RIGHT_ANTI, because they can produce false null extended rows.
    1324             :      * Also, the resulting path must not be parameterized.
    1325             :      */
    1326      471084 :     if (joinrel->consider_parallel &&
    1327      409094 :         save_jointype != JOIN_UNIQUE_OUTER &&
    1328      406630 :         save_jointype != JOIN_FULL &&
    1329      334018 :         save_jointype != JOIN_RIGHT &&
    1330      332876 :         save_jointype != JOIN_RIGHT_ANTI &&
    1331      332876 :         outerrel->partial_pathlist != NIL &&
    1332        9720 :         bms_is_empty(joinrel->lateral_relids))
    1333             :     {
    1334        9720 :         cheapest_partial_outer = (Path *) linitial(outerrel->partial_pathlist);
    1335             : 
    1336        9720 :         if (inner_path->parallel_safe)
    1337        9372 :             cheapest_safe_inner = inner_path;
    1338         348 :         else if (save_jointype != JOIN_UNIQUE_INNER)
    1339             :             cheapest_safe_inner =
    1340         342 :                 get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
    1341             :     }
    1342             : 
    1343             :     /*
    1344             :      * Each possible ordering of the available mergejoin clauses will generate
    1345             :      * a differently-sorted result path at essentially the same cost.  We have
    1346             :      * no basis for choosing one over another at this level of joining, but
    1347             :      * some sort orders may be more useful than others for higher-level
    1348             :      * mergejoins, so it's worth considering multiple orderings.
    1349             :      *
    1350             :      * Actually, it's not quite true that every mergeclause ordering will
    1351             :      * generate a different path order, because some of the clauses may be
    1352             :      * partially redundant (refer to the same EquivalenceClasses).  Therefore,
    1353             :      * what we do is convert the mergeclause list to a list of canonical
    1354             :      * pathkeys, and then consider different orderings of the pathkeys.
    1355             :      *
    1356             :      * Generating a path for *every* permutation of the pathkeys doesn't seem
    1357             :      * like a winning strategy; the cost in planning time is too high. For
    1358             :      * now, we generate one path for each pathkey, listing that pathkey first
    1359             :      * and the rest in random order.  This should allow at least a one-clause
    1360             :      * mergejoin without re-sorting against any other possible mergejoin
    1361             :      * partner path.  But if we've not guessed the right ordering of secondary
    1362             :      * keys, we may end up evaluating clauses as qpquals when they could have
    1363             :      * been done as mergeclauses.  (In practice, it's rare that there's more
    1364             :      * than two or three mergeclauses, so expending a huge amount of thought
    1365             :      * on that is probably not worth it.)
    1366             :      *
    1367             :      * The pathkey order returned by select_outer_pathkeys_for_merge() has
    1368             :      * some heuristics behind it (see that function), so be sure to try it
    1369             :      * exactly as-is as well as making variants.
    1370             :      */
    1371      471084 :     all_pathkeys = select_outer_pathkeys_for_merge(root,
    1372             :                                                    extra->mergeclause_list,
    1373             :                                                    joinrel);
    1374             : 
    1375      897678 :     foreach(l, all_pathkeys)
    1376             :     {
    1377      426594 :         PathKey    *front_pathkey = (PathKey *) lfirst(l);
    1378             :         List       *cur_mergeclauses;
    1379             :         List       *outerkeys;
    1380             :         List       *innerkeys;
    1381             :         List       *merge_pathkeys;
    1382             : 
    1383             :         /* Make a pathkey list with this guy first */
    1384      426594 :         if (l != list_head(all_pathkeys))
    1385       38118 :             outerkeys = lcons(front_pathkey,
    1386             :                               list_delete_nth_cell(list_copy(all_pathkeys),
    1387             :                                                    foreach_current_index(l)));
    1388             :         else
    1389      388476 :             outerkeys = all_pathkeys;   /* no work at first one... */
    1390             : 
    1391             :         /* Sort the mergeclauses into the corresponding ordering */
    1392             :         cur_mergeclauses =
    1393      426594 :             find_mergeclauses_for_outer_pathkeys(root,
    1394             :                                                  outerkeys,
    1395             :                                                  extra->mergeclause_list);
    1396             : 
    1397             :         /* Should have used them all... */
    1398             :         Assert(list_length(cur_mergeclauses) == list_length(extra->mergeclause_list));
    1399             : 
    1400             :         /* Build sort pathkeys for the inner side */
    1401      426594 :         innerkeys = make_inner_pathkeys_for_merge(root,
    1402             :                                                   cur_mergeclauses,
    1403             :                                                   outerkeys);
    1404             : 
    1405             :         /* Build pathkeys representing output sort order */
    1406      426594 :         merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
    1407             :                                              outerkeys);
    1408             : 
    1409             :         /*
    1410             :          * And now we can make the path.
    1411             :          *
    1412             :          * Note: it's possible that the cheapest paths will already be sorted
    1413             :          * properly.  try_mergejoin_path will detect that case and suppress an
    1414             :          * explicit sort step, so we needn't do so here.
    1415             :          */
    1416      426594 :         try_mergejoin_path(root,
    1417             :                            joinrel,
    1418             :                            outer_path,
    1419             :                            inner_path,
    1420             :                            merge_pathkeys,
    1421             :                            cur_mergeclauses,
    1422             :                            outerkeys,
    1423             :                            innerkeys,
    1424             :                            jointype,
    1425             :                            extra,
    1426             :                            false);
    1427             : 
    1428             :         /*
    1429             :          * If we have partial outer and parallel safe inner path then try
    1430             :          * partial mergejoin path.
    1431             :          */
    1432      426594 :         if (cheapest_partial_outer && cheapest_safe_inner)
    1433       12114 :             try_partial_mergejoin_path(root,
    1434             :                                        joinrel,
    1435             :                                        cheapest_partial_outer,
    1436             :                                        cheapest_safe_inner,
    1437             :                                        merge_pathkeys,
    1438             :                                        cur_mergeclauses,
    1439             :                                        outerkeys,
    1440             :                                        innerkeys,
    1441             :                                        jointype,
    1442             :                                        extra);
    1443             :     }
    1444             : }
    1445             : 
    1446             : /*
    1447             :  * generate_mergejoin_paths
    1448             :  *  Creates possible mergejoin paths for input outerpath.
    1449             :  *
    1450             :  * We generate mergejoins if mergejoin clauses are available.  We have
    1451             :  * two ways to generate the inner path for a mergejoin: sort the cheapest
    1452             :  * inner path, or use an inner path that is already suitably ordered for the
    1453             :  * merge.  If we have several mergeclauses, it could be that there is no inner
    1454             :  * path (or only a very expensive one) for the full list of mergeclauses, but
    1455             :  * better paths exist if we truncate the mergeclause list (thereby discarding
    1456             :  * some sort key requirements).  So, we consider truncations of the
    1457             :  * mergeclause list as well as the full list.  (Ideally we'd consider all
    1458             :  * subsets of the mergeclause list, but that seems way too expensive.)
    1459             :  */
    1460             : static void
    1461      893206 : generate_mergejoin_paths(PlannerInfo *root,
    1462             :                          RelOptInfo *joinrel,
    1463             :                          RelOptInfo *innerrel,
    1464             :                          Path *outerpath,
    1465             :                          JoinType jointype,
    1466             :                          JoinPathExtraData *extra,
    1467             :                          bool useallclauses,
    1468             :                          Path *inner_cheapest_total,
    1469             :                          List *merge_pathkeys,
    1470             :                          bool is_partial)
    1471             : {
    1472             :     List       *mergeclauses;
    1473             :     List       *innersortkeys;
    1474             :     List       *trialsortkeys;
    1475             :     Path       *cheapest_startup_inner;
    1476             :     Path       *cheapest_total_inner;
    1477      893206 :     JoinType    save_jointype = jointype;
    1478             :     int         num_sortkeys;
    1479             :     int         sortkeycnt;
    1480             : 
    1481      893206 :     if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
    1482        6320 :         jointype = JOIN_INNER;
    1483             : 
    1484             :     /* Look for useful mergeclauses (if any) */
    1485             :     mergeclauses =
    1486      893206 :         find_mergeclauses_for_outer_pathkeys(root,
    1487             :                                              outerpath->pathkeys,
    1488             :                                              extra->mergeclause_list);
    1489             : 
    1490             :     /*
    1491             :      * Done with this outer path if no chance for a mergejoin.
    1492             :      *
    1493             :      * Special corner case: for "x FULL JOIN y ON true", there will be no join
    1494             :      * clauses at all.  Ordinarily we'd generate a clauseless nestloop path,
    1495             :      * but since mergejoin is our only join type that supports FULL JOIN
    1496             :      * without any join clauses, it's necessary to generate a clauseless
    1497             :      * mergejoin path instead.
    1498             :      */
    1499      893206 :     if (mergeclauses == NIL)
    1500             :     {
    1501      594098 :         if (jointype == JOIN_FULL)
    1502             :              /* okay to try for mergejoin */ ;
    1503             :         else
    1504      590918 :             return;
    1505             :     }
    1506      379010 :     if (useallclauses &&
    1507       76722 :         list_length(mergeclauses) != list_length(extra->mergeclause_list))
    1508        9592 :         return;
    1509             : 
    1510             :     /* Compute the required ordering of the inner path */
    1511      292696 :     innersortkeys = make_inner_pathkeys_for_merge(root,
    1512             :                                                   mergeclauses,
    1513             :                                                   outerpath->pathkeys);
    1514             : 
    1515             :     /*
    1516             :      * Generate a mergejoin on the basis of sorting the cheapest inner. Since
    1517             :      * a sort will be needed, only cheapest total cost matters. (But
    1518             :      * try_mergejoin_path will do the right thing if inner_cheapest_total is
    1519             :      * already correctly sorted.)
    1520             :      */
    1521      292696 :     try_mergejoin_path(root,
    1522             :                        joinrel,
    1523             :                        outerpath,
    1524             :                        inner_cheapest_total,
    1525             :                        merge_pathkeys,
    1526             :                        mergeclauses,
    1527             :                        NIL,
    1528             :                        innersortkeys,
    1529             :                        jointype,
    1530             :                        extra,
    1531             :                        is_partial);
    1532             : 
    1533             :     /* Can't do anything else if inner path needs to be unique'd */
    1534      292696 :     if (save_jointype == JOIN_UNIQUE_INNER)
    1535        1218 :         return;
    1536             : 
    1537             :     /*
    1538             :      * Look for presorted inner paths that satisfy the innersortkey list ---
    1539             :      * or any truncation thereof, if we are allowed to build a mergejoin using
    1540             :      * a subset of the merge clauses.  Here, we consider both cheap startup
    1541             :      * cost and cheap total cost.
    1542             :      *
    1543             :      * Currently we do not consider parameterized inner paths here. This
    1544             :      * interacts with decisions elsewhere that also discriminate against
    1545             :      * mergejoins with parameterized inputs; see comments in
    1546             :      * src/backend/optimizer/README.
    1547             :      *
    1548             :      * As we shorten the sortkey list, we should consider only paths that are
    1549             :      * strictly cheaper than (in particular, not the same as) any path found
    1550             :      * in an earlier iteration.  Otherwise we'd be intentionally using fewer
    1551             :      * merge keys than a given path allows (treating the rest as plain
    1552             :      * joinquals), which is unlikely to be a good idea.  Also, eliminating
    1553             :      * paths here on the basis of compare_path_costs is a lot cheaper than
    1554             :      * building the mergejoin path only to throw it away.
    1555             :      *
    1556             :      * If inner_cheapest_total is well enough sorted to have not required a
    1557             :      * sort in the path made above, we shouldn't make a duplicate path with
    1558             :      * it, either.  We handle that case with the same logic that handles the
    1559             :      * previous consideration, by initializing the variables that track
    1560             :      * cheapest-so-far properly.  Note that we do NOT reject
    1561             :      * inner_cheapest_total if we find it matches some shorter set of
    1562             :      * pathkeys.  That case corresponds to using fewer mergekeys to avoid
    1563             :      * sorting inner_cheapest_total, whereas we did sort it above, so the
    1564             :      * plans being considered are different.
    1565             :      */
    1566      291478 :     if (pathkeys_contained_in(innersortkeys,
    1567             :                               inner_cheapest_total->pathkeys))
    1568             :     {
    1569             :         /* inner_cheapest_total didn't require a sort */
    1570        8586 :         cheapest_startup_inner = inner_cheapest_total;
    1571        8586 :         cheapest_total_inner = inner_cheapest_total;
    1572             :     }
    1573             :     else
    1574             :     {
    1575             :         /* it did require a sort, at least for the full set of keys */
    1576      282892 :         cheapest_startup_inner = NULL;
    1577      282892 :         cheapest_total_inner = NULL;
    1578             :     }
    1579      291478 :     num_sortkeys = list_length(innersortkeys);
    1580      291478 :     if (num_sortkeys > 1 && !useallclauses)
    1581        8636 :         trialsortkeys = list_copy(innersortkeys);   /* need modifiable copy */
    1582             :     else
    1583      282842 :         trialsortkeys = innersortkeys;  /* won't really truncate */
    1584             : 
    1585      524598 :     for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
    1586             :     {
    1587             :         Path       *innerpath;
    1588      300106 :         List       *newclauses = NIL;
    1589             : 
    1590             :         /*
    1591             :          * Look for an inner path ordered well enough for the first
    1592             :          * 'sortkeycnt' innersortkeys.  NB: trialsortkeys list is modified
    1593             :          * destructively, which is why we made a copy...
    1594             :          */
    1595      300106 :         trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
    1596      300106 :         innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
    1597             :                                                    trialsortkeys,
    1598             :                                                    NULL,
    1599             :                                                    TOTAL_COST,
    1600             :                                                    is_partial);
    1601      300106 :         if (innerpath != NULL &&
    1602       13220 :             (cheapest_total_inner == NULL ||
    1603       13220 :              compare_path_costs(innerpath, cheapest_total_inner,
    1604             :                                 TOTAL_COST) < 0))
    1605             :         {
    1606             :             /* Found a cheap (or even-cheaper) sorted path */
    1607             :             /* Select the right mergeclauses, if we didn't already */
    1608      175196 :             if (sortkeycnt < num_sortkeys)
    1609             :             {
    1610             :                 newclauses =
    1611        2292 :                     trim_mergeclauses_for_inner_pathkeys(root,
    1612             :                                                          mergeclauses,
    1613             :                                                          trialsortkeys);
    1614             :                 Assert(newclauses != NIL);
    1615             :             }
    1616             :             else
    1617      172904 :                 newclauses = mergeclauses;
    1618      175196 :             try_mergejoin_path(root,
    1619             :                                joinrel,
    1620             :                                outerpath,
    1621             :                                innerpath,
    1622             :                                merge_pathkeys,
    1623             :                                newclauses,
    1624             :                                NIL,
    1625             :                                NIL,
    1626             :                                jointype,
    1627             :                                extra,
    1628             :                                is_partial);
    1629      175196 :             cheapest_total_inner = innerpath;
    1630             :         }
    1631             :         /* Same on the basis of cheapest startup cost ... */
    1632      300106 :         innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
    1633             :                                                    trialsortkeys,
    1634             :                                                    NULL,
    1635             :                                                    STARTUP_COST,
    1636             :                                                    is_partial);
    1637      300106 :         if (innerpath != NULL &&
    1638       13220 :             (cheapest_startup_inner == NULL ||
    1639       13220 :              compare_path_costs(innerpath, cheapest_startup_inner,
    1640             :                                 STARTUP_COST) < 0))
    1641             :         {
    1642             :             /* Found a cheap (or even-cheaper) sorted path */
    1643      174020 :             if (innerpath != cheapest_total_inner)
    1644             :             {
    1645             :                 /*
    1646             :                  * Avoid rebuilding clause list if we already made one; saves
    1647             :                  * memory in big join trees...
    1648             :                  */
    1649         798 :                 if (newclauses == NIL)
    1650             :                 {
    1651           2 :                     if (sortkeycnt < num_sortkeys)
    1652             :                     {
    1653             :                         newclauses =
    1654           0 :                             trim_mergeclauses_for_inner_pathkeys(root,
    1655             :                                                                  mergeclauses,
    1656             :                                                                  trialsortkeys);
    1657             :                         Assert(newclauses != NIL);
    1658             :                     }
    1659             :                     else
    1660           2 :                         newclauses = mergeclauses;
    1661             :                 }
    1662         798 :                 try_mergejoin_path(root,
    1663             :                                    joinrel,
    1664             :                                    outerpath,
    1665             :                                    innerpath,
    1666             :                                    merge_pathkeys,
    1667             :                                    newclauses,
    1668             :                                    NIL,
    1669             :                                    NIL,
    1670             :                                    jointype,
    1671             :                                    extra,
    1672             :                                    is_partial);
    1673             :             }
    1674      174020 :             cheapest_startup_inner = innerpath;
    1675             :         }
    1676             : 
    1677             :         /*
    1678             :          * Don't consider truncated sortkeys if we need all clauses.
    1679             :          */
    1680      300106 :         if (useallclauses)
    1681       66986 :             break;
    1682             :     }
    1683             : }
    1684             : 
    1685             : /*
    1686             :  * match_unsorted_outer
    1687             :  *    Creates possible join paths for processing a single join relation
    1688             :  *    'joinrel' by employing either iterative substitution or
    1689             :  *    mergejoining on each of its possible outer paths (considering
    1690             :  *    only outer paths that are already ordered well enough for merging).
    1691             :  *
    1692             :  * We always generate a nestloop path for each available outer path.
    1693             :  * In fact we may generate as many as five: one on the cheapest-total-cost
    1694             :  * inner path, one on the same with materialization, one on the
    1695             :  * cheapest-startup-cost inner path (if different), one on the
    1696             :  * cheapest-total inner-indexscan path (if any), and one on the
    1697             :  * cheapest-startup inner-indexscan path (if different).
    1698             :  *
    1699             :  * We also consider mergejoins if mergejoin clauses are available.  See
    1700             :  * detailed comments in generate_mergejoin_paths.
    1701             :  *
    1702             :  * 'joinrel' is the join relation
    1703             :  * 'outerrel' is the outer join relation
    1704             :  * 'innerrel' is the inner join relation
    1705             :  * 'jointype' is the type of join to do
    1706             :  * 'extra' contains additional input values
    1707             :  */
    1708             : static void
    1709      491772 : match_unsorted_outer(PlannerInfo *root,
    1710             :                      RelOptInfo *joinrel,
    1711             :                      RelOptInfo *outerrel,
    1712             :                      RelOptInfo *innerrel,
    1713             :                      JoinType jointype,
    1714             :                      JoinPathExtraData *extra)
    1715             : {
    1716      491772 :     JoinType    save_jointype = jointype;
    1717             :     bool        nestjoinOK;
    1718             :     bool        useallclauses;
    1719      491772 :     Path       *inner_cheapest_total = innerrel->cheapest_total_path;
    1720      491772 :     Path       *matpath = NULL;
    1721             :     ListCell   *lc1;
    1722             : 
    1723             :     /*
    1724             :      * Nestloop only supports inner, left, semi, and anti joins.  Also, if we
    1725             :      * are doing a right, right-anti or full mergejoin, we must use *all* the
    1726             :      * mergeclauses as join clauses, else we will not have a valid plan.
    1727             :      * (Although these two flags are currently inverses, keep them separate
    1728             :      * for clarity and possible future changes.)
    1729             :      */
    1730      491772 :     switch (jointype)
    1731             :     {
    1732      401632 :         case JOIN_INNER:
    1733             :         case JOIN_LEFT:
    1734             :         case JOIN_SEMI:
    1735             :         case JOIN_ANTI:
    1736      401632 :             nestjoinOK = true;
    1737      401632 :             useallclauses = false;
    1738      401632 :             break;
    1739       82968 :         case JOIN_RIGHT:
    1740             :         case JOIN_RIGHT_ANTI:
    1741             :         case JOIN_FULL:
    1742       82968 :             nestjoinOK = false;
    1743       82968 :             useallclauses = true;
    1744       82968 :             break;
    1745        7172 :         case JOIN_UNIQUE_OUTER:
    1746             :         case JOIN_UNIQUE_INNER:
    1747        7172 :             jointype = JOIN_INNER;
    1748        7172 :             nestjoinOK = true;
    1749        7172 :             useallclauses = false;
    1750        7172 :             break;
    1751           0 :         default:
    1752           0 :             elog(ERROR, "unrecognized join type: %d",
    1753             :                  (int) jointype);
    1754             :             nestjoinOK = false; /* keep compiler quiet */
    1755             :             useallclauses = false;
    1756             :             break;
    1757             :     }
    1758             : 
    1759             :     /*
    1760             :      * If inner_cheapest_total is parameterized by the outer rel, ignore it;
    1761             :      * we will consider it below as a member of cheapest_parameterized_paths,
    1762             :      * but the other possibilities considered in this routine aren't usable.
    1763             :      */
    1764      491772 :     if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel))
    1765       10362 :         inner_cheapest_total = NULL;
    1766             : 
    1767             :     /*
    1768             :      * If we need to unique-ify the inner path, we will consider only the
    1769             :      * cheapest-total inner.
    1770             :      */
    1771      491772 :     if (save_jointype == JOIN_UNIQUE_INNER)
    1772             :     {
    1773             :         /* No way to do this with an inner path parameterized by outer rel */
    1774        3586 :         if (inner_cheapest_total == NULL)
    1775           0 :             return;
    1776             :         inner_cheapest_total = (Path *)
    1777        3586 :             create_unique_path(root, innerrel, inner_cheapest_total, extra->sjinfo);
    1778             :         Assert(inner_cheapest_total);
    1779             :     }
    1780      488186 :     else if (nestjoinOK)
    1781             :     {
    1782             :         /*
    1783             :          * Consider materializing the cheapest inner path, unless
    1784             :          * enable_material is off or the path in question materializes its
    1785             :          * output anyway.
    1786             :          */
    1787      405218 :         if (enable_material && inner_cheapest_total != NULL &&
    1788      394430 :             !ExecMaterializesOutput(inner_cheapest_total->pathtype))
    1789             :             matpath = (Path *)
    1790      376530 :                 create_material_path(innerrel, inner_cheapest_total);
    1791             :     }
    1792             : 
    1793     1574650 :     foreach(lc1, outerrel->pathlist)
    1794             :     {
    1795     1082878 :         Path       *outerpath = (Path *) lfirst(lc1);
    1796             :         List       *merge_pathkeys;
    1797             : 
    1798             :         /*
    1799             :          * We cannot use an outer path that is parameterized by the inner rel.
    1800             :          */
    1801     1082878 :         if (PATH_PARAM_BY_REL(outerpath, innerrel))
    1802      187564 :             continue;
    1803             : 
    1804             :         /*
    1805             :          * If we need to unique-ify the outer path, it's pointless to consider
    1806             :          * any but the cheapest outer.  (XXX we don't consider parameterized
    1807             :          * outers, nor inners, for unique-ified cases.  Should we?)
    1808             :          */
    1809      895314 :         if (save_jointype == JOIN_UNIQUE_OUTER)
    1810             :         {
    1811        4618 :             if (outerpath != outerrel->cheapest_total_path)
    1812        1032 :                 continue;
    1813        3586 :             outerpath = (Path *) create_unique_path(root, outerrel,
    1814             :                                                     outerpath, extra->sjinfo);
    1815             :             Assert(outerpath);
    1816             :         }
    1817             : 
    1818             :         /*
    1819             :          * The result will have this sort order (even if it is implemented as
    1820             :          * a nestloop, and even if some of the mergeclauses are implemented by
    1821             :          * qpquals rather than as true mergeclauses):
    1822             :          */
    1823      894282 :         merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
    1824             :                                              outerpath->pathkeys);
    1825             : 
    1826      894282 :         if (save_jointype == JOIN_UNIQUE_INNER)
    1827             :         {
    1828             :             /*
    1829             :              * Consider nestloop join, but only with the unique-ified cheapest
    1830             :              * inner path
    1831             :              */
    1832        5474 :             try_nestloop_path(root,
    1833             :                               joinrel,
    1834             :                               outerpath,
    1835             :                               inner_cheapest_total,
    1836             :                               merge_pathkeys,
    1837             :                               jointype,
    1838             :                               extra);
    1839             :         }
    1840      888808 :         else if (nestjoinOK)
    1841             :         {
    1842             :             /*
    1843             :              * Consider nestloop joins using this outer path and various
    1844             :              * available paths for the inner relation.  We consider the
    1845             :              * cheapest-total paths for each available parameterization of the
    1846             :              * inner relation, including the unparameterized case.
    1847             :              */
    1848             :             ListCell   *lc2;
    1849             : 
    1850     1916456 :             foreach(lc2, innerrel->cheapest_parameterized_paths)
    1851             :             {
    1852     1188220 :                 Path       *innerpath = (Path *) lfirst(lc2);
    1853             :                 Path       *mpath;
    1854             : 
    1855     1188220 :                 try_nestloop_path(root,
    1856             :                                   joinrel,
    1857             :                                   outerpath,
    1858             :                                   innerpath,
    1859             :                                   merge_pathkeys,
    1860             :                                   jointype,
    1861             :                                   extra);
    1862             : 
    1863             :                 /*
    1864             :                  * Try generating a memoize path and see if that makes the
    1865             :                  * nested loop any cheaper.
    1866             :                  */
    1867     1188220 :                 mpath = get_memoize_path(root, innerrel, outerrel,
    1868             :                                          innerpath, outerpath, jointype,
    1869             :                                          extra);
    1870     1188220 :                 if (mpath != NULL)
    1871      197438 :                     try_nestloop_path(root,
    1872             :                                       joinrel,
    1873             :                                       outerpath,
    1874             :                                       mpath,
    1875             :                                       merge_pathkeys,
    1876             :                                       jointype,
    1877             :                                       extra);
    1878             :             }
    1879             : 
    1880             :             /* Also consider materialized form of the cheapest inner path */
    1881      728236 :             if (matpath != NULL)
    1882      683524 :                 try_nestloop_path(root,
    1883             :                                   joinrel,
    1884             :                                   outerpath,
    1885             :                                   matpath,
    1886             :                                   merge_pathkeys,
    1887             :                                   jointype,
    1888             :                                   extra);
    1889             :         }
    1890             : 
    1891             :         /* Can't do anything else if outer path needs to be unique'd */
    1892      894282 :         if (save_jointype == JOIN_UNIQUE_OUTER)
    1893        3586 :             continue;
    1894             : 
    1895             :         /* Can't do anything else if inner rel is parameterized by outer */
    1896      890696 :         if (inner_cheapest_total == NULL)
    1897       10684 :             continue;
    1898             : 
    1899             :         /* Generate merge join paths */
    1900      880012 :         generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
    1901             :                                  save_jointype, extra, useallclauses,
    1902             :                                  inner_cheapest_total, merge_pathkeys,
    1903             :                                  false);
    1904             :     }
    1905             : 
    1906             :     /*
    1907             :      * Consider partial nestloop and mergejoin plan if outerrel has any
    1908             :      * partial path and the joinrel is parallel-safe.  However, we can't
    1909             :      * handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and
    1910             :      * therefore we won't be able to properly guarantee uniqueness.  Nor can
    1911             :      * we handle joins needing lateral rels, since partial paths must not be
    1912             :      * parameterized. Similarly, we can't handle JOIN_FULL, JOIN_RIGHT and
    1913             :      * JOIN_RIGHT_ANTI, because they can produce false null extended rows.
    1914             :      */
    1915      491772 :     if (joinrel->consider_parallel &&
    1916      413748 :         save_jointype != JOIN_UNIQUE_OUTER &&
    1917      411284 :         save_jointype != JOIN_FULL &&
    1918      338372 :         save_jointype != JOIN_RIGHT &&
    1919      337230 :         save_jointype != JOIN_RIGHT_ANTI &&
    1920      337230 :         outerrel->partial_pathlist != NIL &&
    1921        9858 :         bms_is_empty(joinrel->lateral_relids))
    1922             :     {
    1923        9858 :         if (nestjoinOK)
    1924        9858 :             consider_parallel_nestloop(root, joinrel, outerrel, innerrel,
    1925             :                                        save_jointype, extra);
    1926             : 
    1927             :         /*
    1928             :          * If inner_cheapest_total is NULL or non parallel-safe then find the
    1929             :          * cheapest total parallel safe path.  If doing JOIN_UNIQUE_INNER, we
    1930             :          * can't use any alternative inner path.
    1931             :          */
    1932        9858 :         if (inner_cheapest_total == NULL ||
    1933        9720 :             !inner_cheapest_total->parallel_safe)
    1934             :         {
    1935         486 :             if (save_jointype == JOIN_UNIQUE_INNER)
    1936           6 :                 return;
    1937             : 
    1938         480 :             inner_cheapest_total = get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
    1939             :         }
    1940             : 
    1941        9852 :         if (inner_cheapest_total)
    1942        9708 :             consider_parallel_mergejoin(root, joinrel, outerrel, innerrel,
    1943             :                                         save_jointype, extra,
    1944             :                                         inner_cheapest_total);
    1945             :     }
    1946             : }
    1947             : 
    1948             : /*
    1949             :  * consider_parallel_mergejoin
    1950             :  *    Try to build partial paths for a joinrel by joining a partial path
    1951             :  *    for the outer relation to a complete path for the inner relation.
    1952             :  *
    1953             :  * 'joinrel' is the join relation
    1954             :  * 'outerrel' is the outer join relation
    1955             :  * 'innerrel' is the inner join relation
    1956             :  * 'jointype' is the type of join to do
    1957             :  * 'extra' contains additional input values
    1958             :  * 'inner_cheapest_total' cheapest total path for innerrel
    1959             :  */
    1960             : static void
    1961        9708 : consider_parallel_mergejoin(PlannerInfo *root,
    1962             :                             RelOptInfo *joinrel,
    1963             :                             RelOptInfo *outerrel,
    1964             :                             RelOptInfo *innerrel,
    1965             :                             JoinType jointype,
    1966             :                             JoinPathExtraData *extra,
    1967             :                             Path *inner_cheapest_total)
    1968             : {
    1969             :     ListCell   *lc1;
    1970             : 
    1971             :     /* generate merge join path for each partial outer path */
    1972       22902 :     foreach(lc1, outerrel->partial_pathlist)
    1973             :     {
    1974       13194 :         Path       *outerpath = (Path *) lfirst(lc1);
    1975             :         List       *merge_pathkeys;
    1976             : 
    1977             :         /*
    1978             :          * Figure out what useful ordering any paths we create will have.
    1979             :          */
    1980       13194 :         merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
    1981             :                                              outerpath->pathkeys);
    1982             : 
    1983       13194 :         generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype,
    1984             :                                  extra, false, inner_cheapest_total,
    1985             :                                  merge_pathkeys, true);
    1986             :     }
    1987        9708 : }
    1988             : 
    1989             : /*
    1990             :  * consider_parallel_nestloop
    1991             :  *    Try to build partial paths for a joinrel by joining a partial path for the
    1992             :  *    outer relation to a complete path for the inner relation.
    1993             :  *
    1994             :  * 'joinrel' is the join relation
    1995             :  * 'outerrel' is the outer join relation
    1996             :  * 'innerrel' is the inner join relation
    1997             :  * 'jointype' is the type of join to do
    1998             :  * 'extra' contains additional input values
    1999             :  */
    2000             : static void
    2001        9858 : consider_parallel_nestloop(PlannerInfo *root,
    2002             :                            RelOptInfo *joinrel,
    2003             :                            RelOptInfo *outerrel,
    2004             :                            RelOptInfo *innerrel,
    2005             :                            JoinType jointype,
    2006             :                            JoinPathExtraData *extra)
    2007             : {
    2008        9858 :     JoinType    save_jointype = jointype;
    2009             :     ListCell   *lc1;
    2010             : 
    2011        9858 :     if (jointype == JOIN_UNIQUE_INNER)
    2012         564 :         jointype = JOIN_INNER;
    2013             : 
    2014       23250 :     foreach(lc1, outerrel->partial_pathlist)
    2015             :     {
    2016       13392 :         Path       *outerpath = (Path *) lfirst(lc1);
    2017             :         List       *pathkeys;
    2018             :         ListCell   *lc2;
    2019             : 
    2020             :         /* Figure out what useful ordering any paths we create will have. */
    2021       13392 :         pathkeys = build_join_pathkeys(root, joinrel, jointype,
    2022             :                                        outerpath->pathkeys);
    2023             : 
    2024             :         /*
    2025             :          * Try the cheapest parameterized paths; only those which will produce
    2026             :          * an unparameterized path when joined to this outerrel will survive
    2027             :          * try_partial_nestloop_path.  The cheapest unparameterized path is
    2028             :          * also in this list.
    2029             :          */
    2030       35136 :         foreach(lc2, innerrel->cheapest_parameterized_paths)
    2031             :         {
    2032       21744 :             Path       *innerpath = (Path *) lfirst(lc2);
    2033             :             Path       *mpath;
    2034             : 
    2035             :             /* Can't join to an inner path that is not parallel-safe */
    2036       21744 :             if (!innerpath->parallel_safe)
    2037         396 :                 continue;
    2038             : 
    2039             :             /*
    2040             :              * If we're doing JOIN_UNIQUE_INNER, we can only use the inner's
    2041             :              * cheapest_total_path, and we have to unique-ify it.  (We might
    2042             :              * be able to relax this to allow other safe, unparameterized
    2043             :              * inner paths, but right now create_unique_path is not on board
    2044             :              * with that.)
    2045             :              */
    2046       21348 :             if (save_jointype == JOIN_UNIQUE_INNER)
    2047             :             {
    2048        1722 :                 if (innerpath != innerrel->cheapest_total_path)
    2049         876 :                     continue;
    2050         846 :                 innerpath = (Path *) create_unique_path(root, innerrel,
    2051             :                                                         innerpath,
    2052             :                                                         extra->sjinfo);
    2053             :                 Assert(innerpath);
    2054             :             }
    2055             : 
    2056       20472 :             try_partial_nestloop_path(root, joinrel, outerpath, innerpath,
    2057             :                                       pathkeys, jointype, extra);
    2058             : 
    2059             :             /*
    2060             :              * Try generating a memoize path and see if that makes the nested
    2061             :              * loop any cheaper.
    2062             :              */
    2063       20472 :             mpath = get_memoize_path(root, innerrel, outerrel,
    2064             :                                      innerpath, outerpath, jointype,
    2065             :                                      extra);
    2066       20472 :             if (mpath != NULL)
    2067        4960 :                 try_partial_nestloop_path(root, joinrel, outerpath, mpath,
    2068             :                                           pathkeys, jointype, extra);
    2069             :         }
    2070             :     }
    2071        9858 : }
    2072             : 
    2073             : /*
    2074             :  * hash_inner_and_outer
    2075             :  *    Create hashjoin join paths by explicitly hashing both the outer and
    2076             :  *    inner keys of each available hash clause.
    2077             :  *
    2078             :  * 'joinrel' is the join relation
    2079             :  * 'outerrel' is the outer join relation
    2080             :  * 'innerrel' is the inner join relation
    2081             :  * 'jointype' is the type of join to do
    2082             :  * 'extra' contains additional input values
    2083             :  */
    2084             : static void
    2085      494232 : hash_inner_and_outer(PlannerInfo *root,
    2086             :                      RelOptInfo *joinrel,
    2087             :                      RelOptInfo *outerrel,
    2088             :                      RelOptInfo *innerrel,
    2089             :                      JoinType jointype,
    2090             :                      JoinPathExtraData *extra)
    2091             : {
    2092      494232 :     JoinType    save_jointype = jointype;
    2093      494232 :     bool        isouterjoin = IS_OUTER_JOIN(jointype);
    2094             :     List       *hashclauses;
    2095             :     ListCell   *l;
    2096             : 
    2097             :     /*
    2098             :      * We need to build only one hashclauses list for any given pair of outer
    2099             :      * and inner relations; all of the hashable clauses will be used as keys.
    2100             :      *
    2101             :      * Scan the join's restrictinfo list to find hashjoinable clauses that are
    2102             :      * usable with this pair of sub-relations.
    2103             :      */
    2104      494232 :     hashclauses = NIL;
    2105     1003016 :     foreach(l, extra->restrictlist)
    2106             :     {
    2107      508784 :         RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
    2108             : 
    2109             :         /*
    2110             :          * If processing an outer join, only use its own join clauses for
    2111             :          * hashing.  For inner joins we need not be so picky.
    2112             :          */
    2113      508784 :         if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
    2114        7964 :             continue;
    2115             : 
    2116      500820 :         if (!restrictinfo->can_join ||
    2117      447690 :             restrictinfo->hashjoinoperator == InvalidOid)
    2118       68412 :             continue;           /* not hashjoinable */
    2119             : 
    2120             :         /*
    2121             :          * Check if clause has the form "outer op inner" or "inner op outer".
    2122             :          */
    2123      432408 :         if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
    2124         144 :             continue;           /* no good for these input relations */
    2125             : 
    2126      432264 :         hashclauses = lappend(hashclauses, restrictinfo);
    2127             :     }
    2128             : 
    2129             :     /* If we found any usable hashclauses, make paths */
    2130      494232 :     if (hashclauses)
    2131             :     {
    2132             :         /*
    2133             :          * We consider both the cheapest-total-cost and cheapest-startup-cost
    2134             :          * outer paths.  There's no need to consider any but the
    2135             :          * cheapest-total-cost inner path, however.
    2136             :          */
    2137      394144 :         Path       *cheapest_startup_outer = outerrel->cheapest_startup_path;
    2138      394144 :         Path       *cheapest_total_outer = outerrel->cheapest_total_path;
    2139      394144 :         Path       *cheapest_total_inner = innerrel->cheapest_total_path;
    2140             : 
    2141             :         /*
    2142             :          * If either cheapest-total path is parameterized by the other rel, we
    2143             :          * can't use a hashjoin.  (There's no use looking for alternative
    2144             :          * input paths, since these should already be the least-parameterized
    2145             :          * available paths.)
    2146             :          */
    2147      394144 :         if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) ||
    2148      393762 :             PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
    2149         764 :             return;
    2150             : 
    2151             :         /* Unique-ify if need be; we ignore parameterized possibilities */
    2152      393380 :         if (jointype == JOIN_UNIQUE_OUTER)
    2153             :         {
    2154             :             cheapest_total_outer = (Path *)
    2155        1726 :                 create_unique_path(root, outerrel,
    2156             :                                    cheapest_total_outer, extra->sjinfo);
    2157             :             Assert(cheapest_total_outer);
    2158        1726 :             jointype = JOIN_INNER;
    2159        1726 :             try_hashjoin_path(root,
    2160             :                               joinrel,
    2161             :                               cheapest_total_outer,
    2162             :                               cheapest_total_inner,
    2163             :                               hashclauses,
    2164             :                               jointype,
    2165             :                               extra);
    2166             :             /* no possibility of cheap startup here */
    2167             :         }
    2168      391654 :         else if (jointype == JOIN_UNIQUE_INNER)
    2169             :         {
    2170             :             cheapest_total_inner = (Path *)
    2171        1726 :                 create_unique_path(root, innerrel,
    2172             :                                    cheapest_total_inner, extra->sjinfo);
    2173             :             Assert(cheapest_total_inner);
    2174        1726 :             jointype = JOIN_INNER;
    2175        1726 :             try_hashjoin_path(root,
    2176             :                               joinrel,
    2177             :                               cheapest_total_outer,
    2178             :                               cheapest_total_inner,
    2179             :                               hashclauses,
    2180             :                               jointype,
    2181             :                               extra);
    2182        1726 :             if (cheapest_startup_outer != NULL &&
    2183             :                 cheapest_startup_outer != cheapest_total_outer)
    2184          56 :                 try_hashjoin_path(root,
    2185             :                                   joinrel,
    2186             :                                   cheapest_startup_outer,
    2187             :                                   cheapest_total_inner,
    2188             :                                   hashclauses,
    2189             :                                   jointype,
    2190             :                                   extra);
    2191             :         }
    2192             :         else
    2193             :         {
    2194             :             /*
    2195             :              * For other jointypes, we consider the cheapest startup outer
    2196             :              * together with the cheapest total inner, and then consider
    2197             :              * pairings of cheapest-total paths including parameterized ones.
    2198             :              * There is no use in generating parameterized paths on the basis
    2199             :              * of possibly cheap startup cost, so this is sufficient.
    2200             :              */
    2201             :             ListCell   *lc1;
    2202             :             ListCell   *lc2;
    2203             : 
    2204      389928 :             if (cheapest_startup_outer != NULL)
    2205      389310 :                 try_hashjoin_path(root,
    2206             :                                   joinrel,
    2207             :                                   cheapest_startup_outer,
    2208             :                                   cheapest_total_inner,
    2209             :                                   hashclauses,
    2210             :                                   jointype,
    2211             :                                   extra);
    2212             : 
    2213      994450 :             foreach(lc1, outerrel->cheapest_parameterized_paths)
    2214             :             {
    2215      604522 :                 Path       *outerpath = (Path *) lfirst(lc1);
    2216             : 
    2217             :                 /*
    2218             :                  * We cannot use an outer path that is parameterized by the
    2219             :                  * inner rel.
    2220             :                  */
    2221      604522 :                 if (PATH_PARAM_BY_REL(outerpath, innerrel))
    2222      179474 :                     continue;
    2223             : 
    2224     1094116 :                 foreach(lc2, innerrel->cheapest_parameterized_paths)
    2225             :                 {
    2226      669068 :                     Path       *innerpath = (Path *) lfirst(lc2);
    2227             : 
    2228             :                     /*
    2229             :                      * We cannot use an inner path that is parameterized by
    2230             :                      * the outer rel, either.
    2231             :                      */
    2232      669068 :                     if (PATH_PARAM_BY_REL(innerpath, outerrel))
    2233      200242 :                         continue;
    2234             : 
    2235      468826 :                     if (outerpath == cheapest_startup_outer &&
    2236             :                         innerpath == cheapest_total_inner)
    2237      337374 :                         continue;   /* already tried it */
    2238             : 
    2239      131452 :                     try_hashjoin_path(root,
    2240             :                                       joinrel,
    2241             :                                       outerpath,
    2242             :                                       innerpath,
    2243             :                                       hashclauses,
    2244             :                                       jointype,
    2245             :                                       extra);
    2246             :                 }
    2247             :             }
    2248             :         }
    2249             : 
    2250             :         /*
    2251             :          * If the joinrel is parallel-safe, we may be able to consider a
    2252             :          * partial hash join.  However, we can't handle JOIN_UNIQUE_OUTER,
    2253             :          * because the outer path will be partial, and therefore we won't be
    2254             :          * able to properly guarantee uniqueness.  Also, the resulting path
    2255             :          * must not be parameterized.
    2256             :          */
    2257      393380 :         if (joinrel->consider_parallel &&
    2258      352556 :             save_jointype != JOIN_UNIQUE_OUTER &&
    2259      352556 :             outerrel->partial_pathlist != NIL &&
    2260       12666 :             bms_is_empty(joinrel->lateral_relids))
    2261             :         {
    2262             :             Path       *cheapest_partial_outer;
    2263       12666 :             Path       *cheapest_partial_inner = NULL;
    2264       12666 :             Path       *cheapest_safe_inner = NULL;
    2265             : 
    2266       12666 :             cheapest_partial_outer =
    2267       12666 :                 (Path *) linitial(outerrel->partial_pathlist);
    2268             : 
    2269             :             /*
    2270             :              * Can we use a partial inner plan too, so that we can build a
    2271             :              * shared hash table in parallel?  We can't handle
    2272             :              * JOIN_UNIQUE_INNER because we can't guarantee uniqueness.
    2273             :              */
    2274       12666 :             if (innerrel->partial_pathlist != NIL &&
    2275       11850 :                 save_jointype != JOIN_UNIQUE_INNER &&
    2276             :                 enable_parallel_hash)
    2277             :             {
    2278       11574 :                 cheapest_partial_inner =
    2279       11574 :                     (Path *) linitial(innerrel->partial_pathlist);
    2280       11574 :                 try_partial_hashjoin_path(root, joinrel,
    2281             :                                           cheapest_partial_outer,
    2282             :                                           cheapest_partial_inner,
    2283             :                                           hashclauses, jointype, extra,
    2284             :                                           true /* parallel_hash */ );
    2285             :             }
    2286             : 
    2287             :             /*
    2288             :              * Normally, given that the joinrel is parallel-safe, the cheapest
    2289             :              * total inner path will also be parallel-safe, but if not, we'll
    2290             :              * have to search for the cheapest safe, unparameterized inner
    2291             :              * path.  If doing JOIN_UNIQUE_INNER, we can't use any alternative
    2292             :              * inner path.  If full, right, or right-anti join, we can't use
    2293             :              * parallelism (building the hash table in each backend) because
    2294             :              * no one process has all the match bits.
    2295             :              */
    2296       12666 :             if (save_jointype == JOIN_FULL ||
    2297        8850 :                 save_jointype == JOIN_RIGHT ||
    2298             :                 save_jointype == JOIN_RIGHT_ANTI)
    2299        4146 :                 cheapest_safe_inner = NULL;
    2300        8520 :             else if (cheapest_total_inner->parallel_safe)
    2301        8292 :                 cheapest_safe_inner = cheapest_total_inner;
    2302         228 :             else if (save_jointype != JOIN_UNIQUE_INNER)
    2303             :                 cheapest_safe_inner =
    2304         222 :                     get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
    2305             : 
    2306       12666 :             if (cheapest_safe_inner != NULL)
    2307        8508 :                 try_partial_hashjoin_path(root, joinrel,
    2308             :                                           cheapest_partial_outer,
    2309             :                                           cheapest_safe_inner,
    2310             :                                           hashclauses, jointype, extra,
    2311             :                                           false /* parallel_hash */ );
    2312             :         }
    2313             :     }
    2314             : }
    2315             : 
    2316             : /*
    2317             :  * select_mergejoin_clauses
    2318             :  *    Select mergejoin clauses that are usable for a particular join.
    2319             :  *    Returns a list of RestrictInfo nodes for those clauses.
    2320             :  *
    2321             :  * *mergejoin_allowed is normally set to true, but it is set to false if
    2322             :  * this is a right/right-anti/full join and there are nonmergejoinable join
    2323             :  * clauses.  The executor's mergejoin machinery cannot handle such cases, so
    2324             :  * we have to avoid generating a mergejoin plan.  (Note that this flag does
    2325             :  * NOT consider whether there are actually any mergejoinable clauses.  This is
    2326             :  * correct because in some cases we need to build a clauseless mergejoin.
    2327             :  * Simply returning NIL is therefore not enough to distinguish safe from
    2328             :  * unsafe cases.)
    2329             :  *
    2330             :  * We also mark each selected RestrictInfo to show which side is currently
    2331             :  * being considered as outer.  These are transient markings that are only
    2332             :  * good for the duration of the current add_paths_to_joinrel() call!
    2333             :  *
    2334             :  * We examine each restrictinfo clause known for the join to see
    2335             :  * if it is mergejoinable and involves vars from the two sub-relations
    2336             :  * currently of interest.
    2337             :  */
    2338             : static List *
    2339      495614 : select_mergejoin_clauses(PlannerInfo *root,
    2340             :                          RelOptInfo *joinrel,
    2341             :                          RelOptInfo *outerrel,
    2342             :                          RelOptInfo *innerrel,
    2343             :                          List *restrictlist,
    2344             :                          JoinType jointype,
    2345             :                          bool *mergejoin_allowed)
    2346             : {
    2347      495614 :     List       *result_list = NIL;
    2348      495614 :     bool        isouterjoin = IS_OUTER_JOIN(jointype);
    2349      495614 :     bool        have_nonmergeable_joinclause = false;
    2350             :     ListCell   *l;
    2351             : 
    2352     1005764 :     foreach(l, restrictlist)
    2353             :     {
    2354      510150 :         RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
    2355             : 
    2356             :         /*
    2357             :          * If processing an outer join, only use its own join clauses in the
    2358             :          * merge.  For inner joins we can use pushed-down clauses too. (Note:
    2359             :          * we don't set have_nonmergeable_joinclause here because pushed-down
    2360             :          * clauses will become otherquals not joinquals.)
    2361             :          */
    2362      510150 :         if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
    2363        7988 :             continue;
    2364             : 
    2365             :         /* Check that clause is a mergeable operator clause */
    2366      502162 :         if (!restrictinfo->can_join ||
    2367      449032 :             restrictinfo->mergeopfamilies == NIL)
    2368             :         {
    2369             :             /*
    2370             :              * The executor can handle extra joinquals that are constants, but
    2371             :              * not anything else, when doing right/right-anti/full merge join.
    2372             :              * (The reason to support constants is so we can do FULL JOIN ON
    2373             :              * FALSE.)
    2374             :              */
    2375       67306 :             if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
    2376       59398 :                 have_nonmergeable_joinclause = true;
    2377       67306 :             continue;           /* not mergejoinable */
    2378             :         }
    2379             : 
    2380             :         /*
    2381             :          * Check if clause has the form "outer op inner" or "inner op outer".
    2382             :          */
    2383      434856 :         if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
    2384             :         {
    2385         768 :             have_nonmergeable_joinclause = true;
    2386         768 :             continue;           /* no good for these input relations */
    2387             :         }
    2388             : 
    2389             :         /*
    2390             :          * Insist that each side have a non-redundant eclass.  This
    2391             :          * restriction is needed because various bits of the planner expect
    2392             :          * that each clause in a merge be associable with some pathkey in a
    2393             :          * canonical pathkey list, but redundant eclasses can't appear in
    2394             :          * canonical sort orderings.  (XXX it might be worth relaxing this,
    2395             :          * but not enough time to address it for 8.3.)
    2396             :          */
    2397      434088 :         update_mergeclause_eclasses(root, restrictinfo);
    2398             : 
    2399      434088 :         if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
    2400      434028 :             EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
    2401             :         {
    2402         140 :             have_nonmergeable_joinclause = true;
    2403         140 :             continue;           /* can't handle redundant eclasses */
    2404             :         }
    2405             : 
    2406      433948 :         result_list = lappend(result_list, restrictinfo);
    2407             :     }
    2408             : 
    2409             :     /*
    2410             :      * Report whether mergejoin is allowed (see comment at top of function).
    2411             :      */
    2412      495614 :     switch (jointype)
    2413             :     {
    2414       89830 :         case JOIN_RIGHT:
    2415             :         case JOIN_RIGHT_ANTI:
    2416             :         case JOIN_FULL:
    2417       89830 :             *mergejoin_allowed = !have_nonmergeable_joinclause;
    2418       89830 :             break;
    2419      405784 :         default:
    2420      405784 :             *mergejoin_allowed = true;
    2421      405784 :             break;
    2422             :     }
    2423             : 
    2424      495614 :     return result_list;
    2425             : }

Generated by: LCOV version 1.14