LCOV - code coverage report
Current view: top level - src/backend/optimizer/plan - analyzejoins.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 710 728 97.5 %
Date: 2025-12-15 06:18:08 Functions: 27 27 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * analyzejoins.c
       4             :  *    Routines for simplifying joins after initial query analysis
       5             :  *
       6             :  * While we do a great deal of join simplification in prep/prepjointree.c,
       7             :  * certain optimizations cannot be performed at that stage for lack of
       8             :  * detailed information about the query.  The routines here are invoked
       9             :  * after initsplan.c has done its work, and can do additional join removal
      10             :  * and simplification steps based on the information extracted.  The penalty
      11             :  * is that we have to work harder to clean up after ourselves when we modify
      12             :  * the query, since the derived data structures have to be updated too.
      13             :  *
      14             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
      15             :  * Portions Copyright (c) 1994, Regents of the University of California
      16             :  *
      17             :  *
      18             :  * IDENTIFICATION
      19             :  *    src/backend/optimizer/plan/analyzejoins.c
      20             :  *
      21             :  *-------------------------------------------------------------------------
      22             :  */
      23             : #include "postgres.h"
      24             : 
      25             : #include "catalog/pg_class.h"
      26             : #include "nodes/nodeFuncs.h"
      27             : #include "optimizer/joininfo.h"
      28             : #include "optimizer/optimizer.h"
      29             : #include "optimizer/pathnode.h"
      30             : #include "optimizer/paths.h"
      31             : #include "optimizer/placeholder.h"
      32             : #include "optimizer/planmain.h"
      33             : #include "optimizer/restrictinfo.h"
      34             : #include "parser/parse_agg.h"
      35             : #include "rewrite/rewriteManip.h"
      36             : #include "utils/lsyscache.h"
      37             : 
      38             : /*
      39             :  * Utility structure.  A sorting procedure is needed to simplify the search
      40             :  * of SJE-candidate baserels referencing the same database relation.  Having
      41             :  * collected all baserels from the query jointree, the planner sorts them
      42             :  * according to the reloid value, groups them with the next pass and attempts
      43             :  * to remove self-joins.
      44             :  *
      45             :  * Preliminary sorting prevents quadratic behavior that can be harmful in the
      46             :  * case of numerous joins.
      47             :  */
      48             : typedef struct
      49             : {
      50             :     int         relid;
      51             :     Oid         reloid;
      52             : } SelfJoinCandidate;
      53             : 
      54             : bool        enable_self_join_elimination;
      55             : 
      56             : /* local functions */
      57             : static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
      58             : static void remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
      59             :                                           SpecialJoinInfo *sjinfo);
      60             : static void remove_rel_from_restrictinfo(RestrictInfo *rinfo,
      61             :                                          int relid, int ojrelid);
      62             : static void remove_rel_from_eclass(EquivalenceClass *ec,
      63             :                                    SpecialJoinInfo *sjinfo,
      64             :                                    int relid, int subst);
      65             : static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
      66             : static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
      67             : static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
      68             :                                 List *clause_list, List **extra_clauses);
      69             : static Oid  distinct_col_search(int colno, List *colnos, List *opids);
      70             : static bool is_innerrel_unique_for(PlannerInfo *root,
      71             :                                    Relids joinrelids,
      72             :                                    Relids outerrelids,
      73             :                                    RelOptInfo *innerrel,
      74             :                                    JoinType jointype,
      75             :                                    List *restrictlist,
      76             :                                    List **extra_clauses);
      77             : static int  self_join_candidates_cmp(const void *a, const void *b);
      78             : static bool replace_relid_callback(Node *node,
      79             :                                    ChangeVarNodes_context *context);
      80             : 
      81             : 
      82             : /*
      83             :  * remove_useless_joins
      84             :  *      Check for relations that don't actually need to be joined at all,
      85             :  *      and remove them from the query.
      86             :  *
      87             :  * We are passed the current joinlist and return the updated list.  Other
      88             :  * data structures that have to be updated are accessible via "root".
      89             :  */
      90             : List *
      91      341504 : remove_useless_joins(PlannerInfo *root, List *joinlist)
      92             : {
      93             :     ListCell   *lc;
      94             : 
      95             :     /*
      96             :      * We are only interested in relations that are left-joined to, so we can
      97             :      * scan the join_info_list to find them easily.
      98             :      */
      99      352912 : restart:
     100      396760 :     foreach(lc, root->join_info_list)
     101             :     {
     102       55256 :         SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
     103             :         int         innerrelid;
     104             :         int         nremoved;
     105             : 
     106             :         /* Skip if not removable */
     107       55256 :         if (!join_is_removable(root, sjinfo))
     108       43848 :             continue;
     109             : 
     110             :         /*
     111             :          * Currently, join_is_removable can only succeed when the sjinfo's
     112             :          * righthand is a single baserel.  Remove that rel from the query and
     113             :          * joinlist.
     114             :          */
     115       11408 :         innerrelid = bms_singleton_member(sjinfo->min_righthand);
     116             : 
     117       11408 :         remove_leftjoinrel_from_query(root, innerrelid, sjinfo);
     118             : 
     119             :         /* We verify that exactly one reference gets removed from joinlist */
     120       11408 :         nremoved = 0;
     121       11408 :         joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved);
     122       11408 :         if (nremoved != 1)
     123           0 :             elog(ERROR, "failed to find relation %d in joinlist", innerrelid);
     124             : 
     125             :         /*
     126             :          * We can delete this SpecialJoinInfo from the list too, since it's no
     127             :          * longer of interest.  (Since we'll restart the foreach loop
     128             :          * immediately, we don't bother with foreach_delete_current.)
     129             :          */
     130       11408 :         root->join_info_list = list_delete_cell(root->join_info_list, lc);
     131             : 
     132             :         /*
     133             :          * Restart the scan.  This is necessary to ensure we find all
     134             :          * removable joins independently of ordering of the join_info_list
     135             :          * (note that removal of attr_needed bits may make a join appear
     136             :          * removable that did not before).
     137             :          */
     138       11408 :         goto restart;
     139             :     }
     140             : 
     141      341504 :     return joinlist;
     142             : }
     143             : 
     144             : /*
     145             :  * join_is_removable
     146             :  *    Check whether we need not perform this special join at all, because
     147             :  *    it will just duplicate its left input.
     148             :  *
     149             :  * This is true for a left join for which the join condition cannot match
     150             :  * more than one inner-side row.  (There are other possibly interesting
     151             :  * cases, but we don't have the infrastructure to prove them.)  We also
     152             :  * have to check that the inner side doesn't generate any variables needed
     153             :  * above the join.
     154             :  */
     155             : static bool
     156       55256 : join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
     157             : {
     158             :     int         innerrelid;
     159             :     RelOptInfo *innerrel;
     160             :     Relids      inputrelids;
     161             :     Relids      joinrelids;
     162       55256 :     List       *clause_list = NIL;
     163             :     ListCell   *l;
     164             :     int         attroff;
     165             : 
     166             :     /*
     167             :      * Must be a left join to a single baserel, else we aren't going to be
     168             :      * able to do anything with it.
     169             :      */
     170       55256 :     if (sjinfo->jointype != JOIN_LEFT)
     171       10104 :         return false;
     172             : 
     173       45152 :     if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
     174        1308 :         return false;
     175             : 
     176             :     /*
     177             :      * Never try to eliminate a left join to the query result rel.  Although
     178             :      * the case is syntactically impossible in standard SQL, MERGE will build
     179             :      * a join tree that looks exactly like that.
     180             :      */
     181       43844 :     if (innerrelid == root->parse->resultRelation)
     182         800 :         return false;
     183             : 
     184       43044 :     innerrel = find_base_rel(root, innerrelid);
     185             : 
     186             :     /*
     187             :      * Before we go to the effort of checking whether any innerrel variables
     188             :      * are needed above the join, make a quick check to eliminate cases in
     189             :      * which we will surely be unable to prove uniqueness of the innerrel.
     190             :      */
     191       43044 :     if (!rel_supports_distinctness(root, innerrel))
     192        3078 :         return false;
     193             : 
     194             :     /* Compute the relid set for the join we are considering */
     195       39966 :     inputrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
     196             :     Assert(sjinfo->ojrelid != 0);
     197       39966 :     joinrelids = bms_copy(inputrelids);
     198       39966 :     joinrelids = bms_add_member(joinrelids, sjinfo->ojrelid);
     199             : 
     200             :     /*
     201             :      * We can't remove the join if any inner-rel attributes are used above the
     202             :      * join.  Here, "above" the join includes pushed-down conditions, so we
     203             :      * should reject if attr_needed includes the OJ's own relid; therefore,
     204             :      * compare to inputrelids not joinrelids.
     205             :      *
     206             :      * As a micro-optimization, it seems better to start with max_attr and
     207             :      * count down rather than starting with min_attr and counting up, on the
     208             :      * theory that the system attributes are somewhat less likely to be wanted
     209             :      * and should be tested last.
     210             :      */
     211       39966 :     for (attroff = innerrel->max_attr - innerrel->min_attr;
     212      375334 :          attroff >= 0;
     213      335368 :          attroff--)
     214             :     {
     215      363636 :         if (!bms_is_subset(innerrel->attr_needed[attroff], inputrelids))
     216       28268 :             return false;
     217             :     }
     218             : 
     219             :     /*
     220             :      * Similarly check that the inner rel isn't needed by any PlaceHolderVars
     221             :      * that will be used above the join.  The PHV case is a little bit more
     222             :      * complicated, because PHVs may have been assigned a ph_eval_at location
     223             :      * that includes the innerrel, yet their contained expression might not
     224             :      * actually reference the innerrel (it could be just a constant, for
     225             :      * instance).  If such a PHV is due to be evaluated above the join then it
     226             :      * needn't prevent join removal.
     227             :      */
     228       11902 :     foreach(l, root->placeholder_list)
     229             :     {
     230         240 :         PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
     231             : 
     232         240 :         if (bms_overlap(phinfo->ph_lateral, innerrel->relids))
     233          36 :             return false;       /* it references innerrel laterally */
     234         240 :         if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids))
     235          54 :             continue;           /* it definitely doesn't reference innerrel */
     236         186 :         if (bms_is_subset(phinfo->ph_needed, inputrelids))
     237           6 :             continue;           /* PHV is not used above the join */
     238         180 :         if (!bms_is_member(sjinfo->ojrelid, phinfo->ph_eval_at))
     239          30 :             return false;       /* it has to be evaluated below the join */
     240             : 
     241             :         /*
     242             :          * We need to be sure there will still be a place to evaluate the PHV
     243             :          * if we remove the join, ie that ph_eval_at wouldn't become empty.
     244             :          */
     245         150 :         if (!bms_overlap(sjinfo->min_lefthand, phinfo->ph_eval_at))
     246           6 :             return false;       /* there isn't any other place to eval PHV */
     247             :         /* Check contained expression last, since this is a bit expensive */
     248         144 :         if (bms_overlap(pull_varnos(root, (Node *) phinfo->ph_var->phexpr),
     249         144 :                         innerrel->relids))
     250           0 :             return false;       /* contained expression references innerrel */
     251             :     }
     252             : 
     253             :     /*
     254             :      * Search for mergejoinable clauses that constrain the inner rel against
     255             :      * either the outer rel or a pseudoconstant.  If an operator is
     256             :      * mergejoinable then it behaves like equality for some btree opclass, so
     257             :      * it's what we want.  The mergejoinability test also eliminates clauses
     258             :      * containing volatile functions, which we couldn't depend on.
     259             :      */
     260       23496 :     foreach(l, innerrel->joininfo)
     261             :     {
     262       11834 :         RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
     263             : 
     264             :         /*
     265             :          * If the current join commutes with some other outer join(s) via
     266             :          * outer join identity 3, there will be multiple clones of its join
     267             :          * clauses in the joininfo list.  We want to consider only the
     268             :          * has_clone form of such clauses.  Processing more than one form
     269             :          * would be wasteful, and also some of the others would confuse the
     270             :          * RINFO_IS_PUSHED_DOWN test below.
     271             :          */
     272       11834 :         if (restrictinfo->is_clone)
     273         100 :             continue;           /* ignore it */
     274             : 
     275             :         /*
     276             :          * If it's not a join clause for this outer join, we can't use it.
     277             :          * Note that if the clause is pushed-down, then it is logically from
     278             :          * above the outer join, even if it references no other rels (it might
     279             :          * be from WHERE, for example).
     280             :          */
     281       11734 :         if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
     282         126 :             continue;           /* ignore; not useful here */
     283             : 
     284             :         /* Ignore if it's not a mergejoinable clause */
     285       11608 :         if (!restrictinfo->can_join ||
     286       11490 :             restrictinfo->mergeopfamilies == NIL)
     287         118 :             continue;           /* not mergejoinable */
     288             : 
     289             :         /*
     290             :          * Check if the clause has the form "outer op inner" or "inner op
     291             :          * outer", and if so mark which side is inner.
     292             :          */
     293       11490 :         if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
     294             :                                      innerrel->relids))
     295           6 :             continue;           /* no good for these input relations */
     296             : 
     297             :         /* OK, add to list */
     298       11484 :         clause_list = lappend(clause_list, restrictinfo);
     299             :     }
     300             : 
     301             :     /*
     302             :      * Now that we have the relevant equality join clauses, try to prove the
     303             :      * innerrel distinct.
     304             :      */
     305       11662 :     if (rel_is_distinct_for(root, innerrel, clause_list, NULL))
     306       11408 :         return true;
     307             : 
     308             :     /*
     309             :      * Some day it would be nice to check for other methods of establishing
     310             :      * distinctness.
     311             :      */
     312         254 :     return false;
     313             : }
     314             : 
     315             : 
     316             : /*
     317             :  * Remove the target rel->relid and references to the target join from the
     318             :  * planner's data structures, having determined that there is no need
     319             :  * to include them in the query. Optionally replace them with subst if subst
     320             :  * is non-negative.
     321             :  *
     322             :  * This function updates only parts needed for both left-join removal and
     323             :  * self-join removal.
     324             :  */
     325             : static void
     326       12008 : remove_rel_from_query(PlannerInfo *root, RelOptInfo *rel,
     327             :                       int subst, SpecialJoinInfo *sjinfo,
     328             :                       Relids joinrelids)
     329             : {
     330       12008 :     int         relid = rel->relid;
     331             :     Index       rti;
     332             :     ListCell   *l;
     333             : 
     334             :     /*
     335             :      * Update all_baserels and related relid sets.
     336             :      */
     337       12008 :     root->all_baserels = adjust_relid_set(root->all_baserels, relid, subst);
     338       12008 :     root->all_query_rels = adjust_relid_set(root->all_query_rels, relid, subst);
     339             : 
     340       12008 :     if (sjinfo != NULL)
     341             :     {
     342       22816 :         root->outer_join_rels = bms_del_member(root->outer_join_rels,
     343       11408 :                                                sjinfo->ojrelid);
     344       11408 :         root->all_query_rels = bms_del_member(root->all_query_rels,
     345       11408 :                                               sjinfo->ojrelid);
     346             :     }
     347             : 
     348             :     /*
     349             :      * Likewise remove references from SpecialJoinInfo data structures.
     350             :      *
     351             :      * This is relevant in case the outer join we're deleting is nested inside
     352             :      * other outer joins: the upper joins' relid sets have to be adjusted. The
     353             :      * RHS of the target outer join will be made empty here, but that's OK
     354             :      * since caller will delete that SpecialJoinInfo entirely.
     355             :      */
     356       27736 :     foreach(l, root->join_info_list)
     357             :     {
     358       15728 :         SpecialJoinInfo *sjinf = (SpecialJoinInfo *) lfirst(l);
     359             : 
     360             :         /*
     361             :          * initsplan.c is fairly cavalier about allowing SpecialJoinInfos'
     362             :          * lefthand/righthand relid sets to be shared with other data
     363             :          * structures.  Ensure that we don't modify the original relid sets.
     364             :          * (The commute_xxx sets are always per-SpecialJoinInfo though.)
     365             :          */
     366       15728 :         sjinf->min_lefthand = bms_copy(sjinf->min_lefthand);
     367       15728 :         sjinf->min_righthand = bms_copy(sjinf->min_righthand);
     368       15728 :         sjinf->syn_lefthand = bms_copy(sjinf->syn_lefthand);
     369       15728 :         sjinf->syn_righthand = bms_copy(sjinf->syn_righthand);
     370             :         /* Now remove relid from the sets: */
     371       15728 :         sjinf->min_lefthand = adjust_relid_set(sjinf->min_lefthand, relid, subst);
     372       15728 :         sjinf->min_righthand = adjust_relid_set(sjinf->min_righthand, relid, subst);
     373       15728 :         sjinf->syn_lefthand = adjust_relid_set(sjinf->syn_lefthand, relid, subst);
     374       15728 :         sjinf->syn_righthand = adjust_relid_set(sjinf->syn_righthand, relid, subst);
     375             : 
     376       15728 :         if (sjinfo != NULL)
     377             :         {
     378             :             Assert(subst <= 0);
     379             : 
     380             :             /* Remove sjinfo->ojrelid bits from the sets: */
     381       31252 :             sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand,
     382       15626 :                                                  sjinfo->ojrelid);
     383       31252 :             sjinf->min_righthand = bms_del_member(sjinf->min_righthand,
     384       15626 :                                                   sjinfo->ojrelid);
     385       31252 :             sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand,
     386       15626 :                                                  sjinfo->ojrelid);
     387       31252 :             sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand,
     388       15626 :                                                   sjinfo->ojrelid);
     389             :             /* relid cannot appear in these fields, but ojrelid can: */
     390       31252 :             sjinf->commute_above_l = bms_del_member(sjinf->commute_above_l,
     391       15626 :                                                     sjinfo->ojrelid);
     392       31252 :             sjinf->commute_above_r = bms_del_member(sjinf->commute_above_r,
     393       15626 :                                                     sjinfo->ojrelid);
     394       31252 :             sjinf->commute_below_l = bms_del_member(sjinf->commute_below_l,
     395       15626 :                                                     sjinfo->ojrelid);
     396       15626 :             sjinf->commute_below_r = bms_del_member(sjinf->commute_below_r,
     397       15626 :                                                     sjinfo->ojrelid);
     398             :         }
     399             :         else
     400             :         {
     401             :             Assert(subst > 0);
     402             : 
     403         102 :             ChangeVarNodesExtended((Node *) sjinf->semi_rhs_exprs, relid, subst,
     404             :                                    0, replace_relid_callback);
     405             :         }
     406             :     }
     407             : 
     408             :     /*
     409             :      * Likewise remove references from PlaceHolderVar data structures,
     410             :      * removing any no-longer-needed placeholders entirely.  We remove PHV
     411             :      * only for left-join removal.  With self-join elimination, PHVs already
     412             :      * get moved to the remaining relation, where they might still be needed.
     413             :      * It might also happen that we skip the removal of some PHVs that could
     414             :      * be removed.  However, the overhead of extra PHVs is small compared to
     415             :      * the complexity of analysis needed to remove them.
     416             :      *
     417             :      * Removal is a bit trickier than it might seem: we can remove PHVs that
     418             :      * are used at the target rel and/or in the join qual, but not those that
     419             :      * are used at join partner rels or above the join.  It's not that easy to
     420             :      * distinguish PHVs used at partner rels from those used in the join qual,
     421             :      * since they will both have ph_needed sets that are subsets of
     422             :      * joinrelids.  However, a PHV used at a partner rel could not have the
     423             :      * target rel in ph_eval_at, so we check that while deciding whether to
     424             :      * remove or just update the PHV.  There is no corresponding test in
     425             :      * join_is_removable because it doesn't need to distinguish those cases.
     426             :      */
     427       12236 :     foreach(l, root->placeholder_list)
     428             :     {
     429         228 :         PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
     430             : 
     431             :         Assert(sjinfo == NULL || !bms_is_member(relid, phinfo->ph_lateral));
     432         414 :         if (sjinfo != NULL &&
     433         216 :             bms_is_subset(phinfo->ph_needed, joinrelids) &&
     434          30 :             bms_is_member(relid, phinfo->ph_eval_at) &&
     435          12 :             !bms_is_member(sjinfo->ojrelid, phinfo->ph_eval_at))
     436             :         {
     437             :             /*
     438             :              * This code shouldn't be executed if one relation is substituted
     439             :              * with another: in this case, the placeholder may be employed in
     440             :              * a filter inside the scan node the SJE removes.
     441             :              */
     442           6 :             root->placeholder_list = foreach_delete_current(root->placeholder_list,
     443             :                                                             l);
     444           6 :             root->placeholder_array[phinfo->phid] = NULL;
     445             :         }
     446             :         else
     447             :         {
     448         222 :             PlaceHolderVar *phv = phinfo->ph_var;
     449             : 
     450         222 :             phinfo->ph_eval_at = adjust_relid_set(phinfo->ph_eval_at, relid, subst);
     451         222 :             if (sjinfo != NULL)
     452         180 :                 phinfo->ph_eval_at = adjust_relid_set(phinfo->ph_eval_at,
     453         180 :                                                       sjinfo->ojrelid, subst);
     454             :             Assert(!bms_is_empty(phinfo->ph_eval_at));   /* checked previously */
     455             :             /* Reduce ph_needed to contain only "relation 0"; see below */
     456         222 :             if (bms_is_member(0, phinfo->ph_needed))
     457         108 :                 phinfo->ph_needed = bms_make_singleton(0);
     458             :             else
     459         114 :                 phinfo->ph_needed = NULL;
     460             : 
     461         222 :             phinfo->ph_lateral = adjust_relid_set(phinfo->ph_lateral, relid, subst);
     462             : 
     463             :             /*
     464             :              * ph_lateral might contain rels mentioned in ph_eval_at after the
     465             :              * replacement, remove them.
     466             :              */
     467         222 :             phinfo->ph_lateral = bms_difference(phinfo->ph_lateral, phinfo->ph_eval_at);
     468             :             /* ph_lateral might or might not be empty */
     469             : 
     470         222 :             phv->phrels = adjust_relid_set(phv->phrels, relid, subst);
     471         222 :             if (sjinfo != NULL)
     472         180 :                 phv->phrels = adjust_relid_set(phv->phrels,
     473         180 :                                                sjinfo->ojrelid, subst);
     474             :             Assert(!bms_is_empty(phv->phrels));
     475             : 
     476         222 :             ChangeVarNodesExtended((Node *) phv->phexpr, relid, subst, 0,
     477             :                                    replace_relid_callback);
     478             : 
     479             :             Assert(phv->phnullingrels == NULL); /* no need to adjust */
     480             :         }
     481             :     }
     482             : 
     483             :     /*
     484             :      * Likewise remove references from EquivalenceClasses.
     485             :      */
     486       63332 :     foreach(l, root->eq_classes)
     487             :     {
     488       51324 :         EquivalenceClass *ec = (EquivalenceClass *) lfirst(l);
     489             : 
     490       51324 :         if (bms_is_member(relid, ec->ec_relids) ||
     491       34442 :             (sjinfo == NULL || bms_is_member(sjinfo->ojrelid, ec->ec_relids)))
     492       16882 :             remove_rel_from_eclass(ec, sjinfo, relid, subst);
     493             :     }
     494             : 
     495             :     /*
     496             :      * Finally, we must recompute per-Var attr_needed and per-PlaceHolderVar
     497             :      * ph_needed relid sets.  These have to be known accurately, else we may
     498             :      * fail to remove other now-removable outer joins.  And our removal of the
     499             :      * join clause(s) for this outer join may mean that Vars that were
     500             :      * formerly needed no longer are.  So we have to do this honestly by
     501             :      * repeating the construction of those relid sets.  We can cheat to one
     502             :      * small extent: we can avoid re-examining the targetlist and HAVING qual
     503             :      * by preserving "relation 0" bits from the existing relid sets.  This is
     504             :      * safe because we'd never remove such references.
     505             :      *
     506             :      * So, start by removing all other bits from attr_needed sets and
     507             :      * lateral_vars lists.  (We already did this above for ph_needed.)
     508             :      */
     509       76666 :     for (rti = 1; rti < root->simple_rel_array_size; rti++)
     510             :     {
     511       64658 :         RelOptInfo *otherrel = root->simple_rel_array[rti];
     512             :         int         attroff;
     513             : 
     514             :         /* there may be empty slots corresponding to non-baserel RTEs */
     515       64658 :         if (otherrel == NULL)
     516       32028 :             continue;
     517             : 
     518             :         Assert(otherrel->relid == rti); /* sanity check on array */
     519             : 
     520       32630 :         for (attroff = otherrel->max_attr - otherrel->min_attr;
     521      730224 :              attroff >= 0;
     522      697594 :              attroff--)
     523             :         {
     524      697594 :             if (bms_is_member(0, otherrel->attr_needed[attroff]))
     525       47314 :                 otherrel->attr_needed[attroff] = bms_make_singleton(0);
     526             :             else
     527      650280 :                 otherrel->attr_needed[attroff] = NULL;
     528             :         }
     529             : 
     530       32630 :         if (subst > 0)
     531        1496 :             ChangeVarNodesExtended((Node *) otherrel->lateral_vars, relid,
     532             :                                    subst, 0, replace_relid_callback);
     533             :     }
     534       12008 : }
     535             : 
     536             : /*
     537             :  * Remove the target relid and references to the target join from the
     538             :  * planner's data structures, having determined that there is no need
     539             :  * to include them in the query.
     540             :  *
     541             :  * We are not terribly thorough here.  We only bother to update parts of
     542             :  * the planner's data structures that will actually be consulted later.
     543             :  */
     544             : static void
     545       11408 : remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
     546             :                               SpecialJoinInfo *sjinfo)
     547             : {
     548       11408 :     RelOptInfo *rel = find_base_rel(root, relid);
     549       11408 :     int         ojrelid = sjinfo->ojrelid;
     550             :     Relids      joinrelids;
     551             :     Relids      join_plus_commute;
     552             :     List       *joininfos;
     553             :     ListCell   *l;
     554             : 
     555             :     /* Compute the relid set for the join we are considering */
     556       11408 :     joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
     557             :     Assert(ojrelid != 0);
     558       11408 :     joinrelids = bms_add_member(joinrelids, ojrelid);
     559             : 
     560       11408 :     remove_rel_from_query(root, rel, -1, sjinfo, joinrelids);
     561             : 
     562             :     /*
     563             :      * Remove any joinquals referencing the rel from the joininfo lists.
     564             :      *
     565             :      * In some cases, a joinqual has to be put back after deleting its
     566             :      * reference to the target rel.  This can occur for pseudoconstant and
     567             :      * outerjoin-delayed quals, which can get marked as requiring the rel in
     568             :      * order to force them to be evaluated at or above the join.  We can't
     569             :      * just discard them, though.  Only quals that logically belonged to the
     570             :      * outer join being discarded should be removed from the query.
     571             :      *
     572             :      * We might encounter a qual that is a clone of a deletable qual with some
     573             :      * outer-join relids added (see deconstruct_distribute_oj_quals).  To
     574             :      * ensure we get rid of such clones as well, add the relids of all OJs
     575             :      * commutable with this one to the set we test against for
     576             :      * pushed-down-ness.
     577             :      */
     578       11408 :     join_plus_commute = bms_union(joinrelids,
     579       11408 :                                   sjinfo->commute_above_r);
     580       11408 :     join_plus_commute = bms_add_members(join_plus_commute,
     581       11408 :                                         sjinfo->commute_below_l);
     582             : 
     583             :     /*
     584             :      * We must make a copy of the rel's old joininfo list before starting the
     585             :      * loop, because otherwise remove_join_clause_from_rels would destroy the
     586             :      * list while we're scanning it.
     587             :      */
     588       11408 :     joininfos = list_copy(rel->joininfo);
     589       23012 :     foreach(l, joininfos)
     590             :     {
     591       11604 :         RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
     592             : 
     593       11604 :         remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
     594             : 
     595       11604 :         if (RINFO_IS_PUSHED_DOWN(rinfo, join_plus_commute))
     596             :         {
     597             :             /*
     598             :              * There might be references to relid or ojrelid in the
     599             :              * RestrictInfo's relid sets, as a consequence of PHVs having had
     600             :              * ph_eval_at sets that include those.  We already checked above
     601             :              * that any such PHV is safe (and updated its ph_eval_at), so we
     602             :              * can just drop those references.
     603             :              */
     604         126 :             remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
     605             : 
     606             :             /*
     607             :              * Cross-check that the clause itself does not reference the
     608             :              * target rel or join.
     609             :              */
     610             : #ifdef USE_ASSERT_CHECKING
     611             :             {
     612             :                 Relids      clause_varnos = pull_varnos(root,
     613             :                                                         (Node *) rinfo->clause);
     614             : 
     615             :                 Assert(!bms_is_member(relid, clause_varnos));
     616             :                 Assert(!bms_is_member(ojrelid, clause_varnos));
     617             :             }
     618             : #endif
     619             :             /* Now throw it back into the joininfo lists */
     620         126 :             distribute_restrictinfo_to_rels(root, rinfo);
     621             :         }
     622             :     }
     623             : 
     624             :     /*
     625             :      * There may be references to the rel in root->fkey_list, but if so,
     626             :      * match_foreign_keys_to_quals() will get rid of them.
     627             :      */
     628             : 
     629             :     /*
     630             :      * Now remove the rel from the baserel array to prevent it from being
     631             :      * referenced again.  (We can't do this earlier because
     632             :      * remove_join_clause_from_rels will touch it.)
     633             :      */
     634       11408 :     root->simple_rel_array[relid] = NULL;
     635       11408 :     root->simple_rte_array[relid] = NULL;
     636             : 
     637             :     /* And nuke the RelOptInfo, just in case there's another access path */
     638       11408 :     pfree(rel);
     639             : 
     640             :     /*
     641             :      * Now repeat construction of attr_needed bits coming from all other
     642             :      * sources.
     643             :      */
     644       11408 :     rebuild_placeholder_attr_needed(root);
     645       11408 :     rebuild_joinclause_attr_needed(root);
     646       11408 :     rebuild_eclass_attr_needed(root);
     647       11408 :     rebuild_lateral_attr_needed(root);
     648       11408 : }
     649             : 
     650             : /*
     651             :  * Remove any references to relid or ojrelid from the RestrictInfo.
     652             :  *
     653             :  * We only bother to clean out bits in clause_relids and required_relids,
     654             :  * not nullingrel bits in contained Vars and PHVs.  (This might have to be
     655             :  * improved sometime.)  However, if the RestrictInfo contains an OR clause
     656             :  * we have to also clean up the sub-clauses.
     657             :  */
     658             : static void
     659        4632 : remove_rel_from_restrictinfo(RestrictInfo *rinfo, int relid, int ojrelid)
     660             : {
     661             :     /*
     662             :      * initsplan.c is fairly cavalier about allowing RestrictInfos to share
     663             :      * relid sets with other RestrictInfos, and SpecialJoinInfos too.  Make
     664             :      * sure this RestrictInfo has its own relid sets before we modify them.
     665             :      * (In present usage, clause_relids is probably not shared, but
     666             :      * required_relids could be; let's not assume anything.)
     667             :      */
     668        4632 :     rinfo->clause_relids = bms_copy(rinfo->clause_relids);
     669        4632 :     rinfo->clause_relids = bms_del_member(rinfo->clause_relids, relid);
     670        4632 :     rinfo->clause_relids = bms_del_member(rinfo->clause_relids, ojrelid);
     671             :     /* Likewise for required_relids */
     672        4632 :     rinfo->required_relids = bms_copy(rinfo->required_relids);
     673        4632 :     rinfo->required_relids = bms_del_member(rinfo->required_relids, relid);
     674        4632 :     rinfo->required_relids = bms_del_member(rinfo->required_relids, ojrelid);
     675             : 
     676             :     /* If it's an OR, recurse to clean up sub-clauses */
     677        4632 :     if (restriction_is_or_clause(rinfo))
     678             :     {
     679             :         ListCell   *lc;
     680             : 
     681             :         Assert(is_orclause(rinfo->orclause));
     682          18 :         foreach(lc, ((BoolExpr *) rinfo->orclause)->args)
     683             :         {
     684          12 :             Node       *orarg = (Node *) lfirst(lc);
     685             : 
     686             :             /* OR arguments should be ANDs or sub-RestrictInfos */
     687          12 :             if (is_andclause(orarg))
     688             :             {
     689           0 :                 List       *andargs = ((BoolExpr *) orarg)->args;
     690             :                 ListCell   *lc2;
     691             : 
     692           0 :                 foreach(lc2, andargs)
     693             :                 {
     694           0 :                     RestrictInfo *rinfo2 = lfirst_node(RestrictInfo, lc2);
     695             : 
     696           0 :                     remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
     697             :                 }
     698             :             }
     699             :             else
     700             :             {
     701          12 :                 RestrictInfo *rinfo2 = castNode(RestrictInfo, orarg);
     702             : 
     703          12 :                 remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
     704             :             }
     705             :         }
     706             :     }
     707        4632 : }
     708             : 
     709             : /*
     710             :  * Remove any references to relid or sjinfo->ojrelid (if sjinfo != NULL)
     711             :  * from the EquivalenceClass.
     712             :  *
     713             :  * Like remove_rel_from_restrictinfo, we don't worry about cleaning out
     714             :  * any nullingrel bits in contained Vars and PHVs.  (This might have to be
     715             :  * improved sometime.)  We do need to fix the EC and EM relid sets to ensure
     716             :  * that implied join equalities will be generated at the appropriate join
     717             :  * level(s).
     718             :  */
     719             : static void
     720       16882 : remove_rel_from_eclass(EquivalenceClass *ec, SpecialJoinInfo *sjinfo,
     721             :                        int relid, int subst)
     722             : {
     723             :     ListCell   *lc;
     724             : 
     725             :     /* Fix up the EC's overall relids */
     726       16882 :     ec->ec_relids = adjust_relid_set(ec->ec_relids, relid, subst);
     727       16882 :     if (sjinfo != NULL)
     728       15814 :         ec->ec_relids = adjust_relid_set(ec->ec_relids,
     729       15814 :                                          sjinfo->ojrelid, subst);
     730             : 
     731             :     /*
     732             :      * We don't expect any EC child members to exist at this point.  Ensure
     733             :      * that's the case, otherwise, we might be getting asked to do something
     734             :      * this function hasn't been coded for.
     735             :      */
     736             :     Assert(ec->ec_childmembers == NULL);
     737             : 
     738             :     /*
     739             :      * Fix up the member expressions.  Any non-const member that ends with
     740             :      * empty em_relids must be a Var or PHV of the removed relation.  We don't
     741             :      * need it anymore, so we can drop it.
     742             :      */
     743       39040 :     foreach(lc, ec->ec_members)
     744             :     {
     745       22158 :         EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
     746             : 
     747       22158 :         if (bms_is_member(relid, cur_em->em_relids) ||
     748        4494 :             (sjinfo != NULL && bms_is_member(sjinfo->ojrelid,
     749        4494 :                                              cur_em->em_relids)))
     750             :         {
     751             :             Assert(!cur_em->em_is_const);
     752       15814 :             cur_em->em_relids = adjust_relid_set(cur_em->em_relids, relid, subst);
     753       15814 :             if (sjinfo != NULL)
     754       15814 :                 cur_em->em_relids = adjust_relid_set(cur_em->em_relids,
     755       15814 :                                                      sjinfo->ojrelid, subst);
     756       15814 :             if (bms_is_empty(cur_em->em_relids))
     757       15802 :                 ec->ec_members = foreach_delete_current(ec->ec_members, lc);
     758             :         }
     759             :     }
     760             : 
     761             :     /* Fix up the source clauses, in case we can re-use them later */
     762       22432 :     foreach(lc, ec->ec_sources)
     763             :     {
     764        5550 :         RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
     765             : 
     766        5550 :         if (sjinfo == NULL)
     767        1056 :             ChangeVarNodesExtended((Node *) rinfo, relid, subst, 0,
     768             :                                    replace_relid_callback);
     769             :         else
     770        4494 :             remove_rel_from_restrictinfo(rinfo, relid, sjinfo->ojrelid);
     771             :     }
     772             : 
     773             :     /*
     774             :      * Rather than expend code on fixing up any already-derived clauses, just
     775             :      * drop them.  (At this point, any such clauses would be base restriction
     776             :      * clauses, which we'd not need anymore anyway.)
     777             :      */
     778       16882 :     ec_clear_derived_clauses(ec);
     779       16882 : }
     780             : 
     781             : /*
     782             :  * Remove any occurrences of the target relid from a joinlist structure.
     783             :  *
     784             :  * It's easiest to build a whole new list structure, so we handle it that
     785             :  * way.  Efficiency is not a big deal here.
     786             :  *
     787             :  * *nremoved is incremented by the number of occurrences removed (there
     788             :  * should be exactly one, but the caller checks that).
     789             :  */
     790             : static List *
     791       12272 : remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
     792             : {
     793       12272 :     List       *result = NIL;
     794             :     ListCell   *jl;
     795             : 
     796       45166 :     foreach(jl, joinlist)
     797             :     {
     798       32894 :         Node       *jlnode = (Node *) lfirst(jl);
     799             : 
     800       32894 :         if (IsA(jlnode, RangeTblRef))
     801             :         {
     802       32630 :             int         varno = ((RangeTblRef *) jlnode)->rtindex;
     803             : 
     804       32630 :             if (varno == relid)
     805       12008 :                 (*nremoved)++;
     806             :             else
     807       20622 :                 result = lappend(result, jlnode);
     808             :         }
     809         264 :         else if (IsA(jlnode, List))
     810             :         {
     811             :             /* Recurse to handle subproblem */
     812             :             List       *sublist;
     813             : 
     814         264 :             sublist = remove_rel_from_joinlist((List *) jlnode,
     815             :                                                relid, nremoved);
     816             :             /* Avoid including empty sub-lists in the result */
     817         264 :             if (sublist)
     818         264 :                 result = lappend(result, sublist);
     819             :         }
     820             :         else
     821             :         {
     822           0 :             elog(ERROR, "unrecognized joinlist node type: %d",
     823             :                  (int) nodeTag(jlnode));
     824             :         }
     825             :     }
     826             : 
     827       12272 :     return result;
     828             : }
     829             : 
     830             : 
     831             : /*
     832             :  * reduce_unique_semijoins
     833             :  *      Check for semijoins that can be simplified to plain inner joins
     834             :  *      because the inner relation is provably unique for the join clauses.
     835             :  *
     836             :  * Ideally this would happen during reduce_outer_joins, but we don't have
     837             :  * enough information at that point.
     838             :  *
     839             :  * To perform the strength reduction when applicable, we need only delete
     840             :  * the semijoin's SpecialJoinInfo from root->join_info_list.  (We don't
     841             :  * bother fixing the join type attributed to it in the query jointree,
     842             :  * since that won't be consulted again.)
     843             :  */
     844             : void
     845      341504 : reduce_unique_semijoins(PlannerInfo *root)
     846             : {
     847             :     ListCell   *lc;
     848             : 
     849             :     /*
     850             :      * Scan the join_info_list to find semijoins.
     851             :      */
     852      384908 :     foreach(lc, root->join_info_list)
     853             :     {
     854       43404 :         SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
     855             :         int         innerrelid;
     856             :         RelOptInfo *innerrel;
     857             :         Relids      joinrelids;
     858             :         List       *restrictlist;
     859             : 
     860             :         /*
     861             :          * Must be a semijoin to a single baserel, else we aren't going to be
     862             :          * able to do anything with it.
     863             :          */
     864       43404 :         if (sjinfo->jointype != JOIN_SEMI)
     865       43018 :             continue;
     866             : 
     867        5142 :         if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
     868         182 :             continue;
     869             : 
     870        4960 :         innerrel = find_base_rel(root, innerrelid);
     871             : 
     872             :         /*
     873             :          * Before we trouble to run generate_join_implied_equalities, make a
     874             :          * quick check to eliminate cases in which we will surely be unable to
     875             :          * prove uniqueness of the innerrel.
     876             :          */
     877        4960 :         if (!rel_supports_distinctness(root, innerrel))
     878        1070 :             continue;
     879             : 
     880             :         /* Compute the relid set for the join we are considering */
     881        3890 :         joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
     882             :         Assert(sjinfo->ojrelid == 0);    /* SEMI joins don't have RT indexes */
     883             : 
     884             :         /*
     885             :          * Since we're only considering a single-rel RHS, any join clauses it
     886             :          * has must be clauses linking it to the semijoin's min_lefthand.  We
     887             :          * can also consider EC-derived join clauses.
     888             :          */
     889             :         restrictlist =
     890        3890 :             list_concat(generate_join_implied_equalities(root,
     891             :                                                          joinrelids,
     892             :                                                          sjinfo->min_lefthand,
     893             :                                                          innerrel,
     894             :                                                          NULL),
     895        3890 :                         innerrel->joininfo);
     896             : 
     897             :         /* Test whether the innerrel is unique for those clauses. */
     898        3890 :         if (!innerrel_is_unique(root,
     899             :                                 joinrelids, sjinfo->min_lefthand, innerrel,
     900             :                                 JOIN_SEMI, restrictlist, true))
     901        3504 :             continue;
     902             : 
     903             :         /* OK, remove the SpecialJoinInfo from the list. */
     904         386 :         root->join_info_list = foreach_delete_current(root->join_info_list, lc);
     905             :     }
     906      341504 : }
     907             : 
     908             : 
     909             : /*
     910             :  * rel_supports_distinctness
     911             :  *      Could the relation possibly be proven distinct on some set of columns?
     912             :  *
     913             :  * This is effectively a pre-checking function for rel_is_distinct_for().
     914             :  * It must return true if rel_is_distinct_for() could possibly return true
     915             :  * with this rel, but it should not expend a lot of cycles.  The idea is
     916             :  * that callers can avoid doing possibly-expensive processing to compute
     917             :  * rel_is_distinct_for()'s argument lists if the call could not possibly
     918             :  * succeed.
     919             :  */
     920             : static bool
     921      693684 : rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
     922             : {
     923             :     /* We only know about baserels ... */
     924      693684 :     if (rel->reloptkind != RELOPT_BASEREL)
     925      252120 :         return false;
     926      441564 :     if (rel->rtekind == RTE_RELATION)
     927             :     {
     928             :         /*
     929             :          * For a plain relation, we only know how to prove uniqueness by
     930             :          * reference to unique indexes.  Make sure there's at least one
     931             :          * suitable unique index.  It must be immediately enforced, and not a
     932             :          * partial index. (Keep these conditions in sync with
     933             :          * relation_has_unique_index_for!)
     934             :          */
     935             :         ListCell   *lc;
     936             : 
     937      546440 :         foreach(lc, rel->indexlist)
     938             :         {
     939      479252 :             IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc);
     940             : 
     941      479252 :             if (ind->unique && ind->immediate && ind->indpred == NIL)
     942      336796 :                 return true;
     943             :         }
     944             :     }
     945       37580 :     else if (rel->rtekind == RTE_SUBQUERY)
     946             :     {
     947       10852 :         Query      *subquery = root->simple_rte_array[rel->relid]->subquery;
     948             : 
     949             :         /* Check if the subquery has any qualities that support distinctness */
     950       10852 :         if (query_supports_distinctness(subquery))
     951        9020 :             return true;
     952             :     }
     953             :     /* We have no proof rules for any other rtekinds. */
     954       95748 :     return false;
     955             : }
     956             : 
     957             : /*
     958             :  * rel_is_distinct_for
     959             :  *      Does the relation return only distinct rows according to clause_list?
     960             :  *
     961             :  * clause_list is a list of join restriction clauses involving this rel and
     962             :  * some other one.  Return true if no two rows emitted by this rel could
     963             :  * possibly join to the same row of the other rel.
     964             :  *
     965             :  * The caller must have already determined that each condition is a
     966             :  * mergejoinable equality with an expression in this relation on one side, and
     967             :  * an expression not involving this relation on the other.  The transient
     968             :  * outer_is_left flag is used to identify which side references this relation:
     969             :  * left side if outer_is_left is false, right side if it is true.
     970             :  *
     971             :  * Note that the passed-in clause_list may be destructively modified!  This
     972             :  * is OK for current uses, because the clause_list is built by the caller for
     973             :  * the sole purpose of passing to this function.
     974             :  *
     975             :  * (*extra_clauses) to be set to the right sides of baserestrictinfo clauses,
     976             :  * looking like "x = const" if distinctness is derived from such clauses, not
     977             :  * joininfo clauses.  Pass NULL to the extra_clauses if this value is not
     978             :  * needed.
     979             :  */
     980             : static bool
     981      226954 : rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list,
     982             :                     List **extra_clauses)
     983             : {
     984             :     /*
     985             :      * We could skip a couple of tests here if we assume all callers checked
     986             :      * rel_supports_distinctness first, but it doesn't seem worth taking any
     987             :      * risk for.
     988             :      */
     989      226954 :     if (rel->reloptkind != RELOPT_BASEREL)
     990           0 :         return false;
     991      226954 :     if (rel->rtekind == RTE_RELATION)
     992             :     {
     993             :         /*
     994             :          * Examine the indexes to see if we have a matching unique index.
     995             :          * relation_has_unique_index_for automatically adds any usable
     996             :          * restriction clauses for the rel, so we needn't do that here.
     997             :          */
     998      221728 :         if (relation_has_unique_index_for(root, rel, clause_list, extra_clauses))
     999      125964 :             return true;
    1000             :     }
    1001        5226 :     else if (rel->rtekind == RTE_SUBQUERY)
    1002             :     {
    1003        5226 :         Index       relid = rel->relid;
    1004        5226 :         Query      *subquery = root->simple_rte_array[relid]->subquery;
    1005        5226 :         List       *colnos = NIL;
    1006        5226 :         List       *opids = NIL;
    1007             :         ListCell   *l;
    1008             : 
    1009             :         /*
    1010             :          * Build the argument lists for query_is_distinct_for: a list of
    1011             :          * output column numbers that the query needs to be distinct over, and
    1012             :          * a list of equality operators that the output columns need to be
    1013             :          * distinct according to.
    1014             :          *
    1015             :          * (XXX we are not considering restriction clauses attached to the
    1016             :          * subquery; is that worth doing?)
    1017             :          */
    1018       10272 :         foreach(l, clause_list)
    1019             :         {
    1020        5046 :             RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
    1021             :             Oid         op;
    1022             :             Var        *var;
    1023             : 
    1024             :             /*
    1025             :              * Get the equality operator we need uniqueness according to.
    1026             :              * (This might be a cross-type operator and thus not exactly the
    1027             :              * same operator the subquery would consider; that's all right
    1028             :              * since query_is_distinct_for can resolve such cases.)  The
    1029             :              * caller's mergejoinability test should have selected only
    1030             :              * OpExprs.
    1031             :              */
    1032        5046 :             op = castNode(OpExpr, rinfo->clause)->opno;
    1033             : 
    1034             :             /* caller identified the inner side for us */
    1035        5046 :             if (rinfo->outer_is_left)
    1036        4696 :                 var = (Var *) get_rightop(rinfo->clause);
    1037             :             else
    1038         350 :                 var = (Var *) get_leftop(rinfo->clause);
    1039             : 
    1040             :             /*
    1041             :              * We may ignore any RelabelType node above the operand.  (There
    1042             :              * won't be more than one, since eval_const_expressions() has been
    1043             :              * applied already.)
    1044             :              */
    1045        5046 :             if (var && IsA(var, RelabelType))
    1046        3236 :                 var = (Var *) ((RelabelType *) var)->arg;
    1047             : 
    1048             :             /*
    1049             :              * If inner side isn't a Var referencing a subquery output column,
    1050             :              * this clause doesn't help us.
    1051             :              */
    1052        5046 :             if (!var || !IsA(var, Var) ||
    1053        5034 :                 var->varno != relid || var->varlevelsup != 0)
    1054          12 :                 continue;
    1055             : 
    1056        5034 :             colnos = lappend_int(colnos, var->varattno);
    1057        5034 :             opids = lappend_oid(opids, op);
    1058             :         }
    1059             : 
    1060        5226 :         if (query_is_distinct_for(subquery, colnos, opids))
    1061         468 :             return true;
    1062             :     }
    1063      100522 :     return false;
    1064             : }
    1065             : 
    1066             : 
    1067             : /*
    1068             :  * query_supports_distinctness - could the query possibly be proven distinct
    1069             :  *      on some set of output columns?
    1070             :  *
    1071             :  * This is effectively a pre-checking function for query_is_distinct_for().
    1072             :  * It must return true if query_is_distinct_for() could possibly return true
    1073             :  * with this query, but it should not expend a lot of cycles.  The idea is
    1074             :  * that callers can avoid doing possibly-expensive processing to compute
    1075             :  * query_is_distinct_for()'s argument lists if the call could not possibly
    1076             :  * succeed.
    1077             :  */
    1078             : bool
    1079       10852 : query_supports_distinctness(Query *query)
    1080             : {
    1081             :     /* SRFs break distinctness except with DISTINCT, see below */
    1082       10852 :     if (query->hasTargetSRFs && query->distinctClause == NIL)
    1083        1036 :         return false;
    1084             : 
    1085             :     /* check for features we can prove distinctness with */
    1086        9816 :     if (query->distinctClause != NIL ||
    1087        9666 :         query->groupClause != NIL ||
    1088        9468 :         query->groupingSets != NIL ||
    1089        9420 :         query->hasAggs ||
    1090        9004 :         query->havingQual ||
    1091        9004 :         query->setOperations)
    1092        9020 :         return true;
    1093             : 
    1094         796 :     return false;
    1095             : }
    1096             : 
    1097             : /*
    1098             :  * query_is_distinct_for - does query never return duplicates of the
    1099             :  *      specified columns?
    1100             :  *
    1101             :  * query is a not-yet-planned subquery (in current usage, it's always from
    1102             :  * a subquery RTE, which the planner avoids scribbling on).
    1103             :  *
    1104             :  * colnos is an integer list of output column numbers (resno's).  We are
    1105             :  * interested in whether rows consisting of just these columns are certain
    1106             :  * to be distinct.  "Distinctness" is defined according to whether the
    1107             :  * corresponding upper-level equality operators listed in opids would think
    1108             :  * the values are distinct.  (Note: the opids entries could be cross-type
    1109             :  * operators, and thus not exactly the equality operators that the subquery
    1110             :  * would use itself.  We use equality_ops_are_compatible() to check
    1111             :  * compatibility.  That looks at opfamily membership for index AMs that have
    1112             :  * declared that they support consistent equality semantics within an
    1113             :  * opfamily, and so should give trustworthy answers for all operators that we
    1114             :  * might need to deal with here.)
    1115             :  */
    1116             : bool
    1117        5226 : query_is_distinct_for(Query *query, List *colnos, List *opids)
    1118             : {
    1119             :     ListCell   *l;
    1120             :     Oid         opid;
    1121             : 
    1122             :     Assert(list_length(colnos) == list_length(opids));
    1123             : 
    1124             :     /*
    1125             :      * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
    1126             :      * columns in the DISTINCT clause appear in colnos and operator semantics
    1127             :      * match.  This is true even if there are SRFs in the DISTINCT columns or
    1128             :      * elsewhere in the tlist.
    1129             :      */
    1130        5226 :     if (query->distinctClause)
    1131             :     {
    1132         168 :         foreach(l, query->distinctClause)
    1133             :         {
    1134         126 :             SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
    1135         126 :             TargetEntry *tle = get_sortgroupclause_tle(sgc,
    1136             :                                                        query->targetList);
    1137             : 
    1138         126 :             opid = distinct_col_search(tle->resno, colnos, opids);
    1139         126 :             if (!OidIsValid(opid) ||
    1140          60 :                 !equality_ops_are_compatible(opid, sgc->eqop))
    1141             :                 break;          /* exit early if no match */
    1142             :         }
    1143         108 :         if (l == NULL)          /* had matches for all? */
    1144          42 :             return true;
    1145             :     }
    1146             : 
    1147             :     /*
    1148             :      * Otherwise, a set-returning function in the query's targetlist can
    1149             :      * result in returning duplicate rows, despite any grouping that might
    1150             :      * occur before tlist evaluation.  (If all tlist SRFs are within GROUP BY
    1151             :      * columns, it would be safe because they'd be expanded before grouping.
    1152             :      * But it doesn't currently seem worth the effort to check for that.)
    1153             :      */
    1154        5184 :     if (query->hasTargetSRFs)
    1155           0 :         return false;
    1156             : 
    1157             :     /*
    1158             :      * Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all
    1159             :      * the grouped columns appear in colnos and operator semantics match.
    1160             :      */
    1161        5184 :     if (query->groupClause && !query->groupingSets)
    1162             :     {
    1163         228 :         foreach(l, query->groupClause)
    1164             :         {
    1165         158 :             SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
    1166         158 :             TargetEntry *tle = get_sortgroupclause_tle(sgc,
    1167             :                                                        query->targetList);
    1168             : 
    1169         158 :             opid = distinct_col_search(tle->resno, colnos, opids);
    1170         158 :             if (!OidIsValid(opid) ||
    1171         112 :                 !equality_ops_are_compatible(opid, sgc->eqop))
    1172             :                 break;          /* exit early if no match */
    1173             :         }
    1174         116 :         if (l == NULL)          /* had matches for all? */
    1175          70 :             return true;
    1176             :     }
    1177        5068 :     else if (query->groupingSets)
    1178             :     {
    1179             :         List       *gsets;
    1180             : 
    1181             :         /*
    1182             :          * If we have grouping sets with expressions, we probably don't have
    1183             :          * uniqueness and analysis would be hard. Punt.
    1184             :          */
    1185          60 :         if (query->groupClause)
    1186          12 :             return false;
    1187             : 
    1188             :         /*
    1189             :          * If we have no groupClause (therefore no grouping expressions), we
    1190             :          * might have one or many empty grouping sets.  If there's just one,
    1191             :          * or if the DISTINCT clause is used on the GROUP BY, then we're
    1192             :          * returning only one row and are certainly unique.  But otherwise, we
    1193             :          * know we're certainly not unique.
    1194             :          */
    1195          48 :         if (query->groupDistinct)
    1196           6 :             return true;
    1197             : 
    1198          42 :         gsets = expand_grouping_sets(query->groupingSets, false, -1);
    1199             : 
    1200          42 :         return (list_length(gsets) == 1);
    1201             :     }
    1202             :     else
    1203             :     {
    1204             :         /*
    1205             :          * If we have no GROUP BY, but do have aggregates or HAVING, then the
    1206             :          * result is at most one row so it's surely unique, for any operators.
    1207             :          */
    1208        5008 :         if (query->hasAggs || query->havingQual)
    1209         244 :             return true;
    1210             :     }
    1211             : 
    1212             :     /*
    1213             :      * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
    1214             :      * except with ALL.
    1215             :      */
    1216        4810 :     if (query->setOperations)
    1217             :     {
    1218        4698 :         SetOperationStmt *topop = castNode(SetOperationStmt, query->setOperations);
    1219             : 
    1220             :         Assert(topop->op != SETOP_NONE);
    1221             : 
    1222        4698 :         if (!topop->all)
    1223             :         {
    1224             :             ListCell   *lg;
    1225             : 
    1226             :             /* We're good if all the nonjunk output columns are in colnos */
    1227         142 :             lg = list_head(topop->groupClauses);
    1228         236 :             foreach(l, query->targetList)
    1229             :             {
    1230         148 :                 TargetEntry *tle = (TargetEntry *) lfirst(l);
    1231             :                 SortGroupClause *sgc;
    1232             : 
    1233         148 :                 if (tle->resjunk)
    1234           0 :                     continue;   /* ignore resjunk columns */
    1235             : 
    1236             :                 /* non-resjunk columns should have grouping clauses */
    1237             :                 Assert(lg != NULL);
    1238         148 :                 sgc = (SortGroupClause *) lfirst(lg);
    1239         148 :                 lg = lnext(topop->groupClauses, lg);
    1240             : 
    1241         148 :                 opid = distinct_col_search(tle->resno, colnos, opids);
    1242         148 :                 if (!OidIsValid(opid) ||
    1243          94 :                     !equality_ops_are_compatible(opid, sgc->eqop))
    1244             :                     break;      /* exit early if no match */
    1245             :             }
    1246         142 :             if (l == NULL)      /* had matches for all? */
    1247          88 :                 return true;
    1248             :         }
    1249             :     }
    1250             : 
    1251             :     /*
    1252             :      * XXX Are there any other cases in which we can easily see the result
    1253             :      * must be distinct?
    1254             :      *
    1255             :      * If you do add more smarts to this function, be sure to update
    1256             :      * query_supports_distinctness() to match.
    1257             :      */
    1258             : 
    1259        4722 :     return false;
    1260             : }
    1261             : 
    1262             : /*
    1263             :  * distinct_col_search - subroutine for query_is_distinct_for
    1264             :  *
    1265             :  * If colno is in colnos, return the corresponding element of opids,
    1266             :  * else return InvalidOid.  (Ordinarily colnos would not contain duplicates,
    1267             :  * but if it does, we arbitrarily select the first match.)
    1268             :  */
    1269             : static Oid
    1270         432 : distinct_col_search(int colno, List *colnos, List *opids)
    1271             : {
    1272             :     ListCell   *lc1,
    1273             :                *lc2;
    1274             : 
    1275         626 :     forboth(lc1, colnos, lc2, opids)
    1276             :     {
    1277         460 :         if (colno == lfirst_int(lc1))
    1278         266 :             return lfirst_oid(lc2);
    1279             :     }
    1280         166 :     return InvalidOid;
    1281             : }
    1282             : 
    1283             : 
    1284             : /*
    1285             :  * innerrel_is_unique
    1286             :  *    Check if the innerrel provably contains at most one tuple matching any
    1287             :  *    tuple from the outerrel, based on join clauses in the 'restrictlist'.
    1288             :  *
    1289             :  * We need an actual RelOptInfo for the innerrel, but it's sufficient to
    1290             :  * identify the outerrel by its Relids.  This asymmetry supports use of this
    1291             :  * function before joinrels have been built.  (The caller is expected to
    1292             :  * also supply the joinrelids, just to save recalculating that.)
    1293             :  *
    1294             :  * The proof must be made based only on clauses that will be "joinquals"
    1295             :  * rather than "otherquals" at execution.  For an inner join there's no
    1296             :  * difference; but if the join is outer, we must ignore pushed-down quals,
    1297             :  * as those will become "otherquals".  Note that this means the answer might
    1298             :  * vary depending on whether IS_OUTER_JOIN(jointype); since we cache the
    1299             :  * answer without regard to that, callers must take care not to call this
    1300             :  * with jointypes that would be classified differently by IS_OUTER_JOIN().
    1301             :  *
    1302             :  * The actual proof is undertaken by is_innerrel_unique_for(); this function
    1303             :  * is a frontend that is mainly concerned with caching the answers.
    1304             :  * In particular, the force_cache argument allows overriding the internal
    1305             :  * heuristic about whether to cache negative answers; it should be "true"
    1306             :  * if making an inquiry that is not part of the normal bottom-up join search
    1307             :  * sequence.
    1308             :  */
    1309             : bool
    1310      750262 : innerrel_is_unique(PlannerInfo *root,
    1311             :                    Relids joinrelids,
    1312             :                    Relids outerrelids,
    1313             :                    RelOptInfo *innerrel,
    1314             :                    JoinType jointype,
    1315             :                    List *restrictlist,
    1316             :                    bool force_cache)
    1317             : {
    1318      750262 :     return innerrel_is_unique_ext(root, joinrelids, outerrelids, innerrel,
    1319             :                                   jointype, restrictlist, force_cache, NULL);
    1320             : }
    1321             : 
    1322             : /*
    1323             :  * innerrel_is_unique_ext
    1324             :  *    Do the same as innerrel_is_unique(), but also set to (*extra_clauses)
    1325             :  *    additional clauses from a baserestrictinfo list used to prove the
    1326             :  *    uniqueness.
    1327             :  *
    1328             :  * A non-NULL extra_clauses indicates that we're checking for self-join and
    1329             :  * correspondingly dealing with filtered clauses.
    1330             :  */
    1331             : bool
    1332      752304 : innerrel_is_unique_ext(PlannerInfo *root,
    1333             :                        Relids joinrelids,
    1334             :                        Relids outerrelids,
    1335             :                        RelOptInfo *innerrel,
    1336             :                        JoinType jointype,
    1337             :                        List *restrictlist,
    1338             :                        bool force_cache,
    1339             :                        List **extra_clauses)
    1340             : {
    1341             :     MemoryContext old_context;
    1342             :     ListCell   *lc;
    1343             :     UniqueRelInfo *uniqueRelInfo;
    1344      752304 :     List       *outer_exprs = NIL;
    1345      752304 :     bool        self_join = (extra_clauses != NULL);
    1346             : 
    1347             :     /* Certainly can't prove uniqueness when there are no joinclauses */
    1348      752304 :     if (restrictlist == NIL)
    1349      106624 :         return false;
    1350             : 
    1351             :     /*
    1352             :      * Make a quick check to eliminate cases in which we will surely be unable
    1353             :      * to prove uniqueness of the innerrel.
    1354             :      */
    1355      645680 :     if (!rel_supports_distinctness(root, innerrel))
    1356      343720 :         return false;
    1357             : 
    1358             :     /*
    1359             :      * Query the cache to see if we've managed to prove that innerrel is
    1360             :      * unique for any subset of this outerrel.  For non-self-join search, we
    1361             :      * don't need an exact match, as extra outerrels can't make the innerrel
    1362             :      * any less unique (or more formally, the restrictlist for a join to a
    1363             :      * superset outerrel must be a superset of the conditions we successfully
    1364             :      * used before). For self-join search, we require an exact match of
    1365             :      * outerrels because we need extra clauses to be valid for our case. Also,
    1366             :      * for self-join checking we've filtered the clauses list.  Thus, we can
    1367             :      * match only the result cached for a self-join search for another
    1368             :      * self-join check.
    1369             :      */
    1370      333888 :     foreach(lc, innerrel->unique_for_rels)
    1371             :     {
    1372      118236 :         uniqueRelInfo = (UniqueRelInfo *) lfirst(lc);
    1373             : 
    1374      118236 :         if ((!self_join && bms_is_subset(uniqueRelInfo->outerrelids, outerrelids)) ||
    1375          74 :             (self_join && bms_equal(uniqueRelInfo->outerrelids, outerrelids) &&
    1376          56 :              uniqueRelInfo->self_join))
    1377             :         {
    1378       86308 :             if (extra_clauses)
    1379          12 :                 *extra_clauses = uniqueRelInfo->extra_clauses;
    1380       86308 :             return true;        /* Success! */
    1381             :         }
    1382             :     }
    1383             : 
    1384             :     /*
    1385             :      * Conversely, we may have already determined that this outerrel, or some
    1386             :      * superset thereof, cannot prove this innerrel to be unique.
    1387             :      */
    1388      216208 :     foreach(lc, innerrel->non_unique_for_rels)
    1389             :     {
    1390         916 :         Relids      unique_for_rels = (Relids) lfirst(lc);
    1391             : 
    1392         916 :         if (bms_is_subset(outerrelids, unique_for_rels))
    1393         360 :             return false;
    1394             :     }
    1395             : 
    1396             :     /* No cached information, so try to make the proof. */
    1397      215292 :     if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel,
    1398             :                                jointype, restrictlist,
    1399             :                                self_join ? &outer_exprs : NULL))
    1400             :     {
    1401             :         /*
    1402             :          * Cache the positive result for future probes, being sure to keep it
    1403             :          * in the planner_cxt even if we are working in GEQO.
    1404             :          *
    1405             :          * Note: one might consider trying to isolate the minimal subset of
    1406             :          * the outerrels that proved the innerrel unique.  But it's not worth
    1407             :          * the trouble, because the planner builds up joinrels incrementally
    1408             :          * and so we'll see the minimally sufficient outerrels before any
    1409             :          * supersets of them anyway.
    1410             :          */
    1411      115024 :         old_context = MemoryContextSwitchTo(root->planner_cxt);
    1412      115024 :         uniqueRelInfo = makeNode(UniqueRelInfo);
    1413      115024 :         uniqueRelInfo->outerrelids = bms_copy(outerrelids);
    1414      115024 :         uniqueRelInfo->self_join = self_join;
    1415      115024 :         uniqueRelInfo->extra_clauses = outer_exprs;
    1416      115024 :         innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
    1417             :                                             uniqueRelInfo);
    1418      115024 :         MemoryContextSwitchTo(old_context);
    1419             : 
    1420      115024 :         if (extra_clauses)
    1421         654 :             *extra_clauses = outer_exprs;
    1422      115024 :         return true;            /* Success! */
    1423             :     }
    1424             :     else
    1425             :     {
    1426             :         /*
    1427             :          * None of the join conditions for outerrel proved innerrel unique, so
    1428             :          * we can safely reject this outerrel or any subset of it in future
    1429             :          * checks.
    1430             :          *
    1431             :          * However, in normal planning mode, caching this knowledge is totally
    1432             :          * pointless; it won't be queried again, because we build up joinrels
    1433             :          * from smaller to larger.  It's only useful when using GEQO or
    1434             :          * another planner extension that attempts planning multiple times.
    1435             :          *
    1436             :          * Also, allow callers to override that heuristic and force caching;
    1437             :          * that's useful for reduce_unique_semijoins, which calls here before
    1438             :          * the normal join search starts.
    1439             :          */
    1440      100268 :         if (force_cache || root->assumeReplanning)
    1441             :         {
    1442        3864 :             old_context = MemoryContextSwitchTo(root->planner_cxt);
    1443        3864 :             innerrel->non_unique_for_rels =
    1444        3864 :                 lappend(innerrel->non_unique_for_rels,
    1445        3864 :                         bms_copy(outerrelids));
    1446        3864 :             MemoryContextSwitchTo(old_context);
    1447             :         }
    1448             : 
    1449      100268 :         return false;
    1450             :     }
    1451             : }
    1452             : 
    1453             : /*
    1454             :  * is_innerrel_unique_for
    1455             :  *    Check if the innerrel provably contains at most one tuple matching any
    1456             :  *    tuple from the outerrel, based on join clauses in the 'restrictlist'.
    1457             :  */
    1458             : static bool
    1459      215292 : is_innerrel_unique_for(PlannerInfo *root,
    1460             :                        Relids joinrelids,
    1461             :                        Relids outerrelids,
    1462             :                        RelOptInfo *innerrel,
    1463             :                        JoinType jointype,
    1464             :                        List *restrictlist,
    1465             :                        List **extra_clauses)
    1466             : {
    1467      215292 :     List       *clause_list = NIL;
    1468             :     ListCell   *lc;
    1469             : 
    1470             :     /*
    1471             :      * Search for mergejoinable clauses that constrain the inner rel against
    1472             :      * the outer rel.  If an operator is mergejoinable then it behaves like
    1473             :      * equality for some btree opclass, so it's what we want.  The
    1474             :      * mergejoinability test also eliminates clauses containing volatile
    1475             :      * functions, which we couldn't depend on.
    1476             :      */
    1477      482462 :     foreach(lc, restrictlist)
    1478             :     {
    1479      267170 :         RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
    1480             : 
    1481             :         /*
    1482             :          * As noted above, if it's a pushed-down clause and we're at an outer
    1483             :          * join, we can't use it.
    1484             :          */
    1485      267170 :         if (IS_OUTER_JOIN(jointype) &&
    1486      108178 :             RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
    1487        4324 :             continue;
    1488             : 
    1489             :         /* Ignore if it's not a mergejoinable clause */
    1490      262846 :         if (!restrictinfo->can_join ||
    1491      246206 :             restrictinfo->mergeopfamilies == NIL)
    1492       17628 :             continue;           /* not mergejoinable */
    1493             : 
    1494             :         /*
    1495             :          * Check if the clause has the form "outer op inner" or "inner op
    1496             :          * outer", and if so mark which side is inner.
    1497             :          */
    1498      245218 :         if (!clause_sides_match_join(restrictinfo, outerrelids,
    1499             :                                      innerrel->relids))
    1500          40 :             continue;           /* no good for these input relations */
    1501             : 
    1502             :         /* OK, add to the list */
    1503      245178 :         clause_list = lappend(clause_list, restrictinfo);
    1504             :     }
    1505             : 
    1506             :     /* Let rel_is_distinct_for() do the hard work */
    1507      215292 :     return rel_is_distinct_for(root, innerrel, clause_list, extra_clauses);
    1508             : }
    1509             : 
    1510             : /*
    1511             :  * Update EC members to point to the remaining relation instead of the removed
    1512             :  * one, removing duplicates.
    1513             :  *
    1514             :  * Restriction clauses for base relations are already distributed to
    1515             :  * the respective baserestrictinfo lists (see
    1516             :  * generate_implied_equalities_for_column). The above code has already processed
    1517             :  * this list and updated these clauses to reference the remaining
    1518             :  * relation, so that we can skip them here based on their relids.
    1519             :  *
    1520             :  * Likewise, we have already processed the join clauses that join the
    1521             :  * removed relation to the remaining one.
    1522             :  *
    1523             :  * Finally, there might be join clauses tying the removed relation to
    1524             :  * some third relation.  We can't just delete the source clauses and
    1525             :  * regenerate them from the EC because the corresponding equality
    1526             :  * operators might be missing (see the handling of ec_broken).
    1527             :  * Therefore, we will update the references in the source clauses.
    1528             :  *
    1529             :  * Derived clauses can be generated again, so it is simpler just to
    1530             :  * delete them.
    1531             :  */
    1532             : static void
    1533         924 : update_eclasses(EquivalenceClass *ec, int from, int to)
    1534             : {
    1535         924 :     List       *new_members = NIL;
    1536         924 :     List       *new_sources = NIL;
    1537             : 
    1538             :     /*
    1539             :      * We don't expect any EC child members to exist at this point.  Ensure
    1540             :      * that's the case, otherwise, we might be getting asked to do something
    1541             :      * this function hasn't been coded for.
    1542             :      */
    1543             :     Assert(ec->ec_childmembers == NULL);
    1544             : 
    1545        3726 :     foreach_node(EquivalenceMember, em, ec->ec_members)
    1546             :     {
    1547        1878 :         bool        is_redundant = false;
    1548             : 
    1549        1878 :         if (!bms_is_member(from, em->em_relids))
    1550             :         {
    1551         936 :             new_members = lappend(new_members, em);
    1552         936 :             continue;
    1553             :         }
    1554             : 
    1555         942 :         em->em_relids = adjust_relid_set(em->em_relids, from, to);
    1556         942 :         em->em_jdomain->jd_relids = adjust_relid_set(em->em_jdomain->jd_relids, from, to);
    1557             : 
    1558             :         /* We only process inner joins */
    1559         942 :         ChangeVarNodesExtended((Node *) em->em_expr, from, to, 0,
    1560             :                                replace_relid_callback);
    1561             : 
    1562        1908 :         foreach_node(EquivalenceMember, other, new_members)
    1563             :         {
    1564         304 :             if (!equal(em->em_relids, other->em_relids))
    1565          24 :                 continue;
    1566             : 
    1567         280 :             if (equal(em->em_expr, other->em_expr))
    1568             :             {
    1569         280 :                 is_redundant = true;
    1570         280 :                 break;
    1571             :             }
    1572             :         }
    1573             : 
    1574         942 :         if (!is_redundant)
    1575         662 :             new_members = lappend(new_members, em);
    1576             :     }
    1577             : 
    1578         924 :     list_free(ec->ec_members);
    1579         924 :     ec->ec_members = new_members;
    1580             : 
    1581         924 :     ec_clear_derived_clauses(ec);
    1582             : 
    1583             :     /* Update EC source expressions */
    1584        2802 :     foreach_node(RestrictInfo, rinfo, ec->ec_sources)
    1585             :     {
    1586         954 :         bool        is_redundant = false;
    1587             : 
    1588         954 :         if (!bms_is_member(from, rinfo->required_relids))
    1589             :         {
    1590         118 :             new_sources = lappend(new_sources, rinfo);
    1591         118 :             continue;
    1592             :         }
    1593             : 
    1594         836 :         ChangeVarNodesExtended((Node *) rinfo, from, to, 0,
    1595             :                                replace_relid_callback);
    1596             : 
    1597             :         /*
    1598             :          * After switching the clause to the remaining relation, check it for
    1599             :          * redundancy with existing ones. We don't have to check for
    1600             :          * redundancy with derived clauses, because we've just deleted them.
    1601             :          */
    1602        1696 :         foreach_node(RestrictInfo, other, new_sources)
    1603             :         {
    1604          36 :             if (!equal(rinfo->clause_relids, other->clause_relids))
    1605          24 :                 continue;
    1606             : 
    1607          12 :             if (equal(rinfo->clause, other->clause))
    1608             :             {
    1609          12 :                 is_redundant = true;
    1610          12 :                 break;
    1611             :             }
    1612             :         }
    1613             : 
    1614         836 :         if (!is_redundant)
    1615         824 :             new_sources = lappend(new_sources, rinfo);
    1616             :     }
    1617             : 
    1618         924 :     list_free(ec->ec_sources);
    1619         924 :     ec->ec_sources = new_sources;
    1620         924 :     ec->ec_relids = adjust_relid_set(ec->ec_relids, from, to);
    1621         924 : }
    1622             : 
    1623             : /*
    1624             :  * "Logically" compares two RestrictInfo's ignoring the 'rinfo_serial' field,
    1625             :  * which makes almost every RestrictInfo unique.  This type of comparison is
    1626             :  * useful when removing duplicates while moving RestrictInfo's from removed
    1627             :  * relation to remaining relation during self-join elimination.
    1628             :  *
    1629             :  * XXX: In the future, we might remove the 'rinfo_serial' field completely and
    1630             :  * get rid of this function.
    1631             :  */
    1632             : static bool
    1633         512 : restrict_infos_logically_equal(RestrictInfo *a, RestrictInfo *b)
    1634             : {
    1635         512 :     int         saved_rinfo_serial = a->rinfo_serial;
    1636             :     bool        result;
    1637             : 
    1638         512 :     a->rinfo_serial = b->rinfo_serial;
    1639         512 :     result = equal(a, b);
    1640         512 :     a->rinfo_serial = saved_rinfo_serial;
    1641             : 
    1642         512 :     return result;
    1643             : }
    1644             : 
    1645             : /*
    1646             :  * This function adds all non-redundant clauses to the keeping relation
    1647             :  * during self-join elimination.  That is a contradictory operation. On the
    1648             :  * one hand, we reduce the length of the `restrict` lists, which can
    1649             :  * impact planning or executing time.  Additionally, we improve the
    1650             :  * accuracy of cardinality estimation.  On the other hand, it is one more
    1651             :  * place that can make planning time much longer in specific cases.  It
    1652             :  * would have been better to avoid calling the equal() function here, but
    1653             :  * it's the only way to detect duplicated inequality expressions.
    1654             :  *
    1655             :  * (*keep_rinfo_list) is given by pointer because it might be altered by
    1656             :  * distribute_restrictinfo_to_rels().
    1657             :  */
    1658             : static void
    1659        1200 : add_non_redundant_clauses(PlannerInfo *root,
    1660             :                           List *rinfo_candidates,
    1661             :                           List **keep_rinfo_list,
    1662             :                           Index removed_relid)
    1663             : {
    1664        3328 :     foreach_node(RestrictInfo, rinfo, rinfo_candidates)
    1665             :     {
    1666         928 :         bool        is_redundant = false;
    1667             : 
    1668             :         Assert(!bms_is_member(removed_relid, rinfo->required_relids));
    1669             : 
    1670        2232 :         foreach_node(RestrictInfo, src, (*keep_rinfo_list))
    1671             :         {
    1672         530 :             if (!bms_equal(src->clause_relids, rinfo->clause_relids))
    1673             :                 /* Can't compare trivially different clauses */
    1674          12 :                 continue;
    1675             : 
    1676         518 :             if (src == rinfo ||
    1677         518 :                 (rinfo->parent_ec != NULL &&
    1678         816 :                  src->parent_ec == rinfo->parent_ec) ||
    1679         512 :                 restrict_infos_logically_equal(rinfo, src))
    1680             :             {
    1681         154 :                 is_redundant = true;
    1682         154 :                 break;
    1683             :             }
    1684             :         }
    1685         928 :         if (!is_redundant)
    1686         774 :             distribute_restrictinfo_to_rels(root, rinfo);
    1687             :     }
    1688        1200 : }
    1689             : 
    1690             : /*
    1691             :  * A custom callback for ChangeVarNodesExtended() providing Self-join
    1692             :  * elimination (SJE) related functionality
    1693             :  *
    1694             :  * SJE needs to skip the RangeTblRef node type.  During SJE's last
    1695             :  * step, remove_rel_from_joinlist() removes remaining RangeTblRefs
    1696             :  * with target relid.  If ChangeVarNodes() replaces the target relid
    1697             :  * before, remove_rel_from_joinlist() would fail to identify the nodes
    1698             :  * to delete.
    1699             :  *
    1700             :  * SJE also needs to change the relids within RestrictInfo's.
    1701             :  */
    1702             : static bool
    1703       41060 : replace_relid_callback(Node *node, ChangeVarNodes_context *context)
    1704             : {
    1705       41060 :     if (IsA(node, RangeTblRef))
    1706             :     {
    1707        1544 :         return true;
    1708             :     }
    1709       39516 :     else if (IsA(node, RestrictInfo))
    1710             :     {
    1711        2844 :         RestrictInfo *rinfo = (RestrictInfo *) node;
    1712        2844 :         int         relid = -1;
    1713        2844 :         bool        is_req_equal =
    1714        2844 :             (rinfo->required_relids == rinfo->clause_relids);
    1715        2844 :         bool        clause_relids_is_multiple =
    1716        2844 :             (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
    1717             : 
    1718             :         /*
    1719             :          * Recurse down into clauses if the target relation is present in
    1720             :          * clause_relids or required_relids.  We must check required_relids
    1721             :          * because the relation not present in clause_relids might still be
    1722             :          * present somewhere in orclause.
    1723             :          */
    1724        3960 :         if (bms_is_member(context->rt_index, rinfo->clause_relids) ||
    1725        1116 :             bms_is_member(context->rt_index, rinfo->required_relids))
    1726             :         {
    1727             :             Relids      new_clause_relids;
    1728             : 
    1729        1776 :             ChangeVarNodesWalkExpression((Node *) rinfo->clause, context);
    1730        1776 :             ChangeVarNodesWalkExpression((Node *) rinfo->orclause, context);
    1731             : 
    1732        1776 :             new_clause_relids = adjust_relid_set(rinfo->clause_relids,
    1733             :                                                  context->rt_index,
    1734             :                                                  context->new_index);
    1735             : 
    1736             :             /*
    1737             :              * Incrementally adjust num_base_rels based on the change of
    1738             :              * clause_relids, which could contain both base relids and
    1739             :              * outer-join relids.  This operation is legal until we remove
    1740             :              * only baserels.
    1741             :              */
    1742        1776 :             rinfo->num_base_rels -= bms_num_members(rinfo->clause_relids) -
    1743        1776 :                 bms_num_members(new_clause_relids);
    1744             : 
    1745        1776 :             rinfo->clause_relids = new_clause_relids;
    1746        1776 :             rinfo->left_relids =
    1747        1776 :                 adjust_relid_set(rinfo->left_relids, context->rt_index, context->new_index);
    1748        1776 :             rinfo->right_relids =
    1749        1776 :                 adjust_relid_set(rinfo->right_relids, context->rt_index, context->new_index);
    1750             :         }
    1751             : 
    1752        2844 :         if (is_req_equal)
    1753          24 :             rinfo->required_relids = rinfo->clause_relids;
    1754             :         else
    1755        2820 :             rinfo->required_relids =
    1756        2820 :                 adjust_relid_set(rinfo->required_relids, context->rt_index, context->new_index);
    1757             : 
    1758        2844 :         rinfo->outer_relids =
    1759        2844 :             adjust_relid_set(rinfo->outer_relids, context->rt_index, context->new_index);
    1760        2844 :         rinfo->incompatible_relids =
    1761        2844 :             adjust_relid_set(rinfo->incompatible_relids, context->rt_index, context->new_index);
    1762             : 
    1763        4836 :         if (rinfo->mergeopfamilies &&
    1764        3704 :             bms_get_singleton_member(rinfo->clause_relids, &relid) &&
    1765        1272 :             clause_relids_is_multiple &&
    1766        1272 :             relid == context->new_index && IsA(rinfo->clause, OpExpr))
    1767             :         {
    1768             :             Expr       *leftOp;
    1769             :             Expr       *rightOp;
    1770             : 
    1771        1272 :             leftOp = (Expr *) get_leftop(rinfo->clause);
    1772        1272 :             rightOp = (Expr *) get_rightop(rinfo->clause);
    1773             : 
    1774             :             /*
    1775             :              * For self-join elimination, changing varnos could transform
    1776             :              * "t1.a = t2.a" into "t1.a = t1.a".  That is always true as long
    1777             :              * as "t1.a" is not null.  We use equal() to check for such a
    1778             :              * case, and then we replace the qual with a check for not null
    1779             :              * (NullTest).
    1780             :              */
    1781        1272 :             if (leftOp != NULL && equal(leftOp, rightOp))
    1782             :             {
    1783        1260 :                 NullTest   *ntest = makeNode(NullTest);
    1784             : 
    1785        1260 :                 ntest->arg = leftOp;
    1786        1260 :                 ntest->nulltesttype = IS_NOT_NULL;
    1787        1260 :                 ntest->argisrow = false;
    1788        1260 :                 ntest->location = -1;
    1789        1260 :                 rinfo->clause = (Expr *) ntest;
    1790        1260 :                 rinfo->mergeopfamilies = NIL;
    1791        1260 :                 rinfo->left_em = NULL;
    1792        1260 :                 rinfo->right_em = NULL;
    1793             :             }
    1794             :             Assert(rinfo->orclause == NULL);
    1795             :         }
    1796        2844 :         return true;
    1797             :     }
    1798             : 
    1799       36672 :     return false;
    1800             : }
    1801             : 
    1802             : /*
    1803             :  * Remove a relation after we have proven that it participates only in an
    1804             :  * unneeded unique self-join.
    1805             :  *
    1806             :  * Replace any links in planner info structures.
    1807             :  *
    1808             :  * Transfer join and restriction clauses from the removed relation to the
    1809             :  * remaining one. We change the Vars of the clause to point to the
    1810             :  * remaining relation instead of the removed one. The clauses that require
    1811             :  * a subset of joinrelids become restriction clauses of the remaining
    1812             :  * relation, and others remain join clauses. We append them to
    1813             :  * baserestrictinfo and joininfo, respectively, trying not to introduce
    1814             :  * duplicates.
    1815             :  *
    1816             :  * We also have to process the 'joinclauses' list here, because it
    1817             :  * contains EC-derived join clauses which must become filter clauses. It
    1818             :  * is not enough to just correct the ECs because the EC-derived
    1819             :  * restrictions are generated before join removal (see
    1820             :  * generate_base_implied_equalities).
    1821             :  *
    1822             :  * NOTE: Remember to keep the code in sync with PlannerInfo to be sure all
    1823             :  * cached relids and relid bitmapsets can be correctly cleaned during the
    1824             :  * self-join elimination procedure.
    1825             :  */
    1826             : static void
    1827         600 : remove_self_join_rel(PlannerInfo *root, PlanRowMark *kmark, PlanRowMark *rmark,
    1828             :                      RelOptInfo *toKeep, RelOptInfo *toRemove,
    1829             :                      List *restrictlist)
    1830             : {
    1831             :     List       *joininfos;
    1832             :     ListCell   *lc;
    1833             :     int         i;
    1834         600 :     List       *jinfo_candidates = NIL;
    1835         600 :     List       *binfo_candidates = NIL;
    1836             : 
    1837             :     Assert(toKeep->relid > 0);
    1838             :     Assert(toRemove->relid > 0);
    1839             : 
    1840             :     /*
    1841             :      * Replace the index of the removing table with the keeping one. The
    1842             :      * technique of removing/distributing restrictinfo is used here to attach
    1843             :      * just appeared (for keeping relation) join clauses and avoid adding
    1844             :      * duplicates of those that already exist in the joininfo list.
    1845             :      */
    1846         600 :     joininfos = list_copy(toRemove->joininfo);
    1847        1290 :     foreach_node(RestrictInfo, rinfo, joininfos)
    1848             :     {
    1849          90 :         remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
    1850          90 :         ChangeVarNodesExtended((Node *) rinfo, toRemove->relid, toKeep->relid,
    1851             :                                0, replace_relid_callback);
    1852             : 
    1853          90 :         if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
    1854          72 :             jinfo_candidates = lappend(jinfo_candidates, rinfo);
    1855             :         else
    1856          18 :             binfo_candidates = lappend(binfo_candidates, rinfo);
    1857             :     }
    1858             : 
    1859             :     /*
    1860             :      * Concatenate restrictlist to the list of base restrictions of the
    1861             :      * removing table just to simplify the replacement procedure: all of them
    1862             :      * weren't connected to any keeping relations and need to be added to some
    1863             :      * rels.
    1864             :      */
    1865         600 :     toRemove->baserestrictinfo = list_concat(toRemove->baserestrictinfo,
    1866             :                                              restrictlist);
    1867        2038 :     foreach_node(RestrictInfo, rinfo, toRemove->baserestrictinfo)
    1868             :     {
    1869         838 :         ChangeVarNodesExtended((Node *) rinfo, toRemove->relid, toKeep->relid,
    1870             :                                0, replace_relid_callback);
    1871             : 
    1872         838 :         if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
    1873           0 :             jinfo_candidates = lappend(jinfo_candidates, rinfo);
    1874             :         else
    1875         838 :             binfo_candidates = lappend(binfo_candidates, rinfo);
    1876             :     }
    1877             : 
    1878             :     /*
    1879             :      * Now, add all non-redundant clauses to the keeping relation.
    1880             :      */
    1881         600 :     add_non_redundant_clauses(root, binfo_candidates,
    1882             :                               &toKeep->baserestrictinfo, toRemove->relid);
    1883         600 :     add_non_redundant_clauses(root, jinfo_candidates,
    1884             :                               &toKeep->joininfo, toRemove->relid);
    1885             : 
    1886         600 :     list_free(binfo_candidates);
    1887         600 :     list_free(jinfo_candidates);
    1888             : 
    1889             :     /*
    1890             :      * Arrange equivalence classes, mentioned removing a table, with the
    1891             :      * keeping one: varno of removing table should be replaced in members and
    1892             :      * sources lists. Also, remove duplicated elements if this replacement
    1893             :      * procedure created them.
    1894             :      */
    1895         600 :     i = -1;
    1896        1524 :     while ((i = bms_next_member(toRemove->eclass_indexes, i)) >= 0)
    1897             :     {
    1898         924 :         EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes, i);
    1899             : 
    1900         924 :         update_eclasses(ec, toRemove->relid, toKeep->relid);
    1901         924 :         toKeep->eclass_indexes = bms_add_member(toKeep->eclass_indexes, i);
    1902             :     }
    1903             : 
    1904             :     /*
    1905             :      * Transfer the targetlist and attr_needed flags.
    1906             :      */
    1907             : 
    1908        2396 :     foreach(lc, toRemove->reltarget->exprs)
    1909             :     {
    1910        1796 :         Node       *node = lfirst(lc);
    1911             : 
    1912        1796 :         ChangeVarNodesExtended(node, toRemove->relid, toKeep->relid, 0,
    1913             :                                replace_relid_callback);
    1914        1796 :         if (!list_member(toKeep->reltarget->exprs, node))
    1915         180 :             toKeep->reltarget->exprs = lappend(toKeep->reltarget->exprs, node);
    1916             :     }
    1917             : 
    1918        7590 :     for (i = toKeep->min_attr; i <= toKeep->max_attr; i++)
    1919             :     {
    1920        6990 :         int         attno = i - toKeep->min_attr;
    1921             : 
    1922       13980 :         toRemove->attr_needed[attno] = adjust_relid_set(toRemove->attr_needed[attno],
    1923        6990 :                                                         toRemove->relid, toKeep->relid);
    1924        6990 :         toKeep->attr_needed[attno] = bms_add_members(toKeep->attr_needed[attno],
    1925        6990 :                                                      toRemove->attr_needed[attno]);
    1926             :     }
    1927             : 
    1928             :     /*
    1929             :      * If the removed relation has a row mark, transfer it to the remaining
    1930             :      * one.
    1931             :      *
    1932             :      * If both rels have row marks, just keep the one corresponding to the
    1933             :      * remaining relation because we verified earlier that they have the same
    1934             :      * strength.
    1935             :      */
    1936         600 :     if (rmark)
    1937             :     {
    1938          74 :         if (kmark)
    1939             :         {
    1940             :             Assert(kmark->markType == rmark->markType);
    1941             : 
    1942          74 :             root->rowMarks = list_delete_ptr(root->rowMarks, rmark);
    1943             :         }
    1944             :         else
    1945             :         {
    1946             :             /* Shouldn't have inheritance children here. */
    1947             :             Assert(rmark->rti == rmark->prti);
    1948             : 
    1949           0 :             rmark->rti = rmark->prti = toKeep->relid;
    1950             :         }
    1951             :     }
    1952             : 
    1953             :     /*
    1954             :      * Replace varno in all the query structures, except nodes RangeTblRef
    1955             :      * otherwise later remove_rel_from_joinlist will yield errors.
    1956             :      */
    1957         600 :     ChangeVarNodesExtended((Node *) root->parse, toRemove->relid, toKeep->relid,
    1958             :                            0, replace_relid_callback);
    1959             : 
    1960             :     /* Replace links in the planner info */
    1961         600 :     remove_rel_from_query(root, toRemove, toKeep->relid, NULL, NULL);
    1962             : 
    1963             :     /* At last, replace varno in root targetlist and HAVING clause */
    1964         600 :     ChangeVarNodesExtended((Node *) root->processed_tlist, toRemove->relid,
    1965         600 :                            toKeep->relid, 0, replace_relid_callback);
    1966         600 :     ChangeVarNodesExtended((Node *) root->processed_groupClause,
    1967         600 :                            toRemove->relid, toKeep->relid, 0,
    1968             :                            replace_relid_callback);
    1969             : 
    1970         600 :     adjust_relid_set(root->all_result_relids, toRemove->relid, toKeep->relid);
    1971         600 :     adjust_relid_set(root->leaf_result_relids, toRemove->relid, toKeep->relid);
    1972             : 
    1973             :     /*
    1974             :      * There may be references to the rel in root->fkey_list, but if so,
    1975             :      * match_foreign_keys_to_quals() will get rid of them.
    1976             :      */
    1977             : 
    1978             :     /*
    1979             :      * Finally, remove the rel from the baserel array to prevent it from being
    1980             :      * referenced again.  (We can't do this earlier because
    1981             :      * remove_join_clause_from_rels will touch it.)
    1982             :      */
    1983         600 :     root->simple_rel_array[toRemove->relid] = NULL;
    1984         600 :     root->simple_rte_array[toRemove->relid] = NULL;
    1985             : 
    1986             :     /* And nuke the RelOptInfo, just in case there's another access path. */
    1987         600 :     pfree(toRemove);
    1988             : 
    1989             : 
    1990             :     /*
    1991             :      * Now repeat construction of attr_needed bits coming from all other
    1992             :      * sources.
    1993             :      */
    1994         600 :     rebuild_placeholder_attr_needed(root);
    1995         600 :     rebuild_joinclause_attr_needed(root);
    1996         600 :     rebuild_eclass_attr_needed(root);
    1997         600 :     rebuild_lateral_attr_needed(root);
    1998         600 : }
    1999             : 
    2000             : /*
    2001             :  * split_selfjoin_quals
    2002             :  *      Processes 'joinquals' by building two lists: one containing the quals
    2003             :  *      where the columns/exprs are on either side of the join match and
    2004             :  *      another one containing the remaining quals.
    2005             :  *
    2006             :  * 'joinquals' must only contain quals for a RTE_RELATION being joined to
    2007             :  * itself.
    2008             :  */
    2009             : static void
    2010        2042 : split_selfjoin_quals(PlannerInfo *root, List *joinquals, List **selfjoinquals,
    2011             :                      List **otherjoinquals, int from, int to)
    2012             : {
    2013        2042 :     List       *sjoinquals = NIL;
    2014        2042 :     List       *ojoinquals = NIL;
    2015             : 
    2016        6260 :     foreach_node(RestrictInfo, rinfo, joinquals)
    2017             :     {
    2018             :         OpExpr     *expr;
    2019             :         Node       *leftexpr;
    2020             :         Node       *rightexpr;
    2021             : 
    2022             :         /* In general, clause looks like F(arg1) = G(arg2) */
    2023        4352 :         if (!rinfo->mergeopfamilies ||
    2024        4352 :             bms_num_members(rinfo->clause_relids) != 2 ||
    2025        4352 :             bms_membership(rinfo->left_relids) != BMS_SINGLETON ||
    2026        2176 :             bms_membership(rinfo->right_relids) != BMS_SINGLETON)
    2027             :         {
    2028           0 :             ojoinquals = lappend(ojoinquals, rinfo);
    2029           0 :             continue;
    2030             :         }
    2031             : 
    2032        2176 :         expr = (OpExpr *) rinfo->clause;
    2033             : 
    2034        2176 :         if (!IsA(expr, OpExpr) || list_length(expr->args) != 2)
    2035             :         {
    2036           0 :             ojoinquals = lappend(ojoinquals, rinfo);
    2037           0 :             continue;
    2038             :         }
    2039             : 
    2040        2176 :         leftexpr = get_leftop(rinfo->clause);
    2041        2176 :         rightexpr = copyObject(get_rightop(rinfo->clause));
    2042             : 
    2043        2176 :         if (leftexpr && IsA(leftexpr, RelabelType))
    2044          12 :             leftexpr = (Node *) ((RelabelType *) leftexpr)->arg;
    2045        2176 :         if (rightexpr && IsA(rightexpr, RelabelType))
    2046           6 :             rightexpr = (Node *) ((RelabelType *) rightexpr)->arg;
    2047             : 
    2048             :         /*
    2049             :          * Quite an expensive operation, narrowing the use case. For example,
    2050             :          * when we have cast of the same var to different (but compatible)
    2051             :          * types.
    2052             :          */
    2053        2176 :         ChangeVarNodesExtended(rightexpr,
    2054        2176 :                                bms_singleton_member(rinfo->right_relids),
    2055        2176 :                                bms_singleton_member(rinfo->left_relids), 0,
    2056             :                                replace_relid_callback);
    2057             : 
    2058        2176 :         if (equal(leftexpr, rightexpr))
    2059        1672 :             sjoinquals = lappend(sjoinquals, rinfo);
    2060             :         else
    2061         504 :             ojoinquals = lappend(ojoinquals, rinfo);
    2062             :     }
    2063             : 
    2064        2042 :     *selfjoinquals = sjoinquals;
    2065        2042 :     *otherjoinquals = ojoinquals;
    2066        2042 : }
    2067             : 
    2068             : /*
    2069             :  * Check for a case when uniqueness is at least partly derived from a
    2070             :  * baserestrictinfo clause. In this case, we have a chance to return only
    2071             :  * one row (if such clauses on both sides of SJ are equal) or nothing (if they
    2072             :  * are different).
    2073             :  */
    2074             : static bool
    2075         666 : match_unique_clauses(PlannerInfo *root, RelOptInfo *outer, List *uclauses,
    2076             :                      Index relid)
    2077             : {
    2078        1350 :     foreach_node(RestrictInfo, rinfo, uclauses)
    2079             :     {
    2080             :         Expr       *clause;
    2081             :         Node       *iclause;
    2082             :         Node       *c1;
    2083         150 :         bool        matched = false;
    2084             : 
    2085             :         Assert(outer->relid > 0 && relid > 0);
    2086             : 
    2087             :         /* Only filters like f(R.x1,...,R.xN) == expr we should consider. */
    2088             :         Assert(bms_is_empty(rinfo->left_relids) ^
    2089             :                bms_is_empty(rinfo->right_relids));
    2090             : 
    2091         150 :         clause = (Expr *) copyObject(rinfo->clause);
    2092         150 :         ChangeVarNodesExtended((Node *) clause, relid, outer->relid, 0,
    2093             :                                replace_relid_callback);
    2094             : 
    2095         150 :         iclause = bms_is_empty(rinfo->left_relids) ? get_rightop(clause) :
    2096         144 :             get_leftop(clause);
    2097         150 :         c1 = bms_is_empty(rinfo->left_relids) ? get_leftop(clause) :
    2098         144 :             get_rightop(clause);
    2099             : 
    2100             :         /*
    2101             :          * Compare these left and right sides with the corresponding sides of
    2102             :          * the outer's filters. If no one is detected - return immediately.
    2103             :          */
    2104         408 :         foreach_node(RestrictInfo, orinfo, outer->baserestrictinfo)
    2105             :         {
    2106             :             Node       *oclause;
    2107             :             Node       *c2;
    2108             : 
    2109         192 :             if (orinfo->mergeopfamilies == NIL)
    2110             :                 /* Don't consider clauses that aren't similar to 'F(X)=G(Y)' */
    2111          60 :                 continue;
    2112             : 
    2113             :             Assert(is_opclause(orinfo->clause));
    2114             : 
    2115         264 :             oclause = bms_is_empty(orinfo->left_relids) ?
    2116         132 :                 get_rightop(orinfo->clause) : get_leftop(orinfo->clause);
    2117         264 :             c2 = (bms_is_empty(orinfo->left_relids) ?
    2118         132 :                   get_leftop(orinfo->clause) : get_rightop(orinfo->clause));
    2119             : 
    2120         132 :             if (equal(iclause, oclause) && equal(c1, c2))
    2121             :             {
    2122          84 :                 matched = true;
    2123          84 :                 break;
    2124             :             }
    2125             :         }
    2126             : 
    2127         150 :         if (!matched)
    2128          66 :             return false;
    2129             :     }
    2130             : 
    2131         600 :     return true;
    2132             : }
    2133             : 
    2134             : /*
    2135             :  * Find and remove unique self-joins in a group of base relations that have
    2136             :  * the same Oid.
    2137             :  *
    2138             :  * Returns a set of relids that were removed.
    2139             :  */
    2140             : static Relids
    2141       11132 : remove_self_joins_one_group(PlannerInfo *root, Relids relids)
    2142             : {
    2143       11132 :     Relids      result = NULL;
    2144             :     int         k;              /* Index of kept relation */
    2145       11132 :     int         r = -1;         /* Index of removed relation */
    2146             : 
    2147       34706 :     while ((r = bms_next_member(relids, r)) > 0)
    2148             :     {
    2149       23574 :         RelOptInfo *rrel = root->simple_rel_array[r];
    2150             : 
    2151       23574 :         k = r;
    2152             : 
    2153       36992 :         while ((k = bms_next_member(relids, k)) > 0)
    2154             :         {
    2155       14018 :             Relids      joinrelids = NULL;
    2156       14018 :             RelOptInfo *krel = root->simple_rel_array[k];
    2157             :             List       *restrictlist;
    2158             :             List       *selfjoinquals;
    2159             :             List       *otherjoinquals;
    2160             :             ListCell   *lc;
    2161       14018 :             bool        jinfo_check = true;
    2162       14018 :             PlanRowMark *kmark = NULL;
    2163       14018 :             PlanRowMark *rmark = NULL;
    2164       14018 :             List       *uclauses = NIL;
    2165             : 
    2166             :             /* A sanity check: the relations have the same Oid. */
    2167             :             Assert(root->simple_rte_array[k]->relid ==
    2168             :                    root->simple_rte_array[r]->relid);
    2169             : 
    2170             :             /*
    2171             :              * It is impossible to eliminate the join of two relations if they
    2172             :              * belong to different rules of order. Otherwise, the planner
    2173             :              * can't find any variants of the correct query plan.
    2174             :              */
    2175       17358 :             foreach(lc, root->join_info_list)
    2176             :             {
    2177       11090 :                 SpecialJoinInfo *info = (SpecialJoinInfo *) lfirst(lc);
    2178             : 
    2179       22180 :                 if ((bms_is_member(k, info->syn_lefthand) ^
    2180       15746 :                      bms_is_member(r, info->syn_lefthand)) ||
    2181        4656 :                     (bms_is_member(k, info->syn_righthand) ^
    2182        4656 :                      bms_is_member(r, info->syn_righthand)))
    2183             :                 {
    2184        7750 :                     jinfo_check = false;
    2185        7750 :                     break;
    2186             :                 }
    2187             :             }
    2188       14018 :             if (!jinfo_check)
    2189       13418 :                 continue;
    2190             : 
    2191             :             /*
    2192             :              * Check Row Marks equivalence. We can't remove the join if the
    2193             :              * relations have row marks of different strength (e.g., one is
    2194             :              * locked FOR UPDATE, and another just has ROW_MARK_REFERENCE for
    2195             :              * EvalPlanQual rechecking).
    2196             :              */
    2197        6476 :             foreach(lc, root->rowMarks)
    2198             :             {
    2199         380 :                 PlanRowMark *rowMark = (PlanRowMark *) lfirst(lc);
    2200             : 
    2201         380 :                 if (rowMark->rti == r)
    2202             :                 {
    2203             :                     Assert(rmark == NULL);
    2204         172 :                     rmark = rowMark;
    2205             :                 }
    2206         208 :                 else if (rowMark->rti == k)
    2207             :                 {
    2208             :                     Assert(kmark == NULL);
    2209         172 :                     kmark = rowMark;
    2210             :                 }
    2211             : 
    2212         380 :                 if (kmark && rmark)
    2213         172 :                     break;
    2214             :             }
    2215        6268 :             if (kmark && rmark && kmark->markType != rmark->markType)
    2216          52 :                 continue;
    2217             : 
    2218             :             /*
    2219             :              * We only deal with base rels here, so their relids bitset
    2220             :              * contains only one member -- their relid.
    2221             :              */
    2222        6216 :             joinrelids = bms_add_member(joinrelids, r);
    2223        6216 :             joinrelids = bms_add_member(joinrelids, k);
    2224             : 
    2225             :             /*
    2226             :              * PHVs should not impose any constraints on removing self-joins.
    2227             :              */
    2228             : 
    2229             :             /*
    2230             :              * At this stage, joininfo lists of inner and outer can contain
    2231             :              * only clauses required for a superior outer join that can't
    2232             :              * influence this optimization. So, we can avoid to call the
    2233             :              * build_joinrel_restrictlist() routine.
    2234             :              */
    2235        6216 :             restrictlist = generate_join_implied_equalities(root, joinrelids,
    2236             :                                                             rrel->relids,
    2237             :                                                             krel, NULL);
    2238        6216 :             if (restrictlist == NIL)
    2239        4174 :                 continue;
    2240             : 
    2241             :             /*
    2242             :              * Process restrictlist to separate the self-join quals from the
    2243             :              * other quals. e.g., "x = x" goes to selfjoinquals and "a = b" to
    2244             :              * otherjoinquals.
    2245             :              */
    2246        2042 :             split_selfjoin_quals(root, restrictlist, &selfjoinquals,
    2247        2042 :                                  &otherjoinquals, rrel->relid, krel->relid);
    2248             : 
    2249             :             Assert(list_length(restrictlist) ==
    2250             :                    (list_length(selfjoinquals) + list_length(otherjoinquals)));
    2251             : 
    2252             :             /*
    2253             :              * To enable SJE for the only degenerate case without any self
    2254             :              * join clauses at all, add baserestrictinfo to this list. The
    2255             :              * degenerate case works only if both sides have the same clause.
    2256             :              * So doesn't matter which side to add.
    2257             :              */
    2258        2042 :             selfjoinquals = list_concat(selfjoinquals, krel->baserestrictinfo);
    2259             : 
    2260             :             /*
    2261             :              * Determine if the rrel can duplicate outer rows. We must bypass
    2262             :              * the unique rel cache here since we're possibly using a subset
    2263             :              * of join quals. We can use 'force_cache' == true when all join
    2264             :              * quals are self-join quals.  Otherwise, we could end up putting
    2265             :              * false negatives in the cache.
    2266             :              */
    2267        2042 :             if (!innerrel_is_unique_ext(root, joinrelids, rrel->relids,
    2268             :                                         krel, JOIN_INNER, selfjoinquals,
    2269        2042 :                                         list_length(otherjoinquals) == 0,
    2270             :                                         &uclauses))
    2271        1376 :                 continue;
    2272             : 
    2273             :             /*
    2274             :              * 'uclauses' is the copy of outer->baserestrictinfo that are
    2275             :              * associated with an index.  We proved by matching selfjoinquals
    2276             :              * to a unique index that the outer relation has at most one
    2277             :              * matching row for each inner row.  Sometimes that is not enough.
    2278             :              * e.g. "WHERE s1.b = s2.b AND s1.a = 1 AND s2.a = 2" when the
    2279             :              * unique index is (a,b).  Having non-empty uclauses, we must
    2280             :              * validate that the inner baserestrictinfo contains the same
    2281             :              * expressions, or we won't match the same row on each side of the
    2282             :              * join.
    2283             :              */
    2284         666 :             if (!match_unique_clauses(root, rrel, uclauses, krel->relid))
    2285          66 :                 continue;
    2286             : 
    2287             :             /*
    2288             :              * Remove rrel ReloptInfo from the planner structures and the
    2289             :              * corresponding row mark.
    2290             :              */
    2291         600 :             remove_self_join_rel(root, kmark, rmark, krel, rrel, restrictlist);
    2292             : 
    2293         600 :             result = bms_add_member(result, r);
    2294             : 
    2295             :             /* We have removed the outer relation, try the next one. */
    2296         600 :             break;
    2297             :         }
    2298             :     }
    2299             : 
    2300       11132 :     return result;
    2301             : }
    2302             : 
    2303             : /*
    2304             :  * Gather indexes of base relations from the joinlist and try to eliminate self
    2305             :  * joins.
    2306             :  */
    2307             : static Relids
    2308      105850 : remove_self_joins_recurse(PlannerInfo *root, List *joinlist, Relids toRemove)
    2309             : {
    2310             :     ListCell   *jl;
    2311      105850 :     Relids      relids = NULL;
    2312      105850 :     SelfJoinCandidate *candidates = NULL;
    2313             :     int         i;
    2314             :     int         j;
    2315             :     int         numRels;
    2316             : 
    2317             :     /* Collect indexes of base relations of the join tree */
    2318      352952 :     foreach(jl, joinlist)
    2319             :     {
    2320      247102 :         Node       *jlnode = (Node *) lfirst(jl);
    2321             : 
    2322      247102 :         if (IsA(jlnode, RangeTblRef))
    2323             :         {
    2324      243706 :             int         varno = ((RangeTblRef *) jlnode)->rtindex;
    2325      243706 :             RangeTblEntry *rte = root->simple_rte_array[varno];
    2326             : 
    2327             :             /*
    2328             :              * We only consider ordinary relations as candidates to be
    2329             :              * removed, and these relations should not have TABLESAMPLE
    2330             :              * clauses specified.  Removing a relation with TABLESAMPLE clause
    2331             :              * could potentially change the syntax of the query. Because of
    2332             :              * UPDATE/DELETE EPQ mechanism, currently Query->resultRelation or
    2333             :              * Query->mergeTargetRelation associated rel cannot be eliminated.
    2334             :              */
    2335      243706 :             if (rte->rtekind == RTE_RELATION &&
    2336      212142 :                 rte->relkind == RELKIND_RELATION &&
    2337      206450 :                 rte->tablesample == NULL &&
    2338      206426 :                 varno != root->parse->resultRelation &&
    2339      204550 :                 varno != root->parse->mergeTargetRelation)
    2340             :             {
    2341             :                 Assert(!bms_is_member(varno, relids));
    2342      204550 :                 relids = bms_add_member(relids, varno);
    2343             :             }
    2344             :         }
    2345        3396 :         else if (IsA(jlnode, List))
    2346             :         {
    2347             :             /* Recursively go inside the sub-joinlist */
    2348        3396 :             toRemove = remove_self_joins_recurse(root, (List *) jlnode,
    2349             :                                                  toRemove);
    2350             :         }
    2351             :         else
    2352           0 :             elog(ERROR, "unrecognized joinlist node type: %d",
    2353             :                  (int) nodeTag(jlnode));
    2354             :     }
    2355             : 
    2356      105850 :     numRels = bms_num_members(relids);
    2357             : 
    2358             :     /* Need at least two relations for the join */
    2359      105850 :     if (numRels < 2)
    2360       31246 :         return toRemove;
    2361             : 
    2362             :     /*
    2363             :      * In order to find relations with the same oid we first build an array of
    2364             :      * candidates and then sort it by oid.
    2365             :      */
    2366       74604 :     candidates = palloc_array(SelfJoinCandidate, numRels);
    2367       74604 :     i = -1;
    2368       74604 :     j = 0;
    2369      256774 :     while ((i = bms_next_member(relids, i)) >= 0)
    2370             :     {
    2371      182170 :         candidates[j].relid = i;
    2372      182170 :         candidates[j].reloid = root->simple_rte_array[i]->relid;
    2373      182170 :         j++;
    2374             :     }
    2375             : 
    2376       74604 :     qsort(candidates, numRels, sizeof(SelfJoinCandidate),
    2377             :           self_join_candidates_cmp);
    2378             : 
    2379             :     /*
    2380             :      * Iteratively form a group of relation indexes with the same oid and
    2381             :      * launch the routine that detects self-joins in this group and removes
    2382             :      * excessive range table entries.
    2383             :      *
    2384             :      * At the end of the iteration, exclude the group from the overall relids
    2385             :      * list. So each next iteration of the cycle will involve less and less
    2386             :      * value of relids.
    2387             :      */
    2388       74604 :     i = 0;
    2389      256774 :     for (j = 1; j < numRels + 1; j++)
    2390             :     {
    2391      182170 :         if (j == numRels || candidates[j].reloid != candidates[i].reloid)
    2392             :         {
    2393      169812 :             if (j - i >= 2)
    2394             :             {
    2395             :                 /* Create a group of relation indexes with the same oid */
    2396       11054 :                 Relids      group = NULL;
    2397             :                 Relids      removed;
    2398             : 
    2399       34466 :                 while (i < j)
    2400             :                 {
    2401       23412 :                     group = bms_add_member(group, candidates[i].relid);
    2402       23412 :                     i++;
    2403             :                 }
    2404       11054 :                 relids = bms_del_members(relids, group);
    2405             : 
    2406             :                 /*
    2407             :                  * Try to remove self-joins from a group of identical entries.
    2408             :                  * Make the next attempt iteratively - if something is deleted
    2409             :                  * from a group, changes in clauses and equivalence classes
    2410             :                  * can give us a chance to find more candidates.
    2411             :                  */
    2412             :                 do
    2413             :                 {
    2414             :                     Assert(!bms_overlap(group, toRemove));
    2415       11132 :                     removed = remove_self_joins_one_group(root, group);
    2416       11132 :                     toRemove = bms_add_members(toRemove, removed);
    2417       11132 :                     group = bms_del_members(group, removed);
    2418       11708 :                 } while (!bms_is_empty(removed) &&
    2419         576 :                          bms_membership(group) == BMS_MULTIPLE);
    2420       11054 :                 bms_free(removed);
    2421       11054 :                 bms_free(group);
    2422             :             }
    2423             :             else
    2424             :             {
    2425             :                 /* Single relation, just remove it from the set */
    2426      158758 :                 relids = bms_del_member(relids, candidates[i].relid);
    2427      158758 :                 i = j;
    2428             :             }
    2429             :         }
    2430             :     }
    2431             : 
    2432             :     Assert(bms_is_empty(relids));
    2433             : 
    2434       74604 :     return toRemove;
    2435             : }
    2436             : 
    2437             : /*
    2438             :  * Compare self-join candidates by their oids.
    2439             :  */
    2440             : static int
    2441      133986 : self_join_candidates_cmp(const void *a, const void *b)
    2442             : {
    2443      133986 :     const SelfJoinCandidate *ca = (const SelfJoinCandidate *) a;
    2444      133986 :     const SelfJoinCandidate *cb = (const SelfJoinCandidate *) b;
    2445             : 
    2446      133986 :     if (ca->reloid != cb->reloid)
    2447      121574 :         return (ca->reloid < cb->reloid ? -1 : 1);
    2448             :     else
    2449       12412 :         return 0;
    2450             : }
    2451             : 
    2452             : /*
    2453             :  * Find and remove useless self joins.
    2454             :  *
    2455             :  * Search for joins where a relation is joined to itself. If the join clause
    2456             :  * for each tuple from one side of the join is proven to match the same
    2457             :  * physical row (or nothing) on the other side, that self-join can be
    2458             :  * eliminated from the query.  Suitable join clauses are assumed to be in the
    2459             :  * form of X = X, and can be replaced with NOT NULL clauses.
    2460             :  *
    2461             :  * For the sake of simplicity, we don't apply this optimization to special
    2462             :  * joins. Here is a list of what we could do in some particular cases:
    2463             :  * 'a a1 semi join a a2': is reduced to inner by reduce_unique_semijoins,
    2464             :  * and then removed normally.
    2465             :  * 'a a1 anti join a a2': could simplify to a scan with 'outer quals AND
    2466             :  * (IS NULL on join columns OR NOT inner quals)'.
    2467             :  * 'a a1 left join a a2': could simplify to a scan like inner but without
    2468             :  * NOT NULL conditions on join columns.
    2469             :  * 'a a1 left join (a a2 join b)': can't simplify this, because join to b
    2470             :  * can both remove rows and introduce duplicates.
    2471             :  *
    2472             :  * To search for removable joins, we order all the relations on their Oid,
    2473             :  * go over each set with the same Oid, and consider each pair of relations
    2474             :  * in this set.
    2475             :  *
    2476             :  * To remove the join, we mark one of the participating relations as dead
    2477             :  * and rewrite all references to it to point to the remaining relation.
    2478             :  * This includes modifying RestrictInfos, EquivalenceClasses, and
    2479             :  * EquivalenceMembers. We also have to modify the row marks. The join clauses
    2480             :  * of the removed relation become either restriction or join clauses, based on
    2481             :  * whether they reference any relations not participating in the removed join.
    2482             :  *
    2483             :  * 'joinlist' is the top-level joinlist of the query. If it has any
    2484             :  * references to the removed relations, we update them to point to the
    2485             :  * remaining ones.
    2486             :  */
    2487             : List *
    2488      341504 : remove_useless_self_joins(PlannerInfo *root, List *joinlist)
    2489             : {
    2490      341504 :     Relids      toRemove = NULL;
    2491      341504 :     int         relid = -1;
    2492             : 
    2493      683008 :     if (!enable_self_join_elimination || joinlist == NIL ||
    2494      581434 :         (list_length(joinlist) == 1 && !IsA(linitial(joinlist), List)))
    2495      239050 :         return joinlist;
    2496             : 
    2497             :     /*
    2498             :      * Merge pairs of relations participated in self-join. Remove unnecessary
    2499             :      * range table entries.
    2500             :      */
    2501      102454 :     toRemove = remove_self_joins_recurse(root, joinlist, toRemove);
    2502             : 
    2503      102454 :     if (unlikely(toRemove != NULL))
    2504             :     {
    2505             :         /* At the end, remove orphaned relation links */
    2506        1170 :         while ((relid = bms_next_member(toRemove, relid)) >= 0)
    2507             :         {
    2508         600 :             int         nremoved = 0;
    2509             : 
    2510         600 :             joinlist = remove_rel_from_joinlist(joinlist, relid, &nremoved);
    2511         600 :             if (nremoved != 1)
    2512           0 :                 elog(ERROR, "failed to find relation %d in joinlist", relid);
    2513             :         }
    2514             :     }
    2515             : 
    2516      102454 :     return joinlist;
    2517             : }

Generated by: LCOV version 1.16