LCOV - code coverage report
Current view: top level - src/backend/optimizer/plan - analyzejoins.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 630 654 96.3 %
Date: 2025-02-22 07:14:56 Functions: 26 26 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * analyzejoins.c
       4             :  *    Routines for simplifying joins after initial query analysis
       5             :  *
       6             :  * While we do a great deal of join simplification in prep/prepjointree.c,
       7             :  * certain optimizations cannot be performed at that stage for lack of
       8             :  * detailed information about the query.  The routines here are invoked
       9             :  * after initsplan.c has done its work, and can do additional join removal
      10             :  * and simplification steps based on the information extracted.  The penalty
      11             :  * is that we have to work harder to clean up after ourselves when we modify
      12             :  * the query, since the derived data structures have to be updated too.
      13             :  *
      14             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
      15             :  * Portions Copyright (c) 1994, Regents of the University of California
      16             :  *
      17             :  *
      18             :  * IDENTIFICATION
      19             :  *    src/backend/optimizer/plan/analyzejoins.c
      20             :  *
      21             :  *-------------------------------------------------------------------------
      22             :  */
      23             : #include "postgres.h"
      24             : 
      25             : #include "catalog/pg_class.h"
      26             : #include "nodes/nodeFuncs.h"
      27             : #include "optimizer/joininfo.h"
      28             : #include "optimizer/optimizer.h"
      29             : #include "optimizer/pathnode.h"
      30             : #include "optimizer/paths.h"
      31             : #include "optimizer/placeholder.h"
      32             : #include "optimizer/planmain.h"
      33             : #include "optimizer/restrictinfo.h"
      34             : #include "rewrite/rewriteManip.h"
      35             : #include "utils/lsyscache.h"
      36             : 
      37             : /*
      38             :  * Utility structure.  A sorting procedure is needed to simplify the search
      39             :  * of SJE-candidate baserels referencing the same database relation.  Having
      40             :  * collected all baserels from the query jointree, the planner sorts them
      41             :  * according to the reloid value, groups them with the next pass and attempts
      42             :  * to remove self-joins.
      43             :  *
      44             :  * Preliminary sorting prevents quadratic behavior that can be harmful in the
      45             :  * case of numerous joins.
      46             :  */
      47             : typedef struct
      48             : {
      49             :     int         relid;
      50             :     Oid         reloid;
      51             : } SelfJoinCandidate;
      52             : 
      53             : bool        enable_self_join_elimination;
      54             : 
      55             : /* local functions */
      56             : static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
      57             : static void remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
      58             :                                           SpecialJoinInfo *sjinfo);
      59             : static void remove_rel_from_restrictinfo(RestrictInfo *rinfo,
      60             :                                          int relid, int ojrelid);
      61             : static void remove_rel_from_eclass(EquivalenceClass *ec,
      62             :                                    int relid, int ojrelid, int subst);
      63             : static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
      64             : static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
      65             : static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
      66             :                                 List *clause_list, List **extra_clauses);
      67             : static Oid  distinct_col_search(int colno, List *colnos, List *opids);
      68             : static bool is_innerrel_unique_for(PlannerInfo *root,
      69             :                                    Relids joinrelids,
      70             :                                    Relids outerrelids,
      71             :                                    RelOptInfo *innerrel,
      72             :                                    JoinType jointype,
      73             :                                    List *restrictlist,
      74             :                                    List **extra_clauses);
      75             : static int  self_join_candidates_cmp(const void *a, const void *b);
      76             : 
      77             : 
      78             : /*
      79             :  * remove_useless_joins
      80             :  *      Check for relations that don't actually need to be joined at all,
      81             :  *      and remove them from the query.
      82             :  *
      83             :  * We are passed the current joinlist and return the updated list.  Other
      84             :  * data structures that have to be updated are accessible via "root".
      85             :  */
      86             : List *
      87      346038 : remove_useless_joins(PlannerInfo *root, List *joinlist)
      88             : {
      89             :     ListCell   *lc;
      90             : 
      91             :     /*
      92             :      * We are only interested in relations that are left-joined to, so we can
      93             :      * scan the join_info_list to find them easily.
      94             :      */
      95      346038 : restart:
      96      383394 :     foreach(lc, root->join_info_list)
      97             :     {
      98       47264 :         SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
      99             :         int         innerrelid;
     100             :         int         nremoved;
     101             : 
     102             :         /* Skip if not removable */
     103       47264 :         if (!join_is_removable(root, sjinfo))
     104       37356 :             continue;
     105             : 
     106             :         /*
     107             :          * Currently, join_is_removable can only succeed when the sjinfo's
     108             :          * righthand is a single baserel.  Remove that rel from the query and
     109             :          * joinlist.
     110             :          */
     111        9908 :         innerrelid = bms_singleton_member(sjinfo->min_righthand);
     112             : 
     113        9908 :         remove_leftjoinrel_from_query(root, innerrelid, sjinfo);
     114             : 
     115             :         /* We verify that exactly one reference gets removed from joinlist */
     116        9908 :         nremoved = 0;
     117        9908 :         joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved);
     118        9908 :         if (nremoved != 1)
     119           0 :             elog(ERROR, "failed to find relation %d in joinlist", innerrelid);
     120             : 
     121             :         /*
     122             :          * We can delete this SpecialJoinInfo from the list too, since it's no
     123             :          * longer of interest.  (Since we'll restart the foreach loop
     124             :          * immediately, we don't bother with foreach_delete_current.)
     125             :          */
     126        9908 :         root->join_info_list = list_delete_cell(root->join_info_list, lc);
     127             : 
     128             :         /*
     129             :          * Restart the scan.  This is necessary to ensure we find all
     130             :          * removable joins independently of ordering of the join_info_list
     131             :          * (note that removal of attr_needed bits may make a join appear
     132             :          * removable that did not before).
     133             :          */
     134        9908 :         goto restart;
     135             :     }
     136             : 
     137      336130 :     return joinlist;
     138             : }
     139             : 
     140             : /*
     141             :  * join_is_removable
     142             :  *    Check whether we need not perform this special join at all, because
     143             :  *    it will just duplicate its left input.
     144             :  *
     145             :  * This is true for a left join for which the join condition cannot match
     146             :  * more than one inner-side row.  (There are other possibly interesting
     147             :  * cases, but we don't have the infrastructure to prove them.)  We also
     148             :  * have to check that the inner side doesn't generate any variables needed
     149             :  * above the join.
     150             :  */
     151             : static bool
     152       47264 : join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
     153             : {
     154             :     int         innerrelid;
     155             :     RelOptInfo *innerrel;
     156             :     Relids      inputrelids;
     157             :     Relids      joinrelids;
     158       47264 :     List       *clause_list = NIL;
     159             :     ListCell   *l;
     160             :     int         attroff;
     161             : 
     162             :     /*
     163             :      * Must be a left join to a single baserel, else we aren't going to be
     164             :      * able to do anything with it.
     165             :      */
     166       47264 :     if (sjinfo->jointype != JOIN_LEFT)
     167        6608 :         return false;
     168             : 
     169       40656 :     if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
     170        1266 :         return false;
     171             : 
     172             :     /*
     173             :      * Never try to eliminate a left join to the query result rel.  Although
     174             :      * the case is syntactically impossible in standard SQL, MERGE will build
     175             :      * a join tree that looks exactly like that.
     176             :      */
     177       39390 :     if (innerrelid == root->parse->resultRelation)
     178         716 :         return false;
     179             : 
     180       38674 :     innerrel = find_base_rel(root, innerrelid);
     181             : 
     182             :     /*
     183             :      * Before we go to the effort of checking whether any innerrel variables
     184             :      * are needed above the join, make a quick check to eliminate cases in
     185             :      * which we will surely be unable to prove uniqueness of the innerrel.
     186             :      */
     187       38674 :     if (!rel_supports_distinctness(root, innerrel))
     188        2898 :         return false;
     189             : 
     190             :     /* Compute the relid set for the join we are considering */
     191       35776 :     inputrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
     192             :     Assert(sjinfo->ojrelid != 0);
     193       35776 :     joinrelids = bms_copy(inputrelids);
     194       35776 :     joinrelids = bms_add_member(joinrelids, sjinfo->ojrelid);
     195             : 
     196             :     /*
     197             :      * We can't remove the join if any inner-rel attributes are used above the
     198             :      * join.  Here, "above" the join includes pushed-down conditions, so we
     199             :      * should reject if attr_needed includes the OJ's own relid; therefore,
     200             :      * compare to inputrelids not joinrelids.
     201             :      *
     202             :      * As a micro-optimization, it seems better to start with max_attr and
     203             :      * count down rather than starting with min_attr and counting up, on the
     204             :      * theory that the system attributes are somewhat less likely to be wanted
     205             :      * and should be tested last.
     206             :      */
     207      319270 :     for (attroff = innerrel->max_attr - innerrel->min_attr;
     208             :          attroff >= 0;
     209      283494 :          attroff--)
     210             :     {
     211      309188 :         if (!bms_is_subset(innerrel->attr_needed[attroff], inputrelids))
     212       25694 :             return false;
     213             :     }
     214             : 
     215             :     /*
     216             :      * Similarly check that the inner rel isn't needed by any PlaceHolderVars
     217             :      * that will be used above the join.  The PHV case is a little bit more
     218             :      * complicated, because PHVs may have been assigned a ph_eval_at location
     219             :      * that includes the innerrel, yet their contained expression might not
     220             :      * actually reference the innerrel (it could be just a constant, for
     221             :      * instance).  If such a PHV is due to be evaluated above the join then it
     222             :      * needn't prevent join removal.
     223             :      */
     224       10238 :     foreach(l, root->placeholder_list)
     225             :     {
     226         192 :         PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
     227             : 
     228         192 :         if (bms_overlap(phinfo->ph_lateral, innerrel->relids))
     229          36 :             return false;       /* it references innerrel laterally */
     230         192 :         if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids))
     231          54 :             continue;           /* it definitely doesn't reference innerrel */
     232         138 :         if (bms_is_subset(phinfo->ph_needed, inputrelids))
     233           6 :             continue;           /* PHV is not used above the join */
     234         132 :         if (!bms_is_member(sjinfo->ojrelid, phinfo->ph_eval_at))
     235          30 :             return false;       /* it has to be evaluated below the join */
     236             : 
     237             :         /*
     238             :          * We need to be sure there will still be a place to evaluate the PHV
     239             :          * if we remove the join, ie that ph_eval_at wouldn't become empty.
     240             :          */
     241         102 :         if (!bms_overlap(sjinfo->min_lefthand, phinfo->ph_eval_at))
     242           6 :             return false;       /* there isn't any other place to eval PHV */
     243             :         /* Check contained expression last, since this is a bit expensive */
     244          96 :         if (bms_overlap(pull_varnos(root, (Node *) phinfo->ph_var->phexpr),
     245          96 :                         innerrel->relids))
     246           0 :             return false;       /* contained expression references innerrel */
     247             :     }
     248             : 
     249             :     /*
     250             :      * Search for mergejoinable clauses that constrain the inner rel against
     251             :      * either the outer rel or a pseudoconstant.  If an operator is
     252             :      * mergejoinable then it behaves like equality for some btree opclass, so
     253             :      * it's what we want.  The mergejoinability test also eliminates clauses
     254             :      * containing volatile functions, which we couldn't depend on.
     255             :      */
     256       20432 :     foreach(l, innerrel->joininfo)
     257             :     {
     258       10386 :         RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
     259             : 
     260             :         /*
     261             :          * If the current join commutes with some other outer join(s) via
     262             :          * outer join identity 3, there will be multiple clones of its join
     263             :          * clauses in the joininfo list.  We want to consider only the
     264             :          * has_clone form of such clauses.  Processing more than one form
     265             :          * would be wasteful, and also some of the others would confuse the
     266             :          * RINFO_IS_PUSHED_DOWN test below.
     267             :          */
     268       10386 :         if (restrictinfo->is_clone)
     269         136 :             continue;           /* ignore it */
     270             : 
     271             :         /*
     272             :          * If it's not a join clause for this outer join, we can't use it.
     273             :          * Note that if the clause is pushed-down, then it is logically from
     274             :          * above the outer join, even if it references no other rels (it might
     275             :          * be from WHERE, for example).
     276             :          */
     277       10250 :         if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
     278         114 :             continue;           /* ignore; not useful here */
     279             : 
     280             :         /* Ignore if it's not a mergejoinable clause */
     281       10136 :         if (!restrictinfo->can_join ||
     282       10058 :             restrictinfo->mergeopfamilies == NIL)
     283          78 :             continue;           /* not mergejoinable */
     284             : 
     285             :         /*
     286             :          * Check if the clause has the form "outer op inner" or "inner op
     287             :          * outer", and if so mark which side is inner.
     288             :          */
     289       10058 :         if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
     290             :                                      innerrel->relids))
     291           6 :             continue;           /* no good for these input relations */
     292             : 
     293             :         /* OK, add to list */
     294       10052 :         clause_list = lappend(clause_list, restrictinfo);
     295             :     }
     296             : 
     297             :     /*
     298             :      * Now that we have the relevant equality join clauses, try to prove the
     299             :      * innerrel distinct.
     300             :      */
     301       10046 :     if (rel_is_distinct_for(root, innerrel, clause_list, NULL))
     302        9908 :         return true;
     303             : 
     304             :     /*
     305             :      * Some day it would be nice to check for other methods of establishing
     306             :      * distinctness.
     307             :      */
     308         138 :     return false;
     309             : }
     310             : 
     311             : 
     312             : /*
     313             :  * Remove the target rel->relid and references to the target join from the
     314             :  * planner's data structures, having determined that there is no need
     315             :  * to include them in the query. Optionally replace them with subst if subst
     316             :  * is non-negative.
     317             :  *
     318             :  * This function updates only parts needed for both left-join removal and
     319             :  * self-join removal.
     320             :  */
     321             : static void
     322       10496 : remove_rel_from_query(PlannerInfo *root, RelOptInfo *rel,
     323             :                       int subst, SpecialJoinInfo *sjinfo,
     324             :                       Relids joinrelids)
     325             : {
     326       10496 :     int         relid = rel->relid;
     327       10496 :     int         ojrelid = (sjinfo != NULL) ? sjinfo->ojrelid : -1;
     328             :     Index       rti;
     329             :     ListCell   *l;
     330             : 
     331             :     /*
     332             :      * Update all_baserels and related relid sets.
     333             :      */
     334       10496 :     root->all_baserels = adjust_relid_set(root->all_baserels, relid, subst);
     335       10496 :     root->outer_join_rels = adjust_relid_set(root->outer_join_rels, ojrelid, subst);
     336       10496 :     root->all_query_rels = adjust_relid_set(root->all_query_rels, relid, subst);
     337       10496 :     root->all_query_rels = adjust_relid_set(root->all_query_rels, ojrelid, subst);
     338             : 
     339             :     /*
     340             :      * Likewise remove references from SpecialJoinInfo data structures.
     341             :      *
     342             :      * This is relevant in case the outer join we're deleting is nested inside
     343             :      * other outer joins: the upper joins' relid sets have to be adjusted. The
     344             :      * RHS of the target outer join will be made empty here, but that's OK
     345             :      * since caller will delete that SpecialJoinInfo entirely.
     346             :      */
     347       24182 :     foreach(l, root->join_info_list)
     348             :     {
     349       13686 :         SpecialJoinInfo *sjinf = (SpecialJoinInfo *) lfirst(l);
     350             : 
     351             :         /*
     352             :          * initsplan.c is fairly cavalier about allowing SpecialJoinInfos'
     353             :          * lefthand/righthand relid sets to be shared with other data
     354             :          * structures.  Ensure that we don't modify the original relid sets.
     355             :          * (The commute_xxx sets are always per-SpecialJoinInfo though.)
     356             :          */
     357       13686 :         sjinf->min_lefthand = bms_copy(sjinf->min_lefthand);
     358       13686 :         sjinf->min_righthand = bms_copy(sjinf->min_righthand);
     359       13686 :         sjinf->syn_lefthand = bms_copy(sjinf->syn_lefthand);
     360       13686 :         sjinf->syn_righthand = bms_copy(sjinf->syn_righthand);
     361             :         /* Now remove relid from the sets: */
     362       13686 :         sjinf->min_lefthand = adjust_relid_set(sjinf->min_lefthand, relid, subst);
     363       13686 :         sjinf->min_righthand = adjust_relid_set(sjinf->min_righthand, relid, subst);
     364       13686 :         sjinf->syn_lefthand = adjust_relid_set(sjinf->syn_lefthand, relid, subst);
     365       13686 :         sjinf->syn_righthand = adjust_relid_set(sjinf->syn_righthand, relid, subst);
     366             : 
     367       13686 :         if (sjinfo != NULL)
     368             :         {
     369             :             Assert(subst <= 0 && ojrelid > 0);
     370             : 
     371             :             /* Remove ojrelid bits from the sets: */
     372       13590 :             sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand, ojrelid);
     373       13590 :             sjinf->min_righthand = bms_del_member(sjinf->min_righthand, ojrelid);
     374       13590 :             sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand, ojrelid);
     375       13590 :             sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand, ojrelid);
     376             :             /* relid cannot appear in these fields, but ojrelid can: */
     377       13590 :             sjinf->commute_above_l = bms_del_member(sjinf->commute_above_l, ojrelid);
     378       13590 :             sjinf->commute_above_r = bms_del_member(sjinf->commute_above_r, ojrelid);
     379       13590 :             sjinf->commute_below_l = bms_del_member(sjinf->commute_below_l, ojrelid);
     380       13590 :             sjinf->commute_below_r = bms_del_member(sjinf->commute_below_r, ojrelid);
     381             :         }
     382             :         else
     383             :         {
     384             :             Assert(subst > 0 && ojrelid == -1);
     385             : 
     386          96 :             ChangeVarNodes((Node *) sjinf->semi_rhs_exprs, relid, subst, 0);
     387             :         }
     388             :     }
     389             : 
     390             :     /*
     391             :      * Likewise remove references from PlaceHolderVar data structures,
     392             :      * removing any no-longer-needed placeholders entirely.
     393             :      *
     394             :      * Removal is a bit trickier than it might seem: we can remove PHVs that
     395             :      * are used at the target rel and/or in the join qual, but not those that
     396             :      * are used at join partner rels or above the join.  It's not that easy to
     397             :      * distinguish PHVs used at partner rels from those used in the join qual,
     398             :      * since they will both have ph_needed sets that are subsets of
     399             :      * joinrelids.  However, a PHV used at a partner rel could not have the
     400             :      * target rel in ph_eval_at, so we check that while deciding whether to
     401             :      * remove or just update the PHV.  There is no corresponding test in
     402             :      * join_is_removable because it doesn't need to distinguish those cases.
     403             :      */
     404       10670 :     foreach(l, root->placeholder_list)
     405             :     {
     406         174 :         PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
     407             : 
     408             :         Assert(sjinfo == NULL || !bms_is_member(relid, phinfo->ph_lateral));
     409         210 :         if (bms_is_subset(phinfo->ph_needed, joinrelids) &&
     410          54 :             bms_is_member(relid, phinfo->ph_eval_at) &&
     411          12 :             (sjinfo == NULL || !bms_is_member(ojrelid, phinfo->ph_eval_at)))
     412             :         {
     413          12 :             root->placeholder_list = foreach_delete_current(root->placeholder_list,
     414             :                                                             l);
     415          12 :             root->placeholder_array[phinfo->phid] = NULL;
     416             :         }
     417             :         else
     418             :         {
     419         162 :             PlaceHolderVar *phv = phinfo->ph_var;
     420             : 
     421         162 :             phinfo->ph_eval_at = adjust_relid_set(phinfo->ph_eval_at, relid, subst);
     422         162 :             phinfo->ph_eval_at = adjust_relid_set(phinfo->ph_eval_at, ojrelid, subst);
     423             :             Assert(!bms_is_empty(phinfo->ph_eval_at));   /* checked previously */
     424             :             /* Reduce ph_needed to contain only "relation 0"; see below */
     425         162 :             if (bms_is_member(0, phinfo->ph_needed))
     426          84 :                 phinfo->ph_needed = bms_make_singleton(0);
     427             :             else
     428          78 :                 phinfo->ph_needed = NULL;
     429             : 
     430         162 :             phinfo->ph_lateral = adjust_relid_set(phinfo->ph_lateral, relid, subst);
     431             : 
     432             :             /*
     433             :              * ph_lateral might contain rels mentioned in ph_eval_at after the
     434             :              * replacement, remove them.
     435             :              */
     436         162 :             phinfo->ph_lateral = bms_difference(phinfo->ph_lateral, phinfo->ph_eval_at);
     437             :             /* ph_lateral might or might not be empty */
     438             : 
     439         162 :             phv->phrels = adjust_relid_set(phv->phrels, relid, subst);
     440         162 :             phv->phrels = adjust_relid_set(phv->phrels, ojrelid, subst);
     441             :             Assert(!bms_is_empty(phv->phrels));
     442             : 
     443         162 :             ChangeVarNodes((Node *) phv->phexpr, relid, subst, 0);
     444             : 
     445             :             Assert(phv->phnullingrels == NULL); /* no need to adjust */
     446             :         }
     447             :     }
     448             : 
     449             :     /*
     450             :      * Likewise remove references from EquivalenceClasses.
     451             :      */
     452       56036 :     foreach(l, root->eq_classes)
     453             :     {
     454       45540 :         EquivalenceClass *ec = (EquivalenceClass *) lfirst(l);
     455             : 
     456       45540 :         if (bms_is_member(relid, ec->ec_relids) ||
     457       30294 :             (sjinfo == NULL || bms_is_member(ojrelid, ec->ec_relids)))
     458       15246 :             remove_rel_from_eclass(ec, relid, ojrelid, subst);
     459             :     }
     460             : 
     461             :     /*
     462             :      * Finally, we must recompute per-Var attr_needed and per-PlaceHolderVar
     463             :      * ph_needed relid sets.  These have to be known accurately, else we may
     464             :      * fail to remove other now-removable outer joins.  And our removal of the
     465             :      * join clause(s) for this outer join may mean that Vars that were
     466             :      * formerly needed no longer are.  So we have to do this honestly by
     467             :      * repeating the construction of those relid sets.  We can cheat to one
     468             :      * small extent: we can avoid re-examining the targetlist and HAVING qual
     469             :      * by preserving "relation 0" bits from the existing relid sets.  This is
     470             :      * safe because we'd never remove such references.
     471             :      *
     472             :      * So, start by removing all other bits from attr_needed sets and
     473             :      * lateral_vars lists.  (We already did this above for ph_needed.)
     474             :      */
     475       65178 :     for (rti = 1; rti < root->simple_rel_array_size; rti++)
     476             :     {
     477       54682 :         RelOptInfo *otherrel = root->simple_rel_array[rti];
     478             :         int         attroff;
     479             : 
     480             :         /* there may be empty slots corresponding to non-baserel RTEs */
     481       54682 :         if (otherrel == NULL)
     482       26300 :             continue;
     483             : 
     484             :         Assert(otherrel->relid == rti); /* sanity check on array */
     485             : 
     486      625790 :         for (attroff = otherrel->max_attr - otherrel->min_attr;
     487             :              attroff >= 0;
     488      597408 :              attroff--)
     489             :         {
     490      597408 :             if (bms_is_member(0, otherrel->attr_needed[attroff]))
     491       45194 :                 otherrel->attr_needed[attroff] = bms_make_singleton(0);
     492             :             else
     493      552214 :                 otherrel->attr_needed[attroff] = NULL;
     494             :         }
     495             : 
     496       28382 :         if (subst > 0)
     497        1460 :             ChangeVarNodes((Node *) otherrel->lateral_vars, relid, subst, 0);
     498             :     }
     499       10496 : }
     500             : 
     501             : /*
     502             :  * Remove the target relid and references to the target join from the
     503             :  * planner's data structures, having determined that there is no need
     504             :  * to include them in the query.
     505             :  *
     506             :  * We are not terribly thorough here.  We only bother to update parts of
     507             :  * the planner's data structures that will actually be consulted later.
     508             :  */
     509             : static void
     510        9908 : remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
     511             :                               SpecialJoinInfo *sjinfo)
     512             : {
     513        9908 :     RelOptInfo *rel = find_base_rel(root, relid);
     514        9908 :     int         ojrelid = sjinfo->ojrelid;
     515             :     Relids      joinrelids;
     516             :     Relids      join_plus_commute;
     517             :     List       *joininfos;
     518             :     ListCell   *l;
     519             : 
     520             :     /* Compute the relid set for the join we are considering */
     521        9908 :     joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
     522             :     Assert(ojrelid != 0);
     523        9908 :     joinrelids = bms_add_member(joinrelids, ojrelid);
     524             : 
     525        9908 :     remove_rel_from_query(root, rel, -1, sjinfo, joinrelids);
     526             : 
     527             :     /*
     528             :      * Remove any joinquals referencing the rel from the joininfo lists.
     529             :      *
     530             :      * In some cases, a joinqual has to be put back after deleting its
     531             :      * reference to the target rel.  This can occur for pseudoconstant and
     532             :      * outerjoin-delayed quals, which can get marked as requiring the rel in
     533             :      * order to force them to be evaluated at or above the join.  We can't
     534             :      * just discard them, though.  Only quals that logically belonged to the
     535             :      * outer join being discarded should be removed from the query.
     536             :      *
     537             :      * We might encounter a qual that is a clone of a deletable qual with some
     538             :      * outer-join relids added (see deconstruct_distribute_oj_quals).  To
     539             :      * ensure we get rid of such clones as well, add the relids of all OJs
     540             :      * commutable with this one to the set we test against for
     541             :      * pushed-down-ness.
     542             :      */
     543        9908 :     join_plus_commute = bms_union(joinrelids,
     544        9908 :                                   sjinfo->commute_above_r);
     545        9908 :     join_plus_commute = bms_add_members(join_plus_commute,
     546        9908 :                                         sjinfo->commute_below_l);
     547             : 
     548             :     /*
     549             :      * We must make a copy of the rel's old joininfo list before starting the
     550             :      * loop, because otherwise remove_join_clause_from_rels would destroy the
     551             :      * list while we're scanning it.
     552             :      */
     553        9908 :     joininfos = list_copy(rel->joininfo);
     554       20162 :     foreach(l, joininfos)
     555             :     {
     556       10254 :         RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
     557             : 
     558       10254 :         remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
     559             : 
     560       10254 :         if (RINFO_IS_PUSHED_DOWN(rinfo, join_plus_commute))
     561             :         {
     562             :             /*
     563             :              * There might be references to relid or ojrelid in the
     564             :              * RestrictInfo's relid sets, as a consequence of PHVs having had
     565             :              * ph_eval_at sets that include those.  We already checked above
     566             :              * that any such PHV is safe (and updated its ph_eval_at), so we
     567             :              * can just drop those references.
     568             :              */
     569         108 :             remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
     570             : 
     571             :             /*
     572             :              * Cross-check that the clause itself does not reference the
     573             :              * target rel or join.
     574             :              */
     575             : #ifdef USE_ASSERT_CHECKING
     576             :             {
     577             :                 Relids      clause_varnos = pull_varnos(root,
     578             :                                                         (Node *) rinfo->clause);
     579             : 
     580             :                 Assert(!bms_is_member(relid, clause_varnos));
     581             :                 Assert(!bms_is_member(ojrelid, clause_varnos));
     582             :             }
     583             : #endif
     584             :             /* Now throw it back into the joininfo lists */
     585         108 :             distribute_restrictinfo_to_rels(root, rinfo);
     586             :         }
     587             :     }
     588             : 
     589             :     /*
     590             :      * There may be references to the rel in root->fkey_list, but if so,
     591             :      * match_foreign_keys_to_quals() will get rid of them.
     592             :      */
     593             : 
     594             :     /*
     595             :      * Now remove the rel from the baserel array to prevent it from being
     596             :      * referenced again.  (We can't do this earlier because
     597             :      * remove_join_clause_from_rels will touch it.)
     598             :      */
     599        9908 :     root->simple_rel_array[relid] = NULL;
     600             : 
     601             :     /* And nuke the RelOptInfo, just in case there's another access path */
     602        9908 :     pfree(rel);
     603             : 
     604             :     /*
     605             :      * Now repeat construction of attr_needed bits coming from all other
     606             :      * sources.
     607             :      */
     608        9908 :     rebuild_placeholder_attr_needed(root);
     609        9908 :     rebuild_joinclause_attr_needed(root);
     610        9908 :     rebuild_eclass_attr_needed(root);
     611        9908 :     rebuild_lateral_attr_needed(root);
     612        9908 : }
     613             : 
     614             : /*
     615             :  * Remove any references to relid or ojrelid from the RestrictInfo.
     616             :  *
     617             :  * We only bother to clean out bits in clause_relids and required_relids,
     618             :  * not nullingrel bits in contained Vars and PHVs.  (This might have to be
     619             :  * improved sometime.)  However, if the RestrictInfo contains an OR clause
     620             :  * we have to also clean up the sub-clauses.
     621             :  */
     622             : static void
     623        4318 : remove_rel_from_restrictinfo(RestrictInfo *rinfo, int relid, int ojrelid)
     624             : {
     625             :     /*
     626             :      * initsplan.c is fairly cavalier about allowing RestrictInfos to share
     627             :      * relid sets with other RestrictInfos, and SpecialJoinInfos too.  Make
     628             :      * sure this RestrictInfo has its own relid sets before we modify them.
     629             :      * (In present usage, clause_relids is probably not shared, but
     630             :      * required_relids could be; let's not assume anything.)
     631             :      */
     632        4318 :     rinfo->clause_relids = bms_copy(rinfo->clause_relids);
     633        4318 :     rinfo->clause_relids = bms_del_member(rinfo->clause_relids, relid);
     634        4318 :     rinfo->clause_relids = bms_del_member(rinfo->clause_relids, ojrelid);
     635             :     /* Likewise for required_relids */
     636        4318 :     rinfo->required_relids = bms_copy(rinfo->required_relids);
     637        4318 :     rinfo->required_relids = bms_del_member(rinfo->required_relids, relid);
     638        4318 :     rinfo->required_relids = bms_del_member(rinfo->required_relids, ojrelid);
     639             : 
     640             :     /* If it's an OR, recurse to clean up sub-clauses */
     641        4318 :     if (restriction_is_or_clause(rinfo))
     642             :     {
     643             :         ListCell   *lc;
     644             : 
     645             :         Assert(is_orclause(rinfo->orclause));
     646          18 :         foreach(lc, ((BoolExpr *) rinfo->orclause)->args)
     647             :         {
     648          12 :             Node       *orarg = (Node *) lfirst(lc);
     649             : 
     650             :             /* OR arguments should be ANDs or sub-RestrictInfos */
     651          12 :             if (is_andclause(orarg))
     652             :             {
     653           0 :                 List       *andargs = ((BoolExpr *) orarg)->args;
     654             :                 ListCell   *lc2;
     655             : 
     656           0 :                 foreach(lc2, andargs)
     657             :                 {
     658           0 :                     RestrictInfo *rinfo2 = lfirst_node(RestrictInfo, lc2);
     659             : 
     660           0 :                     remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
     661             :                 }
     662             :             }
     663             :             else
     664             :             {
     665          12 :                 RestrictInfo *rinfo2 = castNode(RestrictInfo, orarg);
     666             : 
     667          12 :                 remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
     668             :             }
     669             :         }
     670             :     }
     671        4318 : }
     672             : 
     673             : /*
     674             :  * Remove any references to relid or ojrelid from the EquivalenceClass.
     675             :  *
     676             :  * Like remove_rel_from_restrictinfo, we don't worry about cleaning out
     677             :  * any nullingrel bits in contained Vars and PHVs.  (This might have to be
     678             :  * improved sometime.)  We do need to fix the EC and EM relid sets to ensure
     679             :  * that implied join equalities will be generated at the appropriate join
     680             :  * level(s).
     681             :  */
     682             : static void
     683       15246 : remove_rel_from_eclass(EquivalenceClass *ec, int relid, int ojrelid, int subst)
     684             : {
     685             :     ListCell   *lc;
     686             : 
     687             :     /* Fix up the EC's overall relids */
     688       15246 :     ec->ec_relids = adjust_relid_set(ec->ec_relids, relid, subst);
     689       15246 :     ec->ec_relids = adjust_relid_set(ec->ec_relids, ojrelid, subst);
     690             : 
     691             :     /*
     692             :      * Fix up the member expressions.  Any non-const member that ends with
     693             :      * empty em_relids must be a Var or PHV of the removed relation.  We don't
     694             :      * need it anymore, so we can drop it.
     695             :      */
     696       35460 :     foreach(lc, ec->ec_members)
     697             :     {
     698       20214 :         EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
     699             : 
     700       20214 :         if (bms_is_member(relid, cur_em->em_relids) ||
     701        4198 :             (ojrelid != -1 && bms_is_member(ojrelid, cur_em->em_relids)))
     702             :         {
     703             :             Assert(!cur_em->em_is_const);
     704       14208 :             cur_em->em_relids = adjust_relid_set(cur_em->em_relids, relid, subst);
     705       14208 :             cur_em->em_relids = adjust_relid_set(cur_em->em_relids, ojrelid, subst);
     706       14208 :             if (bms_is_empty(cur_em->em_relids))
     707       14196 :                 ec->ec_members = foreach_delete_current(ec->ec_members, lc);
     708             :         }
     709             :     }
     710             : 
     711             :     /* Fix up the source clauses, in case we can re-use them later */
     712       20482 :     foreach(lc, ec->ec_sources)
     713             :     {
     714        5236 :         RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
     715             : 
     716        5236 :         if (ojrelid == -1)
     717        1038 :             ChangeVarNodes((Node *) rinfo, relid, subst, 0);
     718             :         else
     719        4198 :             remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
     720             :     }
     721             : 
     722             :     /*
     723             :      * Rather than expend code on fixing up any already-derived clauses, just
     724             :      * drop them.  (At this point, any such clauses would be base restriction
     725             :      * clauses, which we'd not need anymore anyway.)
     726             :      */
     727       15246 :     ec->ec_derives = NIL;
     728       15246 : }
     729             : 
     730             : /*
     731             :  * Remove any occurrences of the target relid from a joinlist structure.
     732             :  *
     733             :  * It's easiest to build a whole new list structure, so we handle it that
     734             :  * way.  Efficiency is not a big deal here.
     735             :  *
     736             :  * *nremoved is incremented by the number of occurrences removed (there
     737             :  * should be exactly one, but the caller checks that).
     738             :  */
     739             : static List *
     740       10760 : remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
     741             : {
     742       10760 :     List       *result = NIL;
     743             :     ListCell   *jl;
     744             : 
     745       39406 :     foreach(jl, joinlist)
     746             :     {
     747       28646 :         Node       *jlnode = (Node *) lfirst(jl);
     748             : 
     749       28646 :         if (IsA(jlnode, RangeTblRef))
     750             :         {
     751       28382 :             int         varno = ((RangeTblRef *) jlnode)->rtindex;
     752             : 
     753       28382 :             if (varno == relid)
     754       10496 :                 (*nremoved)++;
     755             :             else
     756       17886 :                 result = lappend(result, jlnode);
     757             :         }
     758         264 :         else if (IsA(jlnode, List))
     759             :         {
     760             :             /* Recurse to handle subproblem */
     761             :             List       *sublist;
     762             : 
     763         264 :             sublist = remove_rel_from_joinlist((List *) jlnode,
     764             :                                                relid, nremoved);
     765             :             /* Avoid including empty sub-lists in the result */
     766         264 :             if (sublist)
     767         264 :                 result = lappend(result, sublist);
     768             :         }
     769             :         else
     770             :         {
     771           0 :             elog(ERROR, "unrecognized joinlist node type: %d",
     772             :                  (int) nodeTag(jlnode));
     773             :         }
     774             :     }
     775             : 
     776       10760 :     return result;
     777             : }
     778             : 
     779             : 
     780             : /*
     781             :  * reduce_unique_semijoins
     782             :  *      Check for semijoins that can be simplified to plain inner joins
     783             :  *      because the inner relation is provably unique for the join clauses.
     784             :  *
     785             :  * Ideally this would happen during reduce_outer_joins, but we don't have
     786             :  * enough information at that point.
     787             :  *
     788             :  * To perform the strength reduction when applicable, we need only delete
     789             :  * the semijoin's SpecialJoinInfo from root->join_info_list.  (We don't
     790             :  * bother fixing the join type attributed to it in the query jointree,
     791             :  * since that won't be consulted again.)
     792             :  */
     793             : void
     794      336130 : reduce_unique_semijoins(PlannerInfo *root)
     795             : {
     796             :     ListCell   *lc;
     797             : 
     798             :     /*
     799             :      * Scan the join_info_list to find semijoins.
     800             :      */
     801      373260 :     foreach(lc, root->join_info_list)
     802             :     {
     803       37130 :         SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
     804             :         int         innerrelid;
     805             :         RelOptInfo *innerrel;
     806             :         Relids      joinrelids;
     807             :         List       *restrictlist;
     808             : 
     809             :         /*
     810             :          * Must be a semijoin to a single baserel, else we aren't going to be
     811             :          * able to do anything with it.
     812             :          */
     813       37130 :         if (sjinfo->jointype != JOIN_SEMI)
     814       36876 :             continue;
     815             : 
     816        2010 :         if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
     817         164 :             continue;
     818             : 
     819        1846 :         innerrel = find_base_rel(root, innerrelid);
     820             : 
     821             :         /*
     822             :          * Before we trouble to run generate_join_implied_equalities, make a
     823             :          * quick check to eliminate cases in which we will surely be unable to
     824             :          * prove uniqueness of the innerrel.
     825             :          */
     826        1846 :         if (!rel_supports_distinctness(root, innerrel))
     827         810 :             continue;
     828             : 
     829             :         /* Compute the relid set for the join we are considering */
     830        1036 :         joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
     831             :         Assert(sjinfo->ojrelid == 0);    /* SEMI joins don't have RT indexes */
     832             : 
     833             :         /*
     834             :          * Since we're only considering a single-rel RHS, any join clauses it
     835             :          * has must be clauses linking it to the semijoin's min_lefthand.  We
     836             :          * can also consider EC-derived join clauses.
     837             :          */
     838             :         restrictlist =
     839        1036 :             list_concat(generate_join_implied_equalities(root,
     840             :                                                          joinrelids,
     841             :                                                          sjinfo->min_lefthand,
     842             :                                                          innerrel,
     843             :                                                          NULL),
     844        1036 :                         innerrel->joininfo);
     845             : 
     846             :         /* Test whether the innerrel is unique for those clauses. */
     847        1036 :         if (!innerrel_is_unique(root,
     848             :                                 joinrelids, sjinfo->min_lefthand, innerrel,
     849             :                                 JOIN_SEMI, restrictlist, true))
     850         782 :             continue;
     851             : 
     852             :         /* OK, remove the SpecialJoinInfo from the list. */
     853         254 :         root->join_info_list = foreach_delete_current(root->join_info_list, lc);
     854             :     }
     855      336130 : }
     856             : 
     857             : 
     858             : /*
     859             :  * rel_supports_distinctness
     860             :  *      Could the relation possibly be proven distinct on some set of columns?
     861             :  *
     862             :  * This is effectively a pre-checking function for rel_is_distinct_for().
     863             :  * It must return true if rel_is_distinct_for() could possibly return true
     864             :  * with this rel, but it should not expend a lot of cycles.  The idea is
     865             :  * that callers can avoid doing possibly-expensive processing to compute
     866             :  * rel_is_distinct_for()'s argument lists if the call could not possibly
     867             :  * succeed.
     868             :  */
     869             : static bool
     870      966076 : rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
     871             : {
     872             :     /* We only know about baserels ... */
     873      966076 :     if (rel->reloptkind != RELOPT_BASEREL)
     874      333092 :         return false;
     875      632984 :     if (rel->rtekind == RTE_RELATION)
     876             :     {
     877             :         /*
     878             :          * For a plain relation, we only know how to prove uniqueness by
     879             :          * reference to unique indexes.  Make sure there's at least one
     880             :          * suitable unique index.  It must be immediately enforced, and not a
     881             :          * partial index. (Keep these conditions in sync with
     882             :          * relation_has_unique_index_for!)
     883             :          */
     884             :         ListCell   *lc;
     885             : 
     886      817092 :         foreach(lc, rel->indexlist)
     887             :         {
     888      768822 :             IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc);
     889             : 
     890      768822 :             if (ind->unique && ind->immediate && ind->indpred == NIL)
     891      559754 :                 return true;
     892             :         }
     893             :     }
     894       24960 :     else if (rel->rtekind == RTE_SUBQUERY)
     895             :     {
     896        4116 :         Query      *subquery = root->simple_rte_array[rel->relid]->subquery;
     897             : 
     898             :         /* Check if the subquery has any qualities that support distinctness */
     899        4116 :         if (query_supports_distinctness(subquery))
     900        2662 :             return true;
     901             :     }
     902             :     /* We have no proof rules for any other rtekinds. */
     903       70568 :     return false;
     904             : }
     905             : 
     906             : /*
     907             :  * rel_is_distinct_for
     908             :  *      Does the relation return only distinct rows according to clause_list?
     909             :  *
     910             :  * clause_list is a list of join restriction clauses involving this rel and
     911             :  * some other one.  Return true if no two rows emitted by this rel could
     912             :  * possibly join to the same row of the other rel.
     913             :  *
     914             :  * The caller must have already determined that each condition is a
     915             :  * mergejoinable equality with an expression in this relation on one side, and
     916             :  * an expression not involving this relation on the other.  The transient
     917             :  * outer_is_left flag is used to identify which side references this relation:
     918             :  * left side if outer_is_left is false, right side if it is true.
     919             :  *
     920             :  * Note that the passed-in clause_list may be destructively modified!  This
     921             :  * is OK for current uses, because the clause_list is built by the caller for
     922             :  * the sole purpose of passing to this function.
     923             :  *
     924             :  * (*extra_clauses) to be set to the right sides of baserestrictinfo clauses,
     925             :  * looking like "x = const" if distinctness is derived from such clauses, not
     926             :  * joininfo clauses.  Pass NULL to the extra_clauses if this value is not
     927             :  * needed.
     928             :  */
     929             : static bool
     930      367050 : rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list,
     931             :                     List **extra_clauses)
     932             : {
     933             :     /*
     934             :      * We could skip a couple of tests here if we assume all callers checked
     935             :      * rel_supports_distinctness first, but it doesn't seem worth taking any
     936             :      * risk for.
     937             :      */
     938      367050 :     if (rel->reloptkind != RELOPT_BASEREL)
     939           0 :         return false;
     940      367050 :     if (rel->rtekind == RTE_RELATION)
     941             :     {
     942             :         /*
     943             :          * Examine the indexes to see if we have a matching unique index.
     944             :          * relation_has_unique_index_ext automatically adds any usable
     945             :          * restriction clauses for the rel, so we needn't do that here.
     946             :          */
     947      365316 :         if (relation_has_unique_index_ext(root, rel, clause_list, NIL, NIL,
     948             :                                           extra_clauses))
     949      213330 :             return true;
     950             :     }
     951        1734 :     else if (rel->rtekind == RTE_SUBQUERY)
     952             :     {
     953        1734 :         Index       relid = rel->relid;
     954        1734 :         Query      *subquery = root->simple_rte_array[relid]->subquery;
     955        1734 :         List       *colnos = NIL;
     956        1734 :         List       *opids = NIL;
     957             :         ListCell   *l;
     958             : 
     959             :         /*
     960             :          * Build the argument lists for query_is_distinct_for: a list of
     961             :          * output column numbers that the query needs to be distinct over, and
     962             :          * a list of equality operators that the output columns need to be
     963             :          * distinct according to.
     964             :          *
     965             :          * (XXX we are not considering restriction clauses attached to the
     966             :          * subquery; is that worth doing?)
     967             :          */
     968        3438 :         foreach(l, clause_list)
     969             :         {
     970        1704 :             RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
     971             :             Oid         op;
     972             :             Var        *var;
     973             : 
     974             :             /*
     975             :              * Get the equality operator we need uniqueness according to.
     976             :              * (This might be a cross-type operator and thus not exactly the
     977             :              * same operator the subquery would consider; that's all right
     978             :              * since query_is_distinct_for can resolve such cases.)  The
     979             :              * caller's mergejoinability test should have selected only
     980             :              * OpExprs.
     981             :              */
     982        1704 :             op = castNode(OpExpr, rinfo->clause)->opno;
     983             : 
     984             :             /* caller identified the inner side for us */
     985        1704 :             if (rinfo->outer_is_left)
     986        1390 :                 var = (Var *) get_rightop(rinfo->clause);
     987             :             else
     988         314 :                 var = (Var *) get_leftop(rinfo->clause);
     989             : 
     990             :             /*
     991             :              * We may ignore any RelabelType node above the operand.  (There
     992             :              * won't be more than one, since eval_const_expressions() has been
     993             :              * applied already.)
     994             :              */
     995        1704 :             if (var && IsA(var, RelabelType))
     996         638 :                 var = (Var *) ((RelabelType *) var)->arg;
     997             : 
     998             :             /*
     999             :              * If inner side isn't a Var referencing a subquery output column,
    1000             :              * this clause doesn't help us.
    1001             :              */
    1002        1704 :             if (!var || !IsA(var, Var) ||
    1003        1692 :                 var->varno != relid || var->varlevelsup != 0)
    1004          12 :                 continue;
    1005             : 
    1006        1692 :             colnos = lappend_int(colnos, var->varattno);
    1007        1692 :             opids = lappend_oid(opids, op);
    1008             :         }
    1009             : 
    1010        1734 :         if (query_is_distinct_for(subquery, colnos, opids))
    1011         212 :             return true;
    1012             :     }
    1013      153508 :     return false;
    1014             : }
    1015             : 
    1016             : 
    1017             : /*
    1018             :  * query_supports_distinctness - could the query possibly be proven distinct
    1019             :  *      on some set of output columns?
    1020             :  *
    1021             :  * This is effectively a pre-checking function for query_is_distinct_for().
    1022             :  * It must return true if query_is_distinct_for() could possibly return true
    1023             :  * with this query, but it should not expend a lot of cycles.  The idea is
    1024             :  * that callers can avoid doing possibly-expensive processing to compute
    1025             :  * query_is_distinct_for()'s argument lists if the call could not possibly
    1026             :  * succeed.
    1027             :  */
    1028             : bool
    1029        4788 : query_supports_distinctness(Query *query)
    1030             : {
    1031             :     /* SRFs break distinctness except with DISTINCT, see below */
    1032        4788 :     if (query->hasTargetSRFs && query->distinctClause == NIL)
    1033         956 :         return false;
    1034             : 
    1035             :     /* check for features we can prove distinctness with */
    1036        3832 :     if (query->distinctClause != NIL ||
    1037        3688 :         query->groupClause != NIL ||
    1038        3500 :         query->groupingSets != NIL ||
    1039        3500 :         query->hasAggs ||
    1040        3228 :         query->havingQual ||
    1041        3228 :         query->setOperations)
    1042        3298 :         return true;
    1043             : 
    1044         534 :     return false;
    1045             : }
    1046             : 
    1047             : /*
    1048             :  * query_is_distinct_for - does query never return duplicates of the
    1049             :  *      specified columns?
    1050             :  *
    1051             :  * query is a not-yet-planned subquery (in current usage, it's always from
    1052             :  * a subquery RTE, which the planner avoids scribbling on).
    1053             :  *
    1054             :  * colnos is an integer list of output column numbers (resno's).  We are
    1055             :  * interested in whether rows consisting of just these columns are certain
    1056             :  * to be distinct.  "Distinctness" is defined according to whether the
    1057             :  * corresponding upper-level equality operators listed in opids would think
    1058             :  * the values are distinct.  (Note: the opids entries could be cross-type
    1059             :  * operators, and thus not exactly the equality operators that the subquery
    1060             :  * would use itself.  We use equality_ops_are_compatible() to check
    1061             :  * compatibility.  That looks at btree or hash opfamily membership, and so
    1062             :  * should give trustworthy answers for all operators that we might need
    1063             :  * to deal with here.)
    1064             :  */
    1065             : bool
    1066        1856 : query_is_distinct_for(Query *query, List *colnos, List *opids)
    1067             : {
    1068             :     ListCell   *l;
    1069             :     Oid         opid;
    1070             : 
    1071             :     Assert(list_length(colnos) == list_length(opids));
    1072             : 
    1073             :     /*
    1074             :      * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
    1075             :      * columns in the DISTINCT clause appear in colnos and operator semantics
    1076             :      * match.  This is true even if there are SRFs in the DISTINCT columns or
    1077             :      * elsewhere in the tlist.
    1078             :      */
    1079        1856 :     if (query->distinctClause)
    1080             :     {
    1081         150 :         foreach(l, query->distinctClause)
    1082             :         {
    1083         120 :             SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
    1084         120 :             TargetEntry *tle = get_sortgroupclause_tle(sgc,
    1085             :                                                        query->targetList);
    1086             : 
    1087         120 :             opid = distinct_col_search(tle->resno, colnos, opids);
    1088         120 :             if (!OidIsValid(opid) ||
    1089          48 :                 !equality_ops_are_compatible(opid, sgc->eqop))
    1090             :                 break;          /* exit early if no match */
    1091             :         }
    1092         102 :         if (l == NULL)          /* had matches for all? */
    1093          30 :             return true;
    1094             :     }
    1095             : 
    1096             :     /*
    1097             :      * Otherwise, a set-returning function in the query's targetlist can
    1098             :      * result in returning duplicate rows, despite any grouping that might
    1099             :      * occur before tlist evaluation.  (If all tlist SRFs are within GROUP BY
    1100             :      * columns, it would be safe because they'd be expanded before grouping.
    1101             :      * But it doesn't currently seem worth the effort to check for that.)
    1102             :      */
    1103        1826 :     if (query->hasTargetSRFs)
    1104           0 :         return false;
    1105             : 
    1106             :     /*
    1107             :      * Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all
    1108             :      * the grouped columns appear in colnos and operator semantics match.
    1109             :      */
    1110        1826 :     if (query->groupClause && !query->groupingSets)
    1111             :     {
    1112         234 :         foreach(l, query->groupClause)
    1113             :         {
    1114         164 :             SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
    1115         164 :             TargetEntry *tle = get_sortgroupclause_tle(sgc,
    1116             :                                                        query->targetList);
    1117             : 
    1118         164 :             opid = distinct_col_search(tle->resno, colnos, opids);
    1119         164 :             if (!OidIsValid(opid) ||
    1120         112 :                 !equality_ops_are_compatible(opid, sgc->eqop))
    1121             :                 break;          /* exit early if no match */
    1122             :         }
    1123         122 :         if (l == NULL)          /* had matches for all? */
    1124          70 :             return true;
    1125             :     }
    1126        1704 :     else if (query->groupingSets)
    1127             :     {
    1128             :         /*
    1129             :          * If we have grouping sets with expressions, we probably don't have
    1130             :          * uniqueness and analysis would be hard. Punt.
    1131             :          */
    1132           0 :         if (query->groupClause)
    1133           0 :             return false;
    1134             : 
    1135             :         /*
    1136             :          * If we have no groupClause (therefore no grouping expressions), we
    1137             :          * might have one or many empty grouping sets. If there's just one,
    1138             :          * then we're returning only one row and are certainly unique. But
    1139             :          * otherwise, we know we're certainly not unique.
    1140             :          */
    1141           0 :         if (list_length(query->groupingSets) == 1 &&
    1142           0 :             ((GroupingSet *) linitial(query->groupingSets))->kind == GROUPING_SET_EMPTY)
    1143           0 :             return true;
    1144             :         else
    1145           0 :             return false;
    1146             :     }
    1147             :     else
    1148             :     {
    1149             :         /*
    1150             :          * If we have no GROUP BY, but do have aggregates or HAVING, then the
    1151             :          * result is at most one row so it's surely unique, for any operators.
    1152             :          */
    1153        1704 :         if (query->hasAggs || query->havingQual)
    1154         100 :             return true;
    1155             :     }
    1156             : 
    1157             :     /*
    1158             :      * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
    1159             :      * except with ALL.
    1160             :      */
    1161        1656 :     if (query->setOperations)
    1162             :     {
    1163        1532 :         SetOperationStmt *topop = castNode(SetOperationStmt, query->setOperations);
    1164             : 
    1165             :         Assert(topop->op != SETOP_NONE);
    1166             : 
    1167        1532 :         if (!topop->all)
    1168             :         {
    1169             :             ListCell   *lg;
    1170             : 
    1171             :             /* We're good if all the nonjunk output columns are in colnos */
    1172          72 :             lg = list_head(topop->groupClauses);
    1173          90 :             foreach(l, query->targetList)
    1174             :             {
    1175          78 :                 TargetEntry *tle = (TargetEntry *) lfirst(l);
    1176             :                 SortGroupClause *sgc;
    1177             : 
    1178          78 :                 if (tle->resjunk)
    1179           0 :                     continue;   /* ignore resjunk columns */
    1180             : 
    1181             :                 /* non-resjunk columns should have grouping clauses */
    1182             :                 Assert(lg != NULL);
    1183          78 :                 sgc = (SortGroupClause *) lfirst(lg);
    1184          78 :                 lg = lnext(topop->groupClauses, lg);
    1185             : 
    1186          78 :                 opid = distinct_col_search(tle->resno, colnos, opids);
    1187          78 :                 if (!OidIsValid(opid) ||
    1188          18 :                     !equality_ops_are_compatible(opid, sgc->eqop))
    1189             :                     break;      /* exit early if no match */
    1190             :             }
    1191          72 :             if (l == NULL)      /* had matches for all? */
    1192          12 :                 return true;
    1193             :         }
    1194             :     }
    1195             : 
    1196             :     /*
    1197             :      * XXX Are there any other cases in which we can easily see the result
    1198             :      * must be distinct?
    1199             :      *
    1200             :      * If you do add more smarts to this function, be sure to update
    1201             :      * query_supports_distinctness() to match.
    1202             :      */
    1203             : 
    1204        1644 :     return false;
    1205             : }
    1206             : 
    1207             : /*
    1208             :  * distinct_col_search - subroutine for query_is_distinct_for
    1209             :  *
    1210             :  * If colno is in colnos, return the corresponding element of opids,
    1211             :  * else return InvalidOid.  (Ordinarily colnos would not contain duplicates,
    1212             :  * but if it does, we arbitrarily select the first match.)
    1213             :  */
    1214             : static Oid
    1215         362 : distinct_col_search(int colno, List *colnos, List *opids)
    1216             : {
    1217             :     ListCell   *lc1,
    1218             :                *lc2;
    1219             : 
    1220         574 :     forboth(lc1, colnos, lc2, opids)
    1221             :     {
    1222         390 :         if (colno == lfirst_int(lc1))
    1223         178 :             return lfirst_oid(lc2);
    1224             :     }
    1225         184 :     return InvalidOid;
    1226             : }
    1227             : 
    1228             : 
    1229             : /*
    1230             :  * innerrel_is_unique
    1231             :  *    Check if the innerrel provably contains at most one tuple matching any
    1232             :  *    tuple from the outerrel, based on join clauses in the 'restrictlist'.
    1233             :  *
    1234             :  * We need an actual RelOptInfo for the innerrel, but it's sufficient to
    1235             :  * identify the outerrel by its Relids.  This asymmetry supports use of this
    1236             :  * function before joinrels have been built.  (The caller is expected to
    1237             :  * also supply the joinrelids, just to save recalculating that.)
    1238             :  *
    1239             :  * The proof must be made based only on clauses that will be "joinquals"
    1240             :  * rather than "otherquals" at execution.  For an inner join there's no
    1241             :  * difference; but if the join is outer, we must ignore pushed-down quals,
    1242             :  * as those will become "otherquals".  Note that this means the answer might
    1243             :  * vary depending on whether IS_OUTER_JOIN(jointype); since we cache the
    1244             :  * answer without regard to that, callers must take care not to call this
    1245             :  * with jointypes that would be classified differently by IS_OUTER_JOIN().
    1246             :  *
    1247             :  * The actual proof is undertaken by is_innerrel_unique_for(); this function
    1248             :  * is a frontend that is mainly concerned with caching the answers.
    1249             :  * In particular, the force_cache argument allows overriding the internal
    1250             :  * heuristic about whether to cache negative answers; it should be "true"
    1251             :  * if making an inquiry that is not part of the normal bottom-up join search
    1252             :  * sequence.
    1253             :  */
    1254             : bool
    1255     1097774 : innerrel_is_unique(PlannerInfo *root,
    1256             :                    Relids joinrelids,
    1257             :                    Relids outerrelids,
    1258             :                    RelOptInfo *innerrel,
    1259             :                    JoinType jointype,
    1260             :                    List *restrictlist,
    1261             :                    bool force_cache)
    1262             : {
    1263     1097774 :     return innerrel_is_unique_ext(root, joinrelids, outerrelids, innerrel,
    1264             :                                   jointype, restrictlist, force_cache, NULL);
    1265             : }
    1266             : 
    1267             : /*
    1268             :  * innerrel_is_unique_ext
    1269             :  *    Do the same as innerrel_is_unique(), but also set to (*extra_clauses)
    1270             :  *    additional clauses from a baserestrictinfo list used to prove the
    1271             :  *    uniqueness.
    1272             :  *
    1273             :  * A non-NULL extra_clauses indicates that we're checking for self-join and
    1274             :  * correspondingly dealing with filtered clauses.
    1275             :  */
    1276             : bool
    1277     1099750 : innerrel_is_unique_ext(PlannerInfo *root,
    1278             :                        Relids joinrelids,
    1279             :                        Relids outerrelids,
    1280             :                        RelOptInfo *innerrel,
    1281             :                        JoinType jointype,
    1282             :                        List *restrictlist,
    1283             :                        bool force_cache,
    1284             :                        List **extra_clauses)
    1285             : {
    1286             :     MemoryContext old_context;
    1287             :     ListCell   *lc;
    1288             :     UniqueRelInfo *uniqueRelInfo;
    1289     1099750 :     List       *outer_exprs = NIL;
    1290     1099750 :     bool        self_join = (extra_clauses != NULL);
    1291             : 
    1292             :     /* Certainly can't prove uniqueness when there are no joinclauses */
    1293     1099750 :     if (restrictlist == NIL)
    1294      174194 :         return false;
    1295             : 
    1296             :     /*
    1297             :      * Make a quick check to eliminate cases in which we will surely be unable
    1298             :      * to prove uniqueness of the innerrel.
    1299             :      */
    1300      925556 :     if (!rel_supports_distinctness(root, innerrel))
    1301      399952 :         return false;
    1302             : 
    1303             :     /*
    1304             :      * Query the cache to see if we've managed to prove that innerrel is
    1305             :      * unique for any subset of this outerrel.  For non-self-join search, we
    1306             :      * don't need an exact match, as extra outerrels can't make the innerrel
    1307             :      * any less unique (or more formally, the restrictlist for a join to a
    1308             :      * superset outerrel must be a superset of the conditions we successfully
    1309             :      * used before). For self-join search, we require an exact match of
    1310             :      * outerrels because we need extra clauses to be valid for our case. Also,
    1311             :      * for self-join checking we've filtered the clauses list.  Thus, we can
    1312             :      * match only the result cached for a self-join search for another
    1313             :      * self-join check.
    1314             :      */
    1315      609684 :     foreach(lc, innerrel->unique_for_rels)
    1316             :     {
    1317      252356 :         uniqueRelInfo = (UniqueRelInfo *) lfirst(lc);
    1318             : 
    1319      252356 :         if ((!self_join && bms_is_subset(uniqueRelInfo->outerrelids, outerrelids)) ||
    1320          68 :             (self_join && bms_equal(uniqueRelInfo->outerrelids, outerrelids) &&
    1321          56 :              uniqueRelInfo->self_join))
    1322             :         {
    1323      168276 :             if (extra_clauses)
    1324          12 :                 *extra_clauses = uniqueRelInfo->extra_clauses;
    1325      168276 :             return true;        /* Success! */
    1326             :         }
    1327             :     }
    1328             : 
    1329             :     /*
    1330             :      * Conversely, we may have already determined that this outerrel, or some
    1331             :      * superset thereof, cannot prove this innerrel to be unique.
    1332             :      */
    1333      357812 :     foreach(lc, innerrel->non_unique_for_rels)
    1334             :     {
    1335         808 :         Relids      unique_for_rels = (Relids) lfirst(lc);
    1336             : 
    1337         808 :         if (bms_is_subset(outerrelids, unique_for_rels))
    1338         324 :             return false;
    1339             :     }
    1340             : 
    1341             :     /* No cached information, so try to make the proof. */
    1342      357004 :     if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel,
    1343             :                                jointype, restrictlist,
    1344             :                                self_join ? &outer_exprs : NULL))
    1345             :     {
    1346             :         /*
    1347             :          * Cache the positive result for future probes, being sure to keep it
    1348             :          * in the planner_cxt even if we are working in GEQO.
    1349             :          *
    1350             :          * Note: one might consider trying to isolate the minimal subset of
    1351             :          * the outerrels that proved the innerrel unique.  But it's not worth
    1352             :          * the trouble, because the planner builds up joinrels incrementally
    1353             :          * and so we'll see the minimally sufficient outerrels before any
    1354             :          * supersets of them anyway.
    1355             :          */
    1356      203634 :         old_context = MemoryContextSwitchTo(root->planner_cxt);
    1357      203634 :         uniqueRelInfo = makeNode(UniqueRelInfo);
    1358      203634 :         uniqueRelInfo->outerrelids = bms_copy(outerrelids);
    1359      203634 :         uniqueRelInfo->self_join = self_join;
    1360      203634 :         uniqueRelInfo->extra_clauses = outer_exprs;
    1361      203634 :         innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
    1362             :                                             uniqueRelInfo);
    1363      203634 :         MemoryContextSwitchTo(old_context);
    1364             : 
    1365      203634 :         if (extra_clauses)
    1366         642 :             *extra_clauses = outer_exprs;
    1367      203634 :         return true;            /* Success! */
    1368             :     }
    1369             :     else
    1370             :     {
    1371             :         /*
    1372             :          * None of the join conditions for outerrel proved innerrel unique, so
    1373             :          * we can safely reject this outerrel or any subset of it in future
    1374             :          * checks.
    1375             :          *
    1376             :          * However, in normal planning mode, caching this knowledge is totally
    1377             :          * pointless; it won't be queried again, because we build up joinrels
    1378             :          * from smaller to larger.  It is useful in GEQO mode, where the
    1379             :          * knowledge can be carried across successive planning attempts; and
    1380             :          * it's likely to be useful when using join-search plugins, too. Hence
    1381             :          * cache when join_search_private is non-NULL.  (Yeah, that's a hack,
    1382             :          * but it seems reasonable.)
    1383             :          *
    1384             :          * Also, allow callers to override that heuristic and force caching;
    1385             :          * that's useful for reduce_unique_semijoins, which calls here before
    1386             :          * the normal join search starts.
    1387             :          */
    1388      153370 :         if (force_cache || root->join_search_private)
    1389             :         {
    1390        1106 :             old_context = MemoryContextSwitchTo(root->planner_cxt);
    1391        1106 :             innerrel->non_unique_for_rels =
    1392        1106 :                 lappend(innerrel->non_unique_for_rels,
    1393        1106 :                         bms_copy(outerrelids));
    1394        1106 :             MemoryContextSwitchTo(old_context);
    1395             :         }
    1396             : 
    1397      153370 :         return false;
    1398             :     }
    1399             : }
    1400             : 
    1401             : /*
    1402             :  * is_innerrel_unique_for
    1403             :  *    Check if the innerrel provably contains at most one tuple matching any
    1404             :  *    tuple from the outerrel, based on join clauses in the 'restrictlist'.
    1405             :  */
    1406             : static bool
    1407      357004 : is_innerrel_unique_for(PlannerInfo *root,
    1408             :                        Relids joinrelids,
    1409             :                        Relids outerrelids,
    1410             :                        RelOptInfo *innerrel,
    1411             :                        JoinType jointype,
    1412             :                        List *restrictlist,
    1413             :                        List **extra_clauses)
    1414             : {
    1415      357004 :     List       *clause_list = NIL;
    1416             :     ListCell   *lc;
    1417             : 
    1418             :     /*
    1419             :      * Search for mergejoinable clauses that constrain the inner rel against
    1420             :      * the outer rel.  If an operator is mergejoinable then it behaves like
    1421             :      * equality for some btree opclass, so it's what we want.  The
    1422             :      * mergejoinability test also eliminates clauses containing volatile
    1423             :      * functions, which we couldn't depend on.
    1424             :      */
    1425      828182 :     foreach(lc, restrictlist)
    1426             :     {
    1427      471178 :         RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
    1428             : 
    1429             :         /*
    1430             :          * As noted above, if it's a pushed-down clause and we're at an outer
    1431             :          * join, we can't use it.
    1432             :          */
    1433      471178 :         if (IS_OUTER_JOIN(jointype) &&
    1434       89096 :             RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
    1435        3510 :             continue;
    1436             : 
    1437             :         /* Ignore if it's not a mergejoinable clause */
    1438      467668 :         if (!restrictinfo->can_join ||
    1439      417188 :             restrictinfo->mergeopfamilies == NIL)
    1440       51380 :             continue;           /* not mergejoinable */
    1441             : 
    1442             :         /*
    1443             :          * Check if the clause has the form "outer op inner" or "inner op
    1444             :          * outer", and if so mark which side is inner.
    1445             :          */
    1446      416288 :         if (!clause_sides_match_join(restrictinfo, outerrelids,
    1447             :                                      innerrel->relids))
    1448          14 :             continue;           /* no good for these input relations */
    1449             : 
    1450             :         /* OK, add to the list */
    1451      416274 :         clause_list = lappend(clause_list, restrictinfo);
    1452             :     }
    1453             : 
    1454             :     /* Let rel_is_distinct_for() do the hard work */
    1455      357004 :     return rel_is_distinct_for(root, innerrel, clause_list, extra_clauses);
    1456             : }
    1457             : 
    1458             : /*
    1459             :  * Update EC members to point to the remaining relation instead of the removed
    1460             :  * one, removing duplicates.
    1461             :  *
    1462             :  * Restriction clauses for base relations are already distributed to
    1463             :  * the respective baserestrictinfo lists (see
    1464             :  * generate_implied_equalities_for_column). The above code has already processed
    1465             :  * this list and updated these clauses to reference the remaining
    1466             :  * relation, so that we can skip them here based on their relids.
    1467             :  *
    1468             :  * Likewise, we have already processed the join clauses that join the
    1469             :  * removed relation to the remaining one.
    1470             :  *
    1471             :  * Finally, there might be join clauses tying the removed relation to
    1472             :  * some third relation.  We can't just delete the source clauses and
    1473             :  * regenerate them from the EC because the corresponding equality
    1474             :  * operators might be missing (see the handling of ec_broken).
    1475             :  * Therefore, we will update the references in the source clauses.
    1476             :  *
    1477             :  * Derived clauses can be generated again, so it is simpler just to
    1478             :  * delete them.
    1479             :  */
    1480             : static void
    1481         900 : update_eclasses(EquivalenceClass *ec, int from, int to)
    1482             : {
    1483         900 :     List       *new_members = NIL;
    1484         900 :     List       *new_sources = NIL;
    1485             : 
    1486        3636 :     foreach_node(EquivalenceMember, em, ec->ec_members)
    1487             :     {
    1488        1836 :         bool        is_redundant = false;
    1489             : 
    1490        1836 :         if (!bms_is_member(from, em->em_relids))
    1491             :         {
    1492         918 :             new_members = lappend(new_members, em);
    1493         918 :             continue;
    1494             :         }
    1495             : 
    1496         918 :         em->em_relids = adjust_relid_set(em->em_relids, from, to);
    1497         918 :         em->em_jdomain->jd_relids = adjust_relid_set(em->em_jdomain->jd_relids, from, to);
    1498             : 
    1499             :         /* We only process inner joins */
    1500         918 :         ChangeVarNodes((Node *) em->em_expr, from, to, 0);
    1501             : 
    1502        1860 :         foreach_node(EquivalenceMember, other, new_members)
    1503             :         {
    1504         298 :             if (!equal(em->em_relids, other->em_relids))
    1505          24 :                 continue;
    1506             : 
    1507         274 :             if (equal(em->em_expr, other->em_expr))
    1508             :             {
    1509         274 :                 is_redundant = true;
    1510         274 :                 break;
    1511             :             }
    1512             :         }
    1513             : 
    1514         918 :         if (!is_redundant)
    1515         644 :             new_members = lappend(new_members, em);
    1516             :     }
    1517             : 
    1518         900 :     list_free(ec->ec_members);
    1519         900 :     ec->ec_members = new_members;
    1520             : 
    1521         900 :     list_free(ec->ec_derives);
    1522         900 :     ec->ec_derives = NULL;
    1523             : 
    1524             :     /* Update EC source expressions */
    1525        2736 :     foreach_node(RestrictInfo, rinfo, ec->ec_sources)
    1526             :     {
    1527         936 :         bool        is_redundant = false;
    1528             : 
    1529         936 :         if (!bms_is_member(from, rinfo->required_relids))
    1530             :         {
    1531         118 :             new_sources = lappend(new_sources, rinfo);
    1532         118 :             continue;
    1533             :         }
    1534             : 
    1535         818 :         ChangeVarNodes((Node *) rinfo, from, to, 0);
    1536             : 
    1537             :         /*
    1538             :          * After switching the clause to the remaining relation, check it for
    1539             :          * redundancy with existing ones. We don't have to check for
    1540             :          * redundancy with derived clauses, because we've just deleted them.
    1541             :          */
    1542        1660 :         foreach_node(RestrictInfo, other, new_sources)
    1543             :         {
    1544          36 :             if (!equal(rinfo->clause_relids, other->clause_relids))
    1545          24 :                 continue;
    1546             : 
    1547          12 :             if (equal(rinfo->clause, other->clause))
    1548             :             {
    1549          12 :                 is_redundant = true;
    1550          12 :                 break;
    1551             :             }
    1552             :         }
    1553             : 
    1554         818 :         if (!is_redundant)
    1555         806 :             new_sources = lappend(new_sources, rinfo);
    1556             :     }
    1557             : 
    1558         900 :     list_free(ec->ec_sources);
    1559         900 :     ec->ec_sources = new_sources;
    1560         900 :     ec->ec_relids = adjust_relid_set(ec->ec_relids, from, to);
    1561         900 : }
    1562             : 
    1563             : /*
    1564             :  * "Logically" compares two RestrictInfo's ignoring the 'rinfo_serial' field,
    1565             :  * which makes almost every RestrictInfo unique.  This type of comparison is
    1566             :  * useful when removing duplicates while moving RestrictInfo's from removed
    1567             :  * relation to remaining relation during self-join elimination.
    1568             :  *
    1569             :  * XXX: In the future, we might remove the 'rinfo_serial' field completely and
    1570             :  * get rid of this function.
    1571             :  */
    1572             : static bool
    1573         506 : restrict_infos_logically_equal(RestrictInfo *a, RestrictInfo *b)
    1574             : {
    1575         506 :     int         saved_rinfo_serial = a->rinfo_serial;
    1576             :     bool        result;
    1577             : 
    1578         506 :     a->rinfo_serial = b->rinfo_serial;
    1579         506 :     result = equal(a, b);
    1580         506 :     a->rinfo_serial = saved_rinfo_serial;
    1581             : 
    1582         506 :     return result;
    1583             : }
    1584             : 
    1585             : /*
    1586             :  * This function adds all non-redundant clauses to the keeping relation
    1587             :  * during self-join elimination.  That is a contradictory operation. On the
    1588             :  * one hand, we reduce the length of the `restrict` lists, which can
    1589             :  * impact planning or executing time.  Additionally, we improve the
    1590             :  * accuracy of cardinality estimation.  On the other hand, it is one more
    1591             :  * place that can make planning time much longer in specific cases.  It
    1592             :  * would have been better to avoid calling the equal() function here, but
    1593             :  * it's the only way to detect duplicated inequality expressions.
    1594             :  *
    1595             :  * (*keep_rinfo_list) is given by pointer because it might be altered by
    1596             :  * distribute_restrictinfo_to_rels().
    1597             :  */
    1598             : static void
    1599        1176 : add_non_redundant_clauses(PlannerInfo *root,
    1600             :                           List *rinfo_candidates,
    1601             :                           List **keep_rinfo_list,
    1602             :                           Index removed_relid)
    1603             : {
    1604        3250 :     foreach_node(RestrictInfo, rinfo, rinfo_candidates)
    1605             :     {
    1606         898 :         bool        is_redundant = false;
    1607             : 
    1608             :         Assert(!bms_is_member(removed_relid, rinfo->required_relids));
    1609             : 
    1610        2160 :         foreach_node(RestrictInfo, src, (*keep_rinfo_list))
    1611             :         {
    1612         518 :             if (!bms_equal(src->clause_relids, rinfo->clause_relids))
    1613             :                 /* Can't compare trivially different clauses */
    1614           6 :                 continue;
    1615             : 
    1616         512 :             if (src == rinfo ||
    1617         512 :                 (rinfo->parent_ec != NULL &&
    1618         804 :                  src->parent_ec == rinfo->parent_ec) ||
    1619         506 :                 restrict_infos_logically_equal(rinfo, src))
    1620             :             {
    1621         154 :                 is_redundant = true;
    1622         154 :                 break;
    1623             :             }
    1624             :         }
    1625         898 :         if (!is_redundant)
    1626         744 :             distribute_restrictinfo_to_rels(root, rinfo);
    1627             :     }
    1628        1176 : }
    1629             : 
    1630             : /*
    1631             :  * Remove a relation after we have proven that it participates only in an
    1632             :  * unneeded unique self-join.
    1633             :  *
    1634             :  * Replace any links in planner info structures.
    1635             :  *
    1636             :  * Transfer join and restriction clauses from the removed relation to the
    1637             :  * remaining one. We change the Vars of the clause to point to the
    1638             :  * remaining relation instead of the removed one. The clauses that require
    1639             :  * a subset of joinrelids become restriction clauses of the remaining
    1640             :  * relation, and others remain join clauses. We append them to
    1641             :  * baserestrictinfo and joininfo, respectively, trying not to introduce
    1642             :  * duplicates.
    1643             :  *
    1644             :  * We also have to process the 'joinclauses' list here, because it
    1645             :  * contains EC-derived join clauses which must become filter clauses. It
    1646             :  * is not enough to just correct the ECs because the EC-derived
    1647             :  * restrictions are generated before join removal (see
    1648             :  * generate_base_implied_equalities).
    1649             :  *
    1650             :  * NOTE: Remember to keep the code in sync with PlannerInfo to be sure all
    1651             :  * cached relids and relid bitmapsets can be correctly cleaned during the
    1652             :  * self-join elimination procedure.
    1653             :  */
    1654             : static void
    1655         588 : remove_self_join_rel(PlannerInfo *root, PlanRowMark *kmark, PlanRowMark *rmark,
    1656             :                      RelOptInfo *toKeep, RelOptInfo *toRemove,
    1657             :                      List *restrictlist)
    1658             : {
    1659             :     List       *joininfos;
    1660             :     ListCell   *lc;
    1661             :     int         i;
    1662         588 :     List       *jinfo_candidates = NIL;
    1663         588 :     List       *binfo_candidates = NIL;
    1664             : 
    1665             :     Assert(toKeep->relid > 0);
    1666             :     Assert(toRemove->relid > 0);
    1667             : 
    1668             :     /*
    1669             :      * Replace the index of the removing table with the keeping one. The
    1670             :      * technique of removing/distributing restrictinfo is used here to attach
    1671             :      * just appeared (for keeping relation) join clauses and avoid adding
    1672             :      * duplicates of those that already exist in the joininfo list.
    1673             :      */
    1674         588 :     joininfos = list_copy(toRemove->joininfo);
    1675        1254 :     foreach_node(RestrictInfo, rinfo, joininfos)
    1676             :     {
    1677          78 :         remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
    1678          78 :         ChangeVarNodes((Node *) rinfo, toRemove->relid, toKeep->relid, 0);
    1679             : 
    1680          78 :         if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
    1681          60 :             jinfo_candidates = lappend(jinfo_candidates, rinfo);
    1682             :         else
    1683          18 :             binfo_candidates = lappend(binfo_candidates, rinfo);
    1684             :     }
    1685             : 
    1686             :     /*
    1687             :      * Concatenate restrictlist to the list of base restrictions of the
    1688             :      * removing table just to simplify the replacement procedure: all of them
    1689             :      * weren't connected to any keeping relations and need to be added to some
    1690             :      * rels.
    1691             :      */
    1692         588 :     toRemove->baserestrictinfo = list_concat(toRemove->baserestrictinfo,
    1693             :                                              restrictlist);
    1694        1996 :     foreach_node(RestrictInfo, rinfo, toRemove->baserestrictinfo)
    1695             :     {
    1696         820 :         ChangeVarNodes((Node *) rinfo, toRemove->relid, toKeep->relid, 0);
    1697             : 
    1698         820 :         if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
    1699           0 :             jinfo_candidates = lappend(jinfo_candidates, rinfo);
    1700             :         else
    1701         820 :             binfo_candidates = lappend(binfo_candidates, rinfo);
    1702             :     }
    1703             : 
    1704             :     /*
    1705             :      * Now, add all non-redundant clauses to the keeping relation.
    1706             :      */
    1707         588 :     add_non_redundant_clauses(root, binfo_candidates,
    1708             :                               &toKeep->baserestrictinfo, toRemove->relid);
    1709         588 :     add_non_redundant_clauses(root, jinfo_candidates,
    1710             :                               &toKeep->joininfo, toRemove->relid);
    1711             : 
    1712         588 :     list_free(binfo_candidates);
    1713         588 :     list_free(jinfo_candidates);
    1714             : 
    1715             :     /*
    1716             :      * Arrange equivalence classes, mentioned removing a table, with the
    1717             :      * keeping one: varno of removing table should be replaced in members and
    1718             :      * sources lists. Also, remove duplicated elements if this replacement
    1719             :      * procedure created them.
    1720             :      */
    1721         588 :     i = -1;
    1722        1488 :     while ((i = bms_next_member(toRemove->eclass_indexes, i)) >= 0)
    1723             :     {
    1724         900 :         EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes, i);
    1725             : 
    1726         900 :         update_eclasses(ec, toRemove->relid, toKeep->relid);
    1727         900 :         toKeep->eclass_indexes = bms_add_member(toKeep->eclass_indexes, i);
    1728             :     }
    1729             : 
    1730             :     /*
    1731             :      * Transfer the targetlist and attr_needed flags.
    1732             :      */
    1733             : 
    1734        2360 :     foreach(lc, toRemove->reltarget->exprs)
    1735             :     {
    1736        1772 :         Node       *node = lfirst(lc);
    1737             : 
    1738        1772 :         ChangeVarNodes(node, toRemove->relid, toKeep->relid, 0);
    1739        1772 :         if (!list_member(toKeep->reltarget->exprs, node))
    1740         174 :             toKeep->reltarget->exprs = lappend(toKeep->reltarget->exprs, node);
    1741             :     }
    1742             : 
    1743        7470 :     for (i = toKeep->min_attr; i <= toKeep->max_attr; i++)
    1744             :     {
    1745        6882 :         int         attno = i - toKeep->min_attr;
    1746             : 
    1747       13764 :         toRemove->attr_needed[attno] = adjust_relid_set(toRemove->attr_needed[attno],
    1748        6882 :                                                         toRemove->relid, toKeep->relid);
    1749        6882 :         toKeep->attr_needed[attno] = bms_add_members(toKeep->attr_needed[attno],
    1750        6882 :                                                      toRemove->attr_needed[attno]);
    1751             :     }
    1752             : 
    1753             :     /*
    1754             :      * If the removed relation has a row mark, transfer it to the remaining
    1755             :      * one.
    1756             :      *
    1757             :      * If both rels have row marks, just keep the one corresponding to the
    1758             :      * remaining relation because we verified earlier that they have the same
    1759             :      * strength.
    1760             :      */
    1761         588 :     if (rmark)
    1762             :     {
    1763          74 :         if (kmark)
    1764             :         {
    1765             :             Assert(kmark->markType == rmark->markType);
    1766             : 
    1767          74 :             root->rowMarks = list_delete_ptr(root->rowMarks, rmark);
    1768             :         }
    1769             :         else
    1770             :         {
    1771             :             /* Shouldn't have inheritance children here. */
    1772             :             Assert(rmark->rti == rmark->prti);
    1773             : 
    1774           0 :             rmark->rti = rmark->prti = toKeep->relid;
    1775             :         }
    1776             :     }
    1777             : 
    1778             :     /*
    1779             :      * Replace varno in all the query structures, except nodes RangeTblRef
    1780             :      * otherwise later remove_rel_from_joinlist will yield errors.
    1781             :      */
    1782         588 :     ChangeVarNodesExtended((Node *) root->parse, toRemove->relid, toKeep->relid, 0, false);
    1783             : 
    1784             :     /* Replace links in the planner info */
    1785         588 :     remove_rel_from_query(root, toRemove, toKeep->relid, NULL, NULL);
    1786             : 
    1787             :     /* At last, replace varno in root targetlist and HAVING clause */
    1788         588 :     ChangeVarNodes((Node *) root->processed_tlist, toRemove->relid, toKeep->relid, 0);
    1789         588 :     ChangeVarNodes((Node *) root->processed_groupClause, toRemove->relid, toKeep->relid, 0);
    1790             : 
    1791         588 :     adjust_relid_set(root->all_result_relids, toRemove->relid, toKeep->relid);
    1792         588 :     adjust_relid_set(root->leaf_result_relids, toRemove->relid, toKeep->relid);
    1793             : 
    1794             :     /*
    1795             :      * There may be references to the rel in root->fkey_list, but if so,
    1796             :      * match_foreign_keys_to_quals() will get rid of them.
    1797             :      */
    1798             : 
    1799             :     /*
    1800             :      * Finally, remove the rel from the baserel array to prevent it from being
    1801             :      * referenced again.  (We can't do this earlier because
    1802             :      * remove_join_clause_from_rels will touch it.)
    1803             :      */
    1804         588 :     root->simple_rel_array[toRemove->relid] = NULL;
    1805             : 
    1806             :     /* And nuke the RelOptInfo, just in case there's another access path. */
    1807         588 :     pfree(toRemove);
    1808             : 
    1809             :     /*
    1810             :      * Now repeat construction of attr_needed bits coming from all other
    1811             :      * sources.
    1812             :      */
    1813         588 :     rebuild_placeholder_attr_needed(root);
    1814         588 :     rebuild_joinclause_attr_needed(root);
    1815         588 :     rebuild_eclass_attr_needed(root);
    1816         588 :     rebuild_lateral_attr_needed(root);
    1817         588 : }
    1818             : 
    1819             : /*
    1820             :  * split_selfjoin_quals
    1821             :  *      Processes 'joinquals' by building two lists: one containing the quals
    1822             :  *      where the columns/exprs are on either side of the join match and
    1823             :  *      another one containing the remaining quals.
    1824             :  *
    1825             :  * 'joinquals' must only contain quals for a RTE_RELATION being joined to
    1826             :  * itself.
    1827             :  */
    1828             : static void
    1829        1976 : split_selfjoin_quals(PlannerInfo *root, List *joinquals, List **selfjoinquals,
    1830             :                      List **otherjoinquals, int from, int to)
    1831             : {
    1832        1976 :     List       *sjoinquals = NIL;
    1833        1976 :     List       *ojoinquals = NIL;
    1834             : 
    1835        6062 :     foreach_node(RestrictInfo, rinfo, joinquals)
    1836             :     {
    1837             :         OpExpr     *expr;
    1838             :         Node       *leftexpr;
    1839             :         Node       *rightexpr;
    1840             : 
    1841             :         /* In general, clause looks like F(arg1) = G(arg2) */
    1842        4220 :         if (!rinfo->mergeopfamilies ||
    1843        4220 :             bms_num_members(rinfo->clause_relids) != 2 ||
    1844        4220 :             bms_membership(rinfo->left_relids) != BMS_SINGLETON ||
    1845        2110 :             bms_membership(rinfo->right_relids) != BMS_SINGLETON)
    1846             :         {
    1847           0 :             ojoinquals = lappend(ojoinquals, rinfo);
    1848           0 :             continue;
    1849             :         }
    1850             : 
    1851        2110 :         expr = (OpExpr *) rinfo->clause;
    1852             : 
    1853        2110 :         if (!IsA(expr, OpExpr) || list_length(expr->args) != 2)
    1854             :         {
    1855           0 :             ojoinquals = lappend(ojoinquals, rinfo);
    1856           0 :             continue;
    1857             :         }
    1858             : 
    1859        2110 :         leftexpr = get_leftop(rinfo->clause);
    1860        2110 :         rightexpr = copyObject(get_rightop(rinfo->clause));
    1861             : 
    1862        2110 :         if (leftexpr && IsA(leftexpr, RelabelType))
    1863          12 :             leftexpr = (Node *) ((RelabelType *) leftexpr)->arg;
    1864        2110 :         if (rightexpr && IsA(rightexpr, RelabelType))
    1865           6 :             rightexpr = (Node *) ((RelabelType *) rightexpr)->arg;
    1866             : 
    1867             :         /*
    1868             :          * Quite an expensive operation, narrowing the use case. For example,
    1869             :          * when we have cast of the same var to different (but compatible)
    1870             :          * types.
    1871             :          */
    1872        2110 :         ChangeVarNodes(rightexpr, bms_singleton_member(rinfo->right_relids),
    1873        2110 :                        bms_singleton_member(rinfo->left_relids), 0);
    1874             : 
    1875        2110 :         if (equal(leftexpr, rightexpr))
    1876        1624 :             sjoinquals = lappend(sjoinquals, rinfo);
    1877             :         else
    1878         486 :             ojoinquals = lappend(ojoinquals, rinfo);
    1879             :     }
    1880             : 
    1881        1976 :     *selfjoinquals = sjoinquals;
    1882        1976 :     *otherjoinquals = ojoinquals;
    1883        1976 : }
    1884             : 
    1885             : /*
    1886             :  * Check for a case when uniqueness is at least partly derived from a
    1887             :  * baserestrictinfo clause. In this case, we have a chance to return only
    1888             :  * one row (if such clauses on both sides of SJ are equal) or nothing (if they
    1889             :  * are different).
    1890             :  */
    1891             : static bool
    1892         654 : match_unique_clauses(PlannerInfo *root, RelOptInfo *outer, List *uclauses,
    1893             :                      Index relid)
    1894             : {
    1895        1326 :     foreach_node(RestrictInfo, rinfo, uclauses)
    1896             :     {
    1897             :         Expr       *clause;
    1898             :         Node       *iclause;
    1899             :         Node       *c1;
    1900         150 :         bool        matched = false;
    1901             : 
    1902             :         Assert(outer->relid > 0 && relid > 0);
    1903             : 
    1904             :         /* Only filters like f(R.x1,...,R.xN) == expr we should consider. */
    1905             :         Assert(bms_is_empty(rinfo->left_relids) ^
    1906             :                bms_is_empty(rinfo->right_relids));
    1907             : 
    1908         150 :         clause = (Expr *) copyObject(rinfo->clause);
    1909         150 :         ChangeVarNodes((Node *) clause, relid, outer->relid, 0);
    1910             : 
    1911         150 :         iclause = bms_is_empty(rinfo->left_relids) ? get_rightop(clause) :
    1912         144 :             get_leftop(clause);
    1913         150 :         c1 = bms_is_empty(rinfo->left_relids) ? get_leftop(clause) :
    1914         144 :             get_rightop(clause);
    1915             : 
    1916             :         /*
    1917             :          * Compare these left and right sides with the corresponding sides of
    1918             :          * the outer's filters. If no one is detected - return immediately.
    1919             :          */
    1920         408 :         foreach_node(RestrictInfo, orinfo, outer->baserestrictinfo)
    1921             :         {
    1922             :             Node       *oclause;
    1923             :             Node       *c2;
    1924             : 
    1925         192 :             if (orinfo->mergeopfamilies == NIL)
    1926             :                 /* Don't consider clauses that aren't similar to 'F(X)=G(Y)' */
    1927          60 :                 continue;
    1928             : 
    1929             :             Assert(is_opclause(orinfo->clause));
    1930             : 
    1931         264 :             oclause = bms_is_empty(orinfo->left_relids) ?
    1932         132 :                 get_rightop(orinfo->clause) : get_leftop(orinfo->clause);
    1933         264 :             c2 = (bms_is_empty(orinfo->left_relids) ?
    1934         132 :                   get_leftop(orinfo->clause) : get_rightop(orinfo->clause));
    1935             : 
    1936         132 :             if (equal(iclause, oclause) && equal(c1, c2))
    1937             :             {
    1938          84 :                 matched = true;
    1939          84 :                 break;
    1940             :             }
    1941             :         }
    1942             : 
    1943         150 :         if (!matched)
    1944          66 :             return false;
    1945             :     }
    1946             : 
    1947         588 :     return true;
    1948             : }
    1949             : 
    1950             : /*
    1951             :  * Find and remove unique self-joins in a group of base relations that have
    1952             :  * the same Oid.
    1953             :  *
    1954             :  * Returns a set of relids that were removed.
    1955             :  */
    1956             : static Relids
    1957       10436 : remove_self_joins_one_group(PlannerInfo *root, Relids relids)
    1958             : {
    1959       10436 :     Relids      result = NULL;
    1960             :     int         k;              /* Index of kept relation */
    1961       10436 :     int         r = -1;         /* Index of removed relation */
    1962             : 
    1963       32452 :     while ((r = bms_next_member(relids, r)) > 0)
    1964             :     {
    1965       22016 :         RelOptInfo *inner = root->simple_rel_array[r];
    1966             : 
    1967       22016 :         k = r;
    1968             : 
    1969       34370 :         while ((k = bms_next_member(relids, k)) > 0)
    1970             :         {
    1971       12942 :             Relids      joinrelids = NULL;
    1972       12942 :             RelOptInfo *outer = root->simple_rel_array[k];
    1973             :             List       *restrictlist;
    1974             :             List       *selfjoinquals;
    1975             :             List       *otherjoinquals;
    1976             :             ListCell   *lc;
    1977       12942 :             bool        jinfo_check = true;
    1978       12942 :             PlanRowMark *omark = NULL;
    1979       12942 :             PlanRowMark *imark = NULL;
    1980       12942 :             List       *uclauses = NIL;
    1981             : 
    1982             :             /* A sanity check: the relations have the same Oid. */
    1983             :             Assert(root->simple_rte_array[k]->relid ==
    1984             :                    root->simple_rte_array[r]->relid);
    1985             : 
    1986             :             /*
    1987             :              * It is impossible to eliminate the join of two relations if they
    1988             :              * belong to different rules of order. Otherwise, the planner
    1989             :              * can't find any variants of the correct query plan.
    1990             :              */
    1991       15980 :             foreach(lc, root->join_info_list)
    1992             :             {
    1993       10088 :                 SpecialJoinInfo *info = (SpecialJoinInfo *) lfirst(lc);
    1994             : 
    1995       20176 :                 if ((bms_is_member(k, info->syn_lefthand) ^
    1996       14342 :                      bms_is_member(r, info->syn_lefthand)) ||
    1997        4254 :                     (bms_is_member(k, info->syn_righthand) ^
    1998        4254 :                      bms_is_member(r, info->syn_righthand)))
    1999             :                 {
    2000        7050 :                     jinfo_check = false;
    2001        7050 :                     break;
    2002             :                 }
    2003             :             }
    2004       12942 :             if (!jinfo_check)
    2005       12354 :                 continue;
    2006             : 
    2007             :             /*
    2008             :              * Check Row Marks equivalence. We can't remove the join if the
    2009             :              * relations have row marks of different strength (e.g., one is
    2010             :              * locked FOR UPDATE, and another just has ROW_MARK_REFERENCE for
    2011             :              * EvalPlanQual rechecking).
    2012             :              */
    2013        6100 :             foreach(lc, root->rowMarks)
    2014             :             {
    2015         380 :                 PlanRowMark *rowMark = (PlanRowMark *) lfirst(lc);
    2016             : 
    2017         380 :                 if (rowMark->rti == k)
    2018             :                 {
    2019             :                     Assert(imark == NULL);
    2020         172 :                     imark = rowMark;
    2021             :                 }
    2022         208 :                 else if (rowMark->rti == r)
    2023             :                 {
    2024             :                     Assert(omark == NULL);
    2025         172 :                     omark = rowMark;
    2026             :                 }
    2027             : 
    2028         380 :                 if (omark && imark)
    2029         172 :                     break;
    2030             :             }
    2031        5892 :             if (omark && imark && omark->markType != imark->markType)
    2032          52 :                 continue;
    2033             : 
    2034             :             /*
    2035             :              * We only deal with base rels here, so their relids bitset
    2036             :              * contains only one member -- their relid.
    2037             :              */
    2038        5840 :             joinrelids = bms_add_member(joinrelids, r);
    2039        5840 :             joinrelids = bms_add_member(joinrelids, k);
    2040             : 
    2041             :             /*
    2042             :              * PHVs should not impose any constraints on removing self-joins.
    2043             :              */
    2044             : 
    2045             :             /*
    2046             :              * At this stage, joininfo lists of inner and outer can contain
    2047             :              * only clauses required for a superior outer join that can't
    2048             :              * influence this optimization. So, we can avoid to call the
    2049             :              * build_joinrel_restrictlist() routine.
    2050             :              */
    2051        5840 :             restrictlist = generate_join_implied_equalities(root, joinrelids,
    2052             :                                                             inner->relids,
    2053             :                                                             outer, NULL);
    2054        5840 :             if (restrictlist == NIL)
    2055        3864 :                 continue;
    2056             : 
    2057             :             /*
    2058             :              * Process restrictlist to separate the self-join quals from the
    2059             :              * other quals. e.g., "x = x" goes to selfjoinquals and "a = b" to
    2060             :              * otherjoinquals.
    2061             :              */
    2062        1976 :             split_selfjoin_quals(root, restrictlist, &selfjoinquals,
    2063        1976 :                                  &otherjoinquals, inner->relid, outer->relid);
    2064             : 
    2065             :             Assert(list_length(restrictlist) ==
    2066             :                    (list_length(selfjoinquals) + list_length(otherjoinquals)));
    2067             : 
    2068             :             /*
    2069             :              * To enable SJE for the only degenerate case without any self
    2070             :              * join clauses at all, add baserestrictinfo to this list. The
    2071             :              * degenerate case works only if both sides have the same clause.
    2072             :              * So doesn't matter which side to add.
    2073             :              */
    2074        1976 :             selfjoinquals = list_concat(selfjoinquals, outer->baserestrictinfo);
    2075             : 
    2076             :             /*
    2077             :              * Determine if the inner table can duplicate outer rows.  We must
    2078             :              * bypass the unique rel cache here since we're possibly using a
    2079             :              * subset of join quals. We can use 'force_cache' == true when all
    2080             :              * join quals are self-join quals.  Otherwise, we could end up
    2081             :              * putting false negatives in the cache.
    2082             :              */
    2083        1976 :             if (!innerrel_is_unique_ext(root, joinrelids, inner->relids,
    2084             :                                         outer, JOIN_INNER, selfjoinquals,
    2085        1976 :                                         list_length(otherjoinquals) == 0,
    2086             :                                         &uclauses))
    2087        1322 :                 continue;
    2088             : 
    2089             :             /*
    2090             :              * 'uclauses' is the copy of outer->baserestrictinfo that are
    2091             :              * associated with an index.  We proved by matching selfjoinquals
    2092             :              * to a unique index that the outer relation has at most one
    2093             :              * matching row for each inner row.  Sometimes that is not enough.
    2094             :              * e.g. "WHERE s1.b = s2.b AND s1.a = 1 AND s2.a = 2" when the
    2095             :              * unique index is (a,b).  Having non-empty uclauses, we must
    2096             :              * validate that the inner baserestrictinfo contains the same
    2097             :              * expressions, or we won't match the same row on each side of the
    2098             :              * join.
    2099             :              */
    2100         654 :             if (!match_unique_clauses(root, inner, uclauses, outer->relid))
    2101          66 :                 continue;
    2102             : 
    2103             :             /*
    2104             :              * We can remove either relation, so remove the inner one in order
    2105             :              * to simplify this loop.
    2106             :              */
    2107         588 :             remove_self_join_rel(root, omark, imark, outer, inner, restrictlist);
    2108             : 
    2109         588 :             result = bms_add_member(result, r);
    2110             : 
    2111             :             /* We have removed the outer relation, try the next one. */
    2112         588 :             break;
    2113             :         }
    2114             :     }
    2115             : 
    2116       10436 :     return result;
    2117             : }
    2118             : 
    2119             : /*
    2120             :  * Gather indexes of base relations from the joinlist and try to eliminate self
    2121             :  * joins.
    2122             :  */
    2123             : static Relids
    2124      128114 : remove_self_joins_recurse(PlannerInfo *root, List *joinlist, Relids toRemove)
    2125             : {
    2126             :     ListCell   *jl;
    2127      128114 :     Relids      relids = NULL;
    2128      128114 :     SelfJoinCandidate *candidates = NULL;
    2129             :     int         i;
    2130             :     int         j;
    2131             :     int         numRels;
    2132             : 
    2133             :     /* Collect indexes of base relations of the join tree */
    2134      449488 :     foreach(jl, joinlist)
    2135             :     {
    2136      321374 :         Node       *jlnode = (Node *) lfirst(jl);
    2137             : 
    2138      321374 :         if (IsA(jlnode, RangeTblRef))
    2139             :         {
    2140      318044 :             int         varno = ((RangeTblRef *) jlnode)->rtindex;
    2141      318044 :             RangeTblEntry *rte = root->simple_rte_array[varno];
    2142             : 
    2143             :             /*
    2144             :              * We only consider ordinary relations as candidates to be
    2145             :              * removed, and these relations should not have TABLESAMPLE
    2146             :              * clauses specified.  Removing a relation with TABLESAMPLE clause
    2147             :              * could potentially change the syntax of the query. Because of
    2148             :              * UPDATE/DELETE EPQ mechanism, currently Query->resultRelation or
    2149             :              * Query->mergeTargetRelation associated rel cannot be eliminated.
    2150             :              */
    2151      318044 :             if (rte->rtekind == RTE_RELATION &&
    2152      280832 :                 rte->relkind == RELKIND_RELATION &&
    2153      275770 :                 rte->tablesample == NULL &&
    2154      275746 :                 varno != root->parse->resultRelation &&
    2155      273992 :                 varno != root->parse->mergeTargetRelation)
    2156             :             {
    2157             :                 Assert(!bms_is_member(varno, relids));
    2158      273992 :                 relids = bms_add_member(relids, varno);
    2159             :             }
    2160             :         }
    2161        3330 :         else if (IsA(jlnode, List))
    2162             :         {
    2163             :             /* Recursively go inside the sub-joinlist */
    2164        3330 :             toRemove = remove_self_joins_recurse(root, (List *) jlnode,
    2165             :                                                  toRemove);
    2166             :         }
    2167             :         else
    2168           0 :             elog(ERROR, "unrecognized joinlist node type: %d",
    2169             :                  (int) nodeTag(jlnode));
    2170             :     }
    2171             : 
    2172      128114 :     numRels = bms_num_members(relids);
    2173             : 
    2174             :     /* Need at least two relations for the join */
    2175      128114 :     if (numRels < 2)
    2176       26250 :         return toRemove;
    2177             : 
    2178             :     /*
    2179             :      * In order to find relations with the same oid we first build an array of
    2180             :      * candidates and then sort it by oid.
    2181             :      */
    2182      101864 :     candidates = (SelfJoinCandidate *) palloc(sizeof(SelfJoinCandidate) *
    2183             :                                               numRels);
    2184      101864 :     i = -1;
    2185      101864 :     j = 0;
    2186      357510 :     while ((i = bms_next_member(relids, i)) >= 0)
    2187             :     {
    2188      255646 :         candidates[j].relid = i;
    2189      255646 :         candidates[j].reloid = root->simple_rte_array[i]->relid;
    2190      255646 :         j++;
    2191             :     }
    2192             : 
    2193      101864 :     qsort(candidates, numRels, sizeof(SelfJoinCandidate),
    2194             :           self_join_candidates_cmp);
    2195             : 
    2196             :     /*
    2197             :      * Iteratively form a group of relation indexes with the same oid and
    2198             :      * launch the routine that detects self-joins in this group and removes
    2199             :      * excessive range table entries.
    2200             :      *
    2201             :      * At the end of the iteration, exclude the group from the overall relids
    2202             :      * list. So each next iteration of the cycle will involve less and less
    2203             :      * value of relids.
    2204             :      */
    2205      101864 :     i = 0;
    2206      357510 :     for (j = 1; j < numRels + 1; j++)
    2207             :     {
    2208      255646 :         if (j == numRels || candidates[j].reloid != candidates[i].reloid)
    2209             :         {
    2210      244138 :             if (j - i >= 2)
    2211             :             {
    2212             :                 /* Create a group of relation indexes with the same oid */
    2213       10370 :                 Relids      group = NULL;
    2214             :                 Relids      removed;
    2215             : 
    2216       32248 :                 while (i < j)
    2217             :                 {
    2218       21878 :                     group = bms_add_member(group, candidates[i].relid);
    2219       21878 :                     i++;
    2220             :                 }
    2221       10370 :                 relids = bms_del_members(relids, group);
    2222             : 
    2223             :                 /*
    2224             :                  * Try to remove self-joins from a group of identical entries.
    2225             :                  * Make the next attempt iteratively - if something is deleted
    2226             :                  * from a group, changes in clauses and equivalence classes
    2227             :                  * can give us a chance to find more candidates.
    2228             :                  */
    2229             :                 do
    2230             :                 {
    2231             :                     Assert(!bms_overlap(group, toRemove));
    2232       10436 :                     removed = remove_self_joins_one_group(root, group);
    2233       10436 :                     toRemove = bms_add_members(toRemove, removed);
    2234       10436 :                     group = bms_del_members(group, removed);
    2235         564 :                 } while (!bms_is_empty(removed) &&
    2236       10436 :                          bms_membership(group) == BMS_MULTIPLE);
    2237       10370 :                 bms_free(removed);
    2238       10370 :                 bms_free(group);
    2239             :             }
    2240             :             else
    2241             :             {
    2242             :                 /* Single relation, just remove it from the set */
    2243      233768 :                 relids = bms_del_member(relids, candidates[i].relid);
    2244      233768 :                 i = j;
    2245             :             }
    2246             :         }
    2247             :     }
    2248             : 
    2249             :     Assert(bms_is_empty(relids));
    2250             : 
    2251      101864 :     return toRemove;
    2252             : }
    2253             : 
    2254             : /*
    2255             :  * Compare self-join candidates by their oids.
    2256             :  */
    2257             : static int
    2258      199900 : self_join_candidates_cmp(const void *a, const void *b)
    2259             : {
    2260      199900 :     const SelfJoinCandidate *ca = (const SelfJoinCandidate *) a;
    2261      199900 :     const SelfJoinCandidate *cb = (const SelfJoinCandidate *) b;
    2262             : 
    2263      199900 :     if (ca->reloid != cb->reloid)
    2264      188338 :         return (ca->reloid < cb->reloid ? -1 : 1);
    2265             :     else
    2266       11562 :         return 0;
    2267             : }
    2268             : 
    2269             : /*
    2270             :  * Find and remove useless self joins.
    2271             :  *
    2272             :  * Search for joins where a relation is joined to itself. If the join clause
    2273             :  * for each tuple from one side of the join is proven to match the same
    2274             :  * physical row (or nothing) on the other side, that self-join can be
    2275             :  * eliminated from the query.  Suitable join clauses are assumed to be in the
    2276             :  * form of X = X, and can be replaced with NOT NULL clauses.
    2277             :  *
    2278             :  * For the sake of simplicity, we don't apply this optimization to special
    2279             :  * joins. Here is a list of what we could do in some particular cases:
    2280             :  * 'a a1 semi join a a2': is reduced to inner by reduce_unique_semijoins,
    2281             :  * and then removed normally.
    2282             :  * 'a a1 anti join a a2': could simplify to a scan with 'outer quals AND
    2283             :  * (IS NULL on join columns OR NOT inner quals)'.
    2284             :  * 'a a1 left join a a2': could simplify to a scan like inner but without
    2285             :  * NOT NULL conditions on join columns.
    2286             :  * 'a a1 left join (a a2 join b)': can't simplify this, because join to b
    2287             :  * can both remove rows and introduce duplicates.
    2288             :  *
    2289             :  * To search for removable joins, we order all the relations on their Oid,
    2290             :  * go over each set with the same Oid, and consider each pair of relations
    2291             :  * in this set.
    2292             :  *
    2293             :  * To remove the join, we mark one of the participating relations as dead
    2294             :  * and rewrite all references to it to point to the remaining relation.
    2295             :  * This includes modifying RestrictInfos, EquivalenceClasses, and
    2296             :  * EquivalenceMembers. We also have to modify the row marks. The join clauses
    2297             :  * of the removed relation become either restriction or join clauses, based on
    2298             :  * whether they reference any relations not participating in the removed join.
    2299             :  *
    2300             :  * 'joinlist' is the top-level joinlist of the query. If it has any
    2301             :  * references to the removed relations, we update them to point to the
    2302             :  * remaining ones.
    2303             :  */
    2304             : List *
    2305      336130 : remove_useless_self_joins(PlannerInfo *root, List *joinlist)
    2306             : {
    2307      336130 :     Relids      toRemove = NULL;
    2308      336130 :     int         relid = -1;
    2309             : 
    2310      672260 :     if (!enable_self_join_elimination || joinlist == NIL ||
    2311      548344 :         (list_length(joinlist) == 1 && !IsA(linitial(joinlist), List)))
    2312      211346 :         return joinlist;
    2313             : 
    2314             :     /*
    2315             :      * Merge pairs of relations participated in self-join. Remove unnecessary
    2316             :      * range table entries.
    2317             :      */
    2318      124784 :     toRemove = remove_self_joins_recurse(root, joinlist, toRemove);
    2319             : 
    2320      124784 :     if (unlikely(toRemove != NULL))
    2321             :     {
    2322             :         /* At the end, remove orphaned relation links */
    2323        1146 :         while ((relid = bms_next_member(toRemove, relid)) >= 0)
    2324             :         {
    2325         588 :             int         nremoved = 0;
    2326             : 
    2327         588 :             joinlist = remove_rel_from_joinlist(joinlist, relid, &nremoved);
    2328         588 :             if (nremoved != 1)
    2329           0 :                 elog(ERROR, "failed to find relation %d in joinlist", relid);
    2330             :         }
    2331             :     }
    2332             : 
    2333      124784 :     return joinlist;
    2334             : }

Generated by: LCOV version 1.14