LCOV - code coverage report
Current view: top level - src/backend/optimizer/plan - analyzejoins.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 719 747 96.3 %
Date: 2024-04-23 16:11:19 Functions: 29 29 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * analyzejoins.c
       4             :  *    Routines for simplifying joins after initial query analysis
       5             :  *
       6             :  * While we do a great deal of join simplification in prep/prepjointree.c,
       7             :  * certain optimizations cannot be performed at that stage for lack of
       8             :  * detailed information about the query.  The routines here are invoked
       9             :  * after initsplan.c has done its work, and can do additional join removal
      10             :  * and simplification steps based on the information extracted.  The penalty
      11             :  * is that we have to work harder to clean up after ourselves when we modify
      12             :  * the query, since the derived data structures have to be updated too.
      13             :  *
      14             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
      15             :  * Portions Copyright (c) 1994, Regents of the University of California
      16             :  *
      17             :  *
      18             :  * IDENTIFICATION
      19             :  *    src/backend/optimizer/plan/analyzejoins.c
      20             :  *
      21             :  *-------------------------------------------------------------------------
      22             :  */
      23             : #include "postgres.h"
      24             : 
      25             : #include "catalog/pg_class.h"
      26             : #include "nodes/nodeFuncs.h"
      27             : #include "optimizer/joininfo.h"
      28             : #include "optimizer/optimizer.h"
      29             : #include "optimizer/pathnode.h"
      30             : #include "optimizer/paths.h"
      31             : #include "optimizer/planmain.h"
      32             : #include "optimizer/restrictinfo.h"
      33             : #include "utils/lsyscache.h"
      34             : 
      35             : /*
      36             :  * The struct containing self-join candidate.  Used to find duplicate reloids.
      37             :  */
      38             : typedef struct
      39             : {
      40             :     int         relid;
      41             :     Oid         reloid;
      42             : } SelfJoinCandidate;
      43             : 
      44             : bool        enable_self_join_removal;
      45             : 
      46             : /* local functions */
      47             : static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
      48             : 
      49             : static void remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
      50             :                                           SpecialJoinInfo *sjinfo);
      51             : static void remove_rel_from_restrictinfo(RestrictInfo *rinfo,
      52             :                                          int relid, int ojrelid);
      53             : static void remove_rel_from_eclass(EquivalenceClass *ec,
      54             :                                    int relid, int ojrelid);
      55             : static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
      56             : static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
      57             : static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
      58             :                                 List *clause_list, List **extra_clauses);
      59             : static Oid  distinct_col_search(int colno, List *colnos, List *opids);
      60             : static bool is_innerrel_unique_for(PlannerInfo *root,
      61             :                                    Relids joinrelids,
      62             :                                    Relids outerrelids,
      63             :                                    RelOptInfo *innerrel,
      64             :                                    JoinType jointype,
      65             :                                    List *restrictlist,
      66             :                                    List **extra_clauses);
      67             : static void replace_varno(Node *node, int from, int to);
      68             : static Bitmapset *replace_relid(Relids relids, int oldId, int newId);
      69             : static int  self_join_candidates_cmp(const void *a, const void *b);
      70             : 
      71             : 
      72             : /*
      73             :  * remove_useless_joins
      74             :  *      Check for relations that don't actually need to be joined at all,
      75             :  *      and remove them from the query.
      76             :  *
      77             :  * We are passed the current joinlist and return the updated list.  Other
      78             :  * data structures that have to be updated are accessible via "root".
      79             :  */
      80             : List *
      81      285668 : remove_useless_joins(PlannerInfo *root, List *joinlist)
      82             : {
      83             :     ListCell   *lc;
      84             : 
      85             :     /*
      86             :      * We are only interested in relations that are left-joined to, so we can
      87             :      * scan the join_info_list to find them easily.
      88             :      */
      89      285668 : restart:
      90      324132 :     foreach(lc, root->join_info_list)
      91             :     {
      92       47558 :         SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
      93             :         int         innerrelid;
      94             :         int         nremoved;
      95             : 
      96             :         /* Skip if not removable */
      97       47558 :         if (!join_is_removable(root, sjinfo))
      98       38464 :             continue;
      99             : 
     100             :         /*
     101             :          * Currently, join_is_removable can only succeed when the sjinfo's
     102             :          * righthand is a single baserel.  Remove that rel from the query and
     103             :          * joinlist.
     104             :          */
     105        9094 :         innerrelid = bms_singleton_member(sjinfo->min_righthand);
     106             : 
     107        9094 :         remove_leftjoinrel_from_query(root, innerrelid, sjinfo);
     108             : 
     109             :         /* We verify that exactly one reference gets removed from joinlist */
     110        9094 :         nremoved = 0;
     111        9094 :         joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved);
     112        9094 :         if (nremoved != 1)
     113           0 :             elog(ERROR, "failed to find relation %d in joinlist", innerrelid);
     114             : 
     115             :         /*
     116             :          * We can delete this SpecialJoinInfo from the list too, since it's no
     117             :          * longer of interest.  (Since we'll restart the foreach loop
     118             :          * immediately, we don't bother with foreach_delete_current.)
     119             :          */
     120        9094 :         root->join_info_list = list_delete_cell(root->join_info_list, lc);
     121             : 
     122             :         /*
     123             :          * Restart the scan.  This is necessary to ensure we find all
     124             :          * removable joins independently of ordering of the join_info_list
     125             :          * (note that removal of attr_needed bits may make a join appear
     126             :          * removable that did not before).
     127             :          */
     128        9094 :         goto restart;
     129             :     }
     130             : 
     131      276574 :     return joinlist;
     132             : }
     133             : 
     134             : /*
     135             :  * clause_sides_match_join
     136             :  *    Determine whether a join clause is of the right form to use in this join.
     137             :  *
     138             :  * We already know that the clause is a binary opclause referencing only the
     139             :  * rels in the current join.  The point here is to check whether it has the
     140             :  * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
     141             :  * rather than mixing outer and inner vars on either side.  If it matches,
     142             :  * we set the transient flag outer_is_left to identify which side is which.
     143             :  */
     144             : static inline bool
     145      179134 : clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids,
     146             :                         Relids innerrelids)
     147             : {
     148      272148 :     if (bms_is_subset(rinfo->left_relids, outerrelids) &&
     149       93014 :         bms_is_subset(rinfo->right_relids, innerrelids))
     150             :     {
     151             :         /* lefthand side is outer */
     152       93008 :         rinfo->outer_is_left = true;
     153       93008 :         return true;
     154             :     }
     155      172232 :     else if (bms_is_subset(rinfo->left_relids, innerrelids) &&
     156       86106 :              bms_is_subset(rinfo->right_relids, outerrelids))
     157             :     {
     158             :         /* righthand side is outer */
     159       86106 :         rinfo->outer_is_left = false;
     160       86106 :         return true;
     161             :     }
     162          20 :     return false;               /* no good for these input relations */
     163             : }
     164             : 
     165             : /*
     166             :  * join_is_removable
     167             :  *    Check whether we need not perform this special join at all, because
     168             :  *    it will just duplicate its left input.
     169             :  *
     170             :  * This is true for a left join for which the join condition cannot match
     171             :  * more than one inner-side row.  (There are other possibly interesting
     172             :  * cases, but we don't have the infrastructure to prove them.)  We also
     173             :  * have to check that the inner side doesn't generate any variables needed
     174             :  * above the join.
     175             :  */
     176             : static bool
     177       47558 : join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
     178             : {
     179             :     int         innerrelid;
     180             :     RelOptInfo *innerrel;
     181             :     Relids      inputrelids;
     182             :     Relids      joinrelids;
     183       47558 :     List       *clause_list = NIL;
     184             :     ListCell   *l;
     185             :     int         attroff;
     186             : 
     187             :     /*
     188             :      * Must be a left join to a single baserel, else we aren't going to be
     189             :      * able to do anything with it.
     190             :      */
     191       47558 :     if (sjinfo->jointype != JOIN_LEFT)
     192        6382 :         return false;
     193             : 
     194       41176 :     if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
     195        1154 :         return false;
     196             : 
     197             :     /*
     198             :      * Never try to eliminate a left join to the query result rel.  Although
     199             :      * the case is syntactically impossible in standard SQL, MERGE will build
     200             :      * a join tree that looks exactly like that.
     201             :      */
     202       40022 :     if (innerrelid == root->parse->resultRelation)
     203         694 :         return false;
     204             : 
     205       39328 :     innerrel = find_base_rel(root, innerrelid);
     206             : 
     207             :     /*
     208             :      * Before we go to the effort of checking whether any innerrel variables
     209             :      * are needed above the join, make a quick check to eliminate cases in
     210             :      * which we will surely be unable to prove uniqueness of the innerrel.
     211             :      */
     212       39328 :     if (!rel_supports_distinctness(root, innerrel))
     213        2746 :         return false;
     214             : 
     215             :     /* Compute the relid set for the join we are considering */
     216       36582 :     inputrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
     217             :     Assert(sjinfo->ojrelid != 0);
     218       36582 :     joinrelids = bms_copy(inputrelids);
     219       36582 :     joinrelids = bms_add_member(joinrelids, sjinfo->ojrelid);
     220             : 
     221             :     /*
     222             :      * We can't remove the join if any inner-rel attributes are used above the
     223             :      * join.  Here, "above" the join includes pushed-down conditions, so we
     224             :      * should reject if attr_needed includes the OJ's own relid; therefore,
     225             :      * compare to inputrelids not joinrelids.
     226             :      *
     227             :      * As a micro-optimization, it seems better to start with max_attr and
     228             :      * count down rather than starting with min_attr and counting up, on the
     229             :      * theory that the system attributes are somewhat less likely to be wanted
     230             :      * and should be tested last.
     231             :      */
     232      455560 :     for (attroff = innerrel->max_attr - innerrel->min_attr;
     233             :          attroff >= 0;
     234      418978 :          attroff--)
     235             :     {
     236      446340 :         if (!bms_is_subset(innerrel->attr_needed[attroff], inputrelids))
     237       27362 :             return false;
     238             :     }
     239             : 
     240             :     /*
     241             :      * Similarly check that the inner rel isn't needed by any PlaceHolderVars
     242             :      * that will be used above the join.  The PHV case is a little bit more
     243             :      * complicated, because PHVs may have been assigned a ph_eval_at location
     244             :      * that includes the innerrel, yet their contained expression might not
     245             :      * actually reference the innerrel (it could be just a constant, for
     246             :      * instance).  If such a PHV is due to be evaluated above the join then it
     247             :      * needn't prevent join removal.
     248             :      */
     249        9370 :     foreach(l, root->placeholder_list)
     250             :     {
     251         180 :         PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
     252             : 
     253         180 :         if (bms_overlap(phinfo->ph_lateral, innerrel->relids))
     254          30 :             return false;       /* it references innerrel laterally */
     255         180 :         if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids))
     256          54 :             continue;           /* it definitely doesn't reference innerrel */
     257         126 :         if (bms_is_subset(phinfo->ph_needed, inputrelids))
     258           6 :             continue;           /* PHV is not used above the join */
     259         120 :         if (!bms_is_member(sjinfo->ojrelid, phinfo->ph_eval_at))
     260          30 :             return false;       /* it has to be evaluated below the join */
     261             : 
     262             :         /*
     263             :          * We need to be sure there will still be a place to evaluate the PHV
     264             :          * if we remove the join, ie that ph_eval_at wouldn't become empty.
     265             :          */
     266          90 :         if (!bms_overlap(sjinfo->min_lefthand, phinfo->ph_eval_at))
     267           0 :             return false;       /* there isn't any other place to eval PHV */
     268             :         /* Check contained expression last, since this is a bit expensive */
     269          90 :         if (bms_overlap(pull_varnos(root, (Node *) phinfo->ph_var->phexpr),
     270          90 :                         innerrel->relids))
     271           0 :             return false;       /* contained expression references innerrel */
     272             :     }
     273             : 
     274             :     /*
     275             :      * Search for mergejoinable clauses that constrain the inner rel against
     276             :      * either the outer rel or a pseudoconstant.  If an operator is
     277             :      * mergejoinable then it behaves like equality for some btree opclass, so
     278             :      * it's what we want.  The mergejoinability test also eliminates clauses
     279             :      * containing volatile functions, which we couldn't depend on.
     280             :      */
     281       18694 :     foreach(l, innerrel->joininfo)
     282             :     {
     283        9504 :         RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
     284             : 
     285             :         /*
     286             :          * If the current join commutes with some other outer join(s) via
     287             :          * outer join identity 3, there will be multiple clones of its join
     288             :          * clauses in the joininfo list.  We want to consider only the
     289             :          * has_clone form of such clauses.  Processing more than one form
     290             :          * would be wasteful, and also some of the others would confuse the
     291             :          * RINFO_IS_PUSHED_DOWN test below.
     292             :          */
     293        9504 :         if (restrictinfo->is_clone)
     294         122 :             continue;           /* ignore it */
     295             : 
     296             :         /*
     297             :          * If it's not a join clause for this outer join, we can't use it.
     298             :          * Note that if the clause is pushed-down, then it is logically from
     299             :          * above the outer join, even if it references no other rels (it might
     300             :          * be from WHERE, for example).
     301             :          */
     302        9382 :         if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
     303         102 :             continue;           /* ignore; not useful here */
     304             : 
     305             :         /* Ignore if it's not a mergejoinable clause */
     306        9280 :         if (!restrictinfo->can_join ||
     307        9226 :             restrictinfo->mergeopfamilies == NIL)
     308          54 :             continue;           /* not mergejoinable */
     309             : 
     310             :         /*
     311             :          * Check if clause has the form "outer op inner" or "inner op outer",
     312             :          * and if so mark which side is inner.
     313             :          */
     314        9226 :         if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
     315             :                                      innerrel->relids))
     316           6 :             continue;           /* no good for these input relations */
     317             : 
     318             :         /* OK, add to list */
     319        9220 :         clause_list = lappend(clause_list, restrictinfo);
     320             :     }
     321             : 
     322             :     /*
     323             :      * Now that we have the relevant equality join clauses, try to prove the
     324             :      * innerrel distinct.
     325             :      */
     326        9190 :     if (rel_is_distinct_for(root, innerrel, clause_list, NULL))
     327        9094 :         return true;
     328             : 
     329             :     /*
     330             :      * Some day it would be nice to check for other methods of establishing
     331             :      * distinctness.
     332             :      */
     333          96 :     return false;
     334             : }
     335             : 
     336             : 
     337             : /*
     338             :  * Remove the target rel->relid and references to the target join from the
     339             :  * planner's data structures, having determined that there is no need
     340             :  * to include them in the query.  Optionally replace them with subst if subst
     341             :  * is non-negative.
     342             :  *
     343             :  * This function updates only parts needed for both left-join removal and
     344             :  * self-join removal.
     345             :  */
     346             : static void
     347        9720 : remove_rel_from_query(PlannerInfo *root, RelOptInfo *rel,
     348             :                       int subst, SpecialJoinInfo *sjinfo,
     349             :                       Relids joinrelids)
     350             : {
     351        9720 :     int         relid = rel->relid;
     352        9720 :     int         ojrelid = (sjinfo != NULL) ? sjinfo->ojrelid : -1;
     353             :     Index       rti;
     354             :     ListCell   *l;
     355             : 
     356             :     /*
     357             :      * Remove references to the rel from other baserels' attr_needed arrays
     358             :      * and lateral_vars lists.
     359             :      */
     360       60574 :     for (rti = 1; rti < root->simple_rel_array_size; rti++)
     361             :     {
     362       50854 :         RelOptInfo *otherrel = root->simple_rel_array[rti];
     363             :         int         attroff;
     364             : 
     365             :         /* there may be empty slots corresponding to non-baserel RTEs */
     366       50854 :         if (otherrel == NULL)
     367       24416 :             continue;
     368             : 
     369             :         Assert(otherrel->relid == rti); /* sanity check on array */
     370             : 
     371             :         /* no point in processing target rel itself */
     372       26438 :         if (otherrel == rel)
     373        9720 :             continue;
     374             : 
     375      382328 :         for (attroff = otherrel->max_attr - otherrel->min_attr;
     376             :              attroff >= 0;
     377      365610 :              attroff--)
     378             :         {
     379      731220 :             otherrel->attr_needed[attroff] =
     380      365610 :                 replace_relid(otherrel->attr_needed[attroff], relid, subst);
     381      365610 :             otherrel->attr_needed[attroff] =
     382      365610 :                 replace_relid(otherrel->attr_needed[attroff], ojrelid, subst);
     383             :         }
     384             : 
     385             :         /* Update lateral_vars list. */
     386       16718 :         replace_varno((Node *) otherrel->lateral_vars, relid, subst);
     387             :     }
     388             : 
     389             :     /*
     390             :      * Update all_baserels and related relid sets.
     391             :      */
     392        9720 :     root->all_baserels = replace_relid(root->all_baserels, relid, subst);
     393        9720 :     root->outer_join_rels = replace_relid(root->outer_join_rels, ojrelid, subst);
     394        9720 :     root->all_query_rels = replace_relid(root->all_query_rels, relid, subst);
     395        9720 :     root->all_query_rels = replace_relid(root->all_query_rels, ojrelid, subst);
     396             : 
     397             :     /*
     398             :      * Likewise remove references from SpecialJoinInfo data structures.
     399             :      *
     400             :      * This is relevant in case the outer join we're deleting is nested inside
     401             :      * other outer joins: the upper joins' relid sets have to be adjusted. The
     402             :      * RHS of the target outer join will be made empty here, but that's OK
     403             :      * since caller will delete that SpecialJoinInfo entirely.
     404             :      */
     405       22314 :     foreach(l, root->join_info_list)
     406             :     {
     407       12594 :         SpecialJoinInfo *sjinf = (SpecialJoinInfo *) lfirst(l);
     408             : 
     409       12594 :         sjinf->min_lefthand = replace_relid(sjinf->min_lefthand, relid, subst);
     410       12594 :         sjinf->min_righthand = replace_relid(sjinf->min_righthand, relid, subst);
     411       12594 :         sjinf->syn_lefthand = replace_relid(sjinf->syn_lefthand, relid, subst);
     412       12594 :         sjinf->syn_righthand = replace_relid(sjinf->syn_righthand, relid, subst);
     413       12594 :         sjinf->min_lefthand = replace_relid(sjinf->min_lefthand, ojrelid, subst);
     414       12594 :         sjinf->min_righthand = replace_relid(sjinf->min_righthand, ojrelid, subst);
     415       12594 :         sjinf->syn_lefthand = replace_relid(sjinf->syn_lefthand, ojrelid, subst);
     416       12594 :         sjinf->syn_righthand = replace_relid(sjinf->syn_righthand, ojrelid, subst);
     417             :         /* relid cannot appear in these fields, but ojrelid can: */
     418       12594 :         sjinf->commute_above_l = replace_relid(sjinf->commute_above_l, ojrelid, subst);
     419       12594 :         sjinf->commute_above_r = replace_relid(sjinf->commute_above_r, ojrelid, subst);
     420       12594 :         sjinf->commute_below_l = replace_relid(sjinf->commute_below_l, ojrelid, subst);
     421       12594 :         sjinf->commute_below_r = replace_relid(sjinf->commute_below_r, ojrelid, subst);
     422             : 
     423       12594 :         replace_varno((Node *) sjinf->semi_rhs_exprs, relid, subst);
     424             :     }
     425             : 
     426             :     /*
     427             :      * Likewise remove references from PlaceHolderVar data structures,
     428             :      * removing any no-longer-needed placeholders entirely.
     429             :      *
     430             :      * Removal is a bit trickier than it might seem: we can remove PHVs that
     431             :      * are used at the target rel and/or in the join qual, but not those that
     432             :      * are used at join partner rels or above the join.  It's not that easy to
     433             :      * distinguish PHVs used at partner rels from those used in the join qual,
     434             :      * since they will both have ph_needed sets that are subsets of
     435             :      * joinrelids.  However, a PHV used at a partner rel could not have the
     436             :      * target rel in ph_eval_at, so we check that while deciding whether to
     437             :      * remove or just update the PHV.  There is no corresponding test in
     438             :      * join_is_removable because it doesn't need to distinguish those cases.
     439             :      */
     440        9882 :     foreach(l, root->placeholder_list)
     441             :     {
     442         162 :         PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
     443             : 
     444             :         Assert(sjinfo == NULL || !bms_is_member(relid, phinfo->ph_lateral));
     445         198 :         if (bms_is_subset(phinfo->ph_needed, joinrelids) &&
     446          54 :             bms_is_member(relid, phinfo->ph_eval_at) &&
     447          12 :             (sjinfo == NULL || !bms_is_member(ojrelid, phinfo->ph_eval_at)))
     448             :         {
     449          12 :             root->placeholder_list = foreach_delete_current(root->placeholder_list,
     450             :                                                             l);
     451          12 :             root->placeholder_array[phinfo->phid] = NULL;
     452             :         }
     453             :         else
     454             :         {
     455         150 :             PlaceHolderVar *phv = phinfo->ph_var;
     456             : 
     457         150 :             phinfo->ph_eval_at = replace_relid(phinfo->ph_eval_at, relid, subst);
     458         150 :             phinfo->ph_eval_at = replace_relid(phinfo->ph_eval_at, ojrelid, subst);
     459             :             Assert(!bms_is_empty(phinfo->ph_eval_at));   /* checked previously */
     460         150 :             phinfo->ph_needed = replace_relid(phinfo->ph_needed, relid, subst);
     461         150 :             phinfo->ph_needed = replace_relid(phinfo->ph_needed, ojrelid, subst);
     462             :             /* ph_needed might or might not become empty */
     463         150 :             phinfo->ph_lateral = replace_relid(phinfo->ph_lateral, relid, subst);
     464             :             /* ph_lateral might or might not be empty */
     465         150 :             phv->phrels = replace_relid(phv->phrels, relid, subst);
     466         150 :             phv->phrels = replace_relid(phv->phrels, ojrelid, subst);
     467             :             Assert(!bms_is_empty(phv->phrels));
     468         150 :             replace_varno((Node *) phv->phexpr, relid, subst);
     469             :             Assert(phv->phnullingrels == NULL); /* no need to adjust */
     470             :         }
     471             :     }
     472        9720 : }
     473             : 
     474             : /*
     475             :  * Remove the target relid and references to the target join from the
     476             :  * planner's data structures, having determined that there is no need
     477             :  * to include them in the query.
     478             :  *
     479             :  * We are not terribly thorough here.  We only bother to update parts of
     480             :  * the planner's data structures that will actually be consulted later.
     481             :  */
     482             : static void
     483        9094 : remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
     484             :                               SpecialJoinInfo *sjinfo)
     485             : {
     486             :     List       *joininfos;
     487        9094 :     int         ojrelid = sjinfo->ojrelid;
     488        9094 :     RelOptInfo *rel = find_base_rel(root, relid);
     489             :     Relids      join_plus_commute;
     490             :     Relids      joinrelids;
     491             :     ListCell   *l;
     492             : 
     493             :     /* Compute the relid set for the join we are considering */
     494        9094 :     joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
     495             :     Assert(ojrelid != 0);
     496        9094 :     joinrelids = bms_add_member(joinrelids, ojrelid);
     497             : 
     498        9094 :     remove_rel_from_query(root, rel, -1, sjinfo, joinrelids);
     499             : 
     500             :     /*
     501             :      * Remove any joinquals referencing the rel from the joininfo lists.
     502             :      *
     503             :      * In some cases, a joinqual has to be put back after deleting its
     504             :      * reference to the target rel.  This can occur for pseudoconstant and
     505             :      * outerjoin-delayed quals, which can get marked as requiring the rel in
     506             :      * order to force them to be evaluated at or above the join.  We can't
     507             :      * just discard them, though.  Only quals that logically belonged to the
     508             :      * outer join being discarded should be removed from the query.
     509             :      *
     510             :      * We might encounter a qual that is a clone of a deletable qual with some
     511             :      * outer-join relids added (see deconstruct_distribute_oj_quals).  To
     512             :      * ensure we get rid of such clones as well, add the relids of all OJs
     513             :      * commutable with this one to the set we test against for
     514             :      * pushed-down-ness.
     515             :      */
     516        9094 :     join_plus_commute = bms_union(joinrelids,
     517        9094 :                                   sjinfo->commute_above_r);
     518        9094 :     join_plus_commute = bms_add_members(join_plus_commute,
     519        9094 :                                         sjinfo->commute_below_l);
     520             : 
     521             :     /*
     522             :      * We must make a copy of the rel's old joininfo list before starting the
     523             :      * loop, because otherwise remove_join_clause_from_rels would destroy the
     524             :      * list while we're scanning it.
     525             :      */
     526        9094 :     joininfos = list_copy(rel->joininfo);
     527       18520 :     foreach(l, joininfos)
     528             :     {
     529        9426 :         RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
     530             : 
     531        9426 :         remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
     532             : 
     533        9426 :         if (RINFO_IS_PUSHED_DOWN(rinfo, join_plus_commute))
     534             :         {
     535             :             /*
     536             :              * There might be references to relid or ojrelid in the
     537             :              * RestrictInfo's relid sets, as a consequence of PHVs having had
     538             :              * ph_eval_at sets that include those.  We already checked above
     539             :              * that any such PHV is safe (and updated its ph_eval_at), so we
     540             :              * can just drop those references.
     541             :              */
     542         102 :             remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
     543             : 
     544             :             /*
     545             :              * Cross-check that the clause itself does not reference the
     546             :              * target rel or join.
     547             :              */
     548             : #ifdef USE_ASSERT_CHECKING
     549             :             {
     550             :                 Relids      clause_varnos = pull_varnos(root,
     551             :                                                         (Node *) rinfo->clause);
     552             : 
     553             :                 Assert(!bms_is_member(relid, clause_varnos));
     554             :                 Assert(!bms_is_member(ojrelid, clause_varnos));
     555             :             }
     556             : #endif
     557             :             /* Now throw it back into the joininfo lists */
     558         102 :             distribute_restrictinfo_to_rels(root, rinfo);
     559             :         }
     560             :     }
     561             : 
     562             :     /*
     563             :      * Likewise remove references from EquivalenceClasses.
     564             :      */
     565       49946 :     foreach(l, root->eq_classes)
     566             :     {
     567       40852 :         EquivalenceClass *ec = (EquivalenceClass *) lfirst(l);
     568             : 
     569       68690 :         if (bms_is_member(relid, ec->ec_relids) ||
     570       27838 :             bms_is_member(ojrelid, ec->ec_relids))
     571       13014 :             remove_rel_from_eclass(ec, relid, ojrelid);
     572             :     }
     573             : 
     574             :     /*
     575             :      * There may be references to the rel in root->fkey_list, but if so,
     576             :      * match_foreign_keys_to_quals() will get rid of them.
     577             :      */
     578             : 
     579             :     /*
     580             :      * Finally, remove the rel from the baserel array to prevent it from being
     581             :      * referenced again.  (We can't do this earlier because
     582             :      * remove_join_clause_from_rels will touch it.)
     583             :      */
     584        9094 :     root->simple_rel_array[relid] = NULL;
     585             : 
     586             :     /* And nuke the RelOptInfo, just in case there's another access path */
     587        9094 :     pfree(rel);
     588        9094 : }
     589             : 
     590             : /*
     591             :  * Remove any references to relid or ojrelid from the RestrictInfo.
     592             :  *
     593             :  * We only bother to clean out bits in clause_relids and required_relids,
     594             :  * not nullingrel bits in contained Vars and PHVs.  (This might have to be
     595             :  * improved sometime.)  However, if the RestrictInfo contains an OR clause
     596             :  * we have to also clean up the sub-clauses.
     597             :  */
     598             : static void
     599        3932 : remove_rel_from_restrictinfo(RestrictInfo *rinfo, int relid, int ojrelid)
     600             : {
     601             :     /*
     602             :      * The clause_relids probably aren't shared with anything else, but let's
     603             :      * copy them just to be sure.
     604             :      */
     605        3932 :     rinfo->clause_relids = bms_copy(rinfo->clause_relids);
     606        3932 :     rinfo->clause_relids = bms_del_member(rinfo->clause_relids, relid);
     607        3932 :     rinfo->clause_relids = bms_del_member(rinfo->clause_relids, ojrelid);
     608             :     /* Likewise for required_relids */
     609        3932 :     rinfo->required_relids = bms_copy(rinfo->required_relids);
     610        3932 :     rinfo->required_relids = bms_del_member(rinfo->required_relids, relid);
     611        3932 :     rinfo->required_relids = bms_del_member(rinfo->required_relids, ojrelid);
     612             : 
     613             :     /* If it's an OR, recurse to clean up sub-clauses */
     614        3932 :     if (restriction_is_or_clause(rinfo))
     615             :     {
     616             :         ListCell   *lc;
     617             : 
     618             :         Assert(is_orclause(rinfo->orclause));
     619          18 :         foreach(lc, ((BoolExpr *) rinfo->orclause)->args)
     620             :         {
     621          12 :             Node       *orarg = (Node *) lfirst(lc);
     622             : 
     623             :             /* OR arguments should be ANDs or sub-RestrictInfos */
     624          12 :             if (is_andclause(orarg))
     625             :             {
     626           0 :                 List       *andargs = ((BoolExpr *) orarg)->args;
     627             :                 ListCell   *lc2;
     628             : 
     629           0 :                 foreach(lc2, andargs)
     630             :                 {
     631           0 :                     RestrictInfo *rinfo2 = lfirst_node(RestrictInfo, lc2);
     632             : 
     633           0 :                     remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
     634             :                 }
     635             :             }
     636             :             else
     637             :             {
     638          12 :                 RestrictInfo *rinfo2 = castNode(RestrictInfo, orarg);
     639             : 
     640          12 :                 remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
     641             :             }
     642             :         }
     643             :     }
     644        3932 : }
     645             : 
     646             : /*
     647             :  * Remove any references to relid or ojrelid from the EquivalenceClass.
     648             :  *
     649             :  * Like remove_rel_from_restrictinfo, we don't worry about cleaning out
     650             :  * any nullingrel bits in contained Vars and PHVs.  (This might have to be
     651             :  * improved sometime.)  We do need to fix the EC and EM relid sets to ensure
     652             :  * that implied join equalities will be generated at the appropriate join
     653             :  * level(s).
     654             :  */
     655             : static void
     656       13014 : remove_rel_from_eclass(EquivalenceClass *ec, int relid, int ojrelid)
     657             : {
     658             :     ListCell   *lc;
     659             : 
     660             :     /* Fix up the EC's overall relids */
     661       13014 :     ec->ec_relids = bms_del_member(ec->ec_relids, relid);
     662       13014 :     ec->ec_relids = bms_del_member(ec->ec_relids, ojrelid);
     663             : 
     664             :     /*
     665             :      * Fix up the member expressions.  Any non-const member that ends with
     666             :      * empty em_relids must be a Var or PHV of the removed relation.  We don't
     667             :      * need it anymore, so we can drop it.
     668             :      */
     669       29846 :     foreach(lc, ec->ec_members)
     670             :     {
     671       16832 :         EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
     672             : 
     673       20650 :         if (bms_is_member(relid, cur_em->em_relids) ||
     674        3818 :             bms_is_member(ojrelid, cur_em->em_relids))
     675             :         {
     676             :             Assert(!cur_em->em_is_const);
     677       13014 :             cur_em->em_relids = bms_del_member(cur_em->em_relids, relid);
     678       13014 :             cur_em->em_relids = bms_del_member(cur_em->em_relids, ojrelid);
     679       13014 :             if (bms_is_empty(cur_em->em_relids))
     680       13002 :                 ec->ec_members = foreach_delete_current(ec->ec_members, lc);
     681             :         }
     682             :     }
     683             : 
     684             :     /* Fix up the source clauses, in case we can re-use them later */
     685       16832 :     foreach(lc, ec->ec_sources)
     686             :     {
     687        3818 :         RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
     688             : 
     689        3818 :         remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
     690             :     }
     691             : 
     692             :     /*
     693             :      * Rather than expend code on fixing up any already-derived clauses, just
     694             :      * drop them.  (At this point, any such clauses would be base restriction
     695             :      * clauses, which we'd not need anymore anyway.)
     696             :      */
     697       13014 :     ec->ec_derives = NIL;
     698       13014 : }
     699             : 
     700             : /*
     701             :  * Remove any occurrences of the target relid from a joinlist structure.
     702             :  *
     703             :  * It's easiest to build a whole new list structure, so we handle it that
     704             :  * way.  Efficiency is not a big deal here.
     705             :  *
     706             :  * *nremoved is incremented by the number of occurrences removed (there
     707             :  * should be exactly one, but the caller checks that).
     708             :  */
     709             : static List *
     710        9984 : remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
     711             : {
     712        9984 :     List       *result = NIL;
     713             :     ListCell   *jl;
     714             : 
     715       36686 :     foreach(jl, joinlist)
     716             :     {
     717       26702 :         Node       *jlnode = (Node *) lfirst(jl);
     718             : 
     719       26702 :         if (IsA(jlnode, RangeTblRef))
     720             :         {
     721       26438 :             int         varno = ((RangeTblRef *) jlnode)->rtindex;
     722             : 
     723       26438 :             if (varno == relid)
     724        9720 :                 (*nremoved)++;
     725             :             else
     726       16718 :                 result = lappend(result, jlnode);
     727             :         }
     728         264 :         else if (IsA(jlnode, List))
     729             :         {
     730             :             /* Recurse to handle subproblem */
     731             :             List       *sublist;
     732             : 
     733         264 :             sublist = remove_rel_from_joinlist((List *) jlnode,
     734             :                                                relid, nremoved);
     735             :             /* Avoid including empty sub-lists in the result */
     736         264 :             if (sublist)
     737         264 :                 result = lappend(result, sublist);
     738             :         }
     739             :         else
     740             :         {
     741           0 :             elog(ERROR, "unrecognized joinlist node type: %d",
     742             :                  (int) nodeTag(jlnode));
     743             :         }
     744             :     }
     745             : 
     746        9984 :     return result;
     747             : }
     748             : 
     749             : 
     750             : /*
     751             :  * reduce_unique_semijoins
     752             :  *      Check for semijoins that can be simplified to plain inner joins
     753             :  *      because the inner relation is provably unique for the join clauses.
     754             :  *
     755             :  * Ideally this would happen during reduce_outer_joins, but we don't have
     756             :  * enough information at that point.
     757             :  *
     758             :  * To perform the strength reduction when applicable, we need only delete
     759             :  * the semijoin's SpecialJoinInfo from root->join_info_list.  (We don't
     760             :  * bother fixing the join type attributed to it in the query jointree,
     761             :  * since that won't be consulted again.)
     762             :  */
     763             : void
     764      276574 : reduce_unique_semijoins(PlannerInfo *root)
     765             : {
     766             :     ListCell   *lc;
     767             : 
     768             :     /*
     769             :      * Scan the join_info_list to find semijoins.
     770             :      */
     771      314826 :     foreach(lc, root->join_info_list)
     772             :     {
     773       38252 :         SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
     774             :         int         innerrelid;
     775             :         RelOptInfo *innerrel;
     776             :         Relids      joinrelids;
     777             :         List       *restrictlist;
     778             : 
     779             :         /*
     780             :          * Must be a semijoin to a single baserel, else we aren't going to be
     781             :          * able to do anything with it.
     782             :          */
     783       38252 :         if (sjinfo->jointype != JOIN_SEMI)
     784       37998 :             continue;
     785             : 
     786        1952 :         if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
     787         164 :             continue;
     788             : 
     789        1788 :         innerrel = find_base_rel(root, innerrelid);
     790             : 
     791             :         /*
     792             :          * Before we trouble to run generate_join_implied_equalities, make a
     793             :          * quick check to eliminate cases in which we will surely be unable to
     794             :          * prove uniqueness of the innerrel.
     795             :          */
     796        1788 :         if (!rel_supports_distinctness(root, innerrel))
     797         778 :             continue;
     798             : 
     799             :         /* Compute the relid set for the join we are considering */
     800        1010 :         joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
     801             :         Assert(sjinfo->ojrelid == 0);    /* SEMI joins don't have RT indexes */
     802             : 
     803             :         /*
     804             :          * Since we're only considering a single-rel RHS, any join clauses it
     805             :          * has must be clauses linking it to the semijoin's min_lefthand.  We
     806             :          * can also consider EC-derived join clauses.
     807             :          */
     808             :         restrictlist =
     809        1010 :             list_concat(generate_join_implied_equalities(root,
     810             :                                                          joinrelids,
     811             :                                                          sjinfo->min_lefthand,
     812             :                                                          innerrel,
     813             :                                                          NULL),
     814        1010 :                         innerrel->joininfo);
     815             : 
     816             :         /* Test whether the innerrel is unique for those clauses. */
     817        1010 :         if (!innerrel_is_unique(root,
     818             :                                 joinrelids, sjinfo->min_lefthand, innerrel,
     819             :                                 JOIN_SEMI, restrictlist, true))
     820         756 :             continue;
     821             : 
     822             :         /* OK, remove the SpecialJoinInfo from the list. */
     823         254 :         root->join_info_list = foreach_delete_current(root->join_info_list, lc);
     824             :     }
     825      276574 : }
     826             : 
     827             : 
     828             : /*
     829             :  * rel_supports_distinctness
     830             :  *      Could the relation possibly be proven distinct on some set of columns?
     831             :  *
     832             :  * This is effectively a pre-checking function for rel_is_distinct_for().
     833             :  * It must return true if rel_is_distinct_for() could possibly return true
     834             :  * with this rel, but it should not expend a lot of cycles.  The idea is
     835             :  * that callers can avoid doing possibly-expensive processing to compute
     836             :  * rel_is_distinct_for()'s argument lists if the call could not possibly
     837             :  * succeed.
     838             :  */
     839             : static bool
     840      495198 : rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
     841             : {
     842             :     /* We only know about baserels ... */
     843      495198 :     if (rel->reloptkind != RELOPT_BASEREL)
     844      162222 :         return false;
     845      332976 :     if (rel->rtekind == RTE_RELATION)
     846             :     {
     847             :         /*
     848             :          * For a plain relation, we only know how to prove uniqueness by
     849             :          * reference to unique indexes.  Make sure there's at least one
     850             :          * suitable unique index.  It must be immediately enforced, and not a
     851             :          * partial index. (Keep these conditions in sync with
     852             :          * relation_has_unique_index_for!)
     853             :          */
     854             :         ListCell   *lc;
     855             : 
     856      442198 :         foreach(lc, rel->indexlist)
     857             :         {
     858      401528 :             IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc);
     859             : 
     860      401528 :             if (ind->unique && ind->immediate && ind->indpred == NIL)
     861      268804 :                 return true;
     862             :         }
     863             :     }
     864       23502 :     else if (rel->rtekind == RTE_SUBQUERY)
     865             :     {
     866        3914 :         Query      *subquery = root->simple_rte_array[rel->relid]->subquery;
     867             : 
     868             :         /* Check if the subquery has any qualities that support distinctness */
     869        3914 :         if (query_supports_distinctness(subquery))
     870        2526 :             return true;
     871             :     }
     872             :     /* We have no proof rules for any other rtekinds. */
     873       61646 :     return false;
     874             : }
     875             : 
     876             : /*
     877             :  * rel_is_distinct_for
     878             :  *      Does the relation return only distinct rows according to clause_list?
     879             :  *
     880             :  * clause_list is a list of join restriction clauses involving this rel and
     881             :  * some other one.  Return true if no two rows emitted by this rel could
     882             :  * possibly join to the same row of the other rel.
     883             :  *
     884             :  * The caller must have already determined that each condition is a
     885             :  * mergejoinable equality with an expression in this relation on one side, and
     886             :  * an expression not involving this relation on the other.  The transient
     887             :  * outer_is_left flag is used to identify which side references this relation:
     888             :  * left side if outer_is_left is false, right side if it is true.
     889             :  *
     890             :  * Note that the passed-in clause_list may be destructively modified!  This
     891             :  * is OK for current uses, because the clause_list is built by the caller for
     892             :  * the sole purpose of passing to this function.
     893             :  *
     894             :  * outer_exprs contains the right sides of baserestrictinfo clauses looking
     895             :  * like x = const if distinctness is derived from such clauses, not joininfo
     896             :  * clause.  Pass NULL to the outer_exprs, if its value is not needed.
     897             :  */
     898             : static bool
     899      168648 : rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list,
     900             :                     List **extra_clauses)
     901             : {
     902             :     /*
     903             :      * We could skip a couple of tests here if we assume all callers checked
     904             :      * rel_supports_distinctness first, but it doesn't seem worth taking any
     905             :      * risk for.
     906             :      */
     907      168648 :     if (rel->reloptkind != RELOPT_BASEREL)
     908           0 :         return false;
     909      168648 :     if (rel->rtekind == RTE_RELATION)
     910             :     {
     911             :         /*
     912             :          * Examine the indexes to see if we have a matching unique index.
     913             :          * relation_has_unique_index_ext automatically adds any usable
     914             :          * restriction clauses for the rel, so we needn't do that here.
     915             :          */
     916      166946 :         if (relation_has_unique_index_ext(root, rel, clause_list, NIL, NIL,
     917             :                                           extra_clauses))
     918      102018 :             return true;
     919             :     }
     920        1702 :     else if (rel->rtekind == RTE_SUBQUERY)
     921             :     {
     922        1702 :         Index       relid = rel->relid;
     923        1702 :         Query      *subquery = root->simple_rte_array[relid]->subquery;
     924        1702 :         List       *colnos = NIL;
     925        1702 :         List       *opids = NIL;
     926             :         ListCell   *l;
     927             : 
     928             :         /*
     929             :          * Build the argument lists for query_is_distinct_for: a list of
     930             :          * output column numbers that the query needs to be distinct over, and
     931             :          * a list of equality operators that the output columns need to be
     932             :          * distinct according to.
     933             :          *
     934             :          * (XXX we are not considering restriction clauses attached to the
     935             :          * subquery; is that worth doing?)
     936             :          */
     937        3380 :         foreach(l, clause_list)
     938             :         {
     939        1678 :             RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
     940             :             Oid         op;
     941             :             Var        *var;
     942             : 
     943             :             /*
     944             :              * Get the equality operator we need uniqueness according to.
     945             :              * (This might be a cross-type operator and thus not exactly the
     946             :              * same operator the subquery would consider; that's all right
     947             :              * since query_is_distinct_for can resolve such cases.)  The
     948             :              * caller's mergejoinability test should have selected only
     949             :              * OpExprs.
     950             :              */
     951        1678 :             op = castNode(OpExpr, rinfo->clause)->opno;
     952             : 
     953             :             /* caller identified the inner side for us */
     954        1678 :             if (rinfo->outer_is_left)
     955        1364 :                 var = (Var *) get_rightop(rinfo->clause);
     956             :             else
     957         314 :                 var = (Var *) get_leftop(rinfo->clause);
     958             : 
     959             :             /*
     960             :              * We may ignore any RelabelType node above the operand.  (There
     961             :              * won't be more than one, since eval_const_expressions() has been
     962             :              * applied already.)
     963             :              */
     964        1678 :             if (var && IsA(var, RelabelType))
     965         600 :                 var = (Var *) ((RelabelType *) var)->arg;
     966             : 
     967             :             /*
     968             :              * If inner side isn't a Var referencing a subquery output column,
     969             :              * this clause doesn't help us.
     970             :              */
     971        1678 :             if (!var || !IsA(var, Var) ||
     972        1666 :                 var->varno != relid || var->varlevelsup != 0)
     973          12 :                 continue;
     974             : 
     975        1666 :             colnos = lappend_int(colnos, var->varattno);
     976        1666 :             opids = lappend_oid(opids, op);
     977             :         }
     978             : 
     979        1702 :         if (query_is_distinct_for(subquery, colnos, opids))
     980         188 :             return true;
     981             :     }
     982       66442 :     return false;
     983             : }
     984             : 
     985             : 
     986             : /*
     987             :  * query_supports_distinctness - could the query possibly be proven distinct
     988             :  *      on some set of output columns?
     989             :  *
     990             :  * This is effectively a pre-checking function for query_is_distinct_for().
     991             :  * It must return true if query_is_distinct_for() could possibly return true
     992             :  * with this query, but it should not expend a lot of cycles.  The idea is
     993             :  * that callers can avoid doing possibly-expensive processing to compute
     994             :  * query_is_distinct_for()'s argument lists if the call could not possibly
     995             :  * succeed.
     996             :  */
     997             : bool
     998        4560 : query_supports_distinctness(Query *query)
     999             : {
    1000             :     /* SRFs break distinctness except with DISTINCT, see below */
    1001        4560 :     if (query->hasTargetSRFs && query->distinctClause == NIL)
    1002         908 :         return false;
    1003             : 
    1004             :     /* check for features we can prove distinctness with */
    1005        3652 :     if (query->distinctClause != NIL ||
    1006        3508 :         query->groupClause != NIL ||
    1007        3302 :         query->groupingSets != NIL ||
    1008        3302 :         query->hasAggs ||
    1009        3126 :         query->havingQual ||
    1010        3126 :         query->setOperations)
    1011        3136 :         return true;
    1012             : 
    1013         516 :     return false;
    1014             : }
    1015             : 
    1016             : /*
    1017             :  * query_is_distinct_for - does query never return duplicates of the
    1018             :  *      specified columns?
    1019             :  *
    1020             :  * query is a not-yet-planned subquery (in current usage, it's always from
    1021             :  * a subquery RTE, which the planner avoids scribbling on).
    1022             :  *
    1023             :  * colnos is an integer list of output column numbers (resno's).  We are
    1024             :  * interested in whether rows consisting of just these columns are certain
    1025             :  * to be distinct.  "Distinctness" is defined according to whether the
    1026             :  * corresponding upper-level equality operators listed in opids would think
    1027             :  * the values are distinct.  (Note: the opids entries could be cross-type
    1028             :  * operators, and thus not exactly the equality operators that the subquery
    1029             :  * would use itself.  We use equality_ops_are_compatible() to check
    1030             :  * compatibility.  That looks at btree or hash opfamily membership, and so
    1031             :  * should give trustworthy answers for all operators that we might need
    1032             :  * to deal with here.)
    1033             :  */
    1034             : bool
    1035        1824 : query_is_distinct_for(Query *query, List *colnos, List *opids)
    1036             : {
    1037             :     ListCell   *l;
    1038             :     Oid         opid;
    1039             : 
    1040             :     Assert(list_length(colnos) == list_length(opids));
    1041             : 
    1042             :     /*
    1043             :      * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
    1044             :      * columns in the DISTINCT clause appear in colnos and operator semantics
    1045             :      * match.  This is true even if there are SRFs in the DISTINCT columns or
    1046             :      * elsewhere in the tlist.
    1047             :      */
    1048        1824 :     if (query->distinctClause)
    1049             :     {
    1050         150 :         foreach(l, query->distinctClause)
    1051             :         {
    1052         120 :             SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
    1053         120 :             TargetEntry *tle = get_sortgroupclause_tle(sgc,
    1054             :                                                        query->targetList);
    1055             : 
    1056         120 :             opid = distinct_col_search(tle->resno, colnos, opids);
    1057         120 :             if (!OidIsValid(opid) ||
    1058          48 :                 !equality_ops_are_compatible(opid, sgc->eqop))
    1059             :                 break;          /* exit early if no match */
    1060             :         }
    1061         102 :         if (l == NULL)          /* had matches for all? */
    1062          30 :             return true;
    1063             :     }
    1064             : 
    1065             :     /*
    1066             :      * Otherwise, a set-returning function in the query's targetlist can
    1067             :      * result in returning duplicate rows, despite any grouping that might
    1068             :      * occur before tlist evaluation.  (If all tlist SRFs are within GROUP BY
    1069             :      * columns, it would be safe because they'd be expanded before grouping.
    1070             :      * But it doesn't currently seem worth the effort to check for that.)
    1071             :      */
    1072        1794 :     if (query->hasTargetSRFs)
    1073           0 :         return false;
    1074             : 
    1075             :     /*
    1076             :      * Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all
    1077             :      * the grouped columns appear in colnos and operator semantics match.
    1078             :      */
    1079        1794 :     if (query->groupClause && !query->groupingSets)
    1080             :     {
    1081         258 :         foreach(l, query->groupClause)
    1082             :         {
    1083         200 :             SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
    1084         200 :             TargetEntry *tle = get_sortgroupclause_tle(sgc,
    1085             :                                                        query->targetList);
    1086             : 
    1087         200 :             opid = distinct_col_search(tle->resno, colnos, opids);
    1088         200 :             if (!OidIsValid(opid) ||
    1089         124 :                 !equality_ops_are_compatible(opid, sgc->eqop))
    1090             :                 break;          /* exit early if no match */
    1091             :         }
    1092         134 :         if (l == NULL)          /* had matches for all? */
    1093          58 :             return true;
    1094             :     }
    1095        1660 :     else if (query->groupingSets)
    1096             :     {
    1097             :         /*
    1098             :          * If we have grouping sets with expressions, we probably don't have
    1099             :          * uniqueness and analysis would be hard. Punt.
    1100             :          */
    1101           0 :         if (query->groupClause)
    1102           0 :             return false;
    1103             : 
    1104             :         /*
    1105             :          * If we have no groupClause (therefore no grouping expressions), we
    1106             :          * might have one or many empty grouping sets. If there's just one,
    1107             :          * then we're returning only one row and are certainly unique. But
    1108             :          * otherwise, we know we're certainly not unique.
    1109             :          */
    1110           0 :         if (list_length(query->groupingSets) == 1 &&
    1111           0 :             ((GroupingSet *) linitial(query->groupingSets))->kind == GROUPING_SET_EMPTY)
    1112           0 :             return true;
    1113             :         else
    1114           0 :             return false;
    1115             :     }
    1116             :     else
    1117             :     {
    1118             :         /*
    1119             :          * If we have no GROUP BY, but do have aggregates or HAVING, then the
    1120             :          * result is at most one row so it's surely unique, for any operators.
    1121             :          */
    1122        1660 :         if (query->hasAggs || query->havingQual)
    1123          88 :             return true;
    1124             :     }
    1125             : 
    1126             :     /*
    1127             :      * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
    1128             :      * except with ALL.
    1129             :      */
    1130        1648 :     if (query->setOperations)
    1131             :     {
    1132        1500 :         SetOperationStmt *topop = castNode(SetOperationStmt, query->setOperations);
    1133             : 
    1134             :         Assert(topop->op != SETOP_NONE);
    1135             : 
    1136        1500 :         if (!topop->all)
    1137             :         {
    1138             :             ListCell   *lg;
    1139             : 
    1140             :             /* We're good if all the nonjunk output columns are in colnos */
    1141          72 :             lg = list_head(topop->groupClauses);
    1142          90 :             foreach(l, query->targetList)
    1143             :             {
    1144          78 :                 TargetEntry *tle = (TargetEntry *) lfirst(l);
    1145             :                 SortGroupClause *sgc;
    1146             : 
    1147          78 :                 if (tle->resjunk)
    1148           0 :                     continue;   /* ignore resjunk columns */
    1149             : 
    1150             :                 /* non-resjunk columns should have grouping clauses */
    1151             :                 Assert(lg != NULL);
    1152          78 :                 sgc = (SortGroupClause *) lfirst(lg);
    1153          78 :                 lg = lnext(topop->groupClauses, lg);
    1154             : 
    1155          78 :                 opid = distinct_col_search(tle->resno, colnos, opids);
    1156          78 :                 if (!OidIsValid(opid) ||
    1157          18 :                     !equality_ops_are_compatible(opid, sgc->eqop))
    1158             :                     break;      /* exit early if no match */
    1159             :             }
    1160          72 :             if (l == NULL)      /* had matches for all? */
    1161          12 :                 return true;
    1162             :         }
    1163             :     }
    1164             : 
    1165             :     /*
    1166             :      * XXX Are there any other cases in which we can easily see the result
    1167             :      * must be distinct?
    1168             :      *
    1169             :      * If you do add more smarts to this function, be sure to update
    1170             :      * query_supports_distinctness() to match.
    1171             :      */
    1172             : 
    1173        1636 :     return false;
    1174             : }
    1175             : 
    1176             : /*
    1177             :  * distinct_col_search - subroutine for query_is_distinct_for
    1178             :  *
    1179             :  * If colno is in colnos, return the corresponding element of opids,
    1180             :  * else return InvalidOid.  (Ordinarily colnos would not contain duplicates,
    1181             :  * but if it does, we arbitrarily select the first match.)
    1182             :  */
    1183             : static Oid
    1184         398 : distinct_col_search(int colno, List *colnos, List *opids)
    1185             : {
    1186             :     ListCell   *lc1,
    1187             :                *lc2;
    1188             : 
    1189         634 :     forboth(lc1, colnos, lc2, opids)
    1190             :     {
    1191         426 :         if (colno == lfirst_int(lc1))
    1192         190 :             return lfirst_oid(lc2);
    1193             :     }
    1194         208 :     return InvalidOid;
    1195             : }
    1196             : 
    1197             : 
    1198             : /*
    1199             :  * innerrel_is_unique
    1200             :  *    Check if the innerrel provably contains at most one tuple matching any
    1201             :  *    tuple from the outerrel, based on join clauses in the 'restrictlist'.
    1202             :  *
    1203             :  * We need an actual RelOptInfo for the innerrel, but it's sufficient to
    1204             :  * identify the outerrel by its Relids.  This asymmetry supports use of this
    1205             :  * function before joinrels have been built.  (The caller is expected to
    1206             :  * also supply the joinrelids, just to save recalculating that.)
    1207             :  *
    1208             :  * The proof must be made based only on clauses that will be "joinquals"
    1209             :  * rather than "otherquals" at execution.  For an inner join there's no
    1210             :  * difference; but if the join is outer, we must ignore pushed-down quals,
    1211             :  * as those will become "otherquals".  Note that this means the answer might
    1212             :  * vary depending on whether IS_OUTER_JOIN(jointype); since we cache the
    1213             :  * answer without regard to that, callers must take care not to call this
    1214             :  * with jointypes that would be classified differently by IS_OUTER_JOIN().
    1215             :  *
    1216             :  * The actual proof is undertaken by is_innerrel_unique_for(); this function
    1217             :  * is a frontend that is mainly concerned with caching the answers.
    1218             :  * In particular, the force_cache argument allows overriding the internal
    1219             :  * heuristic about whether to cache negative answers; it should be "true"
    1220             :  * if making an inquiry that is not part of the normal bottom-up join search
    1221             :  * sequence.
    1222             :  */
    1223             : bool
    1224      547080 : innerrel_is_unique(PlannerInfo *root,
    1225             :                    Relids joinrelids,
    1226             :                    Relids outerrelids,
    1227             :                    RelOptInfo *innerrel,
    1228             :                    JoinType jointype,
    1229             :                    List *restrictlist,
    1230             :                    bool force_cache)
    1231             : {
    1232      547080 :     return innerrel_is_unique_ext(root, joinrelids, outerrelids, innerrel,
    1233             :                                   jointype, restrictlist, force_cache, NULL);
    1234             : }
    1235             : 
    1236             : /*
    1237             :  * innerrel_is_unique_ext
    1238             :  *    Do the same as innerrel_is_unique(), but also set to '*extra_clauses'
    1239             :  *    additional clauses from a baserestrictinfo list that were used to prove
    1240             :  *    uniqueness.  A non NULL 'extra_clauses' indicates that we're checking
    1241             :  *    for self-join and correspondingly dealing with filtered clauses.
    1242             :  */
    1243             : bool
    1244      552718 : innerrel_is_unique_ext(PlannerInfo *root,
    1245             :                        Relids joinrelids,
    1246             :                        Relids outerrelids,
    1247             :                        RelOptInfo *innerrel,
    1248             :                        JoinType jointype,
    1249             :                        List *restrictlist,
    1250             :                        bool force_cache,
    1251             :                        List **extra_clauses)
    1252             : {
    1253             :     MemoryContext old_context;
    1254             :     ListCell   *lc;
    1255             :     UniqueRelInfo *uniqueRelInfo;
    1256      552718 :     List       *outer_exprs = NIL;
    1257      552718 :     bool        self_join = (extra_clauses != NULL);
    1258             : 
    1259             :     /* Certainly can't prove uniqueness when there are no joinclauses */
    1260      552718 :     if (restrictlist == NIL)
    1261       98636 :         return false;
    1262             : 
    1263             :     /*
    1264             :      * Make a quick check to eliminate cases in which we will surely be unable
    1265             :      * to prove uniqueness of the innerrel.
    1266             :      */
    1267      454082 :     if (!rel_supports_distinctness(root, innerrel))
    1268      220344 :         return false;
    1269             : 
    1270             :     /*
    1271             :      * Query the cache to see if we've managed to prove that innerrel is
    1272             :      * unique for any subset of this outerrel.  For non self-join search, we
    1273             :      * don't need an exact match, as extra outerrels can't make the innerrel
    1274             :      * any less unique (or more formally, the restrictlist for a join to a
    1275             :      * superset outerrel must be a superset of the conditions we successfully
    1276             :      * used before). For self-join search, we require an exact match of
    1277             :      * outerrels, because we need extra clauses to be valid for our case.
    1278             :      * Also, for self-join checking we've filtered the clauses list.  Thus,
    1279             :      * for a self-join search, we can match only the result cached for another
    1280             :      * self-join check.
    1281             :      */
    1282      256240 :     foreach(lc, innerrel->unique_for_rels)
    1283             :     {
    1284       96464 :         uniqueRelInfo = (UniqueRelInfo *) lfirst(lc);
    1285             : 
    1286       96464 :         if ((!self_join && bms_is_subset(uniqueRelInfo->outerrelids, outerrelids)) ||
    1287          74 :             (self_join && bms_equal(uniqueRelInfo->outerrelids, outerrelids) &&
    1288          56 :              uniqueRelInfo->self_join))
    1289             :         {
    1290       73962 :             if (extra_clauses)
    1291          12 :                 *extra_clauses = uniqueRelInfo->extra_clauses;
    1292       73962 :             return true;        /* Success! */
    1293             :         }
    1294             :     }
    1295             : 
    1296             :     /*
    1297             :      * Conversely, we may have already determined that this outerrel, or some
    1298             :      * superset thereof, cannot prove this innerrel to be unique.
    1299             :      */
    1300      160736 :     foreach(lc, innerrel->non_unique_for_rels)
    1301             :     {
    1302        1278 :         Relids      unique_for_rels = (Relids) lfirst(lc);
    1303             : 
    1304        1278 :         if (bms_is_subset(outerrelids, unique_for_rels))
    1305         318 :             return false;
    1306             :     }
    1307             : 
    1308             :     /* No cached information, so try to make the proof. */
    1309      159458 :     if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel,
    1310             :                                jointype, restrictlist,
    1311             :                                self_join ? &outer_exprs : NULL))
    1312             :     {
    1313             :         /*
    1314             :          * Cache the positive result for future probes, being sure to keep it
    1315             :          * in the planner_cxt even if we are working in GEQO.
    1316             :          *
    1317             :          * Note: one might consider trying to isolate the minimal subset of
    1318             :          * the outerrels that proved the innerrel unique.  But it's not worth
    1319             :          * the trouble, because the planner builds up joinrels incrementally
    1320             :          * and so we'll see the minimally sufficient outerrels before any
    1321             :          * supersets of them anyway.
    1322             :          */
    1323       93112 :         old_context = MemoryContextSwitchTo(root->planner_cxt);
    1324       93112 :         uniqueRelInfo = makeNode(UniqueRelInfo);
    1325       93112 :         uniqueRelInfo->outerrelids = bms_copy(outerrelids);
    1326       93112 :         uniqueRelInfo->self_join = self_join;
    1327       93112 :         uniqueRelInfo->extra_clauses = outer_exprs;
    1328       93112 :         innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
    1329             :                                             uniqueRelInfo);
    1330       93112 :         MemoryContextSwitchTo(old_context);
    1331             : 
    1332       93112 :         if (extra_clauses)
    1333         694 :             *extra_clauses = outer_exprs;
    1334       93112 :         return true;            /* Success! */
    1335             :     }
    1336             :     else
    1337             :     {
    1338             :         /*
    1339             :          * None of the join conditions for outerrel proved innerrel unique, so
    1340             :          * we can safely reject this outerrel or any subset of it in future
    1341             :          * checks.
    1342             :          *
    1343             :          * However, in normal planning mode, caching this knowledge is totally
    1344             :          * pointless; it won't be queried again, because we build up joinrels
    1345             :          * from smaller to larger.  It is useful in GEQO mode, where the
    1346             :          * knowledge can be carried across successive planning attempts; and
    1347             :          * it's likely to be useful when using join-search plugins, too. Hence
    1348             :          * cache when join_search_private is non-NULL.  (Yeah, that's a hack,
    1349             :          * but it seems reasonable.)
    1350             :          *
    1351             :          * Also, allow callers to override that heuristic and force caching;
    1352             :          * that's useful for reduce_unique_semijoins, which calls here before
    1353             :          * the normal join search starts.
    1354             :          */
    1355       66346 :         if (force_cache || root->join_search_private)
    1356             :         {
    1357        1600 :             old_context = MemoryContextSwitchTo(root->planner_cxt);
    1358        1600 :             innerrel->non_unique_for_rels =
    1359        1600 :                 lappend(innerrel->non_unique_for_rels,
    1360        1600 :                         bms_copy(outerrelids));
    1361        1600 :             MemoryContextSwitchTo(old_context);
    1362             :         }
    1363             : 
    1364       66346 :         return false;
    1365             :     }
    1366             : }
    1367             : 
    1368             : /*
    1369             :  * is_innerrel_unique_for
    1370             :  *    Check if the innerrel provably contains at most one tuple matching any
    1371             :  *    tuple from the outerrel, based on join clauses in the 'restrictlist'.
    1372             :  */
    1373             : static bool
    1374      159458 : is_innerrel_unique_for(PlannerInfo *root,
    1375             :                        Relids joinrelids,
    1376             :                        Relids outerrelids,
    1377             :                        RelOptInfo *innerrel,
    1378             :                        JoinType jointype,
    1379             :                        List *restrictlist,
    1380             :                        List **extra_clauses)
    1381             : {
    1382      159458 :     List       *clause_list = NIL;
    1383             :     ListCell   *lc;
    1384             : 
    1385             :     /*
    1386             :      * Search for mergejoinable clauses that constrain the inner rel against
    1387             :      * the outer rel.  If an operator is mergejoinable then it behaves like
    1388             :      * equality for some btree opclass, so it's what we want.  The
    1389             :      * mergejoinability test also eliminates clauses containing volatile
    1390             :      * functions, which we couldn't depend on.
    1391             :      */
    1392      348732 :     foreach(lc, restrictlist)
    1393             :     {
    1394      189274 :         RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
    1395             : 
    1396             :         /*
    1397             :          * As noted above, if it's a pushed-down clause and we're at an outer
    1398             :          * join, we can't use it.
    1399             :          */
    1400      189274 :         if (IS_OUTER_JOIN(jointype) &&
    1401       94314 :             RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
    1402        3358 :             continue;
    1403             : 
    1404             :         /* Ignore if it's not a mergejoinable clause */
    1405      185916 :         if (!restrictinfo->can_join ||
    1406      171934 :             restrictinfo->mergeopfamilies == NIL)
    1407       16008 :             continue;           /* not mergejoinable */
    1408             : 
    1409             :         /*
    1410             :          * Check if the clause has the form "outer op inner" or "inner op
    1411             :          * outer", and if so mark which side is inner.
    1412             :          */
    1413      169908 :         if (!clause_sides_match_join(restrictinfo, outerrelids,
    1414             :                                      innerrel->relids))
    1415          14 :             continue;           /* no good for these input relations */
    1416             : 
    1417             :         /* OK, add to the list */
    1418      169894 :         clause_list = lappend(clause_list, restrictinfo);
    1419             :     }
    1420             : 
    1421             :     /* Let rel_is_distinct_for() do the hard work */
    1422      159458 :     return rel_is_distinct_for(root, innerrel, clause_list, extra_clauses);
    1423             : }
    1424             : 
    1425             : /*
    1426             :  * replace_varno - find in the given tree any Vars, PlaceHolderVar, and Relids
    1427             :  * that reference the removing relid, and change them to the reference to
    1428             :  * the replacement relid.
    1429             :  *
    1430             :  * NOTE: although this has the form of a walker, we cheat and modify the
    1431             :  * nodes in-place.
    1432             :  */
    1433             : 
    1434             : typedef struct
    1435             : {
    1436             :     int         from;
    1437             :     int         to;
    1438             :     int         sublevels_up;
    1439             : } ReplaceVarnoContext;
    1440             : 
    1441             : static bool
    1442       61084 : replace_varno_walker(Node *node, ReplaceVarnoContext *ctx)
    1443             : {
    1444       61084 :     if (node == NULL)
    1445       18160 :         return false;
    1446       42924 :     if (IsA(node, Var))
    1447             :     {
    1448       19960 :         Var        *var = (Var *) node;
    1449             : 
    1450       19960 :         if (var->varno == ctx->from &&
    1451        8284 :             var->varlevelsup == ctx->sublevels_up)
    1452             :         {
    1453        8272 :             var->varno = ctx->to;
    1454        8272 :             var->varnosyn = ctx->to;
    1455             :         }
    1456       19960 :         return false;
    1457             :     }
    1458       22964 :     else if (IsA(node, PlaceHolderVar))
    1459             :     {
    1460          66 :         PlaceHolderVar *phv = (PlaceHolderVar *) node;
    1461             : 
    1462          66 :         if (phv->phlevelsup == ctx->sublevels_up)
    1463             :         {
    1464          66 :             phv->phrels =
    1465          66 :                 replace_relid(phv->phrels, ctx->from, ctx->to);
    1466          66 :             phv->phnullingrels =
    1467          66 :                 replace_relid(phv->phnullingrels, ctx->from, ctx->to);
    1468             :         }
    1469             : 
    1470             :         /* fall through to recurse into the placeholder's expression */
    1471             :     }
    1472       22898 :     else if (IsA(node, Query))
    1473             :     {
    1474             :         /* Recurse into subselects */
    1475             :         bool        result;
    1476             : 
    1477          30 :         ctx->sublevels_up++;
    1478          30 :         result = query_tree_walker((Query *) node,
    1479             :                                    replace_varno_walker,
    1480             :                                    (void *) ctx,
    1481             :                                    QTW_EXAMINE_SORTGROUP);
    1482          30 :         ctx->sublevels_up--;
    1483          30 :         return result;
    1484             :     }
    1485       22868 :     else if (IsA(node, RestrictInfo))
    1486             :     {
    1487        1852 :         RestrictInfo *rinfo = (RestrictInfo *) node;
    1488        1852 :         int         relid = -1;
    1489        1852 :         bool        is_req_equal =
    1490        1852 :             (rinfo->required_relids == rinfo->clause_relids);
    1491             : 
    1492        1852 :         if (bms_is_member(ctx->from, rinfo->clause_relids))
    1493             :         {
    1494        1798 :             replace_varno((Node *) rinfo->clause, ctx->from, ctx->to);
    1495        1798 :             replace_varno((Node *) rinfo->orclause, ctx->from, ctx->to);
    1496        1798 :             rinfo->clause_relids =
    1497        1798 :                 replace_relid(rinfo->clause_relids, ctx->from, ctx->to);
    1498        1798 :             rinfo->left_relids =
    1499        1798 :                 replace_relid(rinfo->left_relids, ctx->from, ctx->to);
    1500        1798 :             rinfo->right_relids =
    1501        1798 :                 replace_relid(rinfo->right_relids, ctx->from, ctx->to);
    1502             :         }
    1503             : 
    1504        1852 :         if (is_req_equal)
    1505          12 :             rinfo->required_relids = rinfo->clause_relids;
    1506             :         else
    1507        1840 :             rinfo->required_relids =
    1508        1840 :                 replace_relid(rinfo->required_relids, ctx->from, ctx->to);
    1509             : 
    1510        1852 :         rinfo->outer_relids =
    1511        1852 :             replace_relid(rinfo->outer_relids, ctx->from, ctx->to);
    1512        1852 :         rinfo->incompatible_relids =
    1513        1852 :             replace_relid(rinfo->incompatible_relids, ctx->from, ctx->to);
    1514             : 
    1515        3518 :         if (rinfo->mergeopfamilies &&
    1516        1666 :             bms_get_singleton_member(rinfo->clause_relids, &relid) &&
    1517        1550 :             relid == ctx->to && IsA(rinfo->clause, OpExpr))
    1518             :         {
    1519             :             Expr       *leftOp;
    1520             :             Expr       *rightOp;
    1521             : 
    1522        1550 :             leftOp = (Expr *) get_leftop(rinfo->clause);
    1523        1550 :             rightOp = (Expr *) get_rightop(rinfo->clause);
    1524             : 
    1525        1550 :             if (leftOp != NULL && equal(leftOp, rightOp))
    1526             :             {
    1527        1242 :                 NullTest   *ntest = makeNode(NullTest);
    1528             : 
    1529        1242 :                 ntest->arg = leftOp;
    1530        1242 :                 ntest->nulltesttype = IS_NOT_NULL;
    1531        1242 :                 ntest->argisrow = false;
    1532        1242 :                 ntest->location = -1;
    1533        1242 :                 rinfo->clause = (Expr *) ntest;
    1534        1242 :                 rinfo->mergeopfamilies = NIL;
    1535             :             }
    1536             :             Assert(rinfo->orclause == NULL);
    1537             :         }
    1538             : 
    1539        1852 :         return false;
    1540             :     }
    1541             : 
    1542       21082 :     return expression_tree_walker(node, replace_varno_walker,
    1543             :                                   (void *) ctx);
    1544             : }
    1545             : 
    1546             : static void
    1547       41800 : replace_varno(Node *node, int from, int to)
    1548             : {
    1549             :     ReplaceVarnoContext ctx;
    1550             : 
    1551       41800 :     if (to <= 0)
    1552       28426 :         return;
    1553             : 
    1554       13374 :     ctx.from = from;
    1555       13374 :     ctx.to = to;
    1556       13374 :     ctx.sublevels_up = 0;
    1557             : 
    1558             :     /*
    1559             :      * Must be prepared to start with a Query or a bare expression tree.
    1560             :      */
    1561       13374 :     query_or_expression_tree_walker(node,
    1562             :                                     replace_varno_walker,
    1563             :                                     (void *) &ctx,
    1564             :                                     QTW_EXAMINE_SORTGROUP);
    1565             : }
    1566             : 
    1567             : /*
    1568             :  * Substitute newId by oldId in relids.
    1569             :  *
    1570             :  * We must make a copy of the original Bitmapset before making any
    1571             :  * modifications, because the same pointer to it might be shared among
    1572             :  * different places.
    1573             :  */
    1574             : static Bitmapset *
    1575      944992 : replace_relid(Relids relids, int oldId, int newId)
    1576             : {
    1577      944992 :     if (oldId < 0)
    1578       12066 :         return relids;
    1579             : 
    1580             :     /* Delete relid without substitution. */
    1581      932926 :     if (newId < 0)
    1582      898506 :         return bms_del_member(bms_copy(relids), oldId);
    1583             : 
    1584             :     /* Substitute newId for oldId. */
    1585       34420 :     if (bms_is_member(oldId, relids))
    1586       10928 :         return bms_add_member(bms_del_member(bms_copy(relids), oldId), newId);
    1587             : 
    1588       23492 :     return relids;
    1589             : }
    1590             : 
    1591             : /*
    1592             :  * Update EC members to point to the remaining relation instead of the removed
    1593             :  * one, removing duplicates.
    1594             :  *
    1595             :  * Restriction clauses for base relations are already distributed to
    1596             :  * the respective baserestrictinfo lists (see
    1597             :  * generate_implied_equalities_for_column). The above code has already processed
    1598             :  * this list, and updated these clauses to reference the remaining
    1599             :  * relation, so we can skip them here based on their relids.
    1600             :  *
    1601             :  * Likewise, we have already processed the join clauses that join the
    1602             :  * removed relation to the remaining one.
    1603             :  *
    1604             :  * Finally, there are join clauses that join the removed relation to
    1605             :  * some third relation. We can't just delete the source clauses and
    1606             :  * regenerate them from the EC because the corresponding equality
    1607             :  * operators might be missing (see the handling of ec_broken).
    1608             :  * Therefore, we will update the references in the source clauses.
    1609             :  *
    1610             :  * Derived clauses can be generated again, so it is simpler to just
    1611             :  * delete them.
    1612             :  */
    1613             : static void
    1614         932 : update_eclasses(EquivalenceClass *ec, int from, int to)
    1615             : {
    1616         932 :     List       *new_members = NIL;
    1617         932 :     List       *new_sources = NIL;
    1618             :     ListCell   *lc;
    1619             :     ListCell   *lc1;
    1620             : 
    1621        2894 :     foreach(lc, ec->ec_members)
    1622             :     {
    1623        1962 :         EquivalenceMember *em = lfirst_node(EquivalenceMember, lc);
    1624        1962 :         bool        is_redundant = false;
    1625             : 
    1626        1962 :         if (!bms_is_member(from, em->em_relids))
    1627             :         {
    1628        1012 :             new_members = lappend(new_members, em);
    1629        1012 :             continue;
    1630             :         }
    1631             : 
    1632         950 :         em->em_relids = replace_relid(em->em_relids, from, to);
    1633         950 :         em->em_jdomain->jd_relids = replace_relid(em->em_jdomain->jd_relids, from, to);
    1634             : 
    1635             :         /* We only process inner joins */
    1636         950 :         replace_varno((Node *) em->em_expr, from, to);
    1637             : 
    1638         974 :         foreach(lc1, new_members)
    1639             :         {
    1640         324 :             EquivalenceMember *other = lfirst_node(EquivalenceMember, lc1);
    1641             : 
    1642         324 :             if (!equal(em->em_relids, other->em_relids))
    1643          24 :                 continue;
    1644             : 
    1645         300 :             if (equal(em->em_expr, other->em_expr))
    1646             :             {
    1647         300 :                 is_redundant = true;
    1648         300 :                 break;
    1649             :             }
    1650             :         }
    1651             : 
    1652         950 :         if (!is_redundant)
    1653         650 :             new_members = lappend(new_members, em);
    1654             :     }
    1655             : 
    1656         932 :     list_free(ec->ec_members);
    1657         932 :     ec->ec_members = new_members;
    1658             : 
    1659         932 :     list_free(ec->ec_derives);
    1660         932 :     ec->ec_derives = NULL;
    1661             : 
    1662             :     /* Update EC source expressions */
    1663        1962 :     foreach(lc, ec->ec_sources)
    1664             :     {
    1665        1030 :         RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
    1666        1030 :         bool        is_redundant = false;
    1667             : 
    1668        1030 :         if (!bms_is_member(from, rinfo->required_relids))
    1669             :         {
    1670         144 :             new_sources = lappend(new_sources, rinfo);
    1671         144 :             continue;
    1672             :         }
    1673             : 
    1674         886 :         replace_varno((Node *) rinfo, from, to);
    1675             : 
    1676             :         /*
    1677             :          * After switching the clause to the remaining relation, check it for
    1678             :          * redundancy with existing ones. We don't have to check for
    1679             :          * redundancy with derived clauses, because we've just deleted them.
    1680             :          */
    1681         952 :         foreach(lc1, new_sources)
    1682             :         {
    1683         104 :             RestrictInfo *other = lfirst_node(RestrictInfo, lc1);
    1684             : 
    1685         104 :             if (!equal(rinfo->clause_relids, other->clause_relids))
    1686          24 :                 continue;
    1687             : 
    1688          80 :             if (equal(rinfo->clause, other->clause))
    1689             :             {
    1690          38 :                 is_redundant = true;
    1691          38 :                 break;
    1692             :             }
    1693             :         }
    1694             : 
    1695         886 :         if (!is_redundant)
    1696         848 :             new_sources = lappend(new_sources, rinfo);
    1697             :     }
    1698             : 
    1699         932 :     list_free(ec->ec_sources);
    1700         932 :     ec->ec_sources = new_sources;
    1701         932 :     ec->ec_relids = replace_relid(ec->ec_relids, from, to);
    1702         932 : }
    1703             : 
    1704             : /*
    1705             :  * "Logically" compares two RestrictInfo's ignoring the 'rinfo_serial' field,
    1706             :  * which makes almost every RestrictInfo unique.  This type of comparison is
    1707             :  * useful when removing duplicates while moving RestrictInfo's from removed
    1708             :  * relation to remaining relation during self-join elimination.
    1709             :  *
    1710             :  * XXX: In the future, we might remove the 'rinfo_serial' field completely and
    1711             :  * get rid of this function.
    1712             :  */
    1713             : static bool
    1714         556 : restrict_infos_logically_equal(RestrictInfo *a, RestrictInfo *b)
    1715             : {
    1716         556 :     int         saved_rinfo_serial = a->rinfo_serial;
    1717             :     bool        result;
    1718             : 
    1719         556 :     a->rinfo_serial = b->rinfo_serial;
    1720         556 :     result = equal(a, b);
    1721         556 :     a->rinfo_serial = saved_rinfo_serial;
    1722             : 
    1723         556 :     return result;
    1724             : }
    1725             : 
    1726             : /*
    1727             :  * Remove a relation after we have proven that it participates only in an
    1728             :  * unneeded unique self join.
    1729             :  *
    1730             :  * Replace any links in planner info structures.
    1731             :  *
    1732             :  * Transfer join and restriction clauses from the removed relation to the
    1733             :  * remaining one. We change the Vars of the clause to point to the
    1734             :  * remaining relation instead of the removed one. The clauses that require
    1735             :  * a subset of joinrelids become restriction clauses of the remaining
    1736             :  * relation, and others remain join clauses. We append them to
    1737             :  * baserestrictinfo and joininfo respectively, trying not to introduce
    1738             :  * duplicates.
    1739             :  *
    1740             :  * We also have to process the 'joinclauses' list here, because it
    1741             :  * contains EC-derived join clauses which must become filter clauses. It
    1742             :  * is not enough to just correct the ECs because the EC-derived
    1743             :  * restrictions are generated before join removal (see
    1744             :  * generate_base_implied_equalities).
    1745             :  */
    1746             : static void
    1747         626 : remove_self_join_rel(PlannerInfo *root, PlanRowMark *kmark, PlanRowMark *rmark,
    1748             :                      RelOptInfo *toKeep, RelOptInfo *toRemove,
    1749             :                      List *restrictlist)
    1750             : {
    1751             :     List       *joininfos;
    1752             :     ListCell   *lc;
    1753             :     int         i;
    1754         626 :     List       *jinfo_candidates = NIL;
    1755         626 :     List       *binfo_candidates = NIL;
    1756             : 
    1757             :     Assert(toKeep->relid != -1);
    1758             : 
    1759             :     /*
    1760             :      * Replace the index of the removing table with the keeping one. The
    1761             :      * technique of removing/distributing restrictinfo is used here to attach
    1762             :      * just appeared (for keeping relation) join clauses and avoid adding
    1763             :      * duplicates of those that already exist in the joininfo list.
    1764             :      */
    1765         626 :     joininfos = list_copy(toRemove->joininfo);
    1766         716 :     foreach(lc, joininfos)
    1767             :     {
    1768          90 :         RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
    1769             : 
    1770          90 :         remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
    1771          90 :         replace_varno((Node *) rinfo, toRemove->relid, toKeep->relid);
    1772             : 
    1773          90 :         if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
    1774          60 :             jinfo_candidates = lappend(jinfo_candidates, rinfo);
    1775             :         else
    1776          30 :             binfo_candidates = lappend(binfo_candidates, rinfo);
    1777             :     }
    1778             : 
    1779             :     /*
    1780             :      * Concatenate restrictlist to the list of base restrictions of the
    1781             :      * removing table just to simplify the replacement procedure: all of them
    1782             :      * weren't connected to any keeping relations and need to be added to some
    1783             :      * rels.
    1784             :      */
    1785         626 :     toRemove->baserestrictinfo = list_concat(toRemove->baserestrictinfo,
    1786             :                                              restrictlist);
    1787        1490 :     foreach(lc, toRemove->baserestrictinfo)
    1788             :     {
    1789         864 :         RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
    1790             : 
    1791         864 :         replace_varno((Node *) rinfo, toRemove->relid, toKeep->relid);
    1792             : 
    1793         864 :         if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
    1794           0 :             jinfo_candidates = lappend(jinfo_candidates, rinfo);
    1795             :         else
    1796         864 :             binfo_candidates = lappend(binfo_candidates, rinfo);
    1797             :     }
    1798             : 
    1799             :     /*
    1800             :      * Now, add all non-redundant clauses to the keeping relation.
    1801             :      * Contradictory operation. On the one side, we reduce the length of
    1802             :      * restrict lists that can impact planning or executing time.
    1803             :      * Additionally, we improve the accuracy of cardinality estimation. On the
    1804             :      * other side, it is one more place that can make planning time much
    1805             :      * longer in specific cases. It would have been better to avoid calling
    1806             :      * the equal() function here, but it's the only way to detect duplicated
    1807             :      * inequality expressions.
    1808             :      */
    1809        1520 :     foreach(lc, binfo_candidates)
    1810             :     {
    1811         894 :         RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
    1812         894 :         ListCell   *olc = NULL;
    1813         894 :         bool        is_redundant = false;
    1814             : 
    1815             :         Assert(!bms_is_member(toRemove->relid, rinfo->required_relids));
    1816             : 
    1817        1258 :         foreach(olc, toKeep->baserestrictinfo)
    1818             :         {
    1819         562 :             RestrictInfo *src = lfirst_node(RestrictInfo, olc);
    1820             : 
    1821         562 :             if (!bms_equal(src->clause_relids, rinfo->clause_relids))
    1822             :                 /* Const and non-const expressions can't be equal */
    1823          18 :                 continue;
    1824             : 
    1825         544 :             if (src == rinfo ||
    1826         544 :                 (rinfo->parent_ec != NULL
    1827         304 :                  && src->parent_ec == rinfo->parent_ec)
    1828         538 :                 || restrict_infos_logically_equal(rinfo, src))
    1829             :             {
    1830         198 :                 is_redundant = true;
    1831         198 :                 break;
    1832             :             }
    1833             :         }
    1834         894 :         if (!is_redundant)
    1835         696 :             distribute_restrictinfo_to_rels(root, rinfo);
    1836             :     }
    1837         686 :     foreach(lc, jinfo_candidates)
    1838             :     {
    1839          60 :         RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
    1840          60 :         ListCell   *olc = NULL;
    1841          60 :         bool        is_redundant = false;
    1842             : 
    1843             :         Assert(!bms_is_member(toRemove->relid, rinfo->required_relids));
    1844             : 
    1845          78 :         foreach(olc, toKeep->joininfo)
    1846             :         {
    1847          18 :             RestrictInfo *src = lfirst_node(RestrictInfo, olc);
    1848             : 
    1849          18 :             if (!bms_equal(src->clause_relids, rinfo->clause_relids))
    1850             :                 /* Can't compare trivially different clauses */
    1851           0 :                 continue;
    1852             : 
    1853          18 :             if (src == rinfo ||
    1854          18 :                 (rinfo->parent_ec != NULL
    1855           0 :                  && src->parent_ec == rinfo->parent_ec)
    1856          18 :                 || restrict_infos_logically_equal(rinfo, src))
    1857             :             {
    1858           0 :                 is_redundant = true;
    1859           0 :                 break;
    1860             :             }
    1861             :         }
    1862          60 :         if (!is_redundant)
    1863          60 :             distribute_restrictinfo_to_rels(root, rinfo);
    1864             :     }
    1865         626 :     list_free(binfo_candidates);
    1866         626 :     list_free(jinfo_candidates);
    1867             : 
    1868             :     /*
    1869             :      * Arrange equivalence classes, mentioned removing a table, with the
    1870             :      * keeping one: varno of removing table should be replaced in members and
    1871             :      * sources lists. Also, remove duplicated elements if this replacement
    1872             :      * procedure created them.
    1873             :      */
    1874         626 :     i = -1;
    1875        1558 :     while ((i = bms_next_member(toRemove->eclass_indexes, i)) >= 0)
    1876             :     {
    1877         932 :         EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes, i);
    1878             : 
    1879         932 :         update_eclasses(ec, toRemove->relid, toKeep->relid);
    1880         932 :         toKeep->eclass_indexes = bms_add_member(toKeep->eclass_indexes, i);
    1881             :     }
    1882             : 
    1883             :     /*
    1884             :      * Transfer the targetlist and attr_needed flags.
    1885             :      */
    1886             : 
    1887        2484 :     foreach(lc, toRemove->reltarget->exprs)
    1888             :     {
    1889        1858 :         Node       *node = lfirst(lc);
    1890             : 
    1891        1858 :         replace_varno(node, toRemove->relid, toKeep->relid);
    1892        1858 :         if (!list_member(toKeep->reltarget->exprs, node))
    1893         226 :             toKeep->reltarget->exprs = lappend(toKeep->reltarget->exprs, node);
    1894             :     }
    1895             : 
    1896        8186 :     for (i = toKeep->min_attr; i <= toKeep->max_attr; i++)
    1897             :     {
    1898        7560 :         int         attno = i - toKeep->min_attr;
    1899             : 
    1900       15120 :         toRemove->attr_needed[attno] = replace_relid(toRemove->attr_needed[attno],
    1901        7560 :                                                      toRemove->relid, toKeep->relid);
    1902        7560 :         toKeep->attr_needed[attno] = bms_add_members(toKeep->attr_needed[attno],
    1903        7560 :                                                      toRemove->attr_needed[attno]);
    1904             :     }
    1905             : 
    1906             :     /*
    1907             :      * If the removed relation has a row mark, transfer it to the remaining
    1908             :      * one.
    1909             :      *
    1910             :      * If both rels have row marks, just keep the one corresponding to the
    1911             :      * remaining relation, because we verified earlier that they have the same
    1912             :      * strength.
    1913             :      */
    1914         626 :     if (rmark)
    1915             :     {
    1916          80 :         if (kmark)
    1917             :         {
    1918             :             Assert(kmark->markType == rmark->markType);
    1919             : 
    1920          80 :             root->rowMarks = list_delete_ptr(root->rowMarks, rmark);
    1921             :         }
    1922             :         else
    1923             :         {
    1924             :             /* Shouldn't have inheritance children here. */
    1925             :             Assert(rmark->rti == rmark->prti);
    1926             : 
    1927           0 :             rmark->rti = rmark->prti = toKeep->relid;
    1928             :         }
    1929             :     }
    1930             : 
    1931             :     /* Replace varno in all the query structures */
    1932         626 :     replace_varno((Node *) root->parse, toRemove->relid, toKeep->relid);
    1933             : 
    1934             :     /* See remove_self_joins_one_group() */
    1935             :     Assert(root->parse->resultRelation != toRemove->relid);
    1936             :     Assert(root->parse->resultRelation != toKeep->relid);
    1937             : 
    1938             :     /* Replace links in the planner info */
    1939         626 :     remove_rel_from_query(root, toRemove, toKeep->relid, NULL, NULL);
    1940             : 
    1941             :     /* At last, replace varno in root targetlist and HAVING clause */
    1942         626 :     replace_varno((Node *) root->processed_tlist,
    1943         626 :                   toRemove->relid, toKeep->relid);
    1944         626 :     replace_varno((Node *) root->processed_groupClause,
    1945         626 :                   toRemove->relid, toKeep->relid);
    1946         626 :     replace_relid(root->all_result_relids, toRemove->relid, toKeep->relid);
    1947         626 :     replace_relid(root->leaf_result_relids, toRemove->relid, toKeep->relid);
    1948             : 
    1949             : 
    1950             :     /*
    1951             :      * There may be references to the rel in root->fkey_list, but if so,
    1952             :      * match_foreign_keys_to_quals() will get rid of them.
    1953             :      */
    1954             : 
    1955             :     /*
    1956             :      * Finally, remove the rel from the baserel array to prevent it from being
    1957             :      * referenced again.  (We can't do this earlier because
    1958             :      * remove_join_clause_from_rels will touch it.)
    1959             :      */
    1960         626 :     root->simple_rel_array[toRemove->relid] = NULL;
    1961             : 
    1962             :     /* And nuke the RelOptInfo, just in case there's another access path */
    1963         626 :     pfree(toRemove);
    1964         626 : }
    1965             : 
    1966             : /*
    1967             :  * split_selfjoin_quals
    1968             :  *      Processes 'joinquals' by building two lists: one containing the quals
    1969             :  *      where the columns/exprs are on either side of the join match, and
    1970             :  *      another one containing the remaining quals.
    1971             :  *
    1972             :  * 'joinquals' must only contain quals for a RTE_RELATION being joined to
    1973             :  * itself.
    1974             :  */
    1975             : static void
    1976        5638 : split_selfjoin_quals(PlannerInfo *root, List *joinquals, List **selfjoinquals,
    1977             :                      List **otherjoinquals, int from, int to)
    1978             : {
    1979             :     ListCell   *lc;
    1980        5638 :     List       *sjoinquals = NIL;
    1981        5638 :     List       *ojoinquals = NIL;
    1982             : 
    1983        7646 :     foreach(lc, joinquals)
    1984             :     {
    1985        2008 :         RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
    1986             :         OpExpr     *expr;
    1987             :         Node       *leftexpr;
    1988             :         Node       *rightexpr;
    1989             : 
    1990             :         /* In general, clause looks like F(arg1) = G(arg2) */
    1991        4016 :         if (!rinfo->mergeopfamilies ||
    1992        4016 :             bms_num_members(rinfo->clause_relids) != 2 ||
    1993        4016 :             bms_membership(rinfo->left_relids) != BMS_SINGLETON ||
    1994        2008 :             bms_membership(rinfo->right_relids) != BMS_SINGLETON)
    1995             :         {
    1996           0 :             ojoinquals = lappend(ojoinquals, rinfo);
    1997           0 :             continue;
    1998             :         }
    1999             : 
    2000        2008 :         expr = (OpExpr *) rinfo->clause;
    2001             : 
    2002        2008 :         if (!IsA(expr, OpExpr) || list_length(expr->args) != 2)
    2003             :         {
    2004           0 :             ojoinquals = lappend(ojoinquals, rinfo);
    2005           0 :             continue;
    2006             :         }
    2007             : 
    2008        2008 :         leftexpr = get_leftop(rinfo->clause);
    2009        2008 :         rightexpr = copyObject(get_rightop(rinfo->clause));
    2010             : 
    2011        2008 :         if (leftexpr && IsA(leftexpr, RelabelType))
    2012          12 :             leftexpr = (Node *) ((RelabelType *) leftexpr)->arg;
    2013        2008 :         if (rightexpr && IsA(rightexpr, RelabelType))
    2014           6 :             rightexpr = (Node *) ((RelabelType *) rightexpr)->arg;
    2015             : 
    2016             :         /*
    2017             :          * Quite an expensive operation, narrowing the use case. For example,
    2018             :          * when we have cast of the same var to different (but compatible)
    2019             :          * types.
    2020             :          */
    2021        2008 :         replace_varno(rightexpr, bms_singleton_member(rinfo->right_relids),
    2022        2008 :                       bms_singleton_member(rinfo->left_relids));
    2023             : 
    2024        2008 :         if (equal(leftexpr, rightexpr))
    2025        1546 :             sjoinquals = lappend(sjoinquals, rinfo);
    2026             :         else
    2027         462 :             ojoinquals = lappend(ojoinquals, rinfo);
    2028             :     }
    2029             : 
    2030        5638 :     *selfjoinquals = sjoinquals;
    2031        5638 :     *otherjoinquals = ojoinquals;
    2032        5638 : }
    2033             : 
    2034             : /*
    2035             :  * Check for a case when uniqueness is at least partly derived from a
    2036             :  * baserestrictinfo clause. In this case, we have a chance to return only
    2037             :  * one row (if such clauses on both sides of SJ are equal) or nothing (if they
    2038             :  * are different).
    2039             :  */
    2040             : static bool
    2041         706 : match_unique_clauses(PlannerInfo *root, RelOptInfo *outer, List *uclauses,
    2042             :                      Index relid)
    2043             : {
    2044             :     ListCell   *lc;
    2045             : 
    2046         834 :     foreach(lc, uclauses)
    2047             :     {
    2048         208 :         RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
    2049             :         Expr       *clause;
    2050             :         Node       *iclause;
    2051             :         Node       *c1;
    2052         208 :         bool        matched = false;
    2053             :         ListCell   *olc;
    2054             : 
    2055             :         Assert(outer->relid > 0 && relid > 0);
    2056             : 
    2057             :         /* Only filters like f(R.x1,...,R.xN) == expr we should consider. */
    2058             :         Assert(bms_is_empty(rinfo->left_relids) ^
    2059             :                bms_is_empty(rinfo->right_relids));
    2060             : 
    2061         208 :         clause = (Expr *) copyObject(rinfo->clause);
    2062         208 :         replace_varno((Node *) clause, relid, outer->relid);
    2063             : 
    2064         208 :         iclause = bms_is_empty(rinfo->left_relids) ? get_rightop(clause) :
    2065         202 :             get_leftop(clause);
    2066         208 :         c1 = bms_is_empty(rinfo->left_relids) ? get_leftop(clause) :
    2067         202 :             get_rightop(clause);
    2068             : 
    2069             :         /*
    2070             :          * Compare these left and right sides with the corresponding sides of
    2071             :          * the outer's filters. If no one is detected - return immediately.
    2072             :          */
    2073         318 :         foreach(olc, outer->baserestrictinfo)
    2074             :         {
    2075         238 :             RestrictInfo *orinfo = lfirst_node(RestrictInfo, olc);
    2076             :             Node       *oclause;
    2077             :             Node       *c2;
    2078             : 
    2079         238 :             if (orinfo->mergeopfamilies == NIL)
    2080             :                 /* Don't consider clauses which aren't similar to 'F(X)=G(Y)' */
    2081          60 :                 continue;
    2082             : 
    2083             :             Assert(is_opclause(orinfo->clause));
    2084             : 
    2085         356 :             oclause = bms_is_empty(orinfo->left_relids) ?
    2086         178 :                 get_rightop(orinfo->clause) : get_leftop(orinfo->clause);
    2087         356 :             c2 = (bms_is_empty(orinfo->left_relids) ?
    2088         178 :                   get_leftop(orinfo->clause) : get_rightop(orinfo->clause));
    2089             : 
    2090         178 :             if (equal(iclause, oclause) && equal(c1, c2))
    2091             :             {
    2092         128 :                 matched = true;
    2093         128 :                 break;
    2094             :             }
    2095             :         }
    2096             : 
    2097         208 :         if (!matched)
    2098          80 :             return false;
    2099             :     }
    2100             : 
    2101         626 :     return true;
    2102             : }
    2103             : 
    2104             : /*
    2105             :  * Find and remove unique self joins in a group of base relations that have
    2106             :  * the same Oid.
    2107             :  *
    2108             :  * Returns a set of relids that were removed.
    2109             :  */
    2110             : static Relids
    2111       12614 : remove_self_joins_one_group(PlannerInfo *root, Relids relids)
    2112             : {
    2113       12614 :     Relids      result = NULL;
    2114             :     int         k;              /* Index of kept relation */
    2115       12614 :     int         r = -1;         /* Index of removed relation */
    2116             : 
    2117       41062 :     while ((r = bms_next_member(relids, r)) > 0)
    2118             :     {
    2119       28448 :         RelOptInfo *inner = root->simple_rel_array[r];
    2120             : 
    2121             :         /*
    2122             :          * We don't accept result relation as either source or target relation
    2123             :          * of SJE, because result relation has different behavior in
    2124             :          * EvalPlanQual() and RETURNING clause.
    2125             :          */
    2126       28448 :         if (root->parse->resultRelation == r)
    2127         252 :             continue;
    2128             : 
    2129       28196 :         k = r;
    2130             : 
    2131       46564 :         while ((k = bms_next_member(relids, k)) > 0)
    2132             :         {
    2133       18994 :             Relids      joinrelids = NULL;
    2134       18994 :             RelOptInfo *outer = root->simple_rel_array[k];
    2135             :             List       *restrictlist;
    2136             :             List       *selfjoinquals;
    2137             :             List       *otherjoinquals;
    2138             :             ListCell   *lc;
    2139       18994 :             bool        jinfo_check = true;
    2140       18994 :             PlanRowMark *omark = NULL;
    2141       18994 :             PlanRowMark *imark = NULL;
    2142       18994 :             List       *uclauses = NIL;
    2143             : 
    2144       18994 :             if (root->parse->resultRelation == k)
    2145       18368 :                 continue;
    2146             : 
    2147             :             /* A sanity check: the relations have the same Oid. */
    2148             :             Assert(root->simple_rte_array[k]->relid ==
    2149             :                    root->simple_rte_array[r]->relid);
    2150             : 
    2151             :             /*
    2152             :              * It is impossible to eliminate join of two relations if they
    2153             :              * belong to different rules of order. Otherwise planner can't be
    2154             :              * able to find any variants of correct query plan.
    2155             :              */
    2156       24336 :             foreach(lc, root->join_info_list)
    2157             :             {
    2158       18646 :                 SpecialJoinInfo *info = (SpecialJoinInfo *) lfirst(lc);
    2159             : 
    2160       37292 :                 if ((bms_is_member(k, info->syn_lefthand) ^
    2161       27590 :                      bms_is_member(r, info->syn_lefthand)) ||
    2162        8944 :                     (bms_is_member(k, info->syn_righthand) ^
    2163        8944 :                      bms_is_member(r, info->syn_righthand)))
    2164             :                 {
    2165       13286 :                     jinfo_check = false;
    2166       13286 :                     break;
    2167             :                 }
    2168             :             }
    2169       18976 :             if (!jinfo_check)
    2170       13286 :                 continue;
    2171             : 
    2172             :             /*
    2173             :              * Check Row Marks equivalence.  We can't remove the join if the
    2174             :              * relations have row marks of different strength (e.g. one is
    2175             :              * locked FOR UPDATE and another just has ROW_MARK_REFERENCE for
    2176             :              * EvalPlanQual rechecking).
    2177             :              */
    2178        5874 :             foreach(lc, root->rowMarks)
    2179             :             {
    2180         344 :                 PlanRowMark *rowMark = (PlanRowMark *) lfirst(lc);
    2181             : 
    2182         344 :                 if (rowMark->rti == k)
    2183             :                 {
    2184             :                     Assert(imark == NULL);
    2185         160 :                     imark = rowMark;
    2186             :                 }
    2187         184 :                 else if (rowMark->rti == r)
    2188             :                 {
    2189             :                     Assert(omark == NULL);
    2190         160 :                     omark = rowMark;
    2191             :                 }
    2192             : 
    2193         344 :                 if (omark && imark)
    2194         160 :                     break;
    2195             :             }
    2196        5690 :             if (omark && imark && omark->markType != imark->markType)
    2197          52 :                 continue;
    2198             : 
    2199             :             /*
    2200             :              * We only deal with base rels here, so their relids bitset
    2201             :              * contains only one member -- their relid.
    2202             :              */
    2203        5638 :             joinrelids = bms_add_member(joinrelids, r);
    2204        5638 :             joinrelids = bms_add_member(joinrelids, k);
    2205             : 
    2206             :             /*
    2207             :              * PHVs should not impose any constraints on removing self joins.
    2208             :              */
    2209             : 
    2210             :             /*
    2211             :              * At this stage, joininfo lists of inner and outer can contain
    2212             :              * only clauses, required for a superior outer join that can't
    2213             :              * influence this optimization. So, we can avoid to call the
    2214             :              * build_joinrel_restrictlist() routine.
    2215             :              */
    2216        5638 :             restrictlist = generate_join_implied_equalities(root, joinrelids,
    2217             :                                                             inner->relids,
    2218             :                                                             outer, NULL);
    2219             : 
    2220             :             /*
    2221             :              * Process restrictlist to separate the self join quals out of the
    2222             :              * other quals. e.g x = x goes to selfjoinquals and a = b to
    2223             :              * otherjoinquals.
    2224             :              */
    2225        5638 :             split_selfjoin_quals(root, restrictlist, &selfjoinquals,
    2226        5638 :                                  &otherjoinquals, inner->relid, outer->relid);
    2227             : 
    2228             :             /*
    2229             :              * To enable SJE for the only degenerate case without any self
    2230             :              * join clauses at all, add baserestrictinfo to this list. The
    2231             :              * degenerate case works only if both sides have the same clause.
    2232             :              * So doesn't matter which side to add.
    2233             :              */
    2234        5638 :             selfjoinquals = list_concat(selfjoinquals, outer->baserestrictinfo);
    2235             : 
    2236             :             /*
    2237             :              * Determine if the inner table can duplicate outer rows.  We must
    2238             :              * bypass the unique rel cache here since we're possibly using a
    2239             :              * subset of join quals. We can use 'force_cache' == true when all
    2240             :              * join quals are self-join quals.  Otherwise, we could end up
    2241             :              * putting false negatives in the cache.
    2242             :              */
    2243        5638 :             if (!innerrel_is_unique_ext(root, joinrelids, inner->relids,
    2244             :                                         outer, JOIN_INNER, selfjoinquals,
    2245        5638 :                                         list_length(otherjoinquals) == 0,
    2246             :                                         &uclauses))
    2247        4932 :                 continue;
    2248             : 
    2249             :             /*
    2250             :              * We have proven that for both relations, the same unique index
    2251             :              * guarantees that there is at most one row where columns equal
    2252             :              * given values. These values must be the same for both relations,
    2253             :              * or else we won't match the same row on each side of the join.
    2254             :              */
    2255         706 :             if (!match_unique_clauses(root, inner, uclauses, outer->relid))
    2256          80 :                 continue;
    2257             : 
    2258             :             /*
    2259             :              * We can remove either relation, so remove the inner one in order
    2260             :              * to simplify this loop.
    2261             :              */
    2262         626 :             remove_self_join_rel(root, omark, imark, outer, inner, restrictlist);
    2263             : 
    2264         626 :             result = bms_add_member(result, r);
    2265             : 
    2266             :             /* We have removed the outer relation, try the next one. */
    2267         626 :             break;
    2268             :         }
    2269             :     }
    2270             : 
    2271       12614 :     return result;
    2272             : }
    2273             : 
    2274             : /*
    2275             :  * Gather indexes of base relations from the joinlist and try to eliminate self
    2276             :  * joins.
    2277             :  */
    2278             : static Relids
    2279       85044 : remove_self_joins_recurse(PlannerInfo *root, List *joinlist, Relids toRemove)
    2280             : {
    2281             :     ListCell   *jl;
    2282       85044 :     Relids      relids = NULL;
    2283       85044 :     SelfJoinCandidate *candidates = NULL;
    2284             :     int         i;
    2285             :     int         j;
    2286             :     int         numRels;
    2287             : 
    2288             :     /* Collect indexes of base relations of the join tree */
    2289      284068 :     foreach(jl, joinlist)
    2290             :     {
    2291      199024 :         Node       *jlnode = (Node *) lfirst(jl);
    2292             : 
    2293      199024 :         if (IsA(jlnode, RangeTblRef))
    2294             :         {
    2295      195742 :             RangeTblRef *ref = (RangeTblRef *) jlnode;
    2296      195742 :             RangeTblEntry *rte = root->simple_rte_array[ref->rtindex];
    2297             : 
    2298             :             /*
    2299             :              * We only care about base relations from which we select
    2300             :              * something.
    2301             :              */
    2302      195742 :             if (rte->rtekind == RTE_RELATION &&
    2303      172232 :                 rte->relkind == RELKIND_RELATION &&
    2304      167474 :                 root->simple_rel_array[ref->rtindex] != NULL)
    2305             :             {
    2306             :                 Assert(!bms_is_member(ref->rtindex, relids));
    2307      167474 :                 relids = bms_add_member(relids, ref->rtindex);
    2308             :             }
    2309             :         }
    2310        3282 :         else if (IsA(jlnode, List))
    2311             :             /* Recursively go inside the sub-joinlist */
    2312        3282 :             toRemove = remove_self_joins_recurse(root, (List *) jlnode,
    2313             :                                                  toRemove);
    2314             :         else
    2315           0 :             elog(ERROR, "unrecognized joinlist node type: %d",
    2316             :                  (int) nodeTag(jlnode));
    2317             :     }
    2318             : 
    2319       85044 :     numRels = bms_num_members(relids);
    2320             : 
    2321             :     /* Need at least two relations for the join */
    2322       85044 :     if (numRels < 2)
    2323       23498 :         return toRemove;
    2324             : 
    2325             :     /*
    2326             :      * In order to find relations with the same oid we first build an array of
    2327             :      * candidates and then sort it by oid.
    2328             :      */
    2329       61546 :     candidates = (SelfJoinCandidate *) palloc(sizeof(SelfJoinCandidate) *
    2330             :                                               numRels);
    2331       61546 :     i = -1;
    2332       61546 :     j = 0;
    2333      212484 :     while ((i = bms_next_member(relids, i)) >= 0)
    2334             :     {
    2335      150938 :         candidates[j].relid = i;
    2336      150938 :         candidates[j].reloid = root->simple_rte_array[i]->relid;
    2337      150938 :         j++;
    2338             :     }
    2339             : 
    2340       61546 :     qsort(candidates, numRels, sizeof(SelfJoinCandidate),
    2341             :           self_join_candidates_cmp);
    2342             : 
    2343             :     /*
    2344             :      * Iteratively form a group of relation indexes with the same oid and
    2345             :      * launch the routine that detects self-joins in this group and removes
    2346             :      * excessive range table entries.
    2347             :      *
    2348             :      * At the end of the iteration, exclude the group from the overall relids
    2349             :      * list. So each next iteration of the cycle will involve less and less
    2350             :      * value of relids.
    2351             :      */
    2352       61546 :     i = 0;
    2353      212484 :     for (j = 1; j < numRels + 1; j++)
    2354             :     {
    2355      150938 :         if (j == numRels || candidates[j].reloid != candidates[i].reloid)
    2356             :         {
    2357      135170 :             if (j - i >= 2)
    2358             :             {
    2359             :                 /* Create a group of relation indexes with the same oid */
    2360       12554 :                 Relids      group = NULL;
    2361             :                 Relids      removed;
    2362             : 
    2363       40876 :                 while (i < j)
    2364             :                 {
    2365       28322 :                     group = bms_add_member(group, candidates[i].relid);
    2366       28322 :                     i++;
    2367             :                 }
    2368             : 
    2369       12554 :                 relids = bms_del_members(relids, group);
    2370             : 
    2371             :                 /*
    2372             :                  * Try to remove self-joins from a group of identical entries.
    2373             :                  * Make the next attempt iteratively - if something is deleted
    2374             :                  * from a group, changes in clauses and equivalence classes
    2375             :                  * can give us a chance to find more candidates.
    2376             :                  */
    2377             :                 do
    2378             :                 {
    2379             :                     Assert(!bms_overlap(group, toRemove));
    2380       12614 :                     removed = remove_self_joins_one_group(root, group);
    2381       12614 :                     toRemove = bms_add_members(toRemove, removed);
    2382       12614 :                     group = bms_del_members(group, removed);
    2383         602 :                 } while (!bms_is_empty(removed) &&
    2384       12614 :                          bms_membership(group) == BMS_MULTIPLE);
    2385       12554 :                 bms_free(removed);
    2386       12554 :                 bms_free(group);
    2387             :             }
    2388             :             else
    2389             :             {
    2390             :                 /* Single relation, just remove it from the set */
    2391      122616 :                 relids = bms_del_member(relids, candidates[i].relid);
    2392      122616 :                 i = j;
    2393             :             }
    2394             :         }
    2395             :     }
    2396             : 
    2397             :     Assert(bms_is_empty(relids));
    2398             : 
    2399       61546 :     return toRemove;
    2400             : }
    2401             : 
    2402             : /*
    2403             :  * Compare self-join candidates by their oids.
    2404             :  */
    2405             : static int
    2406      112738 : self_join_candidates_cmp(const void *a, const void *b)
    2407             : {
    2408      112738 :     const SelfJoinCandidate *ca = (const SelfJoinCandidate *) a;
    2409      112738 :     const SelfJoinCandidate *cb = (const SelfJoinCandidate *) b;
    2410             : 
    2411      112738 :     if (ca->reloid != cb->reloid)
    2412       96910 :         return (ca->reloid < cb->reloid ? -1 : 1);
    2413             :     else
    2414       15828 :         return 0;
    2415             : }
    2416             : 
    2417             : /*
    2418             :  * Find and remove useless self joins.
    2419             :  *
    2420             :  * Search for joins where a relation is joined to itself. If the join clause
    2421             :  * for each tuple from one side of the join is proven to match the same
    2422             :  * physical row (or nothing) on the other side, that self-join can be
    2423             :  * eliminated from the query.  Suitable join clauses are assumed to be in the
    2424             :  * form of X = X, and can be replaced with NOT NULL clauses.
    2425             :  *
    2426             :  * For the sake of simplicity, we don't apply this optimization to special
    2427             :  * joins. Here is a list of what we could do in some particular cases:
    2428             :  * 'a a1 semi join a a2': is reduced to inner by reduce_unique_semijoins,
    2429             :  * and then removed normally.
    2430             :  * 'a a1 anti join a a2': could simplify to a scan with 'outer quals AND
    2431             :  * (IS NULL on join columns OR NOT inner quals)'.
    2432             :  * 'a a1 left join a a2': could simplify to a scan like inner but without
    2433             :  * NOT NULL conditions on join columns.
    2434             :  * 'a a1 left join (a a2 join b)': can't simplify this, because join to b
    2435             :  * can both remove rows and introduce duplicates.
    2436             :  *
    2437             :  * To search for removable joins, we order all the relations on their Oid,
    2438             :  * go over each set with the same Oid, and consider each pair of relations
    2439             :  * in this set.
    2440             :  *
    2441             :  * To remove the join, we mark one of the participating relations as dead
    2442             :  * and rewrite all references to it to point to the remaining relation.
    2443             :  * This includes modifying RestrictInfos, EquivalenceClasses, and
    2444             :  * EquivalenceMembers. We also have to modify the row marks. The join clauses
    2445             :  * of the removed relation become either restriction or join clauses, based on
    2446             :  * whether they reference any relations not participating in the removed join.
    2447             :  *
    2448             :  * 'targetlist' is the top-level targetlist of the query. If it has any
    2449             :  * references to the removed relations, we update them to point to the
    2450             :  * remaining ones.
    2451             :  */
    2452             : List *
    2453      276574 : remove_useless_self_joins(PlannerInfo *root, List *joinlist)
    2454             : {
    2455      276574 :     Relids      toRemove = NULL;
    2456      276574 :     int         relid = -1;
    2457             : 
    2458      553148 :     if (!enable_self_join_removal || joinlist == NIL ||
    2459      472242 :         (list_length(joinlist) == 1 && !IsA(linitial(joinlist), List)))
    2460      194812 :         return joinlist;
    2461             : 
    2462             :     /*
    2463             :      * Merge pairs of relations participated in self-join. Remove unnecessary
    2464             :      * range table entries.
    2465             :      */
    2466       81762 :     toRemove = remove_self_joins_recurse(root, joinlist, toRemove);
    2467             : 
    2468       81762 :     if (unlikely(toRemove != NULL))
    2469             :     {
    2470         590 :         int         nremoved = 0;
    2471             : 
    2472             :         /* At the end, remove orphaned relation links */
    2473        1216 :         while ((relid = bms_next_member(toRemove, relid)) >= 0)
    2474         626 :             joinlist = remove_rel_from_joinlist(joinlist, relid, &nremoved);
    2475             :     }
    2476             : 
    2477       81762 :     return joinlist;
    2478             : }

Generated by: LCOV version 1.14