LCOV - code coverage report
Current view: top level - src/backend/optimizer/plan - analyzejoins.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 97.5 % 730 712
Test Date: 2026-05-07 22:16:27 Functions: 100.0 % 27 27
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * analyzejoins.c
       4              :  *    Routines for simplifying joins after initial query analysis
       5              :  *
       6              :  * While we do a great deal of join simplification in prep/prepjointree.c,
       7              :  * certain optimizations cannot be performed at that stage for lack of
       8              :  * detailed information about the query.  The routines here are invoked
       9              :  * after initsplan.c has done its work, and can do additional join removal
      10              :  * and simplification steps based on the information extracted.  The penalty
      11              :  * is that we have to work harder to clean up after ourselves when we modify
      12              :  * the query, since the derived data structures have to be updated too.
      13              :  *
      14              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      15              :  * Portions Copyright (c) 1994, Regents of the University of California
      16              :  *
      17              :  *
      18              :  * IDENTIFICATION
      19              :  *    src/backend/optimizer/plan/analyzejoins.c
      20              :  *
      21              :  *-------------------------------------------------------------------------
      22              :  */
      23              : #include "postgres.h"
      24              : 
      25              : #include "catalog/pg_class.h"
      26              : #include "nodes/nodeFuncs.h"
      27              : #include "optimizer/joininfo.h"
      28              : #include "optimizer/optimizer.h"
      29              : #include "optimizer/pathnode.h"
      30              : #include "optimizer/paths.h"
      31              : #include "optimizer/placeholder.h"
      32              : #include "optimizer/planmain.h"
      33              : #include "optimizer/restrictinfo.h"
      34              : #include "parser/parse_agg.h"
      35              : #include "rewrite/rewriteManip.h"
      36              : #include "utils/lsyscache.h"
      37              : 
      38              : /*
      39              :  * Utility structure.  A sorting procedure is needed to simplify the search
      40              :  * of SJE-candidate baserels referencing the same database relation.  Having
      41              :  * collected all baserels from the query jointree, the planner sorts them
      42              :  * according to the reloid value, groups them with the next pass and attempts
      43              :  * to remove self-joins.
      44              :  *
      45              :  * Preliminary sorting prevents quadratic behavior that can be harmful in the
      46              :  * case of numerous joins.
      47              :  */
      48              : typedef struct
      49              : {
      50              :     int         relid;
      51              :     Oid         reloid;
      52              : } SelfJoinCandidate;
      53              : 
      54              : bool        enable_self_join_elimination;
      55              : 
      56              : /* local functions */
      57              : static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
      58              : static void remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
      59              :                                           SpecialJoinInfo *sjinfo);
      60              : static void remove_rel_from_query(PlannerInfo *root, int relid,
      61              :                                   int subst, SpecialJoinInfo *sjinfo,
      62              :                                   Relids joinrelids);
      63              : static void remove_rel_from_restrictinfo(RestrictInfo *rinfo,
      64              :                                          int relid, int ojrelid);
      65              : static void remove_rel_from_eclass(EquivalenceClass *ec,
      66              :                                    int relid, int ojrelid);
      67              : static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
      68              : static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
      69              : static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
      70              :                                 List *clause_list, List **extra_clauses);
      71              : static DistinctColInfo *distinct_col_search(int colno, List *distinct_cols);
      72              : static bool is_innerrel_unique_for(PlannerInfo *root,
      73              :                                    Relids joinrelids,
      74              :                                    Relids outerrelids,
      75              :                                    RelOptInfo *innerrel,
      76              :                                    JoinType jointype,
      77              :                                    List *restrictlist,
      78              :                                    List **extra_clauses);
      79              : static int  self_join_candidates_cmp(const void *a, const void *b);
      80              : static bool replace_relid_callback(Node *node,
      81              :                                    ChangeVarNodes_context *context);
      82              : 
      83              : 
      84              : /*
      85              :  * remove_useless_joins
      86              :  *      Check for relations that don't actually need to be joined at all,
      87              :  *      and remove them from the query.
      88              :  *
      89              :  * We are passed the current joinlist and return the updated list.  Other
      90              :  * data structures that have to be updated are accessible via "root".
      91              :  */
      92              : List *
      93       253567 : remove_useless_joins(PlannerInfo *root, List *joinlist)
      94              : {
      95              :     ListCell   *lc;
      96              : 
      97              :     /*
      98              :      * We are only interested in relations that are left-joined to, so we can
      99              :      * scan the join_info_list to find them easily.
     100              :      */
     101       262173 : restart:
     102       297637 :     foreach(lc, root->join_info_list)
     103              :     {
     104        44070 :         SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
     105              :         int         innerrelid;
     106              :         int         nremoved;
     107              : 
     108              :         /* Skip if not removable */
     109        44070 :         if (!join_is_removable(root, sjinfo))
     110        35464 :             continue;
     111              : 
     112              :         /*
     113              :          * Currently, join_is_removable can only succeed when the sjinfo's
     114              :          * righthand is a single baserel.  Remove that rel from the query and
     115              :          * joinlist.
     116              :          */
     117         8606 :         innerrelid = bms_singleton_member(sjinfo->min_righthand);
     118              : 
     119         8606 :         remove_leftjoinrel_from_query(root, innerrelid, sjinfo);
     120              : 
     121              :         /* We verify that exactly one reference gets removed from joinlist */
     122         8606 :         nremoved = 0;
     123         8606 :         joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved);
     124         8606 :         if (nremoved != 1)
     125            0 :             elog(ERROR, "failed to find relation %d in joinlist", innerrelid);
     126              : 
     127              :         /*
     128              :          * We can delete this SpecialJoinInfo from the list too, since it's no
     129              :          * longer of interest.  (Since we'll restart the foreach loop
     130              :          * immediately, we don't bother with foreach_delete_current.)
     131              :          */
     132         8606 :         root->join_info_list = list_delete_cell(root->join_info_list, lc);
     133              : 
     134              :         /*
     135              :          * Restart the scan.  This is necessary to ensure we find all
     136              :          * removable joins independently of ordering of the join_info_list
     137              :          * (note that removal of attr_needed bits may make a join appear
     138              :          * removable that did not before).
     139              :          */
     140         8606 :         goto restart;
     141              :     }
     142              : 
     143       253567 :     return joinlist;
     144              : }
     145              : 
     146              : /*
     147              :  * join_is_removable
     148              :  *    Check whether we need not perform this special join at all, because
     149              :  *    it will just duplicate its left input.
     150              :  *
     151              :  * This is true for a left join for which the join condition cannot match
     152              :  * more than one inner-side row.  (There are other possibly interesting
     153              :  * cases, but we don't have the infrastructure to prove them.)  We also
     154              :  * have to check that the inner side doesn't generate any variables needed
     155              :  * above the join.
     156              :  */
     157              : static bool
     158        44070 : join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
     159              : {
     160              :     int         innerrelid;
     161              :     RelOptInfo *innerrel;
     162              :     Relids      inputrelids;
     163              :     Relids      joinrelids;
     164        44070 :     List       *clause_list = NIL;
     165              :     ListCell   *l;
     166              :     int         attroff;
     167              : 
     168              :     /*
     169              :      * Must be a left join to a single baserel, else we aren't going to be
     170              :      * able to do anything with it.
     171              :      */
     172        44070 :     if (sjinfo->jointype != JOIN_LEFT)
     173        10696 :         return false;
     174              : 
     175        33374 :     if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
     176         1058 :         return false;
     177              : 
     178              :     /*
     179              :      * Never try to eliminate a left join to the query result rel.  Although
     180              :      * the case is syntactically impossible in standard SQL, MERGE will build
     181              :      * a join tree that looks exactly like that.
     182              :      */
     183        32316 :     if (innerrelid == root->parse->resultRelation)
     184          636 :         return false;
     185              : 
     186        31680 :     innerrel = find_base_rel(root, innerrelid);
     187              : 
     188              :     /*
     189              :      * Before we go to the effort of checking whether any innerrel variables
     190              :      * are needed above the join, make a quick check to eliminate cases in
     191              :      * which we will surely be unable to prove uniqueness of the innerrel.
     192              :      */
     193        31680 :     if (!rel_supports_distinctness(root, innerrel))
     194         2509 :         return false;
     195              : 
     196              :     /* Compute the relid set for the join we are considering */
     197        29171 :     inputrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
     198              :     Assert(sjinfo->ojrelid != 0);
     199        29171 :     joinrelids = bms_copy(inputrelids);
     200        29171 :     joinrelids = bms_add_member(joinrelids, sjinfo->ojrelid);
     201              : 
     202              :     /*
     203              :      * We can't remove the join if any inner-rel attributes are used above the
     204              :      * join.  Here, "above" the join includes pushed-down conditions, so we
     205              :      * should reject if attr_needed includes the OJ's own relid; therefore,
     206              :      * compare to inputrelids not joinrelids.
     207              :      *
     208              :      * As a micro-optimization, it seems better to start with max_attr and
     209              :      * count down rather than starting with min_attr and counting up, on the
     210              :      * theory that the system attributes are somewhat less likely to be wanted
     211              :      * and should be tested last.
     212              :      */
     213        29171 :     for (attroff = innerrel->max_attr - innerrel->min_attr;
     214       274543 :          attroff >= 0;
     215       245372 :          attroff--)
     216              :     {
     217       265670 :         if (!bms_is_subset(innerrel->attr_needed[attroff], inputrelids))
     218        20298 :             return false;
     219              :     }
     220              : 
     221              :     /*
     222              :      * Similarly check that the inner rel isn't needed by any PlaceHolderVars
     223              :      * that will be used above the join.  The PHV case is a little bit more
     224              :      * complicated, because PHVs may have been assigned a ph_eval_at location
     225              :      * that includes the innerrel, yet their contained expression might not
     226              :      * actually reference the innerrel (it could be just a constant, for
     227              :      * instance).  If such a PHV is due to be evaluated above the join then it
     228              :      * needn't prevent join removal.
     229              :      */
     230         9027 :     foreach(l, root->placeholder_list)
     231              :     {
     232          184 :         PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
     233              : 
     234          184 :         if (bms_overlap(phinfo->ph_lateral, innerrel->relids))
     235           30 :             return false;       /* it references innerrel laterally */
     236          184 :         if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids))
     237           51 :             continue;           /* it definitely doesn't reference innerrel */
     238          133 :         if (bms_is_subset(phinfo->ph_needed, inputrelids))
     239            5 :             continue;           /* PHV is not used above the join */
     240          128 :         if (!bms_is_member(sjinfo->ojrelid, phinfo->ph_eval_at))
     241           25 :             return false;       /* it has to be evaluated below the join */
     242              : 
     243              :         /*
     244              :          * We need to be sure there will still be a place to evaluate the PHV
     245              :          * if we remove the join, ie that ph_eval_at wouldn't become empty.
     246              :          */
     247          103 :         if (!bms_overlap(sjinfo->min_lefthand, phinfo->ph_eval_at))
     248            5 :             return false;       /* there isn't any other place to eval PHV */
     249              :         /* Check contained expression last, since this is a bit expensive */
     250           98 :         if (bms_overlap(pull_varnos(root, (Node *) phinfo->ph_var->phexpr),
     251           98 :                         innerrel->relids))
     252            0 :             return false;       /* contained expression references innerrel */
     253              :     }
     254              : 
     255              :     /*
     256              :      * Search for mergejoinable clauses that constrain the inner rel against
     257              :      * either the outer rel or a pseudoconstant.  If an operator is
     258              :      * mergejoinable then it behaves like equality for some btree opclass, so
     259              :      * it's what we want.  The mergejoinability test also eliminates clauses
     260              :      * containing volatile functions, which we couldn't depend on.
     261              :      */
     262        17849 :     foreach(l, innerrel->joininfo)
     263              :     {
     264         9006 :         RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
     265              : 
     266              :         /*
     267              :          * If the current join commutes with some other outer join(s) via
     268              :          * outer join identity 3, there will be multiple clones of its join
     269              :          * clauses in the joininfo list.  We want to consider only the
     270              :          * has_clone form of such clauses.  Processing more than one form
     271              :          * would be wasteful, and also some of the others would confuse the
     272              :          * RINFO_IS_PUSHED_DOWN test below.
     273              :          */
     274         9006 :         if (restrictinfo->is_clone)
     275           82 :             continue;           /* ignore it */
     276              : 
     277              :         /*
     278              :          * If it's not a join clause for this outer join, we can't use it.
     279              :          * Note that if the clause is pushed-down, then it is logically from
     280              :          * above the outer join, even if it references no other rels (it might
     281              :          * be from WHERE, for example).
     282              :          */
     283         8924 :         if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
     284           99 :             continue;           /* ignore; not useful here */
     285              : 
     286              :         /* Ignore if it's not a mergejoinable clause */
     287         8825 :         if (!restrictinfo->can_join ||
     288         8679 :             restrictinfo->mergeopfamilies == NIL)
     289          146 :             continue;           /* not mergejoinable */
     290              : 
     291              :         /*
     292              :          * Check if the clause has the form "outer op inner" or "inner op
     293              :          * outer", and if so mark which side is inner.
     294              :          */
     295         8679 :         if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
     296              :                                      innerrel->relids))
     297            5 :             continue;           /* no good for these input relations */
     298              : 
     299              :         /* OK, add to list */
     300         8674 :         clause_list = lappend(clause_list, restrictinfo);
     301              :     }
     302              : 
     303              :     /*
     304              :      * Now that we have the relevant equality join clauses, try to prove the
     305              :      * innerrel distinct.
     306              :      */
     307         8843 :     if (rel_is_distinct_for(root, innerrel, clause_list, NULL))
     308         8606 :         return true;
     309              : 
     310              :     /*
     311              :      * Some day it would be nice to check for other methods of establishing
     312              :      * distinctness.
     313              :      */
     314          237 :     return false;
     315              : }
     316              : 
     317              : /*
     318              :  * Remove the target relid and references to the target join from the
     319              :  * planner's data structures, having determined that there is no need
     320              :  * to include them in the query.
     321              :  *
     322              :  * We are not terribly thorough here.  We only bother to update parts of
     323              :  * the planner's data structures that will actually be consulted later.
     324              :  */
     325              : static void
     326         8606 : remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
     327              :                               SpecialJoinInfo *sjinfo)
     328              : {
     329         8606 :     RelOptInfo *rel = find_base_rel(root, relid);
     330         8606 :     int         ojrelid = sjinfo->ojrelid;
     331              :     Relids      joinrelids;
     332              :     Relids      join_plus_commute;
     333              :     List       *joininfos;
     334              :     ListCell   *l;
     335              : 
     336              :     /* Compute the relid set for the join we are considering */
     337         8606 :     joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
     338              :     Assert(ojrelid != 0);
     339         8606 :     joinrelids = bms_add_member(joinrelids, ojrelid);
     340              : 
     341         8606 :     remove_rel_from_query(root, relid, -1, sjinfo, joinrelids);
     342              : 
     343              :     /*
     344              :      * Remove any joinquals referencing the rel from the joininfo lists.
     345              :      *
     346              :      * In some cases, a joinqual has to be put back after deleting its
     347              :      * reference to the target rel.  This can occur for pseudoconstant and
     348              :      * outerjoin-delayed quals, which can get marked as requiring the rel in
     349              :      * order to force them to be evaluated at or above the join.  We can't
     350              :      * just discard them, though.  Only quals that logically belonged to the
     351              :      * outer join being discarded should be removed from the query.
     352              :      *
     353              :      * We might encounter a qual that is a clone of a deletable qual with some
     354              :      * outer-join relids added (see deconstruct_distribute_oj_quals).  To
     355              :      * ensure we get rid of such clones as well, add the relids of all OJs
     356              :      * commutable with this one to the set we test against for
     357              :      * pushed-down-ness.
     358              :      */
     359         8606 :     join_plus_commute = bms_union(joinrelids,
     360         8606 :                                   sjinfo->commute_above_r);
     361         8606 :     join_plus_commute = bms_add_members(join_plus_commute,
     362         8606 :                                         sjinfo->commute_below_l);
     363              : 
     364              :     /*
     365              :      * We must make a copy of the rel's old joininfo list before starting the
     366              :      * loop, because otherwise remove_join_clause_from_rels would destroy the
     367              :      * list while we're scanning it.
     368              :      */
     369         8606 :     joininfos = list_copy(rel->joininfo);
     370        17391 :     foreach(l, joininfos)
     371              :     {
     372         8785 :         RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
     373              : 
     374         8785 :         remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
     375              : 
     376         8785 :         if (RINFO_IS_PUSHED_DOWN(rinfo, join_plus_commute))
     377              :         {
     378              :             /*
     379              :              * There might be references to relid or ojrelid in the
     380              :              * RestrictInfo's relid sets, as a consequence of PHVs having had
     381              :              * ph_eval_at sets that include those.  We already checked above
     382              :              * that any such PHV is safe (and updated its ph_eval_at), so we
     383              :              * can just drop those references.
     384              :              */
     385           99 :             remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
     386              : 
     387              :             /*
     388              :              * Cross-check that the clause itself does not reference the
     389              :              * target rel or join.
     390              :              */
     391              : #ifdef USE_ASSERT_CHECKING
     392              :             {
     393              :                 Relids      clause_varnos = pull_varnos(root,
     394              :                                                         (Node *) rinfo->clause);
     395              : 
     396              :                 Assert(!bms_is_member(relid, clause_varnos));
     397              :                 Assert(!bms_is_member(ojrelid, clause_varnos));
     398              :             }
     399              : #endif
     400              :             /* Now throw it back into the joininfo lists */
     401           99 :             distribute_restrictinfo_to_rels(root, rinfo);
     402              :         }
     403              :     }
     404              : 
     405              :     /*
     406              :      * There may be references to the rel in root->fkey_list, but if so,
     407              :      * match_foreign_keys_to_quals() will get rid of them.
     408              :      */
     409              : 
     410              :     /*
     411              :      * Now remove the rel from the baserel array to prevent it from being
     412              :      * referenced again.  (We can't do this earlier because
     413              :      * remove_join_clause_from_rels will touch it.)
     414              :      */
     415         8606 :     root->simple_rel_array[relid] = NULL;
     416         8606 :     root->simple_rte_array[relid] = NULL;
     417              : 
     418              :     /* And nuke the RelOptInfo, just in case there's another access path */
     419         8606 :     pfree(rel);
     420              : 
     421              :     /*
     422              :      * Now repeat construction of attr_needed bits coming from all other
     423              :      * sources.
     424              :      */
     425         8606 :     rebuild_placeholder_attr_needed(root);
     426         8606 :     rebuild_joinclause_attr_needed(root);
     427         8606 :     rebuild_eclass_attr_needed(root);
     428         8606 :     rebuild_lateral_attr_needed(root);
     429         8606 : }
     430              : 
     431              : /*
     432              :  * Remove the target relid and references to the target join from the
     433              :  * planner's data structures, having determined that there is no need
     434              :  * to include them in the query.  Optionally replace references to the
     435              :  * removed relid with subst if this is a self-join removal.
     436              :  *
     437              :  * This function serves as the common infrastructure for left-join removal
     438              :  * and self-join elimination.  It is intentionally scoped to update only the
     439              :  * shared planner data structures that are universally affected by relation
     440              :  * removal.  Each specific caller remains responsible for updating any
     441              :  * remaining data structures required by its unique removal logic.
     442              :  *
     443              :  * The specific type of removal being performed is dictated by the combination
     444              :  * of the sjinfo and subst parameters.  A non-NULL sjinfo indicates left-join
     445              :  * removal.  When sjinfo is NULL, a positive subst value indicates self-join
     446              :  * elimination (where references are replaced with subst).
     447              :  */
     448              : static void
     449         9074 : remove_rel_from_query(PlannerInfo *root, int relid,
     450              :                       int subst, SpecialJoinInfo *sjinfo,
     451              :                       Relids joinrelids)
     452              : {
     453         9074 :     int         ojrelid = sjinfo ? sjinfo->ojrelid : 0;
     454              :     Index       rti;
     455              :     ListCell   *l;
     456         9074 :     bool        is_outer_join = (sjinfo != NULL);
     457         9074 :     bool        is_self_join = (!is_outer_join && subst > 0);
     458              : 
     459              :     Assert(is_outer_join || is_self_join);
     460              :     Assert(!is_outer_join || ojrelid > 0);
     461              :     Assert(!is_outer_join || joinrelids != NULL);
     462              : 
     463              :     /*
     464              :      * Update all_baserels and related relid sets.
     465              :      */
     466         9074 :     root->all_baserels = adjust_relid_set(root->all_baserels, relid, subst);
     467         9074 :     root->all_query_rels = adjust_relid_set(root->all_query_rels, relid, subst);
     468              : 
     469         9074 :     if (is_outer_join)
     470              :     {
     471         8606 :         root->outer_join_rels = bms_del_member(root->outer_join_rels, ojrelid);
     472         8606 :         root->all_query_rels = bms_del_member(root->all_query_rels, ojrelid);
     473              :     }
     474              : 
     475              :     /*
     476              :      * Likewise remove references from SpecialJoinInfo data structures.
     477              :      *
     478              :      * This is relevant in case the relation we're deleting is part of the
     479              :      * relid sets of special joins: those sets have to be adjusted.  If we are
     480              :      * removing an outer join, the RHS of the target outer join will be made
     481              :      * empty here, but that's OK since the caller will delete that
     482              :      * SpecialJoinInfo entirely.
     483              :      */
     484        21444 :     foreach(l, root->join_info_list)
     485              :     {
     486        12370 :         SpecialJoinInfo *sjinf = (SpecialJoinInfo *) lfirst(l);
     487              : 
     488              :         /*
     489              :          * initsplan.c is fairly cavalier about allowing SpecialJoinInfos'
     490              :          * lefthand/righthand relid sets to be shared with other data
     491              :          * structures.  Ensure that we don't modify the original relid sets.
     492              :          * (The commute_xxx sets are always per-SpecialJoinInfo though.)
     493              :          */
     494        12370 :         sjinf->min_lefthand = bms_copy(sjinf->min_lefthand);
     495        12370 :         sjinf->min_righthand = bms_copy(sjinf->min_righthand);
     496        12370 :         sjinf->syn_lefthand = bms_copy(sjinf->syn_lefthand);
     497        12370 :         sjinf->syn_righthand = bms_copy(sjinf->syn_righthand);
     498              : 
     499              :         /* Now adjust relid bit in the sets: */
     500        12370 :         sjinf->min_lefthand = adjust_relid_set(sjinf->min_lefthand, relid, subst);
     501        12370 :         sjinf->min_righthand = adjust_relid_set(sjinf->min_righthand, relid, subst);
     502        12370 :         sjinf->syn_lefthand = adjust_relid_set(sjinf->syn_lefthand, relid, subst);
     503        12370 :         sjinf->syn_righthand = adjust_relid_set(sjinf->syn_righthand, relid, subst);
     504              : 
     505        12370 :         if (is_outer_join)
     506              :         {
     507              :             /* Remove ojrelid bit from the sets: */
     508        12277 :             sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand, ojrelid);
     509        12277 :             sjinf->min_righthand = bms_del_member(sjinf->min_righthand, ojrelid);
     510        12277 :             sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand, ojrelid);
     511        12277 :             sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand, ojrelid);
     512              :             /* relid cannot appear in these fields, but ojrelid can: */
     513        12277 :             sjinf->commute_above_l = bms_del_member(sjinf->commute_above_l, ojrelid);
     514        12277 :             sjinf->commute_above_r = bms_del_member(sjinf->commute_above_r, ojrelid);
     515        12277 :             sjinf->commute_below_l = bms_del_member(sjinf->commute_below_l, ojrelid);
     516        12277 :             sjinf->commute_below_r = bms_del_member(sjinf->commute_below_r, ojrelid);
     517              :         }
     518              :         else
     519              :         {
     520              :             /*
     521              :              * For self-join removal, replace relid references in
     522              :              * semi_rhs_exprs.
     523              :              */
     524           93 :             ChangeVarNodesExtended((Node *) sjinf->semi_rhs_exprs, relid, subst,
     525              :                                    0, replace_relid_callback);
     526              :         }
     527              :     }
     528              : 
     529              :     /*
     530              :      * Likewise remove references from PlaceHolderVar data structures,
     531              :      * removing any no-longer-needed placeholders entirely.  We only remove
     532              :      * PHVs for left-join removal.  With self-join elimination, PHVs already
     533              :      * get moved to the remaining relation, where they might still be needed.
     534              :      * It might also happen that we skip the removal of some PHVs that could
     535              :      * be removed.  However, the overhead of extra PHVs is small compared to
     536              :      * the complexity of analysis needed to remove them.
     537              :      *
     538              :      * Removal is a bit trickier than it might seem: we can remove PHVs that
     539              :      * are used at the target rel and/or in the join qual, but not those that
     540              :      * are used at join partner rels or above the join.  It's not that easy to
     541              :      * distinguish PHVs used at partner rels from those used in the join qual,
     542              :      * since they will both have ph_needed sets that are subsets of
     543              :      * joinrelids.  However, a PHV used at a partner rel could not have the
     544              :      * target rel in ph_eval_at, so we check that while deciding whether to
     545              :      * remove or just update the PHV.  There is no corresponding test in
     546              :      * join_is_removable because it doesn't need to distinguish those cases.
     547              :      */
     548         9248 :     foreach(l, root->placeholder_list)
     549              :     {
     550          174 :         PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
     551              : 
     552              :         Assert(!is_outer_join || !bms_is_member(relid, phinfo->ph_lateral));
     553              : 
     554          313 :         if (is_outer_join &&
     555          164 :             bms_is_subset(phinfo->ph_needed, joinrelids) &&
     556           25 :             bms_is_member(relid, phinfo->ph_eval_at) &&
     557           10 :             !bms_is_member(ojrelid, phinfo->ph_eval_at))
     558              :         {
     559            5 :             root->placeholder_list = foreach_delete_current(root->placeholder_list,
     560              :                                                             l);
     561            5 :             root->placeholder_array[phinfo->phid] = NULL;
     562              :         }
     563              :         else
     564              :         {
     565          169 :             PlaceHolderVar *phv = phinfo->ph_var;
     566              : 
     567          169 :             phinfo->ph_eval_at = adjust_relid_set(phinfo->ph_eval_at, relid, subst);
     568          169 :             if (is_outer_join)
     569          134 :                 phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, ojrelid);
     570              :             Assert(!bms_is_empty(phinfo->ph_eval_at));   /* checked previously */
     571              : 
     572              :             /* Reduce ph_needed to contain only "relation 0"; see below */
     573          169 :             if (bms_is_member(0, phinfo->ph_needed))
     574           92 :                 phinfo->ph_needed = bms_make_singleton(0);
     575              :             else
     576           77 :                 phinfo->ph_needed = NULL;
     577              : 
     578          169 :             phv->phrels = adjust_relid_set(phv->phrels, relid, subst);
     579          169 :             if (is_outer_join)
     580          134 :                 phv->phrels = bms_del_member(phv->phrels, ojrelid);
     581              :             Assert(!bms_is_empty(phv->phrels));
     582              : 
     583              :             /*
     584              :              * For self-join removal, update Var nodes within the PHV's
     585              :              * expression to reference the replacement relid, and adjust
     586              :              * ph_lateral for the relid substitution.  (For left-join removal,
     587              :              * we're removing rather than replacing, and any surviving PHV
     588              :              * shouldn't reference the removed rel in its expression.  Also,
     589              :              * relid can't appear in ph_lateral for outer joins.)
     590              :              */
     591          169 :             if (is_self_join)
     592              :             {
     593           35 :                 ChangeVarNodesExtended((Node *) phv->phexpr, relid, subst, 0,
     594              :                                        replace_relid_callback);
     595           35 :                 phinfo->ph_lateral = adjust_relid_set(phinfo->ph_lateral, relid, subst);
     596              : 
     597              :                 /*
     598              :                  * ph_lateral might contain rels mentioned in ph_eval_at after
     599              :                  * the replacement, remove them.
     600              :                  */
     601           35 :                 phinfo->ph_lateral = bms_difference(phinfo->ph_lateral, phinfo->ph_eval_at);
     602              :                 /* ph_lateral might or might not be empty */
     603              :             }
     604              : 
     605              :             Assert(phv->phnullingrels == NULL); /* no need to adjust */
     606              :         }
     607              :     }
     608              : 
     609              :     /*
     610              :      * Likewise remove references from EquivalenceClasses.
     611              :      *
     612              :      * For self-join removal, the caller has already updated the
     613              :      * EquivalenceClasses, so we can skip this step.
     614              :      */
     615         9074 :     if (is_outer_join)
     616              :     {
     617        49528 :         foreach(l, root->eq_classes)
     618              :         {
     619        40922 :             EquivalenceClass *ec = (EquivalenceClass *) lfirst(l);
     620              : 
     621        69692 :             if (bms_is_member(relid, ec->ec_relids) ||
     622        28770 :                 bms_is_member(ojrelid, ec->ec_relids))
     623        12152 :                 remove_rel_from_eclass(ec, relid, ojrelid);
     624              :         }
     625              :     }
     626              : 
     627              :     /*
     628              :      * Finally, we must prepare for the caller to recompute per-Var
     629              :      * attr_needed and per-PlaceHolderVar ph_needed relid sets.  These have to
     630              :      * be known accurately, else we may fail to remove other now-removable
     631              :      * joins.  Because the caller removes the join clause(s) associated with
     632              :      * the removed join, Vars that were formerly needed may no longer be.
     633              :      *
     634              :      * The actual reconstruction of these relid sets is performed by the
     635              :      * specific caller.  Here, we simply clear out the existing attr_needed
     636              :      * sets (we already did this above for ph_needed) to ensure they are
     637              :      * rebuilt from scratch.  We can cheat to one small extent: we can avoid
     638              :      * re-examining the targetlist and HAVING qual by preserving "relation 0"
     639              :      * bits from the existing relid sets.  This is safe because we'd never
     640              :      * remove such references.
     641              :      *
     642              :      * Additionally, if we are performing self-join elimination, we must
     643              :      * replace references to the removed relid with subst within the
     644              :      * lateral_vars lists.
     645              :      */
     646        59880 :     for (rti = 1; rti < root->simple_rel_array_size; rti++)
     647              :     {
     648        50806 :         RelOptInfo *otherrel = root->simple_rel_array[rti];
     649              :         int         attroff;
     650              : 
     651              :         /* there may be empty slots corresponding to non-baserel RTEs */
     652        50806 :         if (otherrel == NULL)
     653        24693 :             continue;
     654              : 
     655              :         Assert(otherrel->relid == rti); /* sanity check on array */
     656              : 
     657        26113 :         for (attroff = otherrel->max_attr - otherrel->min_attr;
     658       569116 :              attroff >= 0;
     659       543003 :              attroff--)
     660              :         {
     661       543003 :             if (bms_is_member(0, otherrel->attr_needed[attroff]))
     662        37981 :                 otherrel->attr_needed[attroff] = bms_make_singleton(0);
     663              :             else
     664       505022 :                 otherrel->attr_needed[attroff] = NULL;
     665              :         }
     666              : 
     667        26113 :         if (is_self_join)
     668         1209 :             ChangeVarNodesExtended((Node *) otherrel->lateral_vars, relid,
     669              :                                    subst, 0, replace_relid_callback);
     670              :     }
     671         9074 : }
     672              : 
     673              : /*
     674              :  * Remove any references to relid or ojrelid from the RestrictInfo.
     675              :  *
     676              :  * We only bother to clean out bits in the RestrictInfo's various relid sets,
     677              :  * not nullingrel bits in contained Vars and PHVs.  (This might have to be
     678              :  * improved sometime.)  However, if the RestrictInfo contains an OR clause
     679              :  * we have to also clean up the sub-clauses.
     680              :  */
     681              : static void
     682         3733 : remove_rel_from_restrictinfo(RestrictInfo *rinfo, int relid, int ojrelid)
     683              : {
     684              :     /*
     685              :      * initsplan.c is fairly cavalier about allowing RestrictInfos to share
     686              :      * relid sets with other RestrictInfos, and SpecialJoinInfos too.  Make
     687              :      * sure this RestrictInfo has its own relid sets before we modify them.
     688              :      * (In present usage, clause_relids is probably not shared, but
     689              :      * required_relids could be; let's not assume anything.)
     690              :      */
     691         3733 :     rinfo->clause_relids = bms_copy(rinfo->clause_relids);
     692         3733 :     rinfo->clause_relids = bms_del_member(rinfo->clause_relids, relid);
     693         3733 :     rinfo->clause_relids = bms_del_member(rinfo->clause_relids, ojrelid);
     694              :     /* Likewise for required_relids */
     695         3733 :     rinfo->required_relids = bms_copy(rinfo->required_relids);
     696         3733 :     rinfo->required_relids = bms_del_member(rinfo->required_relids, relid);
     697         3733 :     rinfo->required_relids = bms_del_member(rinfo->required_relids, ojrelid);
     698              :     /* Likewise for incompatible_relids */
     699         3733 :     rinfo->incompatible_relids = bms_copy(rinfo->incompatible_relids);
     700         3733 :     rinfo->incompatible_relids = bms_del_member(rinfo->incompatible_relids, relid);
     701         3733 :     rinfo->incompatible_relids = bms_del_member(rinfo->incompatible_relids, ojrelid);
     702              :     /* Likewise for outer_relids */
     703         3733 :     rinfo->outer_relids = bms_copy(rinfo->outer_relids);
     704         3733 :     rinfo->outer_relids = bms_del_member(rinfo->outer_relids, relid);
     705         3733 :     rinfo->outer_relids = bms_del_member(rinfo->outer_relids, ojrelid);
     706              :     /* Likewise for left_relids */
     707         3733 :     rinfo->left_relids = bms_copy(rinfo->left_relids);
     708         3733 :     rinfo->left_relids = bms_del_member(rinfo->left_relids, relid);
     709         3733 :     rinfo->left_relids = bms_del_member(rinfo->left_relids, ojrelid);
     710              :     /* Likewise for right_relids */
     711         3733 :     rinfo->right_relids = bms_copy(rinfo->right_relids);
     712         3733 :     rinfo->right_relids = bms_del_member(rinfo->right_relids, relid);
     713         3733 :     rinfo->right_relids = bms_del_member(rinfo->right_relids, ojrelid);
     714              : 
     715              :     /* If it's an OR, recurse to clean up sub-clauses */
     716         3733 :     if (restriction_is_or_clause(rinfo))
     717              :     {
     718              :         ListCell   *lc;
     719              : 
     720              :         Assert(is_orclause(rinfo->orclause));
     721           15 :         foreach(lc, ((BoolExpr *) rinfo->orclause)->args)
     722              :         {
     723           10 :             Node       *orarg = (Node *) lfirst(lc);
     724              : 
     725              :             /* OR arguments should be ANDs or sub-RestrictInfos */
     726           10 :             if (is_andclause(orarg))
     727              :             {
     728            0 :                 List       *andargs = ((BoolExpr *) orarg)->args;
     729              :                 ListCell   *lc2;
     730              : 
     731            0 :                 foreach(lc2, andargs)
     732              :                 {
     733            0 :                     RestrictInfo *rinfo2 = lfirst_node(RestrictInfo, lc2);
     734              : 
     735            0 :                     remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
     736              :                 }
     737              :             }
     738              :             else
     739              :             {
     740           10 :                 RestrictInfo *rinfo2 = castNode(RestrictInfo, orarg);
     741              : 
     742           10 :                 remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
     743              :             }
     744              :         }
     745              :     }
     746         3733 : }
     747              : 
     748              : /*
     749              :  * Remove any references to relid or ojrelid from the EquivalenceClass.
     750              :  *
     751              :  * Like remove_rel_from_restrictinfo, we don't worry about cleaning out
     752              :  * any nullingrel bits in contained Vars and PHVs.  (This might have to be
     753              :  * improved sometime.)  We do need to fix the EC and EM relid sets to ensure
     754              :  * that implied join equalities will be generated at the appropriate join
     755              :  * level(s).
     756              :  */
     757              : static void
     758        12152 : remove_rel_from_eclass(EquivalenceClass *ec, int relid, int ojrelid)
     759              : {
     760              :     ListCell   *lc;
     761              : 
     762              :     /* Fix up the EC's overall relids */
     763        12152 :     ec->ec_relids = bms_del_member(ec->ec_relids, relid);
     764        12152 :     ec->ec_relids = bms_del_member(ec->ec_relids, ojrelid);
     765              : 
     766              :     /*
     767              :      * We don't expect any EC child members to exist at this point.  Ensure
     768              :      * that's the case, otherwise, we might be getting asked to do something
     769              :      * this function hasn't been coded for.
     770              :      */
     771              :     Assert(ec->ec_childmembers == NULL);
     772              : 
     773              :     /*
     774              :      * Fix up the member expressions.  Any non-const member that ends with
     775              :      * empty em_relids must be a Var or PHV of the removed relation.  We don't
     776              :      * need it anymore, so we can drop it.
     777              :      */
     778        27928 :     foreach(lc, ec->ec_members)
     779              :     {
     780        15776 :         EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
     781              : 
     782        19400 :         if (bms_is_member(relid, cur_em->em_relids) ||
     783         3624 :             bms_is_member(ojrelid, cur_em->em_relids))
     784              :         {
     785              :             Assert(!cur_em->em_is_const);
     786              :             /* em_relids is likely to be shared with some RestrictInfo */
     787        12152 :             cur_em->em_relids = bms_copy(cur_em->em_relids);
     788        12152 :             cur_em->em_relids = bms_del_member(cur_em->em_relids, relid);
     789        12152 :             cur_em->em_relids = bms_del_member(cur_em->em_relids, ojrelid);
     790        12152 :             if (bms_is_empty(cur_em->em_relids))
     791        12132 :                 ec->ec_members = foreach_delete_current(ec->ec_members, lc);
     792              :         }
     793              :     }
     794              : 
     795              :     /* Fix up the source clauses, in case we can re-use them later */
     796        15776 :     foreach(lc, ec->ec_sources)
     797              :     {
     798         3624 :         RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
     799              : 
     800         3624 :         remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
     801              :     }
     802              : 
     803              :     /*
     804              :      * Rather than expend code on fixing up any already-derived clauses, just
     805              :      * drop them.  (At this point, any such clauses would be base restriction
     806              :      * clauses, which we'd not need anymore anyway.)
     807              :      */
     808        12152 :     ec_clear_derived_clauses(ec);
     809        12152 : }
     810              : 
     811              : /*
     812              :  * Remove any occurrences of the target relid from a joinlist structure.
     813              :  *
     814              :  * It's easiest to build a whole new list structure, so we handle it that
     815              :  * way.  Efficiency is not a big deal here.
     816              :  *
     817              :  * *nremoved is incremented by the number of occurrences removed (there
     818              :  * should be exactly one, but the caller checks that).
     819              :  */
     820              : static List *
     821         9324 : remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
     822              : {
     823         9324 :     List       *result = NIL;
     824              :     ListCell   *jl;
     825              : 
     826        35687 :     foreach(jl, joinlist)
     827              :     {
     828        26363 :         Node       *jlnode = (Node *) lfirst(jl);
     829              : 
     830        26363 :         if (IsA(jlnode, RangeTblRef))
     831              :         {
     832        26113 :             int         varno = ((RangeTblRef *) jlnode)->rtindex;
     833              : 
     834        26113 :             if (varno == relid)
     835         9074 :                 (*nremoved)++;
     836              :             else
     837        17039 :                 result = lappend(result, jlnode);
     838              :         }
     839          250 :         else if (IsA(jlnode, List))
     840              :         {
     841              :             /* Recurse to handle subproblem */
     842              :             List       *sublist;
     843              : 
     844          250 :             sublist = remove_rel_from_joinlist((List *) jlnode,
     845              :                                                relid, nremoved);
     846              :             /* Avoid including empty sub-lists in the result */
     847          250 :             if (sublist)
     848          250 :                 result = lappend(result, sublist);
     849              :         }
     850              :         else
     851              :         {
     852            0 :             elog(ERROR, "unrecognized joinlist node type: %d",
     853              :                  (int) nodeTag(jlnode));
     854              :         }
     855              :     }
     856              : 
     857         9324 :     return result;
     858              : }
     859              : 
     860              : 
     861              : /*
     862              :  * reduce_unique_semijoins
     863              :  *      Check for semijoins that can be simplified to plain inner joins
     864              :  *      because the inner relation is provably unique for the join clauses.
     865              :  *
     866              :  * Ideally this would happen during reduce_outer_joins, but we don't have
     867              :  * enough information at that point.
     868              :  *
     869              :  * To perform the strength reduction when applicable, we need only delete
     870              :  * the semijoin's SpecialJoinInfo from root->join_info_list.  (We don't
     871              :  * bother fixing the join type attributed to it in the query jointree,
     872              :  * since that won't be consulted again.)
     873              :  */
     874              : void
     875       253567 : reduce_unique_semijoins(PlannerInfo *root)
     876              : {
     877              :     ListCell   *lc;
     878              : 
     879              :     /*
     880              :      * Scan the join_info_list to find semijoins.
     881              :      */
     882       288658 :     foreach(lc, root->join_info_list)
     883              :     {
     884        35091 :         SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
     885              :         int         innerrelid;
     886              :         RelOptInfo *innerrel;
     887              :         Relids      joinrelids;
     888              :         List       *restrictlist;
     889              : 
     890              :         /*
     891              :          * Must be a semijoin to a single baserel, else we aren't going to be
     892              :          * able to do anything with it.
     893              :          */
     894        35091 :         if (sjinfo->jointype != JOIN_SEMI)
     895        34854 :             continue;
     896              : 
     897         4020 :         if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
     898          140 :             continue;
     899              : 
     900         3880 :         innerrel = find_base_rel(root, innerrelid);
     901              : 
     902              :         /*
     903              :          * Before we trouble to run generate_join_implied_equalities, make a
     904              :          * quick check to eliminate cases in which we will surely be unable to
     905              :          * prove uniqueness of the innerrel.
     906              :          */
     907         3880 :         if (!rel_supports_distinctness(root, innerrel))
     908          801 :             continue;
     909              : 
     910              :         /* Compute the relid set for the join we are considering */
     911         3079 :         joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
     912              :         Assert(sjinfo->ojrelid == 0);    /* SEMI joins don't have RT indexes */
     913              : 
     914              :         /*
     915              :          * Since we're only considering a single-rel RHS, any join clauses it
     916              :          * has must be clauses linking it to the semijoin's min_lefthand.  We
     917              :          * can also consider EC-derived join clauses.
     918              :          */
     919              :         restrictlist =
     920         3079 :             list_concat(generate_join_implied_equalities(root,
     921              :                                                          joinrelids,
     922              :                                                          sjinfo->min_lefthand,
     923              :                                                          innerrel,
     924              :                                                          NULL),
     925         3079 :                         innerrel->joininfo);
     926              : 
     927              :         /* Test whether the innerrel is unique for those clauses. */
     928         3079 :         if (!innerrel_is_unique(root,
     929              :                                 joinrelids, sjinfo->min_lefthand, innerrel,
     930              :                                 JOIN_SEMI, restrictlist, true))
     931         2842 :             continue;
     932              : 
     933              :         /* OK, remove the SpecialJoinInfo from the list. */
     934          237 :         root->join_info_list = foreach_delete_current(root->join_info_list, lc);
     935              :     }
     936       253567 : }
     937              : 
     938              : 
     939              : /*
     940              :  * rel_supports_distinctness
     941              :  *      Could the relation possibly be proven distinct on some set of columns?
     942              :  *
     943              :  * This is effectively a pre-checking function for rel_is_distinct_for().
     944              :  * It must return true if rel_is_distinct_for() could possibly return true
     945              :  * with this rel, but it should not expend a lot of cycles.  The idea is
     946              :  * that callers can avoid doing possibly-expensive processing to compute
     947              :  * rel_is_distinct_for()'s argument lists if the call could not possibly
     948              :  * succeed.
     949              :  */
     950              : static bool
     951       578189 : rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
     952              : {
     953              :     /* We only know about baserels ... */
     954       578189 :     if (rel->reloptkind != RELOPT_BASEREL)
     955       214229 :         return false;
     956       363960 :     if (rel->rtekind == RTE_RELATION)
     957              :     {
     958              :         /*
     959              :          * For a plain relation, we only know how to prove uniqueness by
     960              :          * reference to unique indexes.  Make sure there's at least one
     961              :          * suitable unique index.  It must be immediately enforced, and not a
     962              :          * partial index. (Keep these conditions in sync with
     963              :          * relation_has_unique_index_for!)
     964              :          */
     965              :         ListCell   *lc;
     966              : 
     967       450935 :         foreach(lc, rel->indexlist)
     968              :         {
     969       390472 :             IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc);
     970              : 
     971       390472 :             if (ind->unique && ind->immediate && ind->indpred == NIL)
     972       275899 :                 return true;
     973              :         }
     974              :     }
     975        27598 :     else if (rel->rtekind == RTE_SUBQUERY)
     976              :     {
     977         9705 :         Query      *subquery = root->simple_rte_array[rel->relid]->subquery;
     978              : 
     979              :         /* Check if the subquery has any qualities that support distinctness */
     980         9705 :         if (query_supports_distinctness(subquery))
     981         8342 :             return true;
     982              :     }
     983              :     /* We have no proof rules for any other rtekinds. */
     984        79719 :     return false;
     985              : }
     986              : 
     987              : /*
     988              :  * rel_is_distinct_for
     989              :  *      Does the relation return only distinct rows according to clause_list?
     990              :  *
     991              :  * clause_list is a list of join restriction clauses involving this rel and
     992              :  * some other one.  Return true if no two rows emitted by this rel could
     993              :  * possibly join to the same row of the other rel.
     994              :  *
     995              :  * The caller must have already determined that each condition is a
     996              :  * mergejoinable equality with an expression in this relation on one side, and
     997              :  * an expression not involving this relation on the other.  The transient
     998              :  * outer_is_left flag is used to identify which side references this relation:
     999              :  * left side if outer_is_left is false, right side if it is true.
    1000              :  *
    1001              :  * Note that the passed-in clause_list may be destructively modified!  This
    1002              :  * is OK for current uses, because the clause_list is built by the caller for
    1003              :  * the sole purpose of passing to this function.
    1004              :  *
    1005              :  * (*extra_clauses) to be set to the right sides of baserestrictinfo clauses,
    1006              :  * looking like "x = const" if distinctness is derived from such clauses, not
    1007              :  * joininfo clauses.  Pass NULL to the extra_clauses if this value is not
    1008              :  * needed.
    1009              :  */
    1010              : static bool
    1011       186736 : rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list,
    1012              :                     List **extra_clauses)
    1013              : {
    1014              :     /*
    1015              :      * We could skip a couple of tests here if we assume all callers checked
    1016              :      * rel_supports_distinctness first, but it doesn't seem worth taking any
    1017              :      * risk for.
    1018              :      */
    1019       186736 :     if (rel->reloptkind != RELOPT_BASEREL)
    1020            0 :         return false;
    1021       186736 :     if (rel->rtekind == RTE_RELATION)
    1022              :     {
    1023              :         /*
    1024              :          * Examine the indexes to see if we have a matching unique index.
    1025              :          * relation_has_unique_index_for automatically adds any usable
    1026              :          * restriction clauses for the rel, so we needn't do that here.
    1027              :          */
    1028       182354 :         if (relation_has_unique_index_for(root, rel, clause_list, extra_clauses))
    1029       107198 :             return true;
    1030              :     }
    1031         4382 :     else if (rel->rtekind == RTE_SUBQUERY)
    1032              :     {
    1033         4382 :         Index       relid = rel->relid;
    1034         4382 :         Query      *subquery = root->simple_rte_array[relid]->subquery;
    1035         4382 :         List       *distinct_cols = NIL;
    1036              :         ListCell   *l;
    1037              : 
    1038              :         /*
    1039              :          * Build the argument list for query_is_distinct_for: a list of
    1040              :          * DistinctColInfo entries, each holding an output column number that
    1041              :          * the query needs to be distinct over, the equality operator that the
    1042              :          * column needs to be distinct according to, and that operator's input
    1043              :          * collation.  The collation matters because the subquery's own
    1044              :          * DISTINCT / GROUP BY / set-op proves uniqueness under its own
    1045              :          * collation, which need not agree with the operator's.
    1046              :          *
    1047              :          * (XXX we are not considering restriction clauses attached to the
    1048              :          * subquery; is that worth doing?)
    1049              :          */
    1050         8406 :         foreach(l, clause_list)
    1051              :         {
    1052         4024 :             RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
    1053              :             OpExpr     *opexpr;
    1054              :             Var        *var;
    1055              :             DistinctColInfo *dcinfo;
    1056              : 
    1057              :             /*
    1058              :              * The caller's mergejoinability test should have selected only
    1059              :              * OpExprs.  The operator might be a cross-type operator and thus
    1060              :              * not exactly the same operator the subquery would consider;
    1061              :              * that's all right since query_is_distinct_for can resolve such
    1062              :              * cases.
    1063              :              */
    1064         4024 :             opexpr = castNode(OpExpr, rinfo->clause);
    1065              : 
    1066              :             /* caller identified the inner side for us */
    1067         4024 :             if (rinfo->outer_is_left)
    1068         3663 :                 var = (Var *) get_rightop(rinfo->clause);
    1069              :             else
    1070          361 :                 var = (Var *) get_leftop(rinfo->clause);
    1071              : 
    1072              :             /*
    1073              :              * We may ignore any RelabelType node above the operand.  (There
    1074              :              * won't be more than one, since eval_const_expressions() has been
    1075              :              * applied already.)
    1076              :              */
    1077         4024 :             if (var && IsA(var, RelabelType))
    1078         2748 :                 var = (Var *) ((RelabelType *) var)->arg;
    1079              : 
    1080              :             /*
    1081              :              * If inner side isn't a Var referencing a subquery output column,
    1082              :              * this clause doesn't help us.
    1083              :              */
    1084         4024 :             if (!var || !IsA(var, Var) ||
    1085         3999 :                 var->varno != relid || var->varlevelsup != 0)
    1086           25 :                 continue;
    1087              : 
    1088         3999 :             dcinfo = palloc(sizeof(DistinctColInfo));
    1089         3999 :             dcinfo->colno = var->varattno;
    1090         3999 :             dcinfo->opid = opexpr->opno;
    1091         3999 :             dcinfo->collid = opexpr->inputcollid;
    1092         3999 :             distinct_cols = lappend(distinct_cols, dcinfo);
    1093              :         }
    1094              : 
    1095         4382 :         if (query_is_distinct_for(subquery, distinct_cols))
    1096          563 :             return true;
    1097              :     }
    1098        78975 :     return false;
    1099              : }
    1100              : 
    1101              : 
    1102              : /*
    1103              :  * query_supports_distinctness - could the query possibly be proven distinct
    1104              :  *      on some set of output columns?
    1105              :  *
    1106              :  * This is effectively a pre-checking function for query_is_distinct_for().
    1107              :  * It must return true if query_is_distinct_for() could possibly return true
    1108              :  * with this query, but it should not expend a lot of cycles.  The idea is
    1109              :  * that callers can avoid doing possibly-expensive processing to compute
    1110              :  * query_is_distinct_for()'s argument lists if the call could not possibly
    1111              :  * succeed.
    1112              :  */
    1113              : bool
    1114         9705 : query_supports_distinctness(Query *query)
    1115              : {
    1116              :     /* SRFs break distinctness except with DISTINCT, see below */
    1117         9705 :     if (query->hasTargetSRFs && query->distinctClause == NIL)
    1118          763 :         return false;
    1119              : 
    1120              :     /* check for features we can prove distinctness with */
    1121         8942 :     if (query->distinctClause != NIL ||
    1122         8767 :         query->groupClause != NIL ||
    1123         8591 :         query->groupingSets != NIL ||
    1124         8551 :         query->hasAggs ||
    1125         7118 :         query->havingQual ||
    1126         7118 :         query->setOperations)
    1127         8342 :         return true;
    1128              : 
    1129          600 :     return false;
    1130              : }
    1131              : 
    1132              : /*
    1133              :  * query_is_distinct_for - does query never return duplicates of the
    1134              :  *      specified columns?
    1135              :  *
    1136              :  * query is a not-yet-planned subquery (in current usage, it's always from
    1137              :  * a subquery RTE, which the planner avoids scribbling on).
    1138              :  *
    1139              :  * distinct_cols is a list of DistinctColInfo, one per requested output column.
    1140              :  * Each entry names the subquery output column number we want distinct, the
    1141              :  * upper-level equality operator we'll compare values with, and that operator's
    1142              :  * input collation.  We are interested in whether rows consisting of just these
    1143              :  * columns are certain to be distinct.
    1144              :  *
    1145              :  * "Distinctness" is defined according to whether the corresponding upper-level
    1146              :  * equality operators would think the values are distinct.  (Note: each opid
    1147              :  * could be a cross-type operator, and thus not exactly the equality operator
    1148              :  * that the subquery would use itself.  We use equality_ops_are_compatible() to
    1149              :  * check compatibility.  That looks at opfamily membership for index AMs that
    1150              :  * have declared that they support consistent equality semantics within an
    1151              :  * opfamily, and so should give trustworthy answers for all operators that we
    1152              :  * might need to deal with here.)
    1153              :  *
    1154              :  * The collid must also agree on equality with the collation the subquery's own
    1155              :  * DISTINCT/GROUP BY/set-op uses to deduplicate the column, else the subquery's
    1156              :  * distinctness does not carry over to the caller's equality semantics.  Two
    1157              :  * collations agree on equality if they match or if both are deterministic (in
    1158              :  * which case both reduce equality to byte-equality; see CREATE COLLATION).
    1159              :  */
    1160              : bool
    1161         4382 : query_is_distinct_for(Query *query, List *distinct_cols)
    1162              : {
    1163              :     ListCell   *l;
    1164              :     DistinctColInfo *dcinfo;
    1165              : 
    1166              :     /*
    1167              :      * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
    1168              :      * columns in the DISTINCT clause appear in colnos and operator semantics
    1169              :      * match.  This is true even if there are SRFs in the DISTINCT columns or
    1170              :      * elsewhere in the tlist.
    1171              :      */
    1172         4382 :     if (query->distinctClause)
    1173              :     {
    1174          180 :         foreach(l, query->distinctClause)
    1175              :         {
    1176          145 :             SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
    1177          145 :             TargetEntry *tle = get_sortgroupclause_tle(sgc,
    1178              :                                                        query->targetList);
    1179              : 
    1180          145 :             dcinfo = distinct_col_search(tle->resno, distinct_cols);
    1181          145 :             if (dcinfo == NULL ||
    1182           90 :                 !equality_ops_are_compatible(dcinfo->opid, sgc->eqop) ||
    1183           90 :                 !collations_agree_on_equality(dcinfo->collid,
    1184           90 :                                               exprCollation((Node *) tle->expr)))
    1185              :                 break;          /* exit early if no match */
    1186              :         }
    1187          130 :         if (l == NULL)          /* had matches for all? */
    1188           35 :             return true;
    1189              :     }
    1190              : 
    1191              :     /*
    1192              :      * Otherwise, a set-returning function in the query's targetlist can
    1193              :      * result in returning duplicate rows, despite any grouping that might
    1194              :      * occur before tlist evaluation.  (If all tlist SRFs are within GROUP BY
    1195              :      * columns, it would be safe because they'd be expanded before grouping.
    1196              :      * But it doesn't currently seem worth the effort to check for that.)
    1197              :      */
    1198         4347 :     if (query->hasTargetSRFs)
    1199            0 :         return false;
    1200              : 
    1201              :     /*
    1202              :      * Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all
    1203              :      * the grouped columns appear in colnos and operator semantics match.
    1204              :      */
    1205         4347 :     if (query->groupClause && !query->groupingSets)
    1206              :     {
    1207          206 :         foreach(l, query->groupClause)
    1208              :         {
    1209          144 :             SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
    1210          144 :             TargetEntry *tle = get_sortgroupclause_tle(sgc,
    1211              :                                                        query->targetList);
    1212              : 
    1213          144 :             dcinfo = distinct_col_search(tle->resno, distinct_cols);
    1214          144 :             if (dcinfo == NULL ||
    1215          107 :                 !equality_ops_are_compatible(dcinfo->opid, sgc->eqop) ||
    1216          107 :                 !collations_agree_on_equality(dcinfo->collid,
    1217          107 :                                               exprCollation((Node *) tle->expr)))
    1218              :                 break;          /* exit early if no match */
    1219              :         }
    1220          109 :         if (l == NULL)          /* had matches for all? */
    1221           62 :             return true;
    1222              :     }
    1223         4238 :     else if (query->groupingSets)
    1224              :     {
    1225              :         List       *gsets;
    1226              : 
    1227              :         /*
    1228              :          * If we have grouping sets with expressions, we probably don't have
    1229              :          * uniqueness and analysis would be hard. Punt.
    1230              :          */
    1231           50 :         if (query->groupClause)
    1232           10 :             return false;
    1233              : 
    1234              :         /*
    1235              :          * If we have no groupClause (therefore no grouping expressions), we
    1236              :          * might have one or many empty grouping sets.  If there's just one,
    1237              :          * or if the DISTINCT clause is used on the GROUP BY, then we're
    1238              :          * returning only one row and are certainly unique.  But otherwise, we
    1239              :          * know we're certainly not unique.
    1240              :          */
    1241           40 :         if (query->groupDistinct)
    1242            5 :             return true;
    1243              : 
    1244           35 :         gsets = expand_grouping_sets(query->groupingSets, false, -1);
    1245              : 
    1246           35 :         return (list_length(gsets) == 1);
    1247              :     }
    1248              :     else
    1249              :     {
    1250              :         /*
    1251              :          * If we have no GROUP BY, but do have aggregates or HAVING, then the
    1252              :          * result is at most one row so it's surely unique, for any operators.
    1253              :          */
    1254         4188 :         if (query->hasAggs || query->havingQual)
    1255          396 :             return true;
    1256              :     }
    1257              : 
    1258              :     /*
    1259              :      * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
    1260              :      * except with ALL.
    1261              :      */
    1262         3839 :     if (query->setOperations)
    1263              :     {
    1264         3697 :         SetOperationStmt *topop = castNode(SetOperationStmt, query->setOperations);
    1265              : 
    1266              :         Assert(topop->op != SETOP_NONE);
    1267              : 
    1268         3697 :         if (!topop->all)
    1269              :         {
    1270              :             ListCell   *lg;
    1271              : 
    1272              :             /* We're good if all the nonjunk output columns are in colnos */
    1273           93 :             lg = list_head(topop->groupClauses);
    1274          148 :             foreach(l, query->targetList)
    1275              :             {
    1276           98 :                 TargetEntry *tle = (TargetEntry *) lfirst(l);
    1277              :                 SortGroupClause *sgc;
    1278              : 
    1279           98 :                 if (tle->resjunk)
    1280            0 :                     continue;   /* ignore resjunk columns */
    1281              : 
    1282              :                 /* non-resjunk columns should have grouping clauses */
    1283              :                 Assert(lg != NULL);
    1284           98 :                 sgc = (SortGroupClause *) lfirst(lg);
    1285           98 :                 lg = lnext(topop->groupClauses, lg);
    1286              : 
    1287           98 :                 dcinfo = distinct_col_search(tle->resno, distinct_cols);
    1288           98 :                 if (dcinfo == NULL ||
    1289           65 :                     !equality_ops_are_compatible(dcinfo->opid, sgc->eqop) ||
    1290           65 :                     !collations_agree_on_equality(dcinfo->collid,
    1291           65 :                                                   exprCollation((Node *) tle->expr)))
    1292              :                     break;      /* exit early if no match */
    1293              :             }
    1294           93 :             if (l == NULL)      /* had matches for all? */
    1295           50 :                 return true;
    1296              :         }
    1297              :     }
    1298              : 
    1299              :     /*
    1300              :      * XXX Are there any other cases in which we can easily see the result
    1301              :      * must be distinct?
    1302              :      *
    1303              :      * If you do add more smarts to this function, be sure to update
    1304              :      * query_supports_distinctness() to match.
    1305              :      */
    1306              : 
    1307         3789 :     return false;
    1308              : }
    1309              : 
    1310              : /*
    1311              :  * distinct_col_search - subroutine for query_is_distinct_for
    1312              :  *
    1313              :  * If colno matches the colno field of an entry in distinct_cols, return a
    1314              :  * pointer to that entry; else return NULL.  (Ordinarily distinct_cols would
    1315              :  * not contain duplicate colnos, but if it does, we arbitrarily select the
    1316              :  * first match.)
    1317              :  */
    1318              : static DistinctColInfo *
    1319          387 : distinct_col_search(int colno, List *distinct_cols)
    1320              : {
    1321          655 :     foreach_ptr(DistinctColInfo, dcinfo, distinct_cols)
    1322              :     {
    1323          405 :         if (dcinfo->colno == colno)
    1324          262 :             return dcinfo;
    1325              :     }
    1326              : 
    1327          125 :     return NULL;
    1328              : }
    1329              : 
    1330              : 
    1331              : /*
    1332              :  * innerrel_is_unique
    1333              :  *    Check if the innerrel provably contains at most one tuple matching any
    1334              :  *    tuple from the outerrel, based on join clauses in the 'restrictlist'.
    1335              :  *
    1336              :  * We need an actual RelOptInfo for the innerrel, but it's sufficient to
    1337              :  * identify the outerrel by its Relids.  This asymmetry supports use of this
    1338              :  * function before joinrels have been built.  (The caller is expected to
    1339              :  * also supply the joinrelids, just to save recalculating that.)
    1340              :  *
    1341              :  * The proof must be made based only on clauses that will be "joinquals"
    1342              :  * rather than "otherquals" at execution.  For an inner join there's no
    1343              :  * difference; but if the join is outer, we must ignore pushed-down quals,
    1344              :  * as those will become "otherquals".  Note that this means the answer might
    1345              :  * vary depending on whether IS_OUTER_JOIN(jointype); since we cache the
    1346              :  * answer without regard to that, callers must take care not to call this
    1347              :  * with jointypes that would be classified differently by IS_OUTER_JOIN().
    1348              :  *
    1349              :  * The actual proof is undertaken by is_innerrel_unique_for(); this function
    1350              :  * is a frontend that is mainly concerned with caching the answers.
    1351              :  * In particular, the force_cache argument allows overriding the internal
    1352              :  * heuristic about whether to cache negative answers; it should be "true"
    1353              :  * if making an inquiry that is not part of the normal bottom-up join search
    1354              :  * sequence.
    1355              :  */
    1356              : bool
    1357       620173 : innerrel_is_unique(PlannerInfo *root,
    1358              :                    Relids joinrelids,
    1359              :                    Relids outerrelids,
    1360              :                    RelOptInfo *innerrel,
    1361              :                    JoinType jointype,
    1362              :                    List *restrictlist,
    1363              :                    bool force_cache)
    1364              : {
    1365       620173 :     return innerrel_is_unique_ext(root, joinrelids, outerrelids, innerrel,
    1366              :                                   jointype, restrictlist, force_cache, NULL);
    1367              : }
    1368              : 
    1369              : /*
    1370              :  * innerrel_is_unique_ext
    1371              :  *    Do the same as innerrel_is_unique(), but also set to (*extra_clauses)
    1372              :  *    additional clauses from a baserestrictinfo list used to prove the
    1373              :  *    uniqueness.
    1374              :  *
    1375              :  * A non-NULL extra_clauses indicates that we're checking for self-join and
    1376              :  * correspondingly dealing with filtered clauses.
    1377              :  */
    1378              : bool
    1379       621964 : innerrel_is_unique_ext(PlannerInfo *root,
    1380              :                        Relids joinrelids,
    1381              :                        Relids outerrelids,
    1382              :                        RelOptInfo *innerrel,
    1383              :                        JoinType jointype,
    1384              :                        List *restrictlist,
    1385              :                        bool force_cache,
    1386              :                        List **extra_clauses)
    1387              : {
    1388              :     MemoryContext old_context;
    1389              :     ListCell   *lc;
    1390              :     UniqueRelInfo *uniqueRelInfo;
    1391       621964 :     List       *outer_exprs = NIL;
    1392       621964 :     bool        self_join = (extra_clauses != NULL);
    1393              : 
    1394              :     /* Certainly can't prove uniqueness when there are no joinclauses */
    1395       621964 :     if (restrictlist == NIL)
    1396        79335 :         return false;
    1397              : 
    1398              :     /*
    1399              :      * Make a quick check to eliminate cases in which we will surely be unable
    1400              :      * to prove uniqueness of the innerrel.
    1401              :      */
    1402       542629 :     if (!rel_supports_distinctness(root, innerrel))
    1403       290638 :         return false;
    1404              : 
    1405              :     /*
    1406              :      * Query the cache to see if we've managed to prove that innerrel is
    1407              :      * unique for any subset of this outerrel.  For non-self-join search, we
    1408              :      * don't need an exact match, as extra outerrels can't make the innerrel
    1409              :      * any less unique (or more formally, the restrictlist for a join to a
    1410              :      * superset outerrel must be a superset of the conditions we successfully
    1411              :      * used before). For self-join search, we require an exact match of
    1412              :      * outerrels because we need extra clauses to be valid for our case. Also,
    1413              :      * for self-join checking we've filtered the clauses list.  Thus, we can
    1414              :      * match only the result cached for a self-join search for another
    1415              :      * self-join check.
    1416              :      */
    1417       274974 :     foreach(lc, innerrel->unique_for_rels)
    1418              :     {
    1419        96733 :         uniqueRelInfo = (UniqueRelInfo *) lfirst(lc);
    1420              : 
    1421        96733 :         if ((!self_join && bms_is_subset(uniqueRelInfo->outerrelids, outerrelids)) ||
    1422           57 :             (self_join && bms_equal(uniqueRelInfo->outerrelids, outerrelids) &&
    1423           42 :              uniqueRelInfo->self_join))
    1424              :         {
    1425        73750 :             if (extra_clauses)
    1426           10 :                 *extra_clauses = uniqueRelInfo->extra_clauses;
    1427        73750 :             return true;        /* Success! */
    1428              :         }
    1429              :     }
    1430              : 
    1431              :     /*
    1432              :      * Conversely, we may have already determined that this outerrel, or some
    1433              :      * superset thereof, cannot prove this innerrel to be unique.
    1434              :      */
    1435       178777 :     foreach(lc, innerrel->non_unique_for_rels)
    1436              :     {
    1437          884 :         Relids      unique_for_rels = (Relids) lfirst(lc);
    1438              : 
    1439          884 :         if (bms_is_subset(outerrelids, unique_for_rels))
    1440          348 :             return false;
    1441              :     }
    1442              : 
    1443              :     /* No cached information, so try to make the proof. */
    1444       177893 :     if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel,
    1445              :                                jointype, restrictlist,
    1446              :                                self_join ? &outer_exprs : NULL))
    1447              :     {
    1448              :         /*
    1449              :          * Cache the positive result for future probes, being sure to keep it
    1450              :          * in the planner_cxt even if we are working in GEQO.
    1451              :          *
    1452              :          * Note: one might consider trying to isolate the minimal subset of
    1453              :          * the outerrels that proved the innerrel unique.  But it's not worth
    1454              :          * the trouble, because the planner builds up joinrels incrementally
    1455              :          * and so we'll see the minimally sufficient outerrels before any
    1456              :          * supersets of them anyway.
    1457              :          */
    1458        99155 :         old_context = MemoryContextSwitchTo(root->planner_cxt);
    1459        99155 :         uniqueRelInfo = makeNode(UniqueRelInfo);
    1460        99155 :         uniqueRelInfo->outerrelids = bms_copy(outerrelids);
    1461        99155 :         uniqueRelInfo->self_join = self_join;
    1462        99155 :         uniqueRelInfo->extra_clauses = outer_exprs;
    1463        99155 :         innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
    1464              :                                             uniqueRelInfo);
    1465        99155 :         MemoryContextSwitchTo(old_context);
    1466              : 
    1467        99155 :         if (extra_clauses)
    1468          513 :             *extra_clauses = outer_exprs;
    1469        99155 :         return true;            /* Success! */
    1470              :     }
    1471              :     else
    1472              :     {
    1473              :         /*
    1474              :          * None of the join conditions for outerrel proved innerrel unique, so
    1475              :          * we can safely reject this outerrel or any subset of it in future
    1476              :          * checks.
    1477              :          *
    1478              :          * However, in normal planning mode, caching this knowledge is totally
    1479              :          * pointless; it won't be queried again, because we build up joinrels
    1480              :          * from smaller to larger.  It's only useful when using GEQO or
    1481              :          * another planner extension that attempts planning multiple times.
    1482              :          *
    1483              :          * Also, allow callers to override that heuristic and force caching;
    1484              :          * that's useful for reduce_unique_semijoins, which calls here before
    1485              :          * the normal join search starts.
    1486              :          */
    1487        78738 :         if (force_cache || root->assumeReplanning)
    1488              :         {
    1489         3190 :             old_context = MemoryContextSwitchTo(root->planner_cxt);
    1490         3190 :             innerrel->non_unique_for_rels =
    1491         3190 :                 lappend(innerrel->non_unique_for_rels,
    1492         3190 :                         bms_copy(outerrelids));
    1493         3190 :             MemoryContextSwitchTo(old_context);
    1494              :         }
    1495              : 
    1496        78738 :         return false;
    1497              :     }
    1498              : }
    1499              : 
    1500              : /*
    1501              :  * is_innerrel_unique_for
    1502              :  *    Check if the innerrel provably contains at most one tuple matching any
    1503              :  *    tuple from the outerrel, based on join clauses in the 'restrictlist'.
    1504              :  */
    1505              : static bool
    1506       177893 : is_innerrel_unique_for(PlannerInfo *root,
    1507              :                        Relids joinrelids,
    1508              :                        Relids outerrelids,
    1509              :                        RelOptInfo *innerrel,
    1510              :                        JoinType jointype,
    1511              :                        List *restrictlist,
    1512              :                        List **extra_clauses)
    1513              : {
    1514       177893 :     List       *clause_list = NIL;
    1515              :     ListCell   *lc;
    1516              : 
    1517              :     /*
    1518              :      * Search for mergejoinable clauses that constrain the inner rel against
    1519              :      * the outer rel.  If an operator is mergejoinable then it behaves like
    1520              :      * equality for some btree opclass, so it's what we want.  The
    1521              :      * mergejoinability test also eliminates clauses containing volatile
    1522              :      * functions, which we couldn't depend on.
    1523              :      */
    1524       398932 :     foreach(lc, restrictlist)
    1525              :     {
    1526       221039 :         RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
    1527              : 
    1528              :         /*
    1529              :          * As noted above, if it's a pushed-down clause and we're at an outer
    1530              :          * join, we can't use it.
    1531              :          */
    1532       221039 :         if (IS_OUTER_JOIN(jointype) &&
    1533        88465 :             RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
    1534         8031 :             continue;
    1535              : 
    1536              :         /* Ignore if it's not a mergejoinable clause */
    1537       213008 :         if (!restrictinfo->can_join ||
    1538       199784 :             restrictinfo->mergeopfamilies == NIL)
    1539        13928 :             continue;           /* not mergejoinable */
    1540              : 
    1541              :         /*
    1542              :          * Check if the clause has the form "outer op inner" or "inner op
    1543              :          * outer", and if so mark which side is inner.
    1544              :          */
    1545       199080 :         if (!clause_sides_match_join(restrictinfo, outerrelids,
    1546              :                                      innerrel->relids))
    1547           20 :             continue;           /* no good for these input relations */
    1548              : 
    1549              :         /* OK, add to the list */
    1550       199060 :         clause_list = lappend(clause_list, restrictinfo);
    1551              :     }
    1552              : 
    1553              :     /* Let rel_is_distinct_for() do the hard work */
    1554       177893 :     return rel_is_distinct_for(root, innerrel, clause_list, extra_clauses);
    1555              : }
    1556              : 
    1557              : /*
    1558              :  * Update EC members to point to the remaining relation instead of the removed
    1559              :  * one, removing duplicates.
    1560              :  *
    1561              :  * Restriction clauses for base relations are already distributed to
    1562              :  * the respective baserestrictinfo lists (see
    1563              :  * generate_implied_equalities_for_column). The above code has already processed
    1564              :  * this list and updated these clauses to reference the remaining
    1565              :  * relation, so that we can skip them here based on their relids.
    1566              :  *
    1567              :  * Likewise, we have already processed the join clauses that join the
    1568              :  * removed relation to the remaining one.
    1569              :  *
    1570              :  * Finally, there might be join clauses tying the removed relation to
    1571              :  * some third relation.  We can't just delete the source clauses and
    1572              :  * regenerate them from the EC because the corresponding equality
    1573              :  * operators might be missing (see the handling of ec_broken).
    1574              :  * Therefore, we will update the references in the source clauses.
    1575              :  *
    1576              :  * Derived clauses can be generated again, so it is simpler just to
    1577              :  * delete them.
    1578              :  */
    1579              : static void
    1580          751 : update_eclasses(EquivalenceClass *ec, int from, int to)
    1581              : {
    1582          751 :     List       *new_members = NIL;
    1583          751 :     List       *new_sources = NIL;
    1584              : 
    1585              :     /*
    1586              :      * We don't expect any EC child members to exist at this point.  Ensure
    1587              :      * that's the case, otherwise, we might be getting asked to do something
    1588              :      * this function hasn't been coded for.
    1589              :      */
    1590              :     Assert(ec->ec_childmembers == NULL);
    1591              : 
    1592         3076 :     foreach_node(EquivalenceMember, em, ec->ec_members)
    1593              :     {
    1594         1574 :         bool        is_redundant = false;
    1595              : 
    1596         1574 :         if (!bms_is_member(from, em->em_relids))
    1597              :         {
    1598          808 :             new_members = lappend(new_members, em);
    1599          808 :             continue;
    1600              :         }
    1601              : 
    1602          766 :         em->em_relids = adjust_relid_set(em->em_relids, from, to);
    1603          766 :         em->em_jdomain->jd_relids = adjust_relid_set(em->em_jdomain->jd_relids, from, to);
    1604              : 
    1605              :         /* We only process inner joins */
    1606          766 :         ChangeVarNodesExtended((Node *) em->em_expr, from, to, 0,
    1607              :                                replace_relid_callback);
    1608              : 
    1609         1552 :         foreach_node(EquivalenceMember, other, new_members)
    1610              :         {
    1611          186 :             if (!equal(em->em_relids, other->em_relids))
    1612           20 :                 continue;
    1613              : 
    1614          166 :             if (equal(em->em_expr, other->em_expr))
    1615              :             {
    1616          166 :                 is_redundant = true;
    1617          166 :                 break;
    1618              :             }
    1619              :         }
    1620              : 
    1621          766 :         if (!is_redundant)
    1622          600 :             new_members = lappend(new_members, em);
    1623              :     }
    1624              : 
    1625          751 :     list_free(ec->ec_members);
    1626          751 :     ec->ec_members = new_members;
    1627              : 
    1628          751 :     ec_clear_derived_clauses(ec);
    1629              : 
    1630              :     /* Update EC source expressions */
    1631         2325 :     foreach_node(RestrictInfo, rinfo, ec->ec_sources)
    1632              :     {
    1633          823 :         bool        is_redundant = false;
    1634              : 
    1635          823 :         if (!bms_is_member(from, rinfo->required_relids))
    1636              :         {
    1637          160 :             new_sources = lappend(new_sources, rinfo);
    1638          160 :             continue;
    1639              :         }
    1640              : 
    1641          663 :         ChangeVarNodesExtended((Node *) rinfo, from, to, 0,
    1642              :                                replace_relid_callback);
    1643              : 
    1644              :         /*
    1645              :          * After switching the clause to the remaining relation, check it for
    1646              :          * redundancy with existing ones. We don't have to check for
    1647              :          * redundancy with derived clauses, because we've just deleted them.
    1648              :          */
    1649         1338 :         foreach_node(RestrictInfo, other, new_sources)
    1650              :         {
    1651           22 :             if (!equal(rinfo->clause_relids, other->clause_relids))
    1652           12 :                 continue;
    1653              : 
    1654           10 :             if (equal(rinfo->clause, other->clause))
    1655              :             {
    1656           10 :                 is_redundant = true;
    1657           10 :                 break;
    1658              :             }
    1659              :         }
    1660              : 
    1661          663 :         if (!is_redundant)
    1662          653 :             new_sources = lappend(new_sources, rinfo);
    1663              :     }
    1664              : 
    1665          751 :     list_free(ec->ec_sources);
    1666          751 :     ec->ec_sources = new_sources;
    1667          751 :     ec->ec_relids = adjust_relid_set(ec->ec_relids, from, to);
    1668          751 : }
    1669              : 
    1670              : /*
    1671              :  * "Logically" compares two RestrictInfo's ignoring the 'rinfo_serial' field,
    1672              :  * which makes almost every RestrictInfo unique.  This type of comparison is
    1673              :  * useful when removing duplicates while moving RestrictInfo's from removed
    1674              :  * relation to remaining relation during self-join elimination.
    1675              :  *
    1676              :  * XXX: In the future, we might remove the 'rinfo_serial' field completely and
    1677              :  * get rid of this function.
    1678              :  */
    1679              : static bool
    1680          476 : restrict_infos_logically_equal(RestrictInfo *a, RestrictInfo *b)
    1681              : {
    1682          476 :     int         saved_rinfo_serial = a->rinfo_serial;
    1683              :     bool        result;
    1684              : 
    1685          476 :     a->rinfo_serial = b->rinfo_serial;
    1686          476 :     result = equal(a, b);
    1687          476 :     a->rinfo_serial = saved_rinfo_serial;
    1688              : 
    1689          476 :     return result;
    1690              : }
    1691              : 
    1692              : /*
    1693              :  * This function adds all non-redundant clauses to the keeping relation
    1694              :  * during self-join elimination.  That is a contradictory operation. On the
    1695              :  * one hand, we reduce the length of the `restrict` lists, which can
    1696              :  * impact planning or executing time.  Additionally, we improve the
    1697              :  * accuracy of cardinality estimation.  On the other hand, it is one more
    1698              :  * place that can make planning time much longer in specific cases.  It
    1699              :  * would have been better to avoid calling the equal() function here, but
    1700              :  * it's the only way to detect duplicated inequality expressions.
    1701              :  *
    1702              :  * (*keep_rinfo_list) is given by pointer because it might be altered by
    1703              :  * distribute_restrictinfo_to_rels().
    1704              :  */
    1705              : static void
    1706          936 : add_non_redundant_clauses(PlannerInfo *root,
    1707              :                           List *rinfo_candidates,
    1708              :                           List **keep_rinfo_list,
    1709              :                           Index removed_relid)
    1710              : {
    1711         2652 :     foreach_node(RestrictInfo, rinfo, rinfo_candidates)
    1712              :     {
    1713          780 :         bool        is_redundant = false;
    1714              : 
    1715              :         Assert(!bms_is_member(removed_relid, rinfo->required_relids));
    1716              : 
    1717         1939 :         foreach_node(RestrictInfo, src, (*keep_rinfo_list))
    1718              :         {
    1719          496 :             if (!bms_equal(src->clause_relids, rinfo->clause_relids))
    1720              :                 /* Can't compare trivially different clauses */
    1721           15 :                 continue;
    1722              : 
    1723          481 :             if (src == rinfo ||
    1724          481 :                 (rinfo->parent_ec != NULL &&
    1725          790 :                  src->parent_ec == rinfo->parent_ec) ||
    1726          476 :                 restrict_infos_logically_equal(rinfo, src))
    1727              :             {
    1728          117 :                 is_redundant = true;
    1729          117 :                 break;
    1730              :             }
    1731              :         }
    1732          780 :         if (!is_redundant)
    1733          663 :             distribute_restrictinfo_to_rels(root, rinfo);
    1734              :     }
    1735          936 : }
    1736              : 
    1737              : /*
    1738              :  * A custom callback for ChangeVarNodesExtended() providing Self-join
    1739              :  * elimination (SJE) related functionality
    1740              :  *
    1741              :  * SJE needs to skip the RangeTblRef node type.  During SJE's last
    1742              :  * step, remove_rel_from_joinlist() removes remaining RangeTblRefs
    1743              :  * with target relid.  If ChangeVarNodes() replaces the target relid
    1744              :  * before, remove_rel_from_joinlist() would fail to identify the nodes
    1745              :  * to delete.
    1746              :  *
    1747              :  * SJE also needs to change the relids within RestrictInfo's.
    1748              :  */
    1749              : static bool
    1750        30676 : replace_relid_callback(Node *node, ChangeVarNodes_context *context)
    1751              : {
    1752        30676 :     if (IsA(node, RangeTblRef))
    1753              :     {
    1754         1249 :         return true;
    1755              :     }
    1756        29427 :     else if (IsA(node, RestrictInfo))
    1757              :     {
    1758         1463 :         RestrictInfo *rinfo = (RestrictInfo *) node;
    1759         1463 :         int         relid = -1;
    1760         1463 :         bool        is_req_equal =
    1761         1463 :             (rinfo->required_relids == rinfo->clause_relids);
    1762         1463 :         bool        clause_relids_is_multiple =
    1763         1463 :             (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
    1764              : 
    1765              :         /*
    1766              :          * Recurse down into clauses if the target relation is present in
    1767              :          * clause_relids or required_relids.  We must check required_relids
    1768              :          * because the relation not present in clause_relids might still be
    1769              :          * present somewhere in orclause.
    1770              :          */
    1771         1511 :         if (bms_is_member(context->rt_index, rinfo->clause_relids) ||
    1772           48 :             bms_is_member(context->rt_index, rinfo->required_relids))
    1773              :         {
    1774              :             Relids      new_clause_relids;
    1775              : 
    1776         1453 :             ChangeVarNodesWalkExpression((Node *) rinfo->clause, context);
    1777         1453 :             ChangeVarNodesWalkExpression((Node *) rinfo->orclause, context);
    1778              : 
    1779         1453 :             new_clause_relids = adjust_relid_set(rinfo->clause_relids,
    1780              :                                                  context->rt_index,
    1781              :                                                  context->new_index);
    1782              : 
    1783              :             /*
    1784              :              * Incrementally adjust num_base_rels based on the change of
    1785              :              * clause_relids, which could contain both base relids and
    1786              :              * outer-join relids.  This operation is legal until we remove
    1787              :              * only baserels.
    1788              :              */
    1789         1453 :             rinfo->num_base_rels -= bms_num_members(rinfo->clause_relids) -
    1790         1453 :                 bms_num_members(new_clause_relids);
    1791              : 
    1792         1453 :             rinfo->clause_relids = new_clause_relids;
    1793         1453 :             rinfo->left_relids =
    1794         1453 :                 adjust_relid_set(rinfo->left_relids, context->rt_index, context->new_index);
    1795         1453 :             rinfo->right_relids =
    1796         1453 :                 adjust_relid_set(rinfo->right_relids, context->rt_index, context->new_index);
    1797              :         }
    1798              : 
    1799         1463 :         if (is_req_equal)
    1800           20 :             rinfo->required_relids = rinfo->clause_relids;
    1801              :         else
    1802         1443 :             rinfo->required_relids =
    1803         1443 :                 adjust_relid_set(rinfo->required_relids, context->rt_index, context->new_index);
    1804              : 
    1805         1463 :         rinfo->outer_relids =
    1806         1463 :             adjust_relid_set(rinfo->outer_relids, context->rt_index, context->new_index);
    1807         1463 :         rinfo->incompatible_relids =
    1808         1463 :             adjust_relid_set(rinfo->incompatible_relids, context->rt_index, context->new_index);
    1809              : 
    1810         2767 :         if (rinfo->mergeopfamilies &&
    1811         2490 :             bms_get_singleton_member(rinfo->clause_relids, &relid) &&
    1812          991 :             clause_relids_is_multiple &&
    1813          991 :             relid == context->new_index && IsA(rinfo->clause, OpExpr))
    1814              :         {
    1815              :             Expr       *leftOp;
    1816              :             Expr       *rightOp;
    1817              : 
    1818          991 :             leftOp = (Expr *) get_leftop(rinfo->clause);
    1819          991 :             rightOp = (Expr *) get_rightop(rinfo->clause);
    1820              : 
    1821              :             /*
    1822              :              * For self-join elimination, changing varnos could transform
    1823              :              * "t1.a = t2.a" into "t1.a = t1.a".  That is always true as long
    1824              :              * as "t1.a" is not null.  We use equal() to check for such a
    1825              :              * case, and then we replace the qual with a check for not null
    1826              :              * (NullTest).
    1827              :              */
    1828          991 :             if (leftOp != NULL && equal(leftOp, rightOp))
    1829              :             {
    1830          981 :                 NullTest   *ntest = makeNode(NullTest);
    1831              : 
    1832          981 :                 ntest->arg = leftOp;
    1833          981 :                 ntest->nulltesttype = IS_NOT_NULL;
    1834          981 :                 ntest->argisrow = false;
    1835          981 :                 ntest->location = -1;
    1836          981 :                 rinfo->clause = (Expr *) ntest;
    1837          981 :                 rinfo->mergeopfamilies = NIL;
    1838          981 :                 rinfo->left_em = NULL;
    1839          981 :                 rinfo->right_em = NULL;
    1840              :             }
    1841              :             Assert(rinfo->orclause == NULL);
    1842              :         }
    1843         1463 :         return true;
    1844              :     }
    1845              : 
    1846        27964 :     return false;
    1847              : }
    1848              : 
    1849              : /*
    1850              :  * Remove a relation after we have proven that it participates only in an
    1851              :  * unneeded unique self-join.
    1852              :  *
    1853              :  * Replace any links in planner info structures.
    1854              :  *
    1855              :  * Transfer join and restriction clauses from the removed relation to the
    1856              :  * remaining one. We change the Vars of the clause to point to the
    1857              :  * remaining relation instead of the removed one. The clauses that require
    1858              :  * a subset of joinrelids become restriction clauses of the remaining
    1859              :  * relation, and others remain join clauses. We append them to
    1860              :  * baserestrictinfo and joininfo, respectively, trying not to introduce
    1861              :  * duplicates.
    1862              :  *
    1863              :  * We also have to process the 'joinclauses' list here, because it
    1864              :  * contains EC-derived join clauses which must become filter clauses. It
    1865              :  * is not enough to just correct the ECs because the EC-derived
    1866              :  * restrictions are generated before join removal (see
    1867              :  * generate_base_implied_equalities).
    1868              :  *
    1869              :  * NOTE: Remember to keep the code in sync with PlannerInfo to be sure all
    1870              :  * cached relids and relid bitmapsets can be correctly cleaned during the
    1871              :  * self-join elimination procedure.
    1872              :  */
    1873              : static void
    1874          468 : remove_self_join_rel(PlannerInfo *root, PlanRowMark *kmark, PlanRowMark *rmark,
    1875              :                      RelOptInfo *toKeep, RelOptInfo *toRemove,
    1876              :                      List *restrictlist)
    1877              : {
    1878              :     List       *joininfos;
    1879              :     ListCell   *lc;
    1880              :     int         i;
    1881          468 :     List       *jinfo_candidates = NIL;
    1882          468 :     List       *binfo_candidates = NIL;
    1883              : 
    1884              :     Assert(toKeep->relid > 0);
    1885              :     Assert(toRemove->relid > 0);
    1886              : 
    1887              :     /*
    1888              :      * Replace the index of the removing table with the keeping one. The
    1889              :      * technique of removing/distributing restrictinfo is used here to attach
    1890              :      * just appeared (for keeping relation) join clauses and avoid adding
    1891              :      * duplicates of those that already exist in the joininfo list.
    1892              :      */
    1893          468 :     joininfos = list_copy(toRemove->joininfo);
    1894         1019 :     foreach_node(RestrictInfo, rinfo, joininfos)
    1895              :     {
    1896           83 :         remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
    1897           83 :         ChangeVarNodesExtended((Node *) rinfo, toRemove->relid, toKeep->relid,
    1898              :                                0, replace_relid_callback);
    1899              : 
    1900           83 :         if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
    1901           68 :             jinfo_candidates = lappend(jinfo_candidates, rinfo);
    1902              :         else
    1903           15 :             binfo_candidates = lappend(binfo_candidates, rinfo);
    1904              :     }
    1905              : 
    1906              :     /*
    1907              :      * Concatenate restrictlist to the list of base restrictions of the
    1908              :      * removing table just to simplify the replacement procedure: all of them
    1909              :      * weren't connected to any keeping relations and need to be added to some
    1910              :      * rels.
    1911              :      */
    1912          468 :     toRemove->baserestrictinfo = list_concat(toRemove->baserestrictinfo,
    1913              :                                              restrictlist);
    1914         1633 :     foreach_node(RestrictInfo, rinfo, toRemove->baserestrictinfo)
    1915              :     {
    1916          697 :         ChangeVarNodesExtended((Node *) rinfo, toRemove->relid, toKeep->relid,
    1917              :                                0, replace_relid_callback);
    1918              : 
    1919          697 :         if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
    1920            0 :             jinfo_candidates = lappend(jinfo_candidates, rinfo);
    1921              :         else
    1922          697 :             binfo_candidates = lappend(binfo_candidates, rinfo);
    1923              :     }
    1924              : 
    1925              :     /*
    1926              :      * Now, add all non-redundant clauses to the keeping relation.
    1927              :      */
    1928          468 :     add_non_redundant_clauses(root, binfo_candidates,
    1929              :                               &toKeep->baserestrictinfo, toRemove->relid);
    1930          468 :     add_non_redundant_clauses(root, jinfo_candidates,
    1931              :                               &toKeep->joininfo, toRemove->relid);
    1932              : 
    1933          468 :     list_free(binfo_candidates);
    1934          468 :     list_free(jinfo_candidates);
    1935              : 
    1936              :     /*
    1937              :      * Arrange equivalence classes, mentioned removing a table, with the
    1938              :      * keeping one: varno of removing table should be replaced in members and
    1939              :      * sources lists. Also, remove duplicated elements if this replacement
    1940              :      * procedure created them.
    1941              :      */
    1942          468 :     i = -1;
    1943         1219 :     while ((i = bms_next_member(toRemove->eclass_indexes, i)) >= 0)
    1944              :     {
    1945          751 :         EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes, i);
    1946              : 
    1947          751 :         update_eclasses(ec, toRemove->relid, toKeep->relid);
    1948          751 :         toKeep->eclass_indexes = bms_add_member(toKeep->eclass_indexes, i);
    1949              :     }
    1950              : 
    1951              :     /*
    1952              :      * Transfer the targetlist and attr_needed flags.
    1953              :      */
    1954              : 
    1955         1704 :     foreach(lc, toRemove->reltarget->exprs)
    1956              :     {
    1957         1236 :         Node       *node = lfirst(lc);
    1958              : 
    1959         1236 :         ChangeVarNodesExtended(node, toRemove->relid, toKeep->relid, 0,
    1960              :                                replace_relid_callback);
    1961         1236 :         if (!list_member(toKeep->reltarget->exprs, node))
    1962          128 :             toKeep->reltarget->exprs = lappend(toKeep->reltarget->exprs, node);
    1963              :     }
    1964              : 
    1965         5653 :     for (i = toKeep->min_attr; i <= toKeep->max_attr; i++)
    1966              :     {
    1967         5185 :         int         attno = i - toKeep->min_attr;
    1968              : 
    1969        10370 :         toRemove->attr_needed[attno] = adjust_relid_set(toRemove->attr_needed[attno],
    1970         5185 :                                                         toRemove->relid, toKeep->relid);
    1971         5185 :         toKeep->attr_needed[attno] = bms_add_members(toKeep->attr_needed[attno],
    1972         5185 :                                                      toRemove->attr_needed[attno]);
    1973              :     }
    1974              : 
    1975              :     /*
    1976              :      * If the removed relation has a row mark, transfer it to the remaining
    1977              :      * one.
    1978              :      *
    1979              :      * If both rels have row marks, just keep the one corresponding to the
    1980              :      * remaining relation because we verified earlier that they have the same
    1981              :      * strength.
    1982              :      */
    1983          468 :     if (rmark)
    1984              :     {
    1985           41 :         if (kmark)
    1986              :         {
    1987              :             Assert(kmark->markType == rmark->markType);
    1988              : 
    1989           41 :             root->rowMarks = list_delete_ptr(root->rowMarks, rmark);
    1990              :         }
    1991              :         else
    1992              :         {
    1993              :             /* Shouldn't have inheritance children here. */
    1994              :             Assert(rmark->rti == rmark->prti);
    1995              : 
    1996            0 :             rmark->rti = rmark->prti = toKeep->relid;
    1997              :         }
    1998              :     }
    1999              : 
    2000              :     /*
    2001              :      * Replace varno in all the query structures, except nodes RangeTblRef
    2002              :      * otherwise later remove_rel_from_joinlist will yield errors.
    2003              :      */
    2004          468 :     ChangeVarNodesExtended((Node *) root->parse, toRemove->relid, toKeep->relid,
    2005              :                            0, replace_relid_callback);
    2006              : 
    2007              :     /* Replace links in the planner info */
    2008          468 :     remove_rel_from_query(root, toRemove->relid, toKeep->relid, NULL, NULL);
    2009              : 
    2010              :     /* Replace varno in the fully-processed targetlist */
    2011          468 :     ChangeVarNodesExtended((Node *) root->processed_tlist, toRemove->relid,
    2012          468 :                            toKeep->relid, 0, replace_relid_callback);
    2013              : 
    2014              :     /*
    2015              :      * No need to touch all_result_relids or leaf_result_relids: at this point
    2016              :      * those sets contain only parse->resultRelation; inheritance children
    2017              :      * have not been added yet; that happens later in add_other_rels_to_query.
    2018              :      * And remove_self_joins_recurse rejects parse->resultRelation as an SJE
    2019              :      * candidate to preserve the EPQ mechanism.  So toRemove->relid cannot be
    2020              :      * a member.
    2021              :      */
    2022              :     Assert(!bms_is_member(toRemove->relid, root->all_result_relids));
    2023              :     Assert(!bms_is_member(toRemove->relid, root->leaf_result_relids));
    2024              : 
    2025              :     /*
    2026              :      * There may be references to the rel in root->fkey_list, but if so,
    2027              :      * match_foreign_keys_to_quals() will get rid of them.
    2028              :      */
    2029              : 
    2030              :     /*
    2031              :      * Finally, remove the rel from the baserel array to prevent it from being
    2032              :      * referenced again.  (We can't do this earlier because
    2033              :      * remove_join_clause_from_rels will touch it.)
    2034              :      */
    2035          468 :     root->simple_rel_array[toRemove->relid] = NULL;
    2036          468 :     root->simple_rte_array[toRemove->relid] = NULL;
    2037              : 
    2038              :     /* And nuke the RelOptInfo, just in case there's another access path. */
    2039          468 :     pfree(toRemove);
    2040              : 
    2041              : 
    2042              :     /*
    2043              :      * Now repeat construction of attr_needed bits coming from all other
    2044              :      * sources.
    2045              :      */
    2046          468 :     rebuild_placeholder_attr_needed(root);
    2047          468 :     rebuild_joinclause_attr_needed(root);
    2048          468 :     rebuild_eclass_attr_needed(root);
    2049          468 :     rebuild_lateral_attr_needed(root);
    2050          468 : }
    2051              : 
    2052              : /*
    2053              :  * split_selfjoin_quals
    2054              :  *      Processes 'joinquals' by building two lists: one containing the quals
    2055              :  *      where the columns/exprs are on either side of the join match and
    2056              :  *      another one containing the remaining quals.
    2057              :  *
    2058              :  * 'joinquals' must only contain quals for a RTE_RELATION being joined to
    2059              :  * itself.
    2060              :  */
    2061              : static void
    2062         1791 : split_selfjoin_quals(PlannerInfo *root, List *joinquals, List **selfjoinquals,
    2063              :                      List **otherjoinquals, int from, int to)
    2064              : {
    2065         1791 :     List       *sjoinquals = NIL;
    2066         1791 :     List       *ojoinquals = NIL;
    2067              : 
    2068         5497 :     foreach_node(RestrictInfo, rinfo, joinquals)
    2069              :     {
    2070              :         OpExpr     *expr;
    2071              :         Node       *leftexpr;
    2072              :         Node       *rightexpr;
    2073              : 
    2074              :         /* In general, clause looks like F(arg1) = G(arg2) */
    2075         3830 :         if (!rinfo->mergeopfamilies ||
    2076         3830 :             bms_num_members(rinfo->clause_relids) != 2 ||
    2077         3830 :             bms_membership(rinfo->left_relids) != BMS_SINGLETON ||
    2078         1915 :             bms_membership(rinfo->right_relids) != BMS_SINGLETON)
    2079              :         {
    2080            0 :             ojoinquals = lappend(ojoinquals, rinfo);
    2081            0 :             continue;
    2082              :         }
    2083              : 
    2084         1915 :         expr = (OpExpr *) rinfo->clause;
    2085              : 
    2086         1915 :         if (!IsA(expr, OpExpr) || list_length(expr->args) != 2)
    2087              :         {
    2088            0 :             ojoinquals = lappend(ojoinquals, rinfo);
    2089            0 :             continue;
    2090              :         }
    2091              : 
    2092         1915 :         leftexpr = get_leftop(rinfo->clause);
    2093         1915 :         rightexpr = copyObject(get_rightop(rinfo->clause));
    2094              : 
    2095         1915 :         if (leftexpr && IsA(leftexpr, RelabelType))
    2096          130 :             leftexpr = (Node *) ((RelabelType *) leftexpr)->arg;
    2097         1915 :         if (rightexpr && IsA(rightexpr, RelabelType))
    2098          125 :             rightexpr = (Node *) ((RelabelType *) rightexpr)->arg;
    2099              : 
    2100              :         /*
    2101              :          * Quite an expensive operation, narrowing the use case. For example,
    2102              :          * when we have cast of the same var to different (but compatible)
    2103              :          * types.
    2104              :          */
    2105         1915 :         ChangeVarNodesExtended(rightexpr,
    2106         1915 :                                bms_singleton_member(rinfo->right_relids),
    2107         1915 :                                bms_singleton_member(rinfo->left_relids), 0,
    2108              :                                replace_relid_callback);
    2109              : 
    2110         1915 :         if (equal(leftexpr, rightexpr))
    2111         1492 :             sjoinquals = lappend(sjoinquals, rinfo);
    2112              :         else
    2113          423 :             ojoinquals = lappend(ojoinquals, rinfo);
    2114              :     }
    2115              : 
    2116         1791 :     *selfjoinquals = sjoinquals;
    2117         1791 :     *otherjoinquals = ojoinquals;
    2118         1791 : }
    2119              : 
    2120              : /*
    2121              :  * Check for a case when uniqueness is at least partly derived from a
    2122              :  * baserestrictinfo clause. In this case, we have a chance to return only
    2123              :  * one row (if such clauses on both sides of SJ are equal) or nothing (if they
    2124              :  * are different).
    2125              :  */
    2126              : static bool
    2127          523 : match_unique_clauses(PlannerInfo *root, RelOptInfo *outer, List *uclauses,
    2128              :                      Index relid)
    2129              : {
    2130         1061 :     foreach_node(RestrictInfo, rinfo, uclauses)
    2131              :     {
    2132              :         Expr       *clause;
    2133              :         Node       *iclause;
    2134              :         Node       *c1;
    2135          125 :         bool        matched = false;
    2136              : 
    2137              :         Assert(outer->relid > 0 && relid > 0);
    2138              : 
    2139              :         /* Only filters like f(R.x1,...,R.xN) == expr we should consider. */
    2140              :         Assert(bms_is_empty(rinfo->left_relids) ^
    2141              :                bms_is_empty(rinfo->right_relids));
    2142              : 
    2143          125 :         clause = (Expr *) copyObject(rinfo->clause);
    2144          125 :         ChangeVarNodesExtended((Node *) clause, relid, outer->relid, 0,
    2145              :                                replace_relid_callback);
    2146              : 
    2147          125 :         iclause = bms_is_empty(rinfo->left_relids) ? get_rightop(clause) :
    2148          120 :             get_leftop(clause);
    2149          125 :         c1 = bms_is_empty(rinfo->left_relids) ? get_leftop(clause) :
    2150          120 :             get_rightop(clause);
    2151              : 
    2152              :         /*
    2153              :          * Compare these left and right sides with the corresponding sides of
    2154              :          * the outer's filters. If no one is detected - return immediately.
    2155              :          */
    2156          340 :         foreach_node(RestrictInfo, orinfo, outer->baserestrictinfo)
    2157              :         {
    2158              :             Node       *oclause;
    2159              :             Node       *c2;
    2160              : 
    2161          160 :             if (orinfo->mergeopfamilies == NIL)
    2162              :                 /* Don't consider clauses that aren't similar to 'F(X)=G(Y)' */
    2163           50 :                 continue;
    2164              : 
    2165              :             Assert(is_opclause(orinfo->clause));
    2166              : 
    2167          220 :             oclause = bms_is_empty(orinfo->left_relids) ?
    2168          110 :                 get_rightop(orinfo->clause) : get_leftop(orinfo->clause);
    2169          220 :             c2 = (bms_is_empty(orinfo->left_relids) ?
    2170          110 :                   get_leftop(orinfo->clause) : get_rightop(orinfo->clause));
    2171              : 
    2172          110 :             if (equal(iclause, oclause) && equal(c1, c2))
    2173              :             {
    2174           70 :                 matched = true;
    2175           70 :                 break;
    2176              :             }
    2177              :         }
    2178              : 
    2179          125 :         if (!matched)
    2180           55 :             return false;
    2181              :     }
    2182              : 
    2183          468 :     return true;
    2184              : }
    2185              : 
    2186              : /*
    2187              :  * Find and remove unique self-joins in a group of base relations that have
    2188              :  * the same Oid.
    2189              :  *
    2190              :  * Returns a set of relids that were removed.
    2191              :  */
    2192              : static Relids
    2193         9060 : remove_self_joins_one_group(PlannerInfo *root, Relids relids)
    2194              : {
    2195         9060 :     Relids      result = NULL;
    2196              :     int         k;              /* Index of kept relation */
    2197         9060 :     int         r = -1;         /* Index of removed relation */
    2198              : 
    2199        28294 :     while ((r = bms_next_member(relids, r)) > 0)
    2200              :     {
    2201        19234 :         RelOptInfo *rrel = root->simple_rel_array[r];
    2202              : 
    2203        19234 :         k = r;
    2204              : 
    2205        30275 :         while ((k = bms_next_member(relids, k)) > 0)
    2206              :         {
    2207        11509 :             Relids      joinrelids = NULL;
    2208        11509 :             RelOptInfo *krel = root->simple_rel_array[k];
    2209              :             List       *restrictlist;
    2210              :             List       *selfjoinquals;
    2211              :             List       *otherjoinquals;
    2212              :             ListCell   *lc;
    2213        11509 :             bool        jinfo_check = true;
    2214        11509 :             PlanRowMark *kmark = NULL;
    2215        11509 :             PlanRowMark *rmark = NULL;
    2216        11509 :             List       *uclauses = NIL;
    2217              : 
    2218              :             /* A sanity check: the relations have the same Oid. */
    2219              :             Assert(root->simple_rte_array[k]->relid ==
    2220              :                    root->simple_rte_array[r]->relid);
    2221              : 
    2222              :             /*
    2223              :              * It is impossible to eliminate the join of two relations if they
    2224              :              * belong to different rules of order. Otherwise, the planner
    2225              :              * can't find any variants of the correct query plan.
    2226              :              */
    2227        14175 :             foreach(lc, root->join_info_list)
    2228              :             {
    2229         8948 :                 SpecialJoinInfo *info = (SpecialJoinInfo *) lfirst(lc);
    2230              : 
    2231        17896 :                 if ((bms_is_member(k, info->syn_lefthand) ^
    2232        12665 :                      bms_is_member(r, info->syn_lefthand)) ||
    2233         3717 :                     (bms_is_member(k, info->syn_righthand) ^
    2234         3717 :                      bms_is_member(r, info->syn_righthand)))
    2235              :                 {
    2236         6282 :                     jinfo_check = false;
    2237         6282 :                     break;
    2238              :                 }
    2239              :             }
    2240        11509 :             if (!jinfo_check)
    2241        11041 :                 continue;
    2242              : 
    2243              :             /*
    2244              :              * Check Row Marks equivalence. We can't remove the join if the
    2245              :              * relations have row marks of different strength (e.g., one is
    2246              :              * locked FOR UPDATE, and another just has ROW_MARK_REFERENCE for
    2247              :              * EvalPlanQual rechecking).
    2248              :              */
    2249         5363 :             foreach(lc, root->rowMarks)
    2250              :             {
    2251          242 :                 PlanRowMark *rowMark = (PlanRowMark *) lfirst(lc);
    2252              : 
    2253          242 :                 if (rowMark->rti == r)
    2254              :                 {
    2255              :                     Assert(rmark == NULL);
    2256          106 :                     rmark = rowMark;
    2257              :                 }
    2258          136 :                 else if (rowMark->rti == k)
    2259              :                 {
    2260              :                     Assert(kmark == NULL);
    2261          106 :                     kmark = rowMark;
    2262              :                 }
    2263              : 
    2264          242 :                 if (kmark && rmark)
    2265          106 :                     break;
    2266              :             }
    2267         5227 :             if (kmark && rmark && kmark->markType != rmark->markType)
    2268           28 :                 continue;
    2269              : 
    2270              :             /*
    2271              :              * We only deal with base rels here, so their relids bitset
    2272              :              * contains only one member -- their relid.
    2273              :              */
    2274         5199 :             joinrelids = bms_add_member(joinrelids, r);
    2275         5199 :             joinrelids = bms_add_member(joinrelids, k);
    2276              : 
    2277              :             /*
    2278              :              * PHVs should not impose any constraints on removing self-joins.
    2279              :              */
    2280              : 
    2281              :             /*
    2282              :              * At this stage, joininfo lists of inner and outer can contain
    2283              :              * only clauses required for a superior outer join that can't
    2284              :              * influence this optimization. So, we can avoid to call the
    2285              :              * build_joinrel_restrictlist() routine.
    2286              :              */
    2287         5199 :             restrictlist = generate_join_implied_equalities(root, joinrelids,
    2288              :                                                             rrel->relids,
    2289              :                                                             krel, NULL);
    2290         5199 :             if (restrictlist == NIL)
    2291         3408 :                 continue;
    2292              : 
    2293              :             /*
    2294              :              * Process restrictlist to separate the self-join quals from the
    2295              :              * other quals. e.g., "x = x" goes to selfjoinquals and "a = b" to
    2296              :              * otherjoinquals.
    2297              :              */
    2298         1791 :             split_selfjoin_quals(root, restrictlist, &selfjoinquals,
    2299         1791 :                                  &otherjoinquals, rrel->relid, krel->relid);
    2300              : 
    2301              :             Assert(list_length(restrictlist) ==
    2302              :                    (list_length(selfjoinquals) + list_length(otherjoinquals)));
    2303              : 
    2304              :             /*
    2305              :              * To enable SJE for the only degenerate case without any self
    2306              :              * join clauses at all, add baserestrictinfo to this list. The
    2307              :              * degenerate case works only if both sides have the same clause.
    2308              :              * So doesn't matter which side to add.
    2309              :              */
    2310         1791 :             selfjoinquals = list_concat(selfjoinquals, krel->baserestrictinfo);
    2311              : 
    2312              :             /*
    2313              :              * Determine if the rrel can duplicate outer rows. We must bypass
    2314              :              * the unique rel cache here since we're possibly using a subset
    2315              :              * of join quals. We can use 'force_cache' == true when all join
    2316              :              * quals are self-join quals.  Otherwise, we could end up putting
    2317              :              * false negatives in the cache.
    2318              :              */
    2319         1791 :             if (!innerrel_is_unique_ext(root, joinrelids, rrel->relids,
    2320              :                                         krel, JOIN_INNER, selfjoinquals,
    2321         1791 :                                         list_length(otherjoinquals) == 0,
    2322              :                                         &uclauses))
    2323         1268 :                 continue;
    2324              : 
    2325              :             /*
    2326              :              * 'uclauses' is the copy of outer->baserestrictinfo that are
    2327              :              * associated with an index.  We proved by matching selfjoinquals
    2328              :              * to a unique index that the outer relation has at most one
    2329              :              * matching row for each inner row.  Sometimes that is not enough.
    2330              :              * e.g. "WHERE s1.b = s2.b AND s1.a = 1 AND s2.a = 2" when the
    2331              :              * unique index is (a,b).  Having non-empty uclauses, we must
    2332              :              * validate that the inner baserestrictinfo contains the same
    2333              :              * expressions, or we won't match the same row on each side of the
    2334              :              * join.
    2335              :              */
    2336          523 :             if (!match_unique_clauses(root, rrel, uclauses, krel->relid))
    2337           55 :                 continue;
    2338              : 
    2339              :             /*
    2340              :              * Remove rrel RelOptInfo from the planner structures and the
    2341              :              * corresponding row mark.
    2342              :              */
    2343          468 :             remove_self_join_rel(root, kmark, rmark, krel, rrel, restrictlist);
    2344              : 
    2345          468 :             result = bms_add_member(result, r);
    2346              : 
    2347              :             /* We have removed the outer relation, try the next one. */
    2348          468 :             break;
    2349              :         }
    2350              :     }
    2351              : 
    2352         9060 :     return result;
    2353              : }
    2354              : 
    2355              : /*
    2356              :  * Gather indexes of base relations from the joinlist and try to eliminate self
    2357              :  * joins.
    2358              :  */
    2359              : static Relids
    2360        83667 : remove_self_joins_recurse(PlannerInfo *root, List *joinlist, Relids toRemove)
    2361              : {
    2362              :     ListCell   *jl;
    2363        83667 :     Relids      relids = NULL;
    2364        83667 :     SelfJoinCandidate *candidates = NULL;
    2365              :     int         i;
    2366              :     int         j;
    2367              :     int         numRels;
    2368              : 
    2369              :     /* Collect indexes of base relations of the join tree */
    2370       279006 :     foreach(jl, joinlist)
    2371              :     {
    2372       195339 :         Node       *jlnode = (Node *) lfirst(jl);
    2373              : 
    2374       195339 :         if (IsA(jlnode, RangeTblRef))
    2375              :         {
    2376       192621 :             int         varno = ((RangeTblRef *) jlnode)->rtindex;
    2377       192621 :             RangeTblEntry *rte = root->simple_rte_array[varno];
    2378              : 
    2379              :             /*
    2380              :              * We only consider ordinary relations as candidates to be
    2381              :              * removed, and these relations should not have TABLESAMPLE
    2382              :              * clauses specified.  Removing a relation with TABLESAMPLE clause
    2383              :              * could potentially change the syntax of the query. Because of
    2384              :              * UPDATE/DELETE EPQ mechanism, currently Query->resultRelation or
    2385              :              * Query->mergeTargetRelation associated rel cannot be eliminated.
    2386              :              */
    2387       192621 :             if (rte->rtekind == RTE_RELATION &&
    2388       171331 :                 rte->relkind == RELKIND_RELATION &&
    2389       166803 :                 rte->tablesample == NULL &&
    2390       166781 :                 varno != root->parse->resultRelation &&
    2391       165258 :                 varno != root->parse->mergeTargetRelation)
    2392              :             {
    2393              :                 Assert(!bms_is_member(varno, relids));
    2394       165258 :                 relids = bms_add_member(relids, varno);
    2395              :             }
    2396              :         }
    2397         2718 :         else if (IsA(jlnode, List))
    2398              :         {
    2399              :             /* Recursively go inside the sub-joinlist */
    2400         2718 :             toRemove = remove_self_joins_recurse(root, (List *) jlnode,
    2401              :                                                  toRemove);
    2402              :         }
    2403              :         else
    2404            0 :             elog(ERROR, "unrecognized joinlist node type: %d",
    2405              :                  (int) nodeTag(jlnode));
    2406              :     }
    2407              : 
    2408        83667 :     numRels = bms_num_members(relids);
    2409              : 
    2410              :     /* Need at least two relations for the join */
    2411        83667 :     if (numRels < 2)
    2412        22151 :         return toRemove;
    2413              : 
    2414              :     /*
    2415              :      * In order to find relations with the same oid we first build an array of
    2416              :      * candidates and then sort it by oid.
    2417              :      */
    2418        61516 :     candidates = palloc_array(SelfJoinCandidate, numRels);
    2419        61516 :     i = -1;
    2420        61516 :     j = 0;
    2421       210841 :     while ((i = bms_next_member(relids, i)) >= 0)
    2422              :     {
    2423       149325 :         candidates[j].relid = i;
    2424       149325 :         candidates[j].reloid = root->simple_rte_array[i]->relid;
    2425       149325 :         j++;
    2426              :     }
    2427              : 
    2428        61516 :     qsort(candidates, numRels, sizeof(SelfJoinCandidate),
    2429              :           self_join_candidates_cmp);
    2430              : 
    2431              :     /*
    2432              :      * Iteratively form a group of relation indexes with the same oid and
    2433              :      * launch the routine that detects self-joins in this group and removes
    2434              :      * excessive range table entries.
    2435              :      *
    2436              :      * At the end of the iteration, exclude the group from the overall relids
    2437              :      * list. So each next iteration of the cycle will involve less and less
    2438              :      * value of relids.
    2439              :      */
    2440        61516 :     i = 0;
    2441       210841 :     for (j = 1; j < numRels + 1; j++)
    2442              :     {
    2443       149325 :         if (j == numRels || candidates[j].reloid != candidates[i].reloid)
    2444              :         {
    2445       139231 :             if (j - i >= 2)
    2446              :             {
    2447              :                 /* Create a group of relation indexes with the same oid */
    2448         8990 :                 Relids      group = NULL;
    2449              :                 Relids      removed;
    2450              : 
    2451        28074 :                 while (i < j)
    2452              :                 {
    2453        19084 :                     group = bms_add_member(group, candidates[i].relid);
    2454        19084 :                     i++;
    2455              :                 }
    2456         8990 :                 relids = bms_del_members(relids, group);
    2457              : 
    2458              :                 /*
    2459              :                  * Try to remove self-joins from a group of identical entries.
    2460              :                  * Make the next attempt iteratively - if something is deleted
    2461              :                  * from a group, changes in clauses and equivalence classes
    2462              :                  * can give us a chance to find more candidates.
    2463              :                  */
    2464              :                 do
    2465              :                 {
    2466              :                     Assert(!bms_overlap(group, toRemove));
    2467         9060 :                     removed = remove_self_joins_one_group(root, group);
    2468         9060 :                     toRemove = bms_add_members(toRemove, removed);
    2469         9060 :                     group = bms_del_members(group, removed);
    2470         9508 :                 } while (!bms_is_empty(removed) &&
    2471          448 :                          bms_membership(group) == BMS_MULTIPLE);
    2472         8990 :                 bms_free(removed);
    2473         8990 :                 bms_free(group);
    2474              :             }
    2475              :             else
    2476              :             {
    2477              :                 /* Single relation, just remove it from the set */
    2478       130241 :                 relids = bms_del_member(relids, candidates[i].relid);
    2479       130241 :                 i = j;
    2480              :             }
    2481              :         }
    2482              :     }
    2483              : 
    2484              :     Assert(bms_is_empty(relids));
    2485              : 
    2486        61516 :     return toRemove;
    2487              : }
    2488              : 
    2489              : /*
    2490              :  * Compare self-join candidates by their oids.
    2491              :  */
    2492              : static int
    2493       109581 : self_join_candidates_cmp(const void *a, const void *b)
    2494              : {
    2495       109581 :     const SelfJoinCandidate *ca = (const SelfJoinCandidate *) a;
    2496       109581 :     const SelfJoinCandidate *cb = (const SelfJoinCandidate *) b;
    2497              : 
    2498       109581 :     if (ca->reloid != cb->reloid)
    2499        99442 :         return (ca->reloid < cb->reloid ? -1 : 1);
    2500              :     else
    2501        10139 :         return 0;
    2502              : }
    2503              : 
    2504              : /*
    2505              :  * Find and remove useless self joins.
    2506              :  *
    2507              :  * Search for joins where a relation is joined to itself. If the join clause
    2508              :  * for each tuple from one side of the join is proven to match the same
    2509              :  * physical row (or nothing) on the other side, that self-join can be
    2510              :  * eliminated from the query.  Suitable join clauses are assumed to be in the
    2511              :  * form of X = X, and can be replaced with NOT NULL clauses.
    2512              :  *
    2513              :  * For the sake of simplicity, we don't apply this optimization to special
    2514              :  * joins. Here is a list of what we could do in some particular cases:
    2515              :  * 'a a1 semi join a a2': is reduced to inner by reduce_unique_semijoins,
    2516              :  * and then removed normally.
    2517              :  * 'a a1 anti join a a2': could simplify to a scan with 'outer quals AND
    2518              :  * (IS NULL on join columns OR NOT inner quals)'.
    2519              :  * 'a a1 left join a a2': could simplify to a scan like inner but without
    2520              :  * NOT NULL conditions on join columns.
    2521              :  * 'a a1 left join (a a2 join b)': can't simplify this, because join to b
    2522              :  * can both remove rows and introduce duplicates.
    2523              :  *
    2524              :  * To search for removable joins, we order all the relations on their Oid,
    2525              :  * go over each set with the same Oid, and consider each pair of relations
    2526              :  * in this set.
    2527              :  *
    2528              :  * To remove the join, we mark one of the participating relations as dead
    2529              :  * and rewrite all references to it to point to the remaining relation.
    2530              :  * This includes modifying RestrictInfos, EquivalenceClasses, and
    2531              :  * EquivalenceMembers. We also have to modify the row marks. The join clauses
    2532              :  * of the removed relation become either restriction or join clauses, based on
    2533              :  * whether they reference any relations not participating in the removed join.
    2534              :  *
    2535              :  * 'joinlist' is the top-level joinlist of the query. If it has any
    2536              :  * references to the removed relations, we update them to point to the
    2537              :  * remaining ones.
    2538              :  */
    2539              : List *
    2540       253567 : remove_useless_self_joins(PlannerInfo *root, List *joinlist)
    2541              : {
    2542       253567 :     Relids      toRemove = NULL;
    2543       253567 :     int         relid = -1;
    2544              : 
    2545       507134 :     if (!enable_self_join_elimination || joinlist == NIL ||
    2546       426883 :         (list_length(joinlist) == 1 && !IsA(linitial(joinlist), List)))
    2547       172618 :         return joinlist;
    2548              : 
    2549              :     /*
    2550              :      * Merge pairs of relations participated in self-join. Remove unnecessary
    2551              :      * range table entries.
    2552              :      */
    2553        80949 :     toRemove = remove_self_joins_recurse(root, joinlist, toRemove);
    2554              : 
    2555        80949 :     if (unlikely(toRemove != NULL))
    2556              :     {
    2557              :         /* At the end, remove orphaned relation links */
    2558          911 :         while ((relid = bms_next_member(toRemove, relid)) >= 0)
    2559              :         {
    2560          468 :             int         nremoved = 0;
    2561              : 
    2562          468 :             joinlist = remove_rel_from_joinlist(joinlist, relid, &nremoved);
    2563          468 :             if (nremoved != 1)
    2564            0 :                 elog(ERROR, "failed to find relation %d in joinlist", relid);
    2565              :         }
    2566              :     }
    2567              : 
    2568        80949 :     return joinlist;
    2569              : }
        

Generated by: LCOV version 2.0-1