LCOV - PostgreSQL 19devel - src/backend/optimizer/plan/analyzejoins.c

LCOV - code coverage report

Current view:	top level - src/backend/optimizer/plan - analyzejoins.c (source / functions)		Coverage	Total	Hit
Test:	PostgreSQL 19devel	Lines:	97.5 %	728	710
Test Date:	2026-03-16 05:14:46	Functions:	100.0 %	27	27
Legend:	Lines: hit not hit

            Line data    Source code

       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * analyzejoins.c
       4              :  *    Routines for simplifying joins after initial query analysis
       5              :  *
       6              :  * While we do a great deal of join simplification in prep/prepjointree.c,
       7              :  * certain optimizations cannot be performed at that stage for lack of
       8              :  * detailed information about the query.  The routines here are invoked
       9              :  * after initsplan.c has done its work, and can do additional join removal
      10              :  * and simplification steps based on the information extracted.  The penalty
      11              :  * is that we have to work harder to clean up after ourselves when we modify
      12              :  * the query, since the derived data structures have to be updated too.
      13              :  *
      14              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      15              :  * Portions Copyright (c) 1994, Regents of the University of California
      16              :  *
      17              :  *
      18              :  * IDENTIFICATION
      19              :  *    src/backend/optimizer/plan/analyzejoins.c
      20              :  *
      21              :  *-------------------------------------------------------------------------
      22              :  */
      23              : #include "postgres.h"
      24              : 
      25              : #include "catalog/pg_class.h"
      26              : #include "nodes/nodeFuncs.h"
      27              : #include "optimizer/joininfo.h"
      28              : #include "optimizer/optimizer.h"
      29              : #include "optimizer/pathnode.h"
      30              : #include "optimizer/paths.h"
      31              : #include "optimizer/placeholder.h"
      32              : #include "optimizer/planmain.h"
      33              : #include "optimizer/restrictinfo.h"
      34              : #include "parser/parse_agg.h"
      35              : #include "rewrite/rewriteManip.h"
      36              : #include "utils/lsyscache.h"
      37              : 
      38              : /*
      39              :  * Utility structure.  A sorting procedure is needed to simplify the search
      40              :  * of SJE-candidate baserels referencing the same database relation.  Having
      41              :  * collected all baserels from the query jointree, the planner sorts them
      42              :  * according to the reloid value, groups them with the next pass and attempts
      43              :  * to remove self-joins.
      44              :  *
      45              :  * Preliminary sorting prevents quadratic behavior that can be harmful in the
      46              :  * case of numerous joins.
      47              :  */
      48              : typedef struct
      49              : {
      50              :     int         relid;
      51              :     Oid         reloid;
      52              : } SelfJoinCandidate;
      53              : 
      54              : bool        enable_self_join_elimination;
      55              : 
      56              : /* local functions */
      57              : static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
      58              : static void remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
      59              :                                           SpecialJoinInfo *sjinfo);
      60              : static void remove_rel_from_restrictinfo(RestrictInfo *rinfo,
      61              :                                          int relid, int ojrelid);
      62              : static void remove_rel_from_eclass(EquivalenceClass *ec,
      63              :                                    SpecialJoinInfo *sjinfo,
      64              :                                    int relid, int subst);
      65              : static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
      66              : static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
      67              : static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
      68              :                                 List *clause_list, List **extra_clauses);
      69              : static Oid  distinct_col_search(int colno, List *colnos, List *opids);
      70              : static bool is_innerrel_unique_for(PlannerInfo *root,
      71              :                                    Relids joinrelids,
      72              :                                    Relids outerrelids,
      73              :                                    RelOptInfo *innerrel,
      74              :                                    JoinType jointype,
      75              :                                    List *restrictlist,
      76              :                                    List **extra_clauses);
      77              : static int  self_join_candidates_cmp(const void *a, const void *b);
      78              : static bool replace_relid_callback(Node *node,
      79              :                                    ChangeVarNodes_context *context);
      80              : 
      81              : 
      82              : /*
      83              :  * remove_useless_joins
      84              :  *      Check for relations that don't actually need to be joined at all,
      85              :  *      and remove them from the query.
      86              :  *
      87              :  * We are passed the current joinlist and return the updated list.  Other
      88              :  * data structures that have to be updated are accessible via "root".
      89              :  */
      90              : List *
      91       183569 : remove_useless_joins(PlannerInfo *root, List *joinlist)
      92              : {
      93              :     ListCell   *lc;
      94              : 
      95              :     /*
      96              :      * We are only interested in relations that are left-joined to, so we can
      97              :      * scan the join_info_list to find them easily.
      98              :      */
      99       189948 : restart:
     100       215250 :     foreach(lc, root->join_info_list)
     101              :     {
     102        31681 :         SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
     103              :         int         innerrelid;
     104              :         int         nremoved;
     105              : 
     106              :         /* Skip if not removable */
     107        31681 :         if (!join_is_removable(root, sjinfo))
     108        25302 :             continue;
     109              : 
     110              :         /*
     111              :          * Currently, join_is_removable can only succeed when the sjinfo's
     112              :          * righthand is a single baserel.  Remove that rel from the query and
     113              :          * joinlist.
     114              :          */
     115         6379 :         innerrelid = bms_singleton_member(sjinfo->min_righthand);
     116              : 
     117         6379 :         remove_leftjoinrel_from_query(root, innerrelid, sjinfo);
     118              : 
     119              :         /* We verify that exactly one reference gets removed from joinlist */
     120         6379 :         nremoved = 0;
     121         6379 :         joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved);
     122         6379 :         if (nremoved != 1)
     123            0 :             elog(ERROR, "failed to find relation %d in joinlist", innerrelid);
     124              : 
     125              :         /*
     126              :          * We can delete this SpecialJoinInfo from the list too, since it's no
     127              :          * longer of interest.  (Since we'll restart the foreach loop
     128              :          * immediately, we don't bother with foreach_delete_current.)
     129              :          */
     130         6379 :         root->join_info_list = list_delete_cell(root->join_info_list, lc);
     131              : 
     132              :         /*
     133              :          * Restart the scan.  This is necessary to ensure we find all
     134              :          * removable joins independently of ordering of the join_info_list
     135              :          * (note that removal of attr_needed bits may make a join appear
     136              :          * removable that did not before).
     137              :          */
     138         6379 :         goto restart;
     139              :     }
     140              : 
     141       183569 :     return joinlist;
     142              : }
     143              : 
     144              : /*
     145              :  * join_is_removable
     146              :  *    Check whether we need not perform this special join at all, because
     147              :  *    it will just duplicate its left input.
     148              :  *
     149              :  * This is true for a left join for which the join condition cannot match
     150              :  * more than one inner-side row.  (There are other possibly interesting
     151              :  * cases, but we don't have the infrastructure to prove them.)  We also
     152              :  * have to check that the inner side doesn't generate any variables needed
     153              :  * above the join.
     154              :  */
     155              : static bool
     156        31681 : join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
     157              : {
     158              :     int         innerrelid;
     159              :     RelOptInfo *innerrel;
     160              :     Relids      inputrelids;
     161              :     Relids      joinrelids;
     162        31681 :     List       *clause_list = NIL;
     163              :     ListCell   *l;
     164              :     int         attroff;
     165              : 
     166              :     /*
     167              :      * Must be a left join to a single baserel, else we aren't going to be
     168              :      * able to do anything with it.
     169              :      */
     170        31681 :     if (sjinfo->jointype != JOIN_LEFT)
     171         6876 :         return false;
     172              : 
     173        24805 :     if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
     174          657 :         return false;
     175              : 
     176              :     /*
     177              :      * Never try to eliminate a left join to the query result rel.  Although
     178              :      * the case is syntactically impossible in standard SQL, MERGE will build
     179              :      * a join tree that looks exactly like that.
     180              :      */
     181        24148 :     if (innerrelid == root->parse->resultRelation)
     182          405 :         return false;
     183              : 
     184        23743 :     innerrel = find_base_rel(root, innerrelid);
     185              : 
     186              :     /*
     187              :      * Before we go to the effort of checking whether any innerrel variables
     188              :      * are needed above the join, make a quick check to eliminate cases in
     189              :      * which we will surely be unable to prove uniqueness of the innerrel.
     190              :      */
     191        23743 :     if (!rel_supports_distinctness(root, innerrel))
     192         1649 :         return false;
     193              : 
     194              :     /* Compute the relid set for the join we are considering */
     195        22094 :     inputrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
     196              :     Assert(sjinfo->ojrelid != 0);
     197        22094 :     joinrelids = bms_copy(inputrelids);
     198        22094 :     joinrelids = bms_add_member(joinrelids, sjinfo->ojrelid);
     199              : 
     200              :     /*
     201              :      * We can't remove the join if any inner-rel attributes are used above the
     202              :      * join.  Here, "above" the join includes pushed-down conditions, so we
     203              :      * should reject if attr_needed includes the OJ's own relid; therefore,
     204              :      * compare to inputrelids not joinrelids.
     205              :      *
     206              :      * As a micro-optimization, it seems better to start with max_attr and
     207              :      * count down rather than starting with min_attr and counting up, on the
     208              :      * theory that the system attributes are somewhat less likely to be wanted
     209              :      * and should be tested last.
     210              :      */
     211        22094 :     for (attroff = innerrel->max_attr - innerrel->min_attr;
     212       202726 :          attroff >= 0;
     213       180632 :          attroff--)
     214              :     {
     215       196202 :         if (!bms_is_subset(innerrel->attr_needed[attroff], inputrelids))
     216        15570 :             return false;
     217              :     }
     218              : 
     219              :     /*
     220              :      * Similarly check that the inner rel isn't needed by any PlaceHolderVars
     221              :      * that will be used above the join.  The PHV case is a little bit more
     222              :      * complicated, because PHVs may have been assigned a ph_eval_at location
     223              :      * that includes the innerrel, yet their contained expression might not
     224              :      * actually reference the innerrel (it could be just a constant, for
     225              :      * instance).  If such a PHV is due to be evaluated above the join then it
     226              :      * needn't prevent join removal.
     227              :      */
     228         6626 :     foreach(l, root->placeholder_list)
     229              :     {
     230          120 :         PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
     231              : 
     232          120 :         if (bms_overlap(phinfo->ph_lateral, innerrel->relids))
     233           18 :             return false;       /* it references innerrel laterally */
     234          120 :         if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids))
     235           27 :             continue;           /* it definitely doesn't reference innerrel */
     236           93 :         if (bms_is_subset(phinfo->ph_needed, inputrelids))
     237            3 :             continue;           /* PHV is not used above the join */
     238           90 :         if (!bms_is_member(sjinfo->ojrelid, phinfo->ph_eval_at))
     239           15 :             return false;       /* it has to be evaluated below the join */
     240              : 
     241              :         /*
     242              :          * We need to be sure there will still be a place to evaluate the PHV
     243              :          * if we remove the join, ie that ph_eval_at wouldn't become empty.
     244              :          */
     245           75 :         if (!bms_overlap(sjinfo->min_lefthand, phinfo->ph_eval_at))
     246            3 :             return false;       /* there isn't any other place to eval PHV */
     247              :         /* Check contained expression last, since this is a bit expensive */
     248           72 :         if (bms_overlap(pull_varnos(root, (Node *) phinfo->ph_var->phexpr),
     249           72 :                         innerrel->relids))
     250            0 :             return false;       /* contained expression references innerrel */
     251              :     }
     252              : 
     253              :     /*
     254              :      * Search for mergejoinable clauses that constrain the inner rel against
     255              :      * either the outer rel or a pseudoconstant.  If an operator is
     256              :      * mergejoinable then it behaves like equality for some btree opclass, so
     257              :      * it's what we want.  The mergejoinability test also eliminates clauses
     258              :      * containing volatile functions, which we couldn't depend on.
     259              :      */
     260        13096 :     foreach(l, innerrel->joininfo)
     261              :     {
     262         6590 :         RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
     263              : 
     264              :         /*
     265              :          * If the current join commutes with some other outer join(s) via
     266              :          * outer join identity 3, there will be multiple clones of its join
     267              :          * clauses in the joininfo list.  We want to consider only the
     268              :          * has_clone form of such clauses.  Processing more than one form
     269              :          * would be wasteful, and also some of the others would confuse the
     270              :          * RINFO_IS_PUSHED_DOWN test below.
     271              :          */
     272         6590 :         if (restrictinfo->is_clone)
     273           47 :             continue;           /* ignore it */
     274              : 
     275              :         /*
     276              :          * If it's not a join clause for this outer join, we can't use it.
     277              :          * Note that if the clause is pushed-down, then it is logically from
     278              :          * above the outer join, even if it references no other rels (it might
     279              :          * be from WHERE, for example).
     280              :          */
     281         6543 :         if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
     282           63 :             continue;           /* ignore; not useful here */
     283              : 
     284              :         /* Ignore if it's not a mergejoinable clause */
     285         6480 :         if (!restrictinfo->can_join ||
     286         6388 :             restrictinfo->mergeopfamilies == NIL)
     287           92 :             continue;           /* not mergejoinable */
     288              : 
     289              :         /*
     290              :          * Check if the clause has the form "outer op inner" or "inner op
     291              :          * outer", and if so mark which side is inner.
     292              :          */
     293         6388 :         if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
     294              :                                      innerrel->relids))
     295            3 :             continue;           /* no good for these input relations */
     296              : 
     297              :         /* OK, add to list */
     298         6385 :         clause_list = lappend(clause_list, restrictinfo);
     299              :     }
     300              : 
     301              :     /*
     302              :      * Now that we have the relevant equality join clauses, try to prove the
     303              :      * innerrel distinct.
     304              :      */
     305         6506 :     if (rel_is_distinct_for(root, innerrel, clause_list, NULL))
     306         6379 :         return true;
     307              : 
     308              :     /*
     309              :      * Some day it would be nice to check for other methods of establishing
     310              :      * distinctness.
     311              :      */
     312          127 :     return false;
     313              : }
     314              : 
     315              : 
     316              : /*
     317              :  * Remove the target rel->relid and references to the target join from the
     318              :  * planner's data structures, having determined that there is no need
     319              :  * to include them in the query. Optionally replace them with subst if subst
     320              :  * is non-negative.
     321              :  *
     322              :  * This function updates only parts needed for both left-join removal and
     323              :  * self-join removal.
     324              :  */
     325              : static void
     326         6679 : remove_rel_from_query(PlannerInfo *root, RelOptInfo *rel,
     327              :                       int subst, SpecialJoinInfo *sjinfo,
     328              :                       Relids joinrelids)
     329              : {
     330         6679 :     int         relid = rel->relid;
     331              :     Index       rti;
     332              :     ListCell   *l;
     333              : 
     334              :     /*
     335              :      * Update all_baserels and related relid sets.
     336              :      */
     337         6679 :     root->all_baserels = adjust_relid_set(root->all_baserels, relid, subst);
     338         6679 :     root->all_query_rels = adjust_relid_set(root->all_query_rels, relid, subst);
     339              : 
     340         6679 :     if (sjinfo != NULL)
     341              :     {
     342        12758 :         root->outer_join_rels = bms_del_member(root->outer_join_rels,
     343         6379 :                                                sjinfo->ojrelid);
     344         6379 :         root->all_query_rels = bms_del_member(root->all_query_rels,
     345         6379 :                                               sjinfo->ojrelid);
     346              :     }
     347              : 
     348              :     /*
     349              :      * Likewise remove references from SpecialJoinInfo data structures.
     350              :      *
     351              :      * This is relevant in case the outer join we're deleting is nested inside
     352              :      * other outer joins: the upper joins' relid sets have to be adjusted. The
     353              :      * RHS of the target outer join will be made empty here, but that's OK
     354              :      * since caller will delete that SpecialJoinInfo entirely.
     355              :      */
     356        15511 :     foreach(l, root->join_info_list)
     357              :     {
     358         8832 :         SpecialJoinInfo *sjinf = (SpecialJoinInfo *) lfirst(l);
     359              : 
     360              :         /*
     361              :          * initsplan.c is fairly cavalier about allowing SpecialJoinInfos'
     362              :          * lefthand/righthand relid sets to be shared with other data
     363              :          * structures.  Ensure that we don't modify the original relid sets.
     364              :          * (The commute_xxx sets are always per-SpecialJoinInfo though.)
     365              :          */
     366         8832 :         sjinf->min_lefthand = bms_copy(sjinf->min_lefthand);
     367         8832 :         sjinf->min_righthand = bms_copy(sjinf->min_righthand);
     368         8832 :         sjinf->syn_lefthand = bms_copy(sjinf->syn_lefthand);
     369         8832 :         sjinf->syn_righthand = bms_copy(sjinf->syn_righthand);
     370              :         /* Now remove relid from the sets: */
     371         8832 :         sjinf->min_lefthand = adjust_relid_set(sjinf->min_lefthand, relid, subst);
     372         8832 :         sjinf->min_righthand = adjust_relid_set(sjinf->min_righthand, relid, subst);
     373         8832 :         sjinf->syn_lefthand = adjust_relid_set(sjinf->syn_lefthand, relid, subst);
     374         8832 :         sjinf->syn_righthand = adjust_relid_set(sjinf->syn_righthand, relid, subst);
     375              : 
     376         8832 :         if (sjinfo != NULL)
     377              :         {
     378              :             Assert(subst <= 0);
     379              : 
     380              :             /* Remove sjinfo->ojrelid bits from the sets: */
     381        17562 :             sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand,
     382         8781 :                                                  sjinfo->ojrelid);
     383        17562 :             sjinf->min_righthand = bms_del_member(sjinf->min_righthand,
     384         8781 :                                                   sjinfo->ojrelid);
     385        17562 :             sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand,
     386         8781 :                                                  sjinfo->ojrelid);
     387        17562 :             sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand,
     388         8781 :                                                   sjinfo->ojrelid);
     389              :             /* relid cannot appear in these fields, but ojrelid can: */
     390        17562 :             sjinf->commute_above_l = bms_del_member(sjinf->commute_above_l,
     391         8781 :                                                     sjinfo->ojrelid);
     392        17562 :             sjinf->commute_above_r = bms_del_member(sjinf->commute_above_r,
     393         8781 :                                                     sjinfo->ojrelid);
     394        17562 :             sjinf->commute_below_l = bms_del_member(sjinf->commute_below_l,
     395         8781 :                                                     sjinfo->ojrelid);
     396         8781 :             sjinf->commute_below_r = bms_del_member(sjinf->commute_below_r,
     397         8781 :                                                     sjinfo->ojrelid);
     398              :         }
     399              :         else
     400              :         {
     401              :             Assert(subst > 0);
     402              : 
     403           51 :             ChangeVarNodesExtended((Node *) sjinf->semi_rhs_exprs, relid, subst,
     404              :                                    0, replace_relid_callback);
     405              :         }
     406              :     }
     407              : 
     408              :     /*
     409              :      * Likewise remove references from PlaceHolderVar data structures,
     410              :      * removing any no-longer-needed placeholders entirely.  We remove PHV
     411              :      * only for left-join removal.  With self-join elimination, PHVs already
     412              :      * get moved to the remaining relation, where they might still be needed.
     413              :      * It might also happen that we skip the removal of some PHVs that could
     414              :      * be removed.  However, the overhead of extra PHVs is small compared to
     415              :      * the complexity of analysis needed to remove them.
     416              :      *
     417              :      * Removal is a bit trickier than it might seem: we can remove PHVs that
     418              :      * are used at the target rel and/or in the join qual, but not those that
     419              :      * are used at join partner rels or above the join.  It's not that easy to
     420              :      * distinguish PHVs used at partner rels from those used in the join qual,
     421              :      * since they will both have ph_needed sets that are subsets of
     422              :      * joinrelids.  However, a PHV used at a partner rel could not have the
     423              :      * target rel in ph_eval_at, so we check that while deciding whether to
     424              :      * remove or just update the PHV.  There is no corresponding test in
     425              :      * join_is_removable because it doesn't need to distinguish those cases.
     426              :      */
     427         6793 :     foreach(l, root->placeholder_list)
     428              :     {
     429          114 :         PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
     430              : 
     431              :         Assert(sjinfo == NULL || !bms_is_member(relid, phinfo->ph_lateral));
     432          207 :         if (sjinfo != NULL &&
     433          108 :             bms_is_subset(phinfo->ph_needed, joinrelids) &&
     434           15 :             bms_is_member(relid, phinfo->ph_eval_at) &&
     435            6 :             !bms_is_member(sjinfo->ojrelid, phinfo->ph_eval_at))
     436              :         {
     437              :             /*
     438              :              * This code shouldn't be executed if one relation is substituted
     439              :              * with another: in this case, the placeholder may be employed in
     440              :              * a filter inside the scan node the SJE removes.
     441              :              */
     442            3 :             root->placeholder_list = foreach_delete_current(root->placeholder_list,
     443              :                                                             l);
     444            3 :             root->placeholder_array[phinfo->phid] = NULL;
     445              :         }
     446              :         else
     447              :         {
     448          111 :             PlaceHolderVar *phv = phinfo->ph_var;
     449              : 
     450          111 :             phinfo->ph_eval_at = adjust_relid_set(phinfo->ph_eval_at, relid, subst);
     451          111 :             if (sjinfo != NULL)
     452           90 :                 phinfo->ph_eval_at = adjust_relid_set(phinfo->ph_eval_at,
     453           90 :                                                       sjinfo->ojrelid, subst);
     454              :             Assert(!bms_is_empty(phinfo->ph_eval_at));   /* checked previously */
     455              :             /* Reduce ph_needed to contain only "relation 0"; see below */
     456          111 :             if (bms_is_member(0, phinfo->ph_needed))
     457           54 :                 phinfo->ph_needed = bms_make_singleton(0);
     458              :             else
     459           57 :                 phinfo->ph_needed = NULL;
     460              : 
     461          111 :             phinfo->ph_lateral = adjust_relid_set(phinfo->ph_lateral, relid, subst);
     462              : 
     463              :             /*
     464              :              * ph_lateral might contain rels mentioned in ph_eval_at after the
     465              :              * replacement, remove them.
     466              :              */
     467          111 :             phinfo->ph_lateral = bms_difference(phinfo->ph_lateral, phinfo->ph_eval_at);
     468              :             /* ph_lateral might or might not be empty */
     469              : 
     470          111 :             phv->phrels = adjust_relid_set(phv->phrels, relid, subst);
     471          111 :             if (sjinfo != NULL)
     472           90 :                 phv->phrels = adjust_relid_set(phv->phrels,
     473           90 :                                                sjinfo->ojrelid, subst);
     474              :             Assert(!bms_is_empty(phv->phrels));
     475              : 
     476          111 :             ChangeVarNodesExtended((Node *) phv->phexpr, relid, subst, 0,
     477              :                                    replace_relid_callback);
     478              : 
     479              :             Assert(phv->phnullingrels == NULL); /* no need to adjust */
     480              :         }
     481              :     }
     482              : 
     483              :     /*
     484              :      * Likewise remove references from EquivalenceClasses.
     485              :      */
     486        36555 :     foreach(l, root->eq_classes)
     487              :     {
     488        29876 :         EquivalenceClass *ec = (EquivalenceClass *) lfirst(l);
     489              : 
     490        29876 :         if (bms_is_member(relid, ec->ec_relids) ||
     491        20561 :             (sjinfo == NULL || bms_is_member(sjinfo->ojrelid, ec->ec_relids)))
     492         9315 :             remove_rel_from_eclass(ec, sjinfo, relid, subst);
     493              :     }
     494              : 
     495              :     /*
     496              :      * Finally, we must recompute per-Var attr_needed and per-PlaceHolderVar
     497              :      * ph_needed relid sets.  These have to be known accurately, else we may
     498              :      * fail to remove other now-removable outer joins.  And our removal of the
     499              :      * join clause(s) for this outer join may mean that Vars that were
     500              :      * formerly needed no longer are.  So we have to do this honestly by
     501              :      * repeating the construction of those relid sets.  We can cheat to one
     502              :      * small extent: we can avoid re-examining the targetlist and HAVING qual
     503              :      * by preserving "relation 0" bits from the existing relid sets.  This is
     504              :      * safe because we'd never remove such references.
     505              :      *
     506              :      * So, start by removing all other bits from attr_needed sets and
     507              :      * lateral_vars lists.  (We already did this above for ph_needed.)
     508              :      */
     509        44613 :     for (rti = 1; rti < root->simple_rel_array_size; rti++)
     510              :     {
     511        37934 :         RelOptInfo *otherrel = root->simple_rel_array[rti];
     512              :         int         attroff;
     513              : 
     514              :         /* there may be empty slots corresponding to non-baserel RTEs */
     515        37934 :         if (otherrel == NULL)
     516        18652 :             continue;
     517              : 
     518              :         Assert(otherrel->relid == rti); /* sanity check on array */
     519              : 
     520        19282 :         for (attroff = otherrel->max_attr - otherrel->min_attr;
     521       416617 :              attroff >= 0;
     522       397335 :              attroff--)
     523              :         {
     524       397335 :             if (bms_is_member(0, otherrel->attr_needed[attroff]))
     525        25309 :                 otherrel->attr_needed[attroff] = bms_make_singleton(0);
     526              :             else
     527       372026 :                 otherrel->attr_needed[attroff] = NULL;
     528              :         }
     529              : 
     530        19282 :         if (subst > 0)
     531          748 :             ChangeVarNodesExtended((Node *) otherrel->lateral_vars, relid,
     532              :                                    subst, 0, replace_relid_callback);
     533              :     }
     534         6679 : }
     535              : 
     536              : /*
     537              :  * Remove the target relid and references to the target join from the
     538              :  * planner's data structures, having determined that there is no need
     539              :  * to include them in the query.
     540              :  *
     541              :  * We are not terribly thorough here.  We only bother to update parts of
     542              :  * the planner's data structures that will actually be consulted later.
     543              :  */
     544              : static void
     545         6379 : remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
     546              :                               SpecialJoinInfo *sjinfo)
     547              : {
     548         6379 :     RelOptInfo *rel = find_base_rel(root, relid);
     549         6379 :     int         ojrelid = sjinfo->ojrelid;
     550              :     Relids      joinrelids;
     551              :     Relids      join_plus_commute;
     552              :     List       *joininfos;
     553              :     ListCell   *l;
     554              : 
     555              :     /* Compute the relid set for the join we are considering */
     556         6379 :     joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
     557              :     Assert(ojrelid != 0);
     558         6379 :     joinrelids = bms_add_member(joinrelids, ojrelid);
     559              : 
     560         6379 :     remove_rel_from_query(root, rel, -1, sjinfo, joinrelids);
     561              : 
     562              :     /*
     563              :      * Remove any joinquals referencing the rel from the joininfo lists.
     564              :      *
     565              :      * In some cases, a joinqual has to be put back after deleting its
     566              :      * reference to the target rel.  This can occur for pseudoconstant and
     567              :      * outerjoin-delayed quals, which can get marked as requiring the rel in
     568              :      * order to force them to be evaluated at or above the join.  We can't
     569              :      * just discard them, though.  Only quals that logically belonged to the
     570              :      * outer join being discarded should be removed from the query.
     571              :      *
     572              :      * We might encounter a qual that is a clone of a deletable qual with some
     573              :      * outer-join relids added (see deconstruct_distribute_oj_quals).  To
     574              :      * ensure we get rid of such clones as well, add the relids of all OJs
     575              :      * commutable with this one to the set we test against for
     576              :      * pushed-down-ness.
     577              :      */
     578         6379 :     join_plus_commute = bms_union(joinrelids,
     579         6379 :                                   sjinfo->commute_above_r);
     580         6379 :     join_plus_commute = bms_add_members(join_plus_commute,
     581         6379 :                                         sjinfo->commute_below_l);
     582              : 
     583              :     /*
     584              :      * We must make a copy of the rel's old joininfo list before starting the
     585              :      * loop, because otherwise remove_join_clause_from_rels would destroy the
     586              :      * list while we're scanning it.
     587              :      */
     588         6379 :     joininfos = list_copy(rel->joininfo);
     589        12854 :     foreach(l, joininfos)
     590              :     {
     591         6475 :         RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
     592              : 
     593         6475 :         remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
     594              : 
     595         6475 :         if (RINFO_IS_PUSHED_DOWN(rinfo, join_plus_commute))
     596              :         {
     597              :             /*
     598              :              * There might be references to relid or ojrelid in the
     599              :              * RestrictInfo's relid sets, as a consequence of PHVs having had
     600              :              * ph_eval_at sets that include those.  We already checked above
     601              :              * that any such PHV is safe (and updated its ph_eval_at), so we
     602              :              * can just drop those references.
     603              :              */
     604           63 :             remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
     605              : 
     606              :             /*
     607              :              * Cross-check that the clause itself does not reference the
     608              :              * target rel or join.
     609              :              */
     610              : #ifdef USE_ASSERT_CHECKING
     611              :             {
     612              :                 Relids      clause_varnos = pull_varnos(root,
     613              :                                                         (Node *) rinfo->clause);
     614              : 
     615              :                 Assert(!bms_is_member(relid, clause_varnos));
     616              :                 Assert(!bms_is_member(ojrelid, clause_varnos));
     617              :             }
     618              : #endif
     619              :             /* Now throw it back into the joininfo lists */
     620           63 :             distribute_restrictinfo_to_rels(root, rinfo);
     621              :         }
     622              :     }
     623              : 
     624              :     /*
     625              :      * There may be references to the rel in root->fkey_list, but if so,
     626              :      * match_foreign_keys_to_quals() will get rid of them.
     627              :      */
     628              : 
     629              :     /*
     630              :      * Now remove the rel from the baserel array to prevent it from being
     631              :      * referenced again.  (We can't do this earlier because
     632              :      * remove_join_clause_from_rels will touch it.)
     633              :      */
     634         6379 :     root->simple_rel_array[relid] = NULL;
     635         6379 :     root->simple_rte_array[relid] = NULL;
     636              : 
     637              :     /* And nuke the RelOptInfo, just in case there's another access path */
     638         6379 :     pfree(rel);
     639              : 
     640              :     /*
     641              :      * Now repeat construction of attr_needed bits coming from all other
     642              :      * sources.
     643              :      */
     644         6379 :     rebuild_placeholder_attr_needed(root);
     645         6379 :     rebuild_joinclause_attr_needed(root);
     646         6379 :     rebuild_eclass_attr_needed(root);
     647         6379 :     rebuild_lateral_attr_needed(root);
     648         6379 : }
     649              : 
     650              : /*
     651              :  * Remove any references to relid or ojrelid from the RestrictInfo.
     652              :  *
     653              :  * We only bother to clean out bits in clause_relids and required_relids,
     654              :  * not nullingrel bits in contained Vars and PHVs.  (This might have to be
     655              :  * improved sometime.)  However, if the RestrictInfo contains an OR clause
     656              :  * we have to also clean up the sub-clauses.
     657              :  */
     658              : static void
     659         2547 : remove_rel_from_restrictinfo(RestrictInfo *rinfo, int relid, int ojrelid)
     660              : {
     661              :     /*
     662              :      * initsplan.c is fairly cavalier about allowing RestrictInfos to share
     663              :      * relid sets with other RestrictInfos, and SpecialJoinInfos too.  Make
     664              :      * sure this RestrictInfo has its own relid sets before we modify them.
     665              :      * (In present usage, clause_relids is probably not shared, but
     666              :      * required_relids could be; let's not assume anything.)
     667              :      */
     668         2547 :     rinfo->clause_relids = bms_copy(rinfo->clause_relids);
     669         2547 :     rinfo->clause_relids = bms_del_member(rinfo->clause_relids, relid);
     670         2547 :     rinfo->clause_relids = bms_del_member(rinfo->clause_relids, ojrelid);
     671              :     /* Likewise for required_relids */
     672         2547 :     rinfo->required_relids = bms_copy(rinfo->required_relids);
     673         2547 :     rinfo->required_relids = bms_del_member(rinfo->required_relids, relid);
     674         2547 :     rinfo->required_relids = bms_del_member(rinfo->required_relids, ojrelid);
     675              : 
     676              :     /* If it's an OR, recurse to clean up sub-clauses */
     677         2547 :     if (restriction_is_or_clause(rinfo))
     678              :     {
     679              :         ListCell   *lc;
     680              : 
     681              :         Assert(is_orclause(rinfo->orclause));
     682            9 :         foreach(lc, ((BoolExpr *) rinfo->orclause)->args)
     683              :         {
     684            6 :             Node       *orarg = (Node *) lfirst(lc);
     685              : 
     686              :             /* OR arguments should be ANDs or sub-RestrictInfos */
     687            6 :             if (is_andclause(orarg))
     688              :             {
     689            0 :                 List       *andargs = ((BoolExpr *) orarg)->args;
     690              :                 ListCell   *lc2;
     691              : 
     692            0 :                 foreach(lc2, andargs)
     693              :                 {
     694            0 :                     RestrictInfo *rinfo2 = lfirst_node(RestrictInfo, lc2);
     695              : 
     696            0 :                     remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
     697              :                 }
     698              :             }
     699              :             else
     700              :             {
     701            6 :                 RestrictInfo *rinfo2 = castNode(RestrictInfo, orarg);
     702              : 
     703            6 :                 remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
     704              :             }
     705              :         }
     706              :     }
     707         2547 : }
     708              : 
     709              : /*
     710              :  * Remove any references to relid or sjinfo->ojrelid (if sjinfo != NULL)
     711              :  * from the EquivalenceClass.
     712              :  *
     713              :  * Like remove_rel_from_restrictinfo, we don't worry about cleaning out
     714              :  * any nullingrel bits in contained Vars and PHVs.  (This might have to be
     715              :  * improved sometime.)  We do need to fix the EC and EM relid sets to ensure
     716              :  * that implied join equalities will be generated at the appropriate join
     717              :  * level(s).
     718              :  */
     719              : static void
     720         9315 : remove_rel_from_eclass(EquivalenceClass *ec, SpecialJoinInfo *sjinfo,
     721              :                        int relid, int subst)
     722              : {
     723              :     ListCell   *lc;
     724              : 
     725              :     /* Fix up the EC's overall relids */
     726         9315 :     ec->ec_relids = adjust_relid_set(ec->ec_relids, relid, subst);
     727         9315 :     if (sjinfo != NULL)
     728         8781 :         ec->ec_relids = adjust_relid_set(ec->ec_relids,
     729         8781 :                                          sjinfo->ojrelid, subst);
     730              : 
     731              :     /*
     732              :      * We don't expect any EC child members to exist at this point.  Ensure
     733              :      * that's the case, otherwise, we might be getting asked to do something
     734              :      * this function hasn't been coded for.
     735              :      */
     736              :     Assert(ec->ec_childmembers == NULL);
     737              : 
     738              :     /*
     739              :      * Fix up the member expressions.  Any non-const member that ends with
     740              :      * empty em_relids must be a Var or PHV of the removed relation.  We don't
     741              :      * need it anymore, so we can drop it.
     742              :      */
     743        21499 :     foreach(lc, ec->ec_members)
     744              :     {
     745        12184 :         EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
     746              : 
     747        12184 :         if (bms_is_member(relid, cur_em->em_relids) ||
     748         2478 :             (sjinfo != NULL && bms_is_member(sjinfo->ojrelid,
     749         2478 :                                              cur_em->em_relids)))
     750              :         {
     751              :             Assert(!cur_em->em_is_const);
     752         8781 :             cur_em->em_relids = adjust_relid_set(cur_em->em_relids, relid, subst);
     753         8781 :             if (sjinfo != NULL)
     754         8781 :                 cur_em->em_relids = adjust_relid_set(cur_em->em_relids,
     755         8781 :                                                      sjinfo->ojrelid, subst);
     756         8781 :             if (bms_is_empty(cur_em->em_relids))
     757         8775 :                 ec->ec_members = foreach_delete_current(ec->ec_members, lc);
     758              :         }
     759              :     }
     760              : 
     761              :     /* Fix up the source clauses, in case we can re-use them later */
     762        12321 :     foreach(lc, ec->ec_sources)
     763              :     {
     764         3006 :         RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
     765              : 
     766         3006 :         if (sjinfo == NULL)
     767          528 :             ChangeVarNodesExtended((Node *) rinfo, relid, subst, 0,
     768              :                                    replace_relid_callback);
     769              :         else
     770         2478 :             remove_rel_from_restrictinfo(rinfo, relid, sjinfo->ojrelid);
     771              :     }
     772              : 
     773              :     /*
     774              :      * Rather than expend code on fixing up any already-derived clauses, just
     775              :      * drop them.  (At this point, any such clauses would be base restriction
     776              :      * clauses, which we'd not need anymore anyway.)
     777              :      */
     778         9315 :     ec_clear_derived_clauses(ec);
     779         9315 : }
     780              : 
     781              : /*
     782              :  * Remove any occurrences of the target relid from a joinlist structure.
     783              :  *
     784              :  * It's easiest to build a whole new list structure, so we handle it that
     785              :  * way.  Efficiency is not a big deal here.
     786              :  *
     787              :  * *nremoved is incremented by the number of occurrences removed (there
     788              :  * should be exactly one, but the caller checks that).
     789              :  */
     790              : static List *
     791         6811 : remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
     792              : {
     793         6811 :     List       *result = NIL;
     794              :     ListCell   *jl;
     795              : 
     796        26225 :     foreach(jl, joinlist)
     797              :     {
     798        19414 :         Node       *jlnode = (Node *) lfirst(jl);
     799              : 
     800        19414 :         if (IsA(jlnode, RangeTblRef))
     801              :         {
     802        19282 :             int         varno = ((RangeTblRef *) jlnode)->rtindex;
     803              : 
     804        19282 :             if (varno == relid)
     805         6679 :                 (*nremoved)++;
     806              :             else
     807        12603 :                 result = lappend(result, jlnode);
     808              :         }
     809          132 :         else if (IsA(jlnode, List))
     810              :         {
     811              :             /* Recurse to handle subproblem */
     812              :             List       *sublist;
     813              : 
     814          132 :             sublist = remove_rel_from_joinlist((List *) jlnode,
     815              :                                                relid, nremoved);
     816              :             /* Avoid including empty sub-lists in the result */
     817          132 :             if (sublist)
     818          132 :                 result = lappend(result, sublist);
     819              :         }
     820              :         else
     821              :         {
     822            0 :             elog(ERROR, "unrecognized joinlist node type: %d",
     823              :                  (int) nodeTag(jlnode));
     824              :         }
     825              :     }
     826              : 
     827         6811 :     return result;
     828              : }
     829              : 
     830              : 
     831              : /*
     832              :  * reduce_unique_semijoins
     833              :  *      Check for semijoins that can be simplified to plain inner joins
     834              :  *      because the inner relation is provably unique for the join clauses.
     835              :  *
     836              :  * Ideally this would happen during reduce_outer_joins, but we don't have
     837              :  * enough information at that point.
     838              :  *
     839              :  * To perform the strength reduction when applicable, we need only delete
     840              :  * the semijoin's SpecialJoinInfo from root->join_info_list.  (We don't
     841              :  * bother fixing the join type attributed to it in the query jointree,
     842              :  * since that won't be consulted again.)
     843              :  */
     844              : void
     845       183569 : reduce_unique_semijoins(PlannerInfo *root)
     846              : {
     847              :     ListCell   *lc;
     848              : 
     849              :     /*
     850              :      * Scan the join_info_list to find semijoins.
     851              :      */
     852       208616 :     foreach(lc, root->join_info_list)
     853              :     {
     854        25047 :         SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
     855              :         int         innerrelid;
     856              :         RelOptInfo *innerrel;
     857              :         Relids      joinrelids;
     858              :         List       *restrictlist;
     859              : 
     860              :         /*
     861              :          * Must be a semijoin to a single baserel, else we aren't going to be
     862              :          * able to do anything with it.
     863              :          */
     864        25047 :         if (sjinfo->jointype != JOIN_SEMI)
     865        24854 :             continue;
     866              : 
     867         2597 :         if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
     868           91 :             continue;
     869              : 
     870         2506 :         innerrel = find_base_rel(root, innerrelid);
     871              : 
     872              :         /*
     873              :          * Before we trouble to run generate_join_implied_equalities, make a
     874              :          * quick check to eliminate cases in which we will surely be unable to
     875              :          * prove uniqueness of the innerrel.
     876              :          */
     877         2506 :         if (!rel_supports_distinctness(root, innerrel))
     878          541 :             continue;
     879              : 
     880              :         /* Compute the relid set for the join we are considering */
     881         1965 :         joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
     882              :         Assert(sjinfo->ojrelid == 0);    /* SEMI joins don't have RT indexes */
     883              : 
     884              :         /*
     885              :          * Since we're only considering a single-rel RHS, any join clauses it
     886              :          * has must be clauses linking it to the semijoin's min_lefthand.  We
     887              :          * can also consider EC-derived join clauses.
     888              :          */
     889              :         restrictlist =
     890         1965 :             list_concat(generate_join_implied_equalities(root,
     891              :                                                          joinrelids,
     892              :                                                          sjinfo->min_lefthand,
     893              :                                                          innerrel,
     894              :                                                          NULL),
     895         1965 :                         innerrel->joininfo);
     896              : 
     897              :         /* Test whether the innerrel is unique for those clauses. */
     898         1965 :         if (!innerrel_is_unique(root,
     899              :                                 joinrelids, sjinfo->min_lefthand, innerrel,
     900              :                                 JOIN_SEMI, restrictlist, true))
     901         1772 :             continue;
     902              : 
     903              :         /* OK, remove the SpecialJoinInfo from the list. */
     904          193 :         root->join_info_list = foreach_delete_current(root->join_info_list, lc);
     905              :     }
     906       183569 : }
     907              : 
     908              : 
     909              : /*
     910              :  * rel_supports_distinctness
     911              :  *      Could the relation possibly be proven distinct on some set of columns?
     912              :  *
     913              :  * This is effectively a pre-checking function for rel_is_distinct_for().
     914              :  * It must return true if rel_is_distinct_for() could possibly return true
     915              :  * with this rel, but it should not expend a lot of cycles.  The idea is
     916              :  * that callers can avoid doing possibly-expensive processing to compute
     917              :  * rel_is_distinct_for()'s argument lists if the call could not possibly
     918              :  * succeed.
     919              :  */
     920              : static bool
     921       413966 : rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
     922              : {
     923              :     /* We only know about baserels ... */
     924       413966 :     if (rel->reloptkind != RELOPT_BASEREL)
     925       151569 :         return false;
     926       262397 :     if (rel->rtekind == RTE_RELATION)
     927              :     {
     928              :         /*
     929              :          * For a plain relation, we only know how to prove uniqueness by
     930              :          * reference to unique indexes.  Make sure there's at least one
     931              :          * suitable unique index.  It must be immediately enforced, and not a
     932              :          * partial index. (Keep these conditions in sync with
     933              :          * relation_has_unique_index_for!)
     934              :          */
     935              :         ListCell   *lc;
     936              : 
     937       327664 :         foreach(lc, rel->indexlist)
     938              :         {
     939       291300 :             IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc);
     940              : 
     941       291300 :             if (ind->unique && ind->immediate && ind->indpred == NIL)
     942       203872 :                 return true;
     943              :         }
     944              :     }
     945        22161 :     else if (rel->rtekind == RTE_SUBQUERY)
     946              :     {
     947         6450 :         Query      *subquery = root->simple_rte_array[rel->relid]->subquery;
     948              : 
     949              :         /* Check if the subquery has any qualities that support distinctness */
     950         6450 :         if (query_supports_distinctness(subquery))
     951         5524 :             return true;
     952              :     }
     953              :     /* We have no proof rules for any other rtekinds. */
     954        53001 :     return false;
     955              : }
     956              : 
     957              : /*
     958              :  * rel_is_distinct_for
     959              :  *      Does the relation return only distinct rows according to clause_list?
     960              :  *
     961              :  * clause_list is a list of join restriction clauses involving this rel and
     962              :  * some other one.  Return true if no two rows emitted by this rel could
     963              :  * possibly join to the same row of the other rel.
     964              :  *
     965              :  * The caller must have already determined that each condition is a
     966              :  * mergejoinable equality with an expression in this relation on one side, and
     967              :  * an expression not involving this relation on the other.  The transient
     968              :  * outer_is_left flag is used to identify which side references this relation:
     969              :  * left side if outer_is_left is false, right side if it is true.
     970              :  *
     971              :  * Note that the passed-in clause_list may be destructively modified!  This
     972              :  * is OK for current uses, because the clause_list is built by the caller for
     973              :  * the sole purpose of passing to this function.
     974              :  *
     975              :  * (*extra_clauses) to be set to the right sides of baserestrictinfo clauses,
     976              :  * looking like "x = const" if distinctness is derived from such clauses, not
     977              :  * joininfo clauses.  Pass NULL to the extra_clauses if this value is not
     978              :  * needed.
     979              :  */
     980              : static bool
     981       135416 : rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list,
     982              :                     List **extra_clauses)
     983              : {
     984              :     /*
     985              :      * We could skip a couple of tests here if we assume all callers checked
     986              :      * rel_supports_distinctness first, but it doesn't seem worth taking any
     987              :      * risk for.
     988              :      */
     989       135416 :     if (rel->reloptkind != RELOPT_BASEREL)
     990            0 :         return false;
     991       135416 :     if (rel->rtekind == RTE_RELATION)
     992              :     {
     993              :         /*
     994              :          * Examine the indexes to see if we have a matching unique index.
     995              :          * relation_has_unique_index_for automatically adds any usable
     996              :          * restriction clauses for the rel, so we needn't do that here.
     997              :          */
     998       132600 :         if (relation_has_unique_index_for(root, rel, clause_list, extra_clauses))
     999        75753 :             return true;
    1000              :     }
    1001         2816 :     else if (rel->rtekind == RTE_SUBQUERY)
    1002              :     {
    1003         2816 :         Index       relid = rel->relid;
    1004         2816 :         Query      *subquery = root->simple_rte_array[relid]->subquery;
    1005         2816 :         List       *colnos = NIL;
    1006         2816 :         List       *opids = NIL;
    1007              :         ListCell   *l;
    1008              : 
    1009              :         /*
    1010              :          * Build the argument lists for query_is_distinct_for: a list of
    1011              :          * output column numbers that the query needs to be distinct over, and
    1012              :          * a list of equality operators that the output columns need to be
    1013              :          * distinct according to.
    1014              :          *
    1015              :          * (XXX we are not considering restriction clauses attached to the
    1016              :          * subquery; is that worth doing?)
    1017              :          */
    1018         5354 :         foreach(l, clause_list)
    1019              :         {
    1020         2538 :             RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
    1021              :             Oid         op;
    1022              :             Var        *var;
    1023              : 
    1024              :             /*
    1025              :              * Get the equality operator we need uniqueness according to.
    1026              :              * (This might be a cross-type operator and thus not exactly the
    1027              :              * same operator the subquery would consider; that's all right
    1028              :              * since query_is_distinct_for can resolve such cases.)  The
    1029              :              * caller's mergejoinability test should have selected only
    1030              :              * OpExprs.
    1031              :              */
    1032         2538 :             op = castNode(OpExpr, rinfo->clause)->opno;
    1033              : 
    1034              :             /* caller identified the inner side for us */
    1035         2538 :             if (rinfo->outer_is_left)
    1036         2363 :                 var = (Var *) get_rightop(rinfo->clause);
    1037              :             else
    1038          175 :                 var = (Var *) get_leftop(rinfo->clause);
    1039              : 
    1040              :             /*
    1041              :              * We may ignore any RelabelType node above the operand.  (There
    1042              :              * won't be more than one, since eval_const_expressions() has been
    1043              :              * applied already.)
    1044              :              */
    1045         2538 :             if (var && IsA(var, RelabelType))
    1046         1630 :                 var = (Var *) ((RelabelType *) var)->arg;
    1047              : 
    1048              :             /*
    1049              :              * If inner side isn't a Var referencing a subquery output column,
    1050              :              * this clause doesn't help us.
    1051              :              */
    1052         2538 :             if (!var || !IsA(var, Var) ||
    1053         2532 :                 var->varno != relid || var->varlevelsup != 0)
    1054            6 :                 continue;
    1055              : 
    1056         2532 :             colnos = lappend_int(colnos, var->varattno);
    1057         2532 :             opids = lappend_oid(opids, op);
    1058              :         }
    1059              : 
    1060         2816 :         if (query_is_distinct_for(subquery, colnos, opids))
    1061          425 :             return true;
    1062              :     }
    1063        59238 :     return false;
    1064              : }
    1065              : 
    1066              : 
    1067              : /*
    1068              :  * query_supports_distinctness - could the query possibly be proven distinct
    1069              :  *      on some set of output columns?
    1070              :  *
    1071              :  * This is effectively a pre-checking function for query_is_distinct_for().
    1072              :  * It must return true if query_is_distinct_for() could possibly return true
    1073              :  * with this query, but it should not expend a lot of cycles.  The idea is
    1074              :  * that callers can avoid doing possibly-expensive processing to compute
    1075              :  * query_is_distinct_for()'s argument lists if the call could not possibly
    1076              :  * succeed.
    1077              :  */
    1078              : bool
    1079         6450 : query_supports_distinctness(Query *query)
    1080              : {
    1081              :     /* SRFs break distinctness except with DISTINCT, see below */
    1082         6450 :     if (query->hasTargetSRFs && query->distinctClause == NIL)
    1083          528 :         return false;
    1084              : 
    1085              :     /* check for features we can prove distinctness with */
    1086         5922 :     if (query->distinctClause != NIL ||
    1087         5847 :         query->groupClause != NIL ||
    1088         5745 :         query->groupingSets != NIL ||
    1089         5721 :         query->hasAggs ||
    1090         4526 :         query->havingQual ||
    1091         4526 :         query->setOperations)
    1092         5524 :         return true;
    1093              : 
    1094          398 :     return false;
    1095              : }
    1096              : 
    1097              : /*
    1098              :  * query_is_distinct_for - does query never return duplicates of the
    1099              :  *      specified columns?
    1100              :  *
    1101              :  * query is a not-yet-planned subquery (in current usage, it's always from
    1102              :  * a subquery RTE, which the planner avoids scribbling on).
    1103              :  *
    1104              :  * colnos is an integer list of output column numbers (resno's).  We are
    1105              :  * interested in whether rows consisting of just these columns are certain
    1106              :  * to be distinct.  "Distinctness" is defined according to whether the
    1107              :  * corresponding upper-level equality operators listed in opids would think
    1108              :  * the values are distinct.  (Note: the opids entries could be cross-type
    1109              :  * operators, and thus not exactly the equality operators that the subquery
    1110              :  * would use itself.  We use equality_ops_are_compatible() to check
    1111              :  * compatibility.  That looks at opfamily membership for index AMs that have
    1112              :  * declared that they support consistent equality semantics within an
    1113              :  * opfamily, and so should give trustworthy answers for all operators that we
    1114              :  * might need to deal with here.)
    1115              :  */
    1116              : bool
    1117         2816 : query_is_distinct_for(Query *query, List *colnos, List *opids)
    1118              : {
    1119              :     ListCell   *l;
    1120              :     Oid         opid;
    1121              : 
    1122              :     Assert(list_length(colnos) == list_length(opids));
    1123              : 
    1124              :     /*
    1125              :      * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
    1126              :      * columns in the DISTINCT clause appear in colnos and operator semantics
    1127              :      * match.  This is true even if there are SRFs in the DISTINCT columns or
    1128              :      * elsewhere in the tlist.
    1129              :      */
    1130         2816 :     if (query->distinctClause)
    1131              :     {
    1132           84 :         foreach(l, query->distinctClause)
    1133              :         {
    1134           63 :             SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
    1135           63 :             TargetEntry *tle = get_sortgroupclause_tle(sgc,
    1136              :                                                        query->targetList);
    1137              : 
    1138           63 :             opid = distinct_col_search(tle->resno, colnos, opids);
    1139           63 :             if (!OidIsValid(opid) ||
    1140           30 :                 !equality_ops_are_compatible(opid, sgc->eqop))
    1141              :                 break;          /* exit early if no match */
    1142              :         }
    1143           54 :         if (l == NULL)          /* had matches for all? */
    1144           21 :             return true;
    1145              :     }
    1146              : 
    1147              :     /*
    1148              :      * Otherwise, a set-returning function in the query's targetlist can
    1149              :      * result in returning duplicate rows, despite any grouping that might
    1150              :      * occur before tlist evaluation.  (If all tlist SRFs are within GROUP BY
    1151              :      * columns, it would be safe because they'd be expanded before grouping.
    1152              :      * But it doesn't currently seem worth the effort to check for that.)
    1153              :      */
    1154         2795 :     if (query->hasTargetSRFs)
    1155            0 :         return false;
    1156              : 
    1157              :     /*
    1158              :      * Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all
    1159              :      * the grouped columns appear in colnos and operator semantics match.
    1160              :      */
    1161         2795 :     if (query->groupClause && !query->groupingSets)
    1162              :     {
    1163          120 :         foreach(l, query->groupClause)
    1164              :         {
    1165           82 :             SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
    1166           82 :             TargetEntry *tle = get_sortgroupclause_tle(sgc,
    1167              :                                                        query->targetList);
    1168              : 
    1169           82 :             opid = distinct_col_search(tle->resno, colnos, opids);
    1170           82 :             if (!OidIsValid(opid) ||
    1171           59 :                 !equality_ops_are_compatible(opid, sgc->eqop))
    1172              :                 break;          /* exit early if no match */
    1173              :         }
    1174           61 :         if (l == NULL)          /* had matches for all? */
    1175           38 :             return true;
    1176              :     }
    1177         2734 :     else if (query->groupingSets)
    1178              :     {
    1179              :         List       *gsets;
    1180              : 
    1181              :         /*
    1182              :          * If we have grouping sets with expressions, we probably don't have
    1183              :          * uniqueness and analysis would be hard. Punt.
    1184              :          */
    1185           30 :         if (query->groupClause)
    1186            6 :             return false;
    1187              : 
    1188              :         /*
    1189              :          * If we have no groupClause (therefore no grouping expressions), we
    1190              :          * might have one or many empty grouping sets.  If there's just one,
    1191              :          * or if the DISTINCT clause is used on the GROUP BY, then we're
    1192              :          * returning only one row and are certainly unique.  But otherwise, we
    1193              :          * know we're certainly not unique.
    1194              :          */
    1195           24 :         if (query->groupDistinct)
    1196            3 :             return true;
    1197              : 
    1198           21 :         gsets = expand_grouping_sets(query->groupingSets, false, -1);
    1199              : 
    1200           21 :         return (list_length(gsets) == 1);
    1201              :     }
    1202              :     else
    1203              :     {
    1204              :         /*
    1205              :          * If we have no GROUP BY, but do have aggregates or HAVING, then the
    1206              :          * result is at most one row so it's surely unique, for any operators.
    1207              :          */
    1208         2704 :         if (query->hasAggs || query->havingQual)
    1209          310 :             return true;
    1210              :     }
    1211              : 
    1212              :     /*
    1213              :      * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
    1214              :      * except with ALL.
    1215              :      */
    1216         2417 :     if (query->setOperations)
    1217              :     {
    1218         2361 :         SetOperationStmt *topop = castNode(SetOperationStmt, query->setOperations);
    1219              : 
    1220              :         Assert(topop->op != SETOP_NONE);
    1221              : 
    1222         2361 :         if (!topop->all)
    1223              :         {
    1224              :             ListCell   *lg;
    1225              : 
    1226              :             /* We're good if all the nonjunk output columns are in colnos */
    1227           71 :             lg = list_head(topop->groupClauses);
    1228          118 :             foreach(l, query->targetList)
    1229              :             {
    1230           74 :                 TargetEntry *tle = (TargetEntry *) lfirst(l);
    1231              :                 SortGroupClause *sgc;
    1232              : 
    1233           74 :                 if (tle->resjunk)
    1234            0 :                     continue;   /* ignore resjunk columns */
    1235              : 
    1236              :                 /* non-resjunk columns should have grouping clauses */
    1237              :                 Assert(lg != NULL);
    1238           74 :                 sgc = (SortGroupClause *) lfirst(lg);
    1239           74 :                 lg = lnext(topop->groupClauses, lg);
    1240              : 
    1241           74 :                 opid = distinct_col_search(tle->resno, colnos, opids);
    1242           74 :                 if (!OidIsValid(opid) ||
    1243           47 :                     !equality_ops_are_compatible(opid, sgc->eqop))
    1244              :                     break;      /* exit early if no match */
    1245              :             }
    1246           71 :             if (l == NULL)      /* had matches for all? */
    1247           44 :                 return true;
    1248              :         }
    1249              :     }
    1250              : 
    1251              :     /*
    1252              :      * XXX Are there any other cases in which we can easily see the result
    1253              :      * must be distinct?
    1254              :      *
    1255              :      * If you do add more smarts to this function, be sure to update
    1256              :      * query_supports_distinctness() to match.
    1257              :      */
    1258              : 
    1259         2373 :     return false;
    1260              : }
    1261              : 
    1262              : /*
    1263              :  * distinct_col_search - subroutine for query_is_distinct_for
    1264              :  *
    1265              :  * If colno is in colnos, return the corresponding element of opids,
    1266              :  * else return InvalidOid.  (Ordinarily colnos would not contain duplicates,
    1267              :  * but if it does, we arbitrarily select the first match.)
    1268              :  */
    1269              : static Oid
    1270          219 : distinct_col_search(int colno, List *colnos, List *opids)
    1271              : {
    1272              :     ListCell   *lc1,
    1273              :                *lc2;
    1274              : 
    1275          316 :     forboth(lc1, colnos, lc2, opids)
    1276              :     {
    1277          233 :         if (colno == lfirst_int(lc1))
    1278          136 :             return lfirst_oid(lc2);
    1279              :     }
    1280           83 :     return InvalidOid;
    1281              : }
    1282              : 
    1283              : 
    1284              : /*
    1285              :  * innerrel_is_unique
    1286              :  *    Check if the innerrel provably contains at most one tuple matching any
    1287              :  *    tuple from the outerrel, based on join clauses in the 'restrictlist'.
    1288              :  *
    1289              :  * We need an actual RelOptInfo for the innerrel, but it's sufficient to
    1290              :  * identify the outerrel by its Relids.  This asymmetry supports use of this
    1291              :  * function before joinrels have been built.  (The caller is expected to
    1292              :  * also supply the joinrelids, just to save recalculating that.)
    1293              :  *
    1294              :  * The proof must be made based only on clauses that will be "joinquals"
    1295              :  * rather than "otherquals" at execution.  For an inner join there's no
    1296              :  * difference; but if the join is outer, we must ignore pushed-down quals,
    1297              :  * as those will become "otherquals".  Note that this means the answer might
    1298              :  * vary depending on whether IS_OUTER_JOIN(jointype); since we cache the
    1299              :  * answer without regard to that, callers must take care not to call this
    1300              :  * with jointypes that would be classified differently by IS_OUTER_JOIN().
    1301              :  *
    1302              :  * The actual proof is undertaken by is_innerrel_unique_for(); this function
    1303              :  * is a frontend that is mainly concerned with caching the answers.
    1304              :  * In particular, the force_cache argument allows overriding the internal
    1305              :  * heuristic about whether to cache negative answers; it should be "true"
    1306              :  * if making an inquiry that is not part of the normal bottom-up join search
    1307              :  * sequence.
    1308              :  */
    1309              : bool
    1310       445175 : innerrel_is_unique(PlannerInfo *root,
    1311              :                    Relids joinrelids,
    1312              :                    Relids outerrelids,
    1313              :                    RelOptInfo *innerrel,
    1314              :                    JoinType jointype,
    1315              :                    List *restrictlist,
    1316              :                    bool force_cache)
    1317              : {
    1318       445175 :     return innerrel_is_unique_ext(root, joinrelids, outerrelids, innerrel,
    1319              :                                   jointype, restrictlist, force_cache, NULL);
    1320              : }
    1321              : 
    1322              : /*
    1323              :  * innerrel_is_unique_ext
    1324              :  *    Do the same as innerrel_is_unique(), but also set to (*extra_clauses)
    1325              :  *    additional clauses from a baserestrictinfo list used to prove the
    1326              :  *    uniqueness.
    1327              :  *
    1328              :  * A non-NULL extra_clauses indicates that we're checking for self-join and
    1329              :  * correspondingly dealing with filtered clauses.
    1330              :  */
    1331              : bool
    1332       446217 : innerrel_is_unique_ext(PlannerInfo *root,
    1333              :                        Relids joinrelids,
    1334              :                        Relids outerrelids,
    1335              :                        RelOptInfo *innerrel,
    1336              :                        JoinType jointype,
    1337              :                        List *restrictlist,
    1338              :                        bool force_cache,
    1339              :                        List **extra_clauses)
    1340              : {
    1341              :     MemoryContext old_context;
    1342              :     ListCell   *lc;
    1343              :     UniqueRelInfo *uniqueRelInfo;
    1344       446217 :     List       *outer_exprs = NIL;
    1345       446217 :     bool        self_join = (extra_clauses != NULL);
    1346              : 
    1347              :     /* Certainly can't prove uniqueness when there are no joinclauses */
    1348       446217 :     if (restrictlist == NIL)
    1349        58500 :         return false;
    1350              : 
    1351              :     /*
    1352              :      * Make a quick check to eliminate cases in which we will surely be unable
    1353              :      * to prove uniqueness of the innerrel.
    1354              :      */
    1355       387717 :     if (!rel_supports_distinctness(root, innerrel))
    1356       202380 :         return false;
    1357              : 
    1358              :     /*
    1359              :      * Query the cache to see if we've managed to prove that innerrel is
    1360              :      * unique for any subset of this outerrel.  For non-self-join search, we
    1361              :      * don't need an exact match, as extra outerrels can't make the innerrel
    1362              :      * any less unique (or more formally, the restrictlist for a join to a
    1363              :      * superset outerrel must be a superset of the conditions we successfully
    1364              :      * used before). For self-join search, we require an exact match of
    1365              :      * outerrels because we need extra clauses to be valid for our case. Also,
    1366              :      * for self-join checking we've filtered the clauses list.  Thus, we can
    1367              :      * match only the result cached for a self-join search for another
    1368              :      * self-join check.
    1369              :      */
    1370       204187 :     foreach(lc, innerrel->unique_for_rels)
    1371              :     {
    1372        75097 :         uniqueRelInfo = (UniqueRelInfo *) lfirst(lc);
    1373              : 
    1374        75097 :         if ((!self_join && bms_is_subset(uniqueRelInfo->outerrelids, outerrelids)) ||
    1375           37 :             (self_join && bms_equal(uniqueRelInfo->outerrelids, outerrelids) &&
    1376           28 :              uniqueRelInfo->self_join))
    1377              :         {
    1378        56247 :             if (extra_clauses)
    1379            6 :                 *extra_clauses = uniqueRelInfo->extra_clauses;
    1380        56247 :             return true;        /* Success! */
    1381              :         }
    1382              :     }
    1383              : 
    1384              :     /*
    1385              :      * Conversely, we may have already determined that this outerrel, or some
    1386              :      * superset thereof, cannot prove this innerrel to be unique.
    1387              :      */
    1388       129368 :     foreach(lc, innerrel->non_unique_for_rels)
    1389              :     {
    1390          458 :         Relids      unique_for_rels = (Relids) lfirst(lc);
    1391              : 
    1392          458 :         if (bms_is_subset(outerrelids, unique_for_rels))
    1393          180 :             return false;
    1394              :     }
    1395              : 
    1396              :     /* No cached information, so try to make the proof. */
    1397       128910 :     if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel,
    1398              :                                jointype, restrictlist,
    1399              :                                self_join ? &outer_exprs : NULL))
    1400              :     {
    1401              :         /*
    1402              :          * Cache the positive result for future probes, being sure to keep it
    1403              :          * in the planner_cxt even if we are working in GEQO.
    1404              :          *
    1405              :          * Note: one might consider trying to isolate the minimal subset of
    1406              :          * the outerrels that proved the innerrel unique.  But it's not worth
    1407              :          * the trouble, because the planner builds up joinrels incrementally
    1408              :          * and so we'll see the minimally sufficient outerrels before any
    1409              :          * supersets of them anyway.
    1410              :          */
    1411        69799 :         old_context = MemoryContextSwitchTo(root->planner_cxt);
    1412        69799 :         uniqueRelInfo = makeNode(UniqueRelInfo);
    1413        69799 :         uniqueRelInfo->outerrelids = bms_copy(outerrelids);
    1414        69799 :         uniqueRelInfo->self_join = self_join;
    1415        69799 :         uniqueRelInfo->extra_clauses = outer_exprs;
    1416        69799 :         innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
    1417              :                                             uniqueRelInfo);
    1418        69799 :         MemoryContextSwitchTo(old_context);
    1419              : 
    1420        69799 :         if (extra_clauses)
    1421          327 :             *extra_clauses = outer_exprs;
    1422        69799 :         return true;            /* Success! */
    1423              :     }
    1424              :     else
    1425              :     {
    1426              :         /*
    1427              :          * None of the join conditions for outerrel proved innerrel unique, so
    1428              :          * we can safely reject this outerrel or any subset of it in future
    1429              :          * checks.
    1430              :          *
    1431              :          * However, in normal planning mode, caching this knowledge is totally
    1432              :          * pointless; it won't be queried again, because we build up joinrels
    1433              :          * from smaller to larger.  It's only useful when using GEQO or
    1434              :          * another planner extension that attempts planning multiple times.
    1435              :          *
    1436              :          * Also, allow callers to override that heuristic and force caching;
    1437              :          * that's useful for reduce_unique_semijoins, which calls here before
    1438              :          * the normal join search starts.
    1439              :          */
    1440        59111 :         if (force_cache || root->assumeReplanning)
    1441              :         {
    1442         1952 :             old_context = MemoryContextSwitchTo(root->planner_cxt);
    1443         1952 :             innerrel->non_unique_for_rels =
    1444         1952 :                 lappend(innerrel->non_unique_for_rels,
    1445         1952 :                         bms_copy(outerrelids));
    1446         1952 :             MemoryContextSwitchTo(old_context);
    1447              :         }
    1448              : 
    1449        59111 :         return false;
    1450              :     }
    1451              : }
    1452              : 
    1453              : /*
    1454              :  * is_innerrel_unique_for
    1455              :  *    Check if the innerrel provably contains at most one tuple matching any
    1456              :  *    tuple from the outerrel, based on join clauses in the 'restrictlist'.
    1457              :  */
    1458              : static bool
    1459       128910 : is_innerrel_unique_for(PlannerInfo *root,
    1460              :                        Relids joinrelids,
    1461              :                        Relids outerrelids,
    1462              :                        RelOptInfo *innerrel,
    1463              :                        JoinType jointype,
    1464              :                        List *restrictlist,
    1465              :                        List **extra_clauses)
    1466              : {
    1467       128910 :     List       *clause_list = NIL;
    1468              :     ListCell   *lc;
    1469              : 
    1470              :     /*
    1471              :      * Search for mergejoinable clauses that constrain the inner rel against
    1472              :      * the outer rel.  If an operator is mergejoinable then it behaves like
    1473              :      * equality for some btree opclass, so it's what we want.  The
    1474              :      * mergejoinability test also eliminates clauses containing volatile
    1475              :      * functions, which we couldn't depend on.
    1476              :      */
    1477       292866 :     foreach(lc, restrictlist)
    1478              :     {
    1479       163956 :         RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
    1480              : 
    1481              :         /*
    1482              :          * As noted above, if it's a pushed-down clause and we're at an outer
    1483              :          * join, we can't use it.
    1484              :          */
    1485       163956 :         if (IS_OUTER_JOIN(jointype) &&
    1486        69418 :             RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
    1487         5835 :             continue;
    1488              : 
    1489              :         /* Ignore if it's not a mergejoinable clause */
    1490       158121 :         if (!restrictinfo->can_join ||
    1491       148715 :             restrictinfo->mergeopfamilies == NIL)
    1492         9901 :             continue;           /* not mergejoinable */
    1493              : 
    1494              :         /*
    1495              :          * Check if the clause has the form "outer op inner" or "inner op
    1496              :          * outer", and if so mark which side is inner.
    1497              :          */
    1498       148220 :         if (!clause_sides_match_join(restrictinfo, outerrelids,
    1499              :                                      innerrel->relids))
    1500           20 :             continue;           /* no good for these input relations */
    1501              : 
    1502              :         /* OK, add to the list */
    1503       148200 :         clause_list = lappend(clause_list, restrictinfo);
    1504              :     }
    1505              : 
    1506              :     /* Let rel_is_distinct_for() do the hard work */
    1507       128910 :     return rel_is_distinct_for(root, innerrel, clause_list, extra_clauses);
    1508              : }
    1509              : 
    1510              : /*
    1511              :  * Update EC members to point to the remaining relation instead of the removed
    1512              :  * one, removing duplicates.
    1513              :  *
    1514              :  * Restriction clauses for base relations are already distributed to
    1515              :  * the respective baserestrictinfo lists (see
    1516              :  * generate_implied_equalities_for_column). The above code has already processed
    1517              :  * this list and updated these clauses to reference the remaining
    1518              :  * relation, so that we can skip them here based on their relids.
    1519              :  *
    1520              :  * Likewise, we have already processed the join clauses that join the
    1521              :  * removed relation to the remaining one.
    1522              :  *
    1523              :  * Finally, there might be join clauses tying the removed relation to
    1524              :  * some third relation.  We can't just delete the source clauses and
    1525              :  * regenerate them from the EC because the corresponding equality
    1526              :  * operators might be missing (see the handling of ec_broken).
    1527              :  * Therefore, we will update the references in the source clauses.
    1528              :  *
    1529              :  * Derived clauses can be generated again, so it is simpler just to
    1530              :  * delete them.
    1531              :  */
    1532              : static void
    1533          462 : update_eclasses(EquivalenceClass *ec, int from, int to)
    1534              : {
    1535          462 :     List       *new_members = NIL;
    1536          462 :     List       *new_sources = NIL;
    1537              : 
    1538              :     /*
    1539              :      * We don't expect any EC child members to exist at this point.  Ensure
    1540              :      * that's the case, otherwise, we might be getting asked to do something
    1541              :      * this function hasn't been coded for.
    1542              :      */
    1543              :     Assert(ec->ec_childmembers == NULL);
    1544              : 
    1545         1863 :     foreach_node(EquivalenceMember, em, ec->ec_members)
    1546              :     {
    1547          939 :         bool        is_redundant = false;
    1548              : 
    1549          939 :         if (!bms_is_member(from, em->em_relids))
    1550              :         {
    1551          468 :             new_members = lappend(new_members, em);
    1552          468 :             continue;
    1553              :         }
    1554              : 
    1555          471 :         em->em_relids = adjust_relid_set(em->em_relids, from, to);
    1556          471 :         em->em_jdomain->jd_relids = adjust_relid_set(em->em_jdomain->jd_relids, from, to);
    1557              : 
    1558              :         /* We only process inner joins */
    1559          471 :         ChangeVarNodesExtended((Node *) em->em_expr, from, to, 0,
    1560              :                                replace_relid_callback);
    1561              : 
    1562          954 :         foreach_node(EquivalenceMember, other, new_members)
    1563              :         {
    1564          152 :             if (!equal(em->em_relids, other->em_relids))
    1565           12 :                 continue;
    1566              : 
    1567          140 :             if (equal(em->em_expr, other->em_expr))
    1568              :             {
    1569          140 :                 is_redundant = true;
    1570          140 :                 break;
    1571              :             }
    1572              :         }
    1573              : 
    1574          471 :         if (!is_redundant)
    1575          331 :             new_members = lappend(new_members, em);
    1576              :     }
    1577              : 
    1578          462 :     list_free(ec->ec_members);
    1579          462 :     ec->ec_members = new_members;
    1580              : 
    1581          462 :     ec_clear_derived_clauses(ec);
    1582              : 
    1583              :     /* Update EC source expressions */
    1584         1401 :     foreach_node(RestrictInfo, rinfo, ec->ec_sources)
    1585              :     {
    1586          477 :         bool        is_redundant = false;
    1587              : 
    1588          477 :         if (!bms_is_member(from, rinfo->required_relids))
    1589              :         {
    1590           59 :             new_sources = lappend(new_sources, rinfo);
    1591           59 :             continue;
    1592              :         }
    1593              : 
    1594          418 :         ChangeVarNodesExtended((Node *) rinfo, from, to, 0,
    1595              :                                replace_relid_callback);
    1596              : 
    1597              :         /*
    1598              :          * After switching the clause to the remaining relation, check it for
    1599              :          * redundancy with existing ones. We don't have to check for
    1600              :          * redundancy with derived clauses, because we've just deleted them.
    1601              :          */
    1602          848 :         foreach_node(RestrictInfo, other, new_sources)
    1603              :         {
    1604           18 :             if (!equal(rinfo->clause_relids, other->clause_relids))
    1605           12 :                 continue;
    1606              : 
    1607            6 :             if (equal(rinfo->clause, other->clause))
    1608              :             {
    1609            6 :                 is_redundant = true;
    1610            6 :                 break;
    1611              :             }
    1612              :         }
    1613              : 
    1614          418 :         if (!is_redundant)
    1615          412 :             new_sources = lappend(new_sources, rinfo);
    1616              :     }
    1617              : 
    1618          462 :     list_free(ec->ec_sources);
    1619          462 :     ec->ec_sources = new_sources;
    1620          462 :     ec->ec_relids = adjust_relid_set(ec->ec_relids, from, to);
    1621          462 : }
    1622              : 
    1623              : /*
    1624              :  * "Logically" compares two RestrictInfo's ignoring the 'rinfo_serial' field,
    1625              :  * which makes almost every RestrictInfo unique.  This type of comparison is
    1626              :  * useful when removing duplicates while moving RestrictInfo's from removed
    1627              :  * relation to remaining relation during self-join elimination.
    1628              :  *
    1629              :  * XXX: In the future, we might remove the 'rinfo_serial' field completely and
    1630              :  * get rid of this function.
    1631              :  */
    1632              : static bool
    1633          256 : restrict_infos_logically_equal(RestrictInfo *a, RestrictInfo *b)
    1634              : {
    1635          256 :     int         saved_rinfo_serial = a->rinfo_serial;
    1636              :     bool        result;
    1637              : 
    1638          256 :     a->rinfo_serial = b->rinfo_serial;
    1639          256 :     result = equal(a, b);
    1640          256 :     a->rinfo_serial = saved_rinfo_serial;
    1641              : 
    1642          256 :     return result;
    1643              : }
    1644              : 
    1645              : /*
    1646              :  * This function adds all non-redundant clauses to the keeping relation
    1647              :  * during self-join elimination.  That is a contradictory operation. On the
    1648              :  * one hand, we reduce the length of the `restrict` lists, which can
    1649              :  * impact planning or executing time.  Additionally, we improve the
    1650              :  * accuracy of cardinality estimation.  On the other hand, it is one more
    1651              :  * place that can make planning time much longer in specific cases.  It
    1652              :  * would have been better to avoid calling the equal() function here, but
    1653              :  * it's the only way to detect duplicated inequality expressions.
    1654              :  *
    1655              :  * (*keep_rinfo_list) is given by pointer because it might be altered by
    1656              :  * distribute_restrictinfo_to_rels().
    1657              :  */
    1658              : static void
    1659          600 : add_non_redundant_clauses(PlannerInfo *root,
    1660              :                           List *rinfo_candidates,
    1661              :                           List **keep_rinfo_list,
    1662              :                           Index removed_relid)
    1663              : {
    1664         1664 :     foreach_node(RestrictInfo, rinfo, rinfo_candidates)
    1665              :     {
    1666          464 :         bool        is_redundant = false;
    1667              : 
    1668              :         Assert(!bms_is_member(removed_relid, rinfo->required_relids));
    1669              : 
    1670         1116 :         foreach_node(RestrictInfo, src, (*keep_rinfo_list))
    1671              :         {
    1672          265 :             if (!bms_equal(src->clause_relids, rinfo->clause_relids))
    1673              :                 /* Can't compare trivially different clauses */
    1674            6 :                 continue;
    1675              : 
    1676          259 :             if (src == rinfo ||
    1677          259 :                 (rinfo->parent_ec != NULL &&
    1678          408 :                  src->parent_ec == rinfo->parent_ec) ||
    1679          256 :                 restrict_infos_logically_equal(rinfo, src))
    1680              :             {
    1681           77 :                 is_redundant = true;
    1682           77 :                 break;
    1683              :             }
    1684              :         }
    1685          464 :         if (!is_redundant)
    1686          387 :             distribute_restrictinfo_to_rels(root, rinfo);
    1687              :     }
    1688          600 : }
    1689              : 
    1690              : /*
    1691              :  * A custom callback for ChangeVarNodesExtended() providing Self-join
    1692              :  * elimination (SJE) related functionality
    1693              :  *
    1694              :  * SJE needs to skip the RangeTblRef node type.  During SJE's last
    1695              :  * step, remove_rel_from_joinlist() removes remaining RangeTblRefs
    1696              :  * with target relid.  If ChangeVarNodes() replaces the target relid
    1697              :  * before, remove_rel_from_joinlist() would fail to identify the nodes
    1698              :  * to delete.
    1699              :  *
    1700              :  * SJE also needs to change the relids within RestrictInfo's.
    1701              :  */
    1702              : static bool
    1703        20551 : replace_relid_callback(Node *node, ChangeVarNodes_context *context)
    1704              : {
    1705        20551 :     if (IsA(node, RangeTblRef))
    1706              :     {
    1707          772 :         return true;
    1708              :     }
    1709        19779 :     else if (IsA(node, RestrictInfo))
    1710              :     {
    1711         1422 :         RestrictInfo *rinfo = (RestrictInfo *) node;
    1712         1422 :         int         relid = -1;
    1713         1422 :         bool        is_req_equal =
    1714         1422 :             (rinfo->required_relids == rinfo->clause_relids);
    1715         1422 :         bool        clause_relids_is_multiple =
    1716         1422 :             (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
    1717              : 
    1718              :         /*
    1719              :          * Recurse down into clauses if the target relation is present in
    1720              :          * clause_relids or required_relids.  We must check required_relids
    1721              :          * because the relation not present in clause_relids might still be
    1722              :          * present somewhere in orclause.
    1723              :          */
    1724         1980 :         if (bms_is_member(context->rt_index, rinfo->clause_relids) ||
    1725          558 :             bms_is_member(context->rt_index, rinfo->required_relids))
    1726              :         {
    1727              :             Relids      new_clause_relids;
    1728              : 
    1729          888 :             ChangeVarNodesWalkExpression((Node *) rinfo->clause, context);
    1730          888 :             ChangeVarNodesWalkExpression((Node *) rinfo->orclause, context);
    1731              : 
    1732          888 :             new_clause_relids = adjust_relid_set(rinfo->clause_relids,
    1733              :                                                  context->rt_index,
    1734              :                                                  context->new_index);
    1735              : 
    1736              :             /*
    1737              :              * Incrementally adjust num_base_rels based on the change of
    1738              :              * clause_relids, which could contain both base relids and
    1739              :              * outer-join relids.  This operation is legal until we remove
    1740              :              * only baserels.
    1741              :              */
    1742          888 :             rinfo->num_base_rels -= bms_num_members(rinfo->clause_relids) -
    1743          888 :                 bms_num_members(new_clause_relids);
    1744              : 
    1745          888 :             rinfo->clause_relids = new_clause_relids;
    1746          888 :             rinfo->left_relids =
    1747          888 :                 adjust_relid_set(rinfo->left_relids, context->rt_index, context->new_index);
    1748          888 :             rinfo->right_relids =
    1749          888 :                 adjust_relid_set(rinfo->right_relids, context->rt_index, context->new_index);
    1750              :         }
    1751              : 
    1752         1422 :         if (is_req_equal)
    1753           12 :             rinfo->required_relids = rinfo->clause_relids;
    1754              :         else
    1755         1410 :             rinfo->required_relids =
    1756         1410 :                 adjust_relid_set(rinfo->required_relids, context->rt_index, context->new_index);
    1757              : 
    1758         1422 :         rinfo->outer_relids =
    1759         1422 :             adjust_relid_set(rinfo->outer_relids, context->rt_index, context->new_index);
    1760         1422 :         rinfo->incompatible_relids =
    1761         1422 :             adjust_relid_set(rinfo->incompatible_relids, context->rt_index, context->new_index);
    1762              : 
    1763         2418 :         if (rinfo->mergeopfamilies &&
    1764         1852 :             bms_get_singleton_member(rinfo->clause_relids, &relid) &&
    1765          636 :             clause_relids_is_multiple &&
    1766          636 :             relid == context->new_index && IsA(rinfo->clause, OpExpr))
    1767              :         {
    1768              :             Expr       *leftOp;
    1769              :             Expr       *rightOp;
    1770              : 
    1771          636 :             leftOp = (Expr *) get_leftop(rinfo->clause);
    1772          636 :             rightOp = (Expr *) get_rightop(rinfo->clause);
    1773              : 
    1774              :             /*
    1775              :              * For self-join elimination, changing varnos could transform
    1776              :              * "t1.a = t2.a" into "t1.a = t1.a".  That is always true as long
    1777              :              * as "t1.a" is not null.  We use equal() to check for such a
    1778              :              * case, and then we replace the qual with a check for not null
    1779              :              * (NullTest).
    1780              :              */
    1781          636 :             if (leftOp != NULL && equal(leftOp, rightOp))
    1782              :             {
    1783          630 :                 NullTest   *ntest = makeNode(NullTest);
    1784              : 
    1785          630 :                 ntest->arg = leftOp;
    1786          630 :                 ntest->nulltesttype = IS_NOT_NULL;
    1787          630 :                 ntest->argisrow = false;
    1788          630 :                 ntest->location = -1;
    1789          630 :                 rinfo->clause = (Expr *) ntest;
    1790          630 :                 rinfo->mergeopfamilies = NIL;
    1791          630 :                 rinfo->left_em = NULL;
    1792          630 :                 rinfo->right_em = NULL;
    1793              :             }
    1794              :             Assert(rinfo->orclause == NULL);
    1795              :         }
    1796         1422 :         return true;
    1797              :     }
    1798              : 
    1799        18357 :     return false;
    1800              : }
    1801              : 
    1802              : /*
    1803              :  * Remove a relation after we have proven that it participates only in an
    1804              :  * unneeded unique self-join.
    1805              :  *
    1806              :  * Replace any links in planner info structures.
    1807              :  *
    1808              :  * Transfer join and restriction clauses from the removed relation to the
    1809              :  * remaining one. We change the Vars of the clause to point to the
    1810              :  * remaining relation instead of the removed one. The clauses that require
    1811              :  * a subset of joinrelids become restriction clauses of the remaining
    1812              :  * relation, and others remain join clauses. We append them to
    1813              :  * baserestrictinfo and joininfo, respectively, trying not to introduce
    1814              :  * duplicates.
    1815              :  *
    1816              :  * We also have to process the 'joinclauses' list here, because it
    1817              :  * contains EC-derived join clauses which must become filter clauses. It
    1818              :  * is not enough to just correct the ECs because the EC-derived
    1819              :  * restrictions are generated before join removal (see
    1820              :  * generate_base_implied_equalities).
    1821              :  *
    1822              :  * NOTE: Remember to keep the code in sync with PlannerInfo to be sure all
    1823              :  * cached relids and relid bitmapsets can be correctly cleaned during the
    1824              :  * self-join elimination procedure.
    1825              :  */
    1826              : static void
    1827          300 : remove_self_join_rel(PlannerInfo *root, PlanRowMark *kmark, PlanRowMark *rmark,
    1828              :                      RelOptInfo *toKeep, RelOptInfo *toRemove,
    1829              :                      List *restrictlist)
    1830              : {
    1831              :     List       *joininfos;
    1832              :     ListCell   *lc;
    1833              :     int         i;
    1834          300 :     List       *jinfo_candidates = NIL;
    1835          300 :     List       *binfo_candidates = NIL;
    1836              : 
    1837              :     Assert(toKeep->relid > 0);
    1838              :     Assert(toRemove->relid > 0);
    1839              : 
    1840              :     /*
    1841              :      * Replace the index of the removing table with the keeping one. The
    1842              :      * technique of removing/distributing restrictinfo is used here to attach
    1843              :      * just appeared (for keeping relation) join clauses and avoid adding
    1844              :      * duplicates of those that already exist in the joininfo list.
    1845              :      */
    1846          300 :     joininfos = list_copy(toRemove->joininfo);
    1847          645 :     foreach_node(RestrictInfo, rinfo, joininfos)
    1848              :     {
    1849           45 :         remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
    1850           45 :         ChangeVarNodesExtended((Node *) rinfo, toRemove->relid, toKeep->relid,
    1851              :                                0, replace_relid_callback);
    1852              : 
    1853           45 :         if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
    1854           36 :             jinfo_candidates = lappend(jinfo_candidates, rinfo);
    1855              :         else
    1856            9 :             binfo_candidates = lappend(binfo_candidates, rinfo);
    1857              :     }
    1858              : 
    1859              :     /*
    1860              :      * Concatenate restrictlist to the list of base restrictions of the
    1861              :      * removing table just to simplify the replacement procedure: all of them
    1862              :      * weren't connected to any keeping relations and need to be added to some
    1863              :      * rels.
    1864              :      */
    1865          300 :     toRemove->baserestrictinfo = list_concat(toRemove->baserestrictinfo,
    1866              :                                              restrictlist);
    1867         1019 :     foreach_node(RestrictInfo, rinfo, toRemove->baserestrictinfo)
    1868              :     {
    1869          419 :         ChangeVarNodesExtended((Node *) rinfo, toRemove->relid, toKeep->relid,
    1870              :                                0, replace_relid_callback);
    1871              : 
    1872          419 :         if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
    1873            0 :             jinfo_candidates = lappend(jinfo_candidates, rinfo);
    1874              :         else
    1875          419 :             binfo_candidates = lappend(binfo_candidates, rinfo);
    1876              :     }
    1877              : 
    1878              :     /*
    1879              :      * Now, add all non-redundant clauses to the keeping relation.
    1880              :      */
    1881          300 :     add_non_redundant_clauses(root, binfo_candidates,
    1882              :                               &toKeep->baserestrictinfo, toRemove->relid);
    1883          300 :     add_non_redundant_clauses(root, jinfo_candidates,
    1884              :                               &toKeep->joininfo, toRemove->relid);
    1885              : 
    1886          300 :     list_free(binfo_candidates);
    1887          300 :     list_free(jinfo_candidates);
    1888              : 
    1889              :     /*
    1890              :      * Arrange equivalence classes, mentioned removing a table, with the
    1891              :      * keeping one: varno of removing table should be replaced in members and
    1892              :      * sources lists. Also, remove duplicated elements if this replacement
    1893              :      * procedure created them.
    1894              :      */
    1895          300 :     i = -1;
    1896          762 :     while ((i = bms_next_member(toRemove->eclass_indexes, i)) >= 0)
    1897              :     {
    1898          462 :         EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes, i);
    1899              : 
    1900          462 :         update_eclasses(ec, toRemove->relid, toKeep->relid);
    1901          462 :         toKeep->eclass_indexes = bms_add_member(toKeep->eclass_indexes, i);
    1902              :     }
    1903              : 
    1904              :     /*
    1905              :      * Transfer the targetlist and attr_needed flags.
    1906              :      */
    1907              : 
    1908         1198 :     foreach(lc, toRemove->reltarget->exprs)
    1909              :     {
    1910          898 :         Node       *node = lfirst(lc);
    1911              : 
    1912          898 :         ChangeVarNodesExtended(node, toRemove->relid, toKeep->relid, 0,
    1913              :                                replace_relid_callback);
    1914          898 :         if (!list_member(toKeep->reltarget->exprs, node))
    1915           90 :             toKeep->reltarget->exprs = lappend(toKeep->reltarget->exprs, node);
    1916              :     }
    1917              : 
    1918         3795 :     for (i = toKeep->min_attr; i <= toKeep->max_attr; i++)
    1919              :     {
    1920         3495 :         int         attno = i - toKeep->min_attr;
    1921              : 
    1922         6990 :         toRemove->attr_needed[attno] = adjust_relid_set(toRemove->attr_needed[attno],
    1923         3495 :                                                         toRemove->relid, toKeep->relid);
    1924         3495 :         toKeep->attr_needed[attno] = bms_add_members(toKeep->attr_needed[attno],
    1925         3495 :                                                      toRemove->attr_needed[attno]);
    1926              :     }
    1927              : 
    1928              :     /*
    1929              :      * If the removed relation has a row mark, transfer it to the remaining
    1930              :      * one.
    1931              :      *
    1932              :      * If both rels have row marks, just keep the one corresponding to the
    1933              :      * remaining relation because we verified earlier that they have the same
    1934              :      * strength.
    1935              :      */
    1936          300 :     if (rmark)
    1937              :     {
    1938           37 :         if (kmark)
    1939              :         {
    1940              :             Assert(kmark->markType == rmark->markType);
    1941              : 
    1942           37 :             root->rowMarks = list_delete_ptr(root->rowMarks, rmark);
    1943              :         }
    1944              :         else
    1945              :         {
    1946              :             /* Shouldn't have inheritance children here. */
    1947              :             Assert(rmark->rti == rmark->prti);
    1948              : 
    1949            0 :             rmark->rti = rmark->prti = toKeep->relid;
    1950              :         }
    1951              :     }
    1952              : 
    1953              :     /*
    1954              :      * Replace varno in all the query structures, except nodes RangeTblRef
    1955              :      * otherwise later remove_rel_from_joinlist will yield errors.
    1956              :      */
    1957          300 :     ChangeVarNodesExtended((Node *) root->parse, toRemove->relid, toKeep->relid,
    1958              :                            0, replace_relid_callback);
    1959              : 
    1960              :     /* Replace links in the planner info */
    1961          300 :     remove_rel_from_query(root, toRemove, toKeep->relid, NULL, NULL);
    1962              : 
    1963              :     /* At last, replace varno in root targetlist and HAVING clause */
    1964          300 :     ChangeVarNodesExtended((Node *) root->processed_tlist, toRemove->relid,
    1965          300 :                            toKeep->relid, 0, replace_relid_callback);
    1966          300 :     ChangeVarNodesExtended((Node *) root->processed_groupClause,
    1967          300 :                            toRemove->relid, toKeep->relid, 0,
    1968              :                            replace_relid_callback);
    1969              : 
    1970          300 :     adjust_relid_set(root->all_result_relids, toRemove->relid, toKeep->relid);
    1971          300 :     adjust_relid_set(root->leaf_result_relids, toRemove->relid, toKeep->relid);
    1972              : 
    1973              :     /*
    1974              :      * There may be references to the rel in root->fkey_list, but if so,
    1975              :      * match_foreign_keys_to_quals() will get rid of them.
    1976              :      */
    1977              : 
    1978              :     /*
    1979              :      * Finally, remove the rel from the baserel array to prevent it from being
    1980              :      * referenced again.  (We can't do this earlier because
    1981              :      * remove_join_clause_from_rels will touch it.)
    1982              :      */
    1983          300 :     root->simple_rel_array[toRemove->relid] = NULL;
    1984          300 :     root->simple_rte_array[toRemove->relid] = NULL;
    1985              : 
    1986              :     /* And nuke the RelOptInfo, just in case there's another access path. */
    1987          300 :     pfree(toRemove);
    1988              : 
    1989              : 
    1990              :     /*
    1991              :      * Now repeat construction of attr_needed bits coming from all other
    1992              :      * sources.
    1993              :      */
    1994          300 :     rebuild_placeholder_attr_needed(root);
    1995          300 :     rebuild_joinclause_attr_needed(root);
    1996          300 :     rebuild_eclass_attr_needed(root);
    1997          300 :     rebuild_lateral_attr_needed(root);
    1998          300 : }
    1999              : 
    2000              : /*
    2001              :  * split_selfjoin_quals
    2002              :  *      Processes 'joinquals' by building two lists: one containing the quals
    2003              :  *      where the columns/exprs are on either side of the join match and
    2004              :  *      another one containing the remaining quals.
    2005              :  *
    2006              :  * 'joinquals' must only contain quals for a RTE_RELATION being joined to
    2007              :  * itself.
    2008              :  */
    2009              : static void
    2010         1042 : split_selfjoin_quals(PlannerInfo *root, List *joinquals, List **selfjoinquals,
    2011              :                      List **otherjoinquals, int from, int to)
    2012              : {
    2013         1042 :     List       *sjoinquals = NIL;
    2014         1042 :     List       *ojoinquals = NIL;
    2015              : 
    2016         3193 :     foreach_node(RestrictInfo, rinfo, joinquals)
    2017              :     {
    2018              :         OpExpr     *expr;
    2019              :         Node       *leftexpr;
    2020              :         Node       *rightexpr;
    2021              : 
    2022              :         /* In general, clause looks like F(arg1) = G(arg2) */
    2023         2218 :         if (!rinfo->mergeopfamilies ||
    2024         2218 :             bms_num_members(rinfo->clause_relids) != 2 ||
    2025         2218 :             bms_membership(rinfo->left_relids) != BMS_SINGLETON ||
    2026         1109 :             bms_membership(rinfo->right_relids) != BMS_SINGLETON)
    2027              :         {
    2028            0 :             ojoinquals = lappend(ojoinquals, rinfo);
    2029            0 :             continue;
    2030              :         }
    2031              : 
    2032         1109 :         expr = (OpExpr *) rinfo->clause;
    2033              : 
    2034         1109 :         if (!IsA(expr, OpExpr) || list_length(expr->args) != 2)
    2035              :         {
    2036            0 :             ojoinquals = lappend(ojoinquals, rinfo);
    2037            0 :             continue;
    2038              :         }
    2039              : 
    2040         1109 :         leftexpr = get_leftop(rinfo->clause);
    2041         1109 :         rightexpr = copyObject(get_rightop(rinfo->clause));
    2042              : 
    2043         1109 :         if (leftexpr && IsA(leftexpr, RelabelType))
    2044            6 :             leftexpr = (Node *) ((RelabelType *) leftexpr)->arg;
    2045         1109 :         if (rightexpr && IsA(rightexpr, RelabelType))
    2046            3 :             rightexpr = (Node *) ((RelabelType *) rightexpr)->arg;
    2047              : 
    2048              :         /*
    2049              :          * Quite an expensive operation, narrowing the use case. For example,
    2050              :          * when we have cast of the same var to different (but compatible)
    2051              :          * types.
    2052              :          */
    2053         1109 :         ChangeVarNodesExtended(rightexpr,
    2054         1109 :                                bms_singleton_member(rinfo->right_relids),
    2055         1109 :                                bms_singleton_member(rinfo->left_relids), 0,
    2056              :                                replace_relid_callback);
    2057              : 
    2058         1109 :         if (equal(leftexpr, rightexpr))
    2059          857 :             sjoinquals = lappend(sjoinquals, rinfo);
    2060              :         else
    2061          252 :             ojoinquals = lappend(ojoinquals, rinfo);
    2062              :     }
    2063              : 
    2064         1042 :     *selfjoinquals = sjoinquals;
    2065         1042 :     *otherjoinquals = ojoinquals;
    2066         1042 : }
    2067              : 
    2068              : /*
    2069              :  * Check for a case when uniqueness is at least partly derived from a
    2070              :  * baserestrictinfo clause. In this case, we have a chance to return only
    2071              :  * one row (if such clauses on both sides of SJ are equal) or nothing (if they
    2072              :  * are different).
    2073              :  */
    2074              : static bool
    2075          333 : match_unique_clauses(PlannerInfo *root, RelOptInfo *outer, List *uclauses,
    2076              :                      Index relid)
    2077              : {
    2078          675 :     foreach_node(RestrictInfo, rinfo, uclauses)
    2079              :     {
    2080              :         Expr       *clause;
    2081              :         Node       *iclause;
    2082              :         Node       *c1;
    2083           75 :         bool        matched = false;
    2084              : 
    2085              :         Assert(outer->relid > 0 && relid > 0);
    2086              : 
    2087              :         /* Only filters like f(R.x1,...,R.xN) == expr we should consider. */
    2088              :         Assert(bms_is_empty(rinfo->left_relids) ^
    2089              :                bms_is_empty(rinfo->right_relids));
    2090              : 
    2091           75 :         clause = (Expr *) copyObject(rinfo->clause);
    2092           75 :         ChangeVarNodesExtended((Node *) clause, relid, outer->relid, 0,
    2093              :                                replace_relid_callback);
    2094              : 
    2095           75 :         iclause = bms_is_empty(rinfo->left_relids) ? get_rightop(clause) :
    2096           72 :             get_leftop(clause);
    2097           75 :         c1 = bms_is_empty(rinfo->left_relids) ? get_leftop(clause) :
    2098           72 :             get_rightop(clause);
    2099              : 
    2100              :         /*
    2101              :          * Compare these left and right sides with the corresponding sides of
    2102              :          * the outer's filters. If no one is detected - return immediately.
    2103              :          */
    2104          204 :         foreach_node(RestrictInfo, orinfo, outer->baserestrictinfo)
    2105              :         {
    2106              :             Node       *oclause;
    2107              :             Node       *c2;
    2108              : 
    2109           96 :             if (orinfo->mergeopfamilies == NIL)
    2110              :                 /* Don't consider clauses that aren't similar to 'F(X)=G(Y)' */
    2111           30 :                 continue;
    2112              : 
    2113              :             Assert(is_opclause(orinfo->clause));
    2114              : 
    2115          132 :             oclause = bms_is_empty(orinfo->left_relids) ?
    2116           66 :                 get_rightop(orinfo->clause) : get_leftop(orinfo->clause);
    2117          132 :             c2 = (bms_is_empty(orinfo->left_relids) ?
    2118           66 :                   get_leftop(orinfo->clause) : get_rightop(orinfo->clause));
    2119              : 
    2120           66 :             if (equal(iclause, oclause) && equal(c1, c2))
    2121              :             {
    2122           42 :                 matched = true;
    2123           42 :                 break;
    2124              :             }
    2125              :         }
    2126              : 
    2127           75 :         if (!matched)
    2128           33 :             return false;
    2129              :     }
    2130              : 
    2131          300 :     return true;
    2132              : }
    2133              : 
    2134              : /*
    2135              :  * Find and remove unique self-joins in a group of base relations that have
    2136              :  * the same Oid.
    2137              :  *
    2138              :  * Returns a set of relids that were removed.
    2139              :  */
    2140              : static Relids
    2141         6016 : remove_self_joins_one_group(PlannerInfo *root, Relids relids)
    2142              : {
    2143         6016 :     Relids      result = NULL;
    2144              :     int         k;              /* Index of kept relation */
    2145         6016 :     int         r = -1;         /* Index of removed relation */
    2146              : 
    2147        18726 :     while ((r = bms_next_member(relids, r)) > 0)
    2148              :     {
    2149        12710 :         RelOptInfo *rrel = root->simple_rel_array[r];
    2150              : 
    2151        12710 :         k = r;
    2152              : 
    2153        19915 :         while ((k = bms_next_member(relids, k)) > 0)
    2154              :         {
    2155         7505 :             Relids      joinrelids = NULL;
    2156         7505 :             RelOptInfo *krel = root->simple_rel_array[k];
    2157              :             List       *restrictlist;
    2158              :             List       *selfjoinquals;
    2159              :             List       *otherjoinquals;
    2160              :             ListCell   *lc;
    2161         7505 :             bool        jinfo_check = true;
    2162         7505 :             PlanRowMark *kmark = NULL;
    2163         7505 :             PlanRowMark *rmark = NULL;
    2164         7505 :             List       *uclauses = NIL;
    2165              : 
    2166              :             /* A sanity check: the relations have the same Oid. */
    2167              :             Assert(root->simple_rte_array[k]->relid ==
    2168              :                    root->simple_rte_array[r]->relid);
    2169              : 
    2170              :             /*
    2171              :              * It is impossible to eliminate the join of two relations if they
    2172              :              * belong to different rules of order. Otherwise, the planner
    2173              :              * can't find any variants of the correct query plan.
    2174              :              */
    2175         9340 :             foreach(lc, root->join_info_list)
    2176              :             {
    2177         5952 :                 SpecialJoinInfo *info = (SpecialJoinInfo *) lfirst(lc);
    2178              : 
    2179        11904 :                 if ((bms_is_member(k, info->syn_lefthand) ^
    2180         8482 :                      bms_is_member(r, info->syn_lefthand)) ||
    2181         2530 :                     (bms_is_member(k, info->syn_righthand) ^
    2182         2530 :                      bms_is_member(r, info->syn_righthand)))
    2183              :                 {
    2184         4117 :                     jinfo_check = false;
    2185         4117 :                     break;
    2186              :                 }
    2187              :             }
    2188         7505 :             if (!jinfo_check)
    2189         7205 :                 continue;
    2190              : 
    2191              :             /*
    2192              :              * Check Row Marks equivalence. We can't remove the join if the
    2193              :              * relations have row marks of different strength (e.g., one is
    2194              :              * locked FOR UPDATE, and another just has ROW_MARK_REFERENCE for
    2195              :              * EvalPlanQual rechecking).
    2196              :              */
    2197         3492 :             foreach(lc, root->rowMarks)
    2198              :             {
    2199          190 :                 PlanRowMark *rowMark = (PlanRowMark *) lfirst(lc);
    2200              : 
    2201          190 :                 if (rowMark->rti == r)
    2202              :                 {
    2203              :                     Assert(rmark == NULL);
    2204           86 :                     rmark = rowMark;
    2205              :                 }
    2206          104 :                 else if (rowMark->rti == k)
    2207              :                 {
    2208              :                     Assert(kmark == NULL);
    2209           86 :                     kmark = rowMark;
    2210              :                 }
    2211              : 
    2212          190 :                 if (kmark && rmark)
    2213           86 :                     break;
    2214              :             }
    2215         3388 :             if (kmark && rmark && kmark->markType != rmark->markType)
    2216           26 :                 continue;
    2217              : 
    2218              :             /*
    2219              :              * We only deal with base rels here, so their relids bitset
    2220              :              * contains only one member -- their relid.
    2221              :              */
    2222         3362 :             joinrelids = bms_add_member(joinrelids, r);
    2223         3362 :             joinrelids = bms_add_member(joinrelids, k);
    2224              : 
    2225              :             /*
    2226              :              * PHVs should not impose any constraints on removing self-joins.
    2227              :              */
    2228              : 
    2229              :             /*
    2230              :              * At this stage, joininfo lists of inner and outer can contain
    2231              :              * only clauses required for a superior outer join that can't
    2232              :              * influence this optimization. So, we can avoid to call the
    2233              :              * build_joinrel_restrictlist() routine.
    2234              :              */
    2235         3362 :             restrictlist = generate_join_implied_equalities(root, joinrelids,
    2236              :                                                             rrel->relids,
    2237              :                                                             krel, NULL);
    2238         3362 :             if (restrictlist == NIL)
    2239         2320 :                 continue;
    2240              : 
    2241              :             /*
    2242              :              * Process restrictlist to separate the self-join quals from the
    2243              :              * other quals. e.g., "x = x" goes to selfjoinquals and "a = b" to
    2244              :              * otherjoinquals.
    2245              :              */
    2246         1042 :             split_selfjoin_quals(root, restrictlist, &selfjoinquals,
    2247         1042 :                                  &otherjoinquals, rrel->relid, krel->relid);
    2248              : 
    2249              :             Assert(list_length(restrictlist) ==
    2250              :                    (list_length(selfjoinquals) + list_length(otherjoinquals)));
    2251              : 
    2252              :             /*
    2253              :              * To enable SJE for the only degenerate case without any self
    2254              :              * join clauses at all, add baserestrictinfo to this list. The
    2255              :              * degenerate case works only if both sides have the same clause.
    2256              :              * So doesn't matter which side to add.
    2257              :              */
    2258         1042 :             selfjoinquals = list_concat(selfjoinquals, krel->baserestrictinfo);
    2259              : 
    2260              :             /*
    2261              :              * Determine if the rrel can duplicate outer rows. We must bypass
    2262              :              * the unique rel cache here since we're possibly using a subset
    2263              :              * of join quals. We can use 'force_cache' == true when all join
    2264              :              * quals are self-join quals.  Otherwise, we could end up putting
    2265              :              * false negatives in the cache.
    2266              :              */
    2267         1042 :             if (!innerrel_is_unique_ext(root, joinrelids, rrel->relids,
    2268              :                                         krel, JOIN_INNER, selfjoinquals,
    2269         1042 :                                         list_length(otherjoinquals) == 0,
    2270              :                                         &uclauses))
    2271          709 :                 continue;
    2272              : 
    2273              :             /*
    2274              :              * 'uclauses' is the copy of outer->baserestrictinfo that are
    2275              :              * associated with an index.  We proved by matching selfjoinquals
    2276              :              * to a unique index that the outer relation has at most one
    2277              :              * matching row for each inner row.  Sometimes that is not enough.
    2278              :              * e.g. "WHERE s1.b = s2.b AND s1.a = 1 AND s2.a = 2" when the
    2279              :              * unique index is (a,b).  Having non-empty uclauses, we must
    2280              :              * validate that the inner baserestrictinfo contains the same
    2281              :              * expressions, or we won't match the same row on each side of the
    2282              :              * join.
    2283              :              */
    2284          333 :             if (!match_unique_clauses(root, rrel, uclauses, krel->relid))
    2285           33 :                 continue;
    2286              : 
    2287              :             /*
    2288              :              * Remove rrel RelOptInfo from the planner structures and the
    2289              :              * corresponding row mark.
    2290              :              */
    2291          300 :             remove_self_join_rel(root, kmark, rmark, krel, rrel, restrictlist);
    2292              : 
    2293          300 :             result = bms_add_member(result, r);
    2294              : 
    2295              :             /* We have removed the outer relation, try the next one. */
    2296          300 :             break;
    2297              :         }
    2298              :     }
    2299              : 
    2300         6016 :     return result;
    2301              : }
    2302              : 
    2303              : /*
    2304              :  * Gather indexes of base relations from the joinlist and try to eliminate self
    2305              :  * joins.
    2306              :  */
    2307              : static Relids
    2308        60012 : remove_self_joins_recurse(PlannerInfo *root, List *joinlist, Relids toRemove)
    2309              : {
    2310              :     ListCell   *jl;
    2311        60012 :     Relids      relids = NULL;
    2312        60012 :     SelfJoinCandidate *candidates = NULL;
    2313              :     int         i;
    2314              :     int         j;
    2315              :     int         numRels;
    2316              : 
    2317              :     /* Collect indexes of base relations of the join tree */
    2318       200676 :     foreach(jl, joinlist)
    2319              :     {
    2320       140664 :         Node       *jlnode = (Node *) lfirst(jl);
    2321              : 
    2322       140664 :         if (IsA(jlnode, RangeTblRef))
    2323              :         {
    2324       138918 :             int         varno = ((RangeTblRef *) jlnode)->rtindex;
    2325       138918 :             RangeTblEntry *rte = root->simple_rte_array[varno];
    2326              : 
    2327              :             /*
    2328              :              * We only consider ordinary relations as candidates to be
    2329              :              * removed, and these relations should not have TABLESAMPLE
    2330              :              * clauses specified.  Removing a relation with TABLESAMPLE clause
    2331              :              * could potentially change the syntax of the query. Because of
    2332              :              * UPDATE/DELETE EPQ mechanism, currently Query->resultRelation or
    2333              :              * Query->mergeTargetRelation associated rel cannot be eliminated.
    2334              :              */
    2335       138918 :             if (rte->rtekind == RTE_RELATION &&
    2336       121756 :                 rte->relkind == RELKIND_RELATION &&
    2337       118886 :                 rte->tablesample == NULL &&
    2338       118874 :                 varno != root->parse->resultRelation &&
    2339       117925 :                 varno != root->parse->mergeTargetRelation)
    2340              :             {
    2341              :                 Assert(!bms_is_member(varno, relids));
    2342       117925 :                 relids = bms_add_member(relids, varno);
    2343              :             }
    2344              :         }
    2345         1746 :         else if (IsA(jlnode, List))
    2346              :         {
    2347              :             /* Recursively go inside the sub-joinlist */
    2348         1746 :             toRemove = remove_self_joins_recurse(root, (List *) jlnode,
    2349              :                                                  toRemove);
    2350              :         }
    2351              :         else
    2352            0 :             elog(ERROR, "unrecognized joinlist node type: %d",
    2353              :                  (int) nodeTag(jlnode));
    2354              :     }
    2355              : 
    2356        60012 :     numRels = bms_num_members(relids);
    2357              : 
    2358              :     /* Need at least two relations for the join */
    2359        60012 :     if (numRels < 2)
    2360        16286 :         return toRemove;
    2361              : 
    2362              :     /*
    2363              :      * In order to find relations with the same oid we first build an array of
    2364              :      * candidates and then sort it by oid.
    2365              :      */
    2366        43726 :     candidates = palloc_array(SelfJoinCandidate, numRels);
    2367        43726 :     i = -1;
    2368        43726 :     j = 0;
    2369       149870 :     while ((i = bms_next_member(relids, i)) >= 0)
    2370              :     {
    2371       106144 :         candidates[j].relid = i;
    2372       106144 :         candidates[j].reloid = root->simple_rte_array[i]->relid;
    2373       106144 :         j++;
    2374              :     }
    2375              : 
    2376        43726 :     qsort(candidates, numRels, sizeof(SelfJoinCandidate),
    2377              :           self_join_candidates_cmp);
    2378              : 
    2379              :     /*
    2380              :      * Iteratively form a group of relation indexes with the same oid and
    2381              :      * launch the routine that detects self-joins in this group and removes
    2382              :      * excessive range table entries.
    2383              :      *
    2384              :      * At the end of the iteration, exclude the group from the overall relids
    2385              :      * list. So each next iteration of the cycle will involve less and less
    2386              :      * value of relids.
    2387              :      */
    2388        43726 :     i = 0;
    2389       149870 :     for (j = 1; j < numRels + 1; j++)
    2390              :     {
    2391       106144 :         if (j == numRels || candidates[j].reloid != candidates[i].reloid)
    2392              :         {
    2393        99492 :             if (j - i >= 2)
    2394              :             {
    2395              :                 /* Create a group of relation indexes with the same oid */
    2396         5977 :                 Relids      group = NULL;
    2397              :                 Relids      removed;
    2398              : 
    2399        18606 :                 while (i < j)
    2400              :                 {
    2401        12629 :                     group = bms_add_member(group, candidates[i].relid);
    2402        12629 :                     i++;
    2403              :                 }
    2404         5977 :                 relids = bms_del_members(relids, group);
    2405              : 
    2406              :                 /*
    2407              :                  * Try to remove self-joins from a group of identical entries.
    2408              :                  * Make the next attempt iteratively - if something is deleted
    2409              :                  * from a group, changes in clauses and equivalence classes
    2410              :                  * can give us a chance to find more candidates.
    2411              :                  */
    2412              :                 do
    2413              :                 {
    2414              :                     Assert(!bms_overlap(group, toRemove));
    2415         6016 :                     removed = remove_self_joins_one_group(root, group);
    2416         6016 :                     toRemove = bms_add_members(toRemove, removed);
    2417         6016 :                     group = bms_del_members(group, removed);
    2418         6304 :                 } while (!bms_is_empty(removed) &&
    2419          288 :                          bms_membership(group) == BMS_MULTIPLE);
    2420         5977 :                 bms_free(removed);
    2421         5977 :                 bms_free(group);
    2422              :             }
    2423              :             else
    2424              :             {
    2425              :                 /* Single relation, just remove it from the set */
    2426        93515 :                 relids = bms_del_member(relids, candidates[i].relid);
    2427        93515 :                 i = j;
    2428              :             }
    2429              :         }
    2430              :     }
    2431              : 
    2432              :     Assert(bms_is_empty(relids));
    2433              : 
    2434        43726 :     return toRemove;
    2435              : }
    2436              : 
    2437              : /*
    2438              :  * Compare self-join candidates by their oids.
    2439              :  */
    2440              : static int
    2441        77686 : self_join_candidates_cmp(const void *a, const void *b)
    2442              : {
    2443        77686 :     const SelfJoinCandidate *ca = (const SelfJoinCandidate *) a;
    2444        77686 :     const SelfJoinCandidate *cb = (const SelfJoinCandidate *) b;
    2445              : 
    2446        77686 :     if (ca->reloid != cb->reloid)
    2447        71007 :         return (ca->reloid < cb->reloid ? -1 : 1);
    2448              :     else
    2449         6679 :         return 0;
    2450              : }
    2451              : 
    2452              : /*
    2453              :  * Find and remove useless self joins.
    2454              :  *
    2455              :  * Search for joins where a relation is joined to itself. If the join clause
    2456              :  * for each tuple from one side of the join is proven to match the same
    2457              :  * physical row (or nothing) on the other side, that self-join can be
    2458              :  * eliminated from the query.  Suitable join clauses are assumed to be in the
    2459              :  * form of X = X, and can be replaced with NOT NULL clauses.
    2460              :  *
    2461              :  * For the sake of simplicity, we don't apply this optimization to special
    2462              :  * joins. Here is a list of what we could do in some particular cases:
    2463              :  * 'a a1 semi join a a2': is reduced to inner by reduce_unique_semijoins,
    2464              :  * and then removed normally.
    2465              :  * 'a a1 anti join a a2': could simplify to a scan with 'outer quals AND
    2466              :  * (IS NULL on join columns OR NOT inner quals)'.
    2467              :  * 'a a1 left join a a2': could simplify to a scan like inner but without
    2468              :  * NOT NULL conditions on join columns.
    2469              :  * 'a a1 left join (a a2 join b)': can't simplify this, because join to b
    2470              :  * can both remove rows and introduce duplicates.
    2471              :  *
    2472              :  * To search for removable joins, we order all the relations on their Oid,
    2473              :  * go over each set with the same Oid, and consider each pair of relations
    2474              :  * in this set.
    2475              :  *
    2476              :  * To remove the join, we mark one of the participating relations as dead
    2477              :  * and rewrite all references to it to point to the remaining relation.
    2478              :  * This includes modifying RestrictInfos, EquivalenceClasses, and
    2479              :  * EquivalenceMembers. We also have to modify the row marks. The join clauses
    2480              :  * of the removed relation become either restriction or join clauses, based on
    2481              :  * whether they reference any relations not participating in the removed join.
    2482              :  *
    2483              :  * 'joinlist' is the top-level joinlist of the query. If it has any
    2484              :  * references to the removed relations, we update them to point to the
    2485              :  * remaining ones.
    2486              :  */
    2487              : List *
    2488       183569 : remove_useless_self_joins(PlannerInfo *root, List *joinlist)
    2489              : {
    2490       183569 :     Relids      toRemove = NULL;
    2491       183569 :     int         relid = -1;
    2492              : 
    2493       367138 :     if (!enable_self_join_elimination || joinlist == NIL ||
    2494       309322 :         (list_length(joinlist) == 1 && !IsA(linitial(joinlist), List)))
    2495       125303 :         return joinlist;
    2496              : 
    2497              :     /*
    2498              :      * Merge pairs of relations participated in self-join. Remove unnecessary
    2499              :      * range table entries.
    2500              :      */
    2501        58266 :     toRemove = remove_self_joins_recurse(root, joinlist, toRemove);
    2502              : 
    2503        58266 :     if (unlikely(toRemove != NULL))
    2504              :     {
    2505              :         /* At the end, remove orphaned relation links */
    2506          585 :         while ((relid = bms_next_member(toRemove, relid)) >= 0)
    2507              :         {
    2508          300 :             int         nremoved = 0;
    2509              : 
    2510          300 :             joinlist = remove_rel_from_joinlist(joinlist, relid, &nremoved);
    2511          300 :             if (nremoved != 1)
    2512            0 :                 elog(ERROR, "failed to find relation %d in joinlist", relid);
    2513              :         }
    2514              :     }
    2515              : 
    2516        58266 :     return joinlist;
    2517              : }

Generated by: LCOV version 2.0-1