LCOV - code coverage report
Current view: top level - src/backend/optimizer/path - clausesel.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13beta1 Lines: 215 230 93.5 %
Date: 2020-06-03 10:06:28 Functions: 7 7 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * clausesel.c
       4             :  *    Routines to compute clause selectivities
       5             :  *
       6             :  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/optimizer/path/clausesel.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "nodes/makefuncs.h"
      18             : #include "nodes/nodeFuncs.h"
      19             : #include "optimizer/clauses.h"
      20             : #include "optimizer/cost.h"
      21             : #include "optimizer/optimizer.h"
      22             : #include "optimizer/pathnode.h"
      23             : #include "optimizer/plancat.h"
      24             : #include "statistics/statistics.h"
      25             : #include "utils/fmgroids.h"
      26             : #include "utils/lsyscache.h"
      27             : #include "utils/selfuncs.h"
      28             : 
      29             : /*
      30             :  * Data structure for accumulating info about possible range-query
      31             :  * clause pairs in clauselist_selectivity.
      32             :  */
      33             : typedef struct RangeQueryClause
      34             : {
      35             :     struct RangeQueryClause *next;  /* next in linked list */
      36             :     Node       *var;            /* The common variable of the clauses */
      37             :     bool        have_lobound;   /* found a low-bound clause yet? */
      38             :     bool        have_hibound;   /* found a high-bound clause yet? */
      39             :     Selectivity lobound;        /* Selectivity of a var > something clause */
      40             :     Selectivity hibound;        /* Selectivity of a var < something clause */
      41             : } RangeQueryClause;
      42             : 
      43             : static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
      44             :                            bool varonleft, bool isLTsel, Selectivity s2);
      45             : static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root,
      46             :                                                List *clauses);
      47             : 
      48             : /****************************************************************************
      49             :  *      ROUTINES TO COMPUTE SELECTIVITIES
      50             :  ****************************************************************************/
      51             : 
      52             : /*
      53             :  * clauselist_selectivity -
      54             :  *    Compute the selectivity of an implicitly-ANDed list of boolean
      55             :  *    expression clauses.  The list can be empty, in which case 1.0
      56             :  *    must be returned.  List elements may be either RestrictInfos
      57             :  *    or bare expression clauses --- the former is preferred since
      58             :  *    it allows caching of results.
      59             :  *
      60             :  * See clause_selectivity() for the meaning of the additional parameters.
      61             :  *
      62             :  * The basic approach is to apply extended statistics first, on as many
      63             :  * clauses as possible, in order to capture cross-column dependencies etc.
      64             :  * The remaining clauses are then estimated using regular statistics tracked
      65             :  * for individual columns.  This is done by simply passing the clauses to
      66             :  * clauselist_selectivity_simple.
      67             :  */
      68             : Selectivity
      69     1444208 : clauselist_selectivity(PlannerInfo *root,
      70             :                        List *clauses,
      71             :                        int varRelid,
      72             :                        JoinType jointype,
      73             :                        SpecialJoinInfo *sjinfo)
      74             : {
      75     1444208 :     Selectivity s1 = 1.0;
      76             :     RelOptInfo *rel;
      77     1444208 :     Bitmapset  *estimatedclauses = NULL;
      78             : 
      79             :     /*
      80             :      * Determine if these clauses reference a single relation.  If so, and if
      81             :      * it has extended statistics, try to apply those.
      82             :      */
      83     1444208 :     rel = find_single_rel_for_clauses(root, clauses);
      84     1444208 :     if (rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
      85             :     {
      86             :         /*
      87             :          * Estimate as many clauses as possible using extended statistics.
      88             :          *
      89             :          * 'estimatedclauses' tracks the 0-based list position index of
      90             :          * clauses that we've estimated using extended statistics, and that
      91             :          * should be ignored.
      92             :          */
      93        1656 :         s1 *= statext_clauselist_selectivity(root, clauses, varRelid,
      94             :                                              jointype, sjinfo, rel,
      95             :                                              &estimatedclauses);
      96             :     }
      97             : 
      98             :     /*
      99             :      * Apply normal selectivity estimates for the remaining clauses, passing
     100             :      * 'estimatedclauses' so that it skips already estimated ones.
     101             :      */
     102     1444208 :     return s1 * clauselist_selectivity_simple(root, clauses, varRelid,
     103             :                                               jointype, sjinfo,
     104             :                                               estimatedclauses);
     105             : }
     106             : 
     107             : /*
     108             :  * clauselist_selectivity_simple -
     109             :  *    Compute the selectivity of an implicitly-ANDed list of boolean
     110             :  *    expression clauses.  The list can be empty, in which case 1.0
     111             :  *    must be returned.  List elements may be either RestrictInfos
     112             :  *    or bare expression clauses --- the former is preferred since
     113             :  *    it allows caching of results.  The estimatedclauses bitmap tracks
     114             :  *    clauses that have already been estimated by other means.
     115             :  *
     116             :  * See clause_selectivity() for the meaning of the additional parameters.
     117             :  *
     118             :  * Our basic approach is to take the product of the selectivities of the
     119             :  * subclauses.  However, that's only right if the subclauses have independent
     120             :  * probabilities, and in reality they are often NOT independent.  So,
     121             :  * we want to be smarter where we can.
     122             :  *
     123             :  * We also recognize "range queries", such as "x > 34 AND x < 42".  Clauses
     124             :  * are recognized as possible range query components if they are restriction
     125             :  * opclauses whose operators have scalarltsel or a related function as their
     126             :  * restriction selectivity estimator.  We pair up clauses of this form that
     127             :  * refer to the same variable.  An unpairable clause of this kind is simply
     128             :  * multiplied into the selectivity product in the normal way.  But when we
     129             :  * find a pair, we know that the selectivities represent the relative
     130             :  * positions of the low and high bounds within the column's range, so instead
     131             :  * of figuring the selectivity as hisel * losel, we can figure it as hisel +
     132             :  * losel - 1.  (To visualize this, see that hisel is the fraction of the range
     133             :  * below the high bound, while losel is the fraction above the low bound; so
     134             :  * hisel can be interpreted directly as a 0..1 value but we need to convert
     135             :  * losel to 1-losel before interpreting it as a value.  Then the available
     136             :  * range is 1-losel to hisel.  However, this calculation double-excludes
     137             :  * nulls, so really we need hisel + losel + null_frac - 1.)
     138             :  *
     139             :  * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
     140             :  * and instead use DEFAULT_RANGE_INEQ_SEL.  The same applies if the equation
     141             :  * yields an impossible (negative) result.
     142             :  *
     143             :  * A free side-effect is that we can recognize redundant inequalities such
     144             :  * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
     145             :  *
     146             :  * Of course this is all very dependent on the behavior of the inequality
     147             :  * selectivity functions; perhaps some day we can generalize the approach.
     148             :  */
     149             : Selectivity
     150     1446008 : clauselist_selectivity_simple(PlannerInfo *root,
     151             :                               List *clauses,
     152             :                               int varRelid,
     153             :                               JoinType jointype,
     154             :                               SpecialJoinInfo *sjinfo,
     155             :                               Bitmapset *estimatedclauses)
     156             : {
     157     1446008 :     Selectivity s1 = 1.0;
     158     1446008 :     RangeQueryClause *rqlist = NULL;
     159             :     ListCell   *l;
     160             :     int         listidx;
     161             : 
     162             :     /*
     163             :      * If there's exactly one clause (and it was not estimated yet), just go
     164             :      * directly to clause_selectivity(). None of what we might do below is
     165             :      * relevant.
     166             :      */
     167     2254238 :     if ((list_length(clauses) == 1) &&
     168      808230 :         bms_num_members(estimatedclauses) == 0)
     169      808218 :         return clause_selectivity(root, (Node *) linitial(clauses),
     170             :                                   varRelid, jointype, sjinfo);
     171             : 
     172             :     /*
     173             :      * Anything that doesn't look like a potential rangequery clause gets
     174             :      * multiplied into s1 and forgotten. Anything that does gets inserted into
     175             :      * an rqlist entry.
     176             :      */
     177      637790 :     listidx = -1;
     178     1056774 :     foreach(l, clauses)
     179             :     {
     180      418984 :         Node       *clause = (Node *) lfirst(l);
     181             :         RestrictInfo *rinfo;
     182             :         Selectivity s2;
     183             : 
     184      418984 :         listidx++;
     185             : 
     186             :         /*
     187             :          * Skip this clause if it's already been estimated by some other
     188             :          * statistics above.
     189             :          */
     190      418984 :         if (bms_is_member(listidx, estimatedclauses))
     191        2020 :             continue;
     192             : 
     193             :         /* Always compute the selectivity using clause_selectivity */
     194      416964 :         s2 = clause_selectivity(root, clause, varRelid, jointype, sjinfo);
     195             : 
     196             :         /*
     197             :          * Check for being passed a RestrictInfo.
     198             :          *
     199             :          * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
     200             :          * 0.0; just use that rather than looking for range pairs.
     201             :          */
     202      416964 :         if (IsA(clause, RestrictInfo))
     203             :         {
     204      416084 :             rinfo = (RestrictInfo *) clause;
     205      416084 :             if (rinfo->pseudoconstant)
     206             :             {
     207        2860 :                 s1 = s1 * s2;
     208        2860 :                 continue;
     209             :             }
     210      413224 :             clause = (Node *) rinfo->clause;
     211             :         }
     212             :         else
     213         880 :             rinfo = NULL;
     214             : 
     215             :         /*
     216             :          * See if it looks like a restriction clause with a pseudoconstant on
     217             :          * one side.  (Anything more complicated than that might not behave in
     218             :          * the simple way we are expecting.)  Most of the tests here can be
     219             :          * done more efficiently with rinfo than without.
     220             :          */
     221      414104 :         if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
     222             :         {
     223      376098 :             OpExpr     *expr = (OpExpr *) clause;
     224      376098 :             bool        varonleft = true;
     225             :             bool        ok;
     226             : 
     227      376098 :             if (rinfo)
     228             :             {
     229      629958 :                 ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) &&
     230      249330 :                     (is_pseudo_constant_clause_relids(lsecond(expr->args),
     231        5394 :                                                       rinfo->right_relids) ||
     232        5394 :                      (varonleft = false,
     233        5394 :                       is_pseudo_constant_clause_relids(linitial(expr->args),
     234             :                                                        rinfo->left_relids)));
     235             :             }
     236             :             else
     237             :             {
     238        1728 :                 ok = (NumRelids(clause) == 1) &&
     239         864 :                     (is_pseudo_constant_clause(lsecond(expr->args)) ||
     240           0 :                      (varonleft = false,
     241           0 :                       is_pseudo_constant_clause(linitial(expr->args))));
     242             :             }
     243             : 
     244      376098 :             if (ok)
     245             :             {
     246             :                 /*
     247             :                  * If it's not a "<"/"<="/">"/">=" operator, just merge the
     248             :                  * selectivity in generically.  But if it's the right oprrest,
     249             :                  * add the clause to rqlist for later processing.
     250             :                  */
     251      249936 :                 switch (get_oprrest(expr->opno))
     252             :                 {
     253        8614 :                     case F_SCALARLTSEL:
     254             :                     case F_SCALARLESEL:
     255        8614 :                         addRangeClause(&rqlist, clause,
     256             :                                        varonleft, true, s2);
     257        8614 :                         break;
     258       46372 :                     case F_SCALARGTSEL:
     259             :                     case F_SCALARGESEL:
     260       46372 :                         addRangeClause(&rqlist, clause,
     261             :                                        varonleft, false, s2);
     262       46372 :                         break;
     263      194950 :                     default:
     264             :                         /* Just merge the selectivity in generically */
     265      194950 :                         s1 = s1 * s2;
     266      194950 :                         break;
     267             :                 }
     268      249936 :                 continue;       /* drop to loop bottom */
     269             :             }
     270             :         }
     271             : 
     272             :         /* Not the right form, so treat it generically. */
     273      164168 :         s1 = s1 * s2;
     274             :     }
     275             : 
     276             :     /*
     277             :      * Now scan the rangequery pair list.
     278             :      */
     279      687040 :     while (rqlist != NULL)
     280             :     {
     281             :         RangeQueryClause *rqnext;
     282             : 
     283       49250 :         if (rqlist->have_lobound && rqlist->have_hibound)
     284        5528 :         {
     285             :             /* Successfully matched a pair of range clauses */
     286             :             Selectivity s2;
     287             : 
     288             :             /*
     289             :              * Exact equality to the default value probably means the
     290             :              * selectivity function punted.  This is not airtight but should
     291             :              * be good enough.
     292             :              */
     293        5528 :             if (rqlist->hibound == DEFAULT_INEQ_SEL ||
     294        3614 :                 rqlist->lobound == DEFAULT_INEQ_SEL)
     295             :             {
     296        1914 :                 s2 = DEFAULT_RANGE_INEQ_SEL;
     297             :             }
     298             :             else
     299             :             {
     300        3614 :                 s2 = rqlist->hibound + rqlist->lobound - 1.0;
     301             : 
     302             :                 /* Adjust for double-exclusion of NULLs */
     303        3614 :                 s2 += nulltestsel(root, IS_NULL, rqlist->var,
     304             :                                   varRelid, jointype, sjinfo);
     305             : 
     306             :                 /*
     307             :                  * A zero or slightly negative s2 should be converted into a
     308             :                  * small positive value; we probably are dealing with a very
     309             :                  * tight range and got a bogus result due to roundoff errors.
     310             :                  * However, if s2 is very negative, then we probably have
     311             :                  * default selectivity estimates on one or both sides of the
     312             :                  * range that we failed to recognize above for some reason.
     313             :                  */
     314        3614 :                 if (s2 <= 0.0)
     315             :                 {
     316         468 :                     if (s2 < -0.01)
     317             :                     {
     318             :                         /*
     319             :                          * No data available --- use a default estimate that
     320             :                          * is small, but not real small.
     321             :                          */
     322           0 :                         s2 = DEFAULT_RANGE_INEQ_SEL;
     323             :                     }
     324             :                     else
     325             :                     {
     326             :                         /*
     327             :                          * It's just roundoff error; use a small positive
     328             :                          * value
     329             :                          */
     330         468 :                         s2 = 1.0e-10;
     331             :                     }
     332             :                 }
     333             :             }
     334             :             /* Merge in the selectivity of the pair of clauses */
     335        5528 :             s1 *= s2;
     336             :         }
     337             :         else
     338             :         {
     339             :             /* Only found one of a pair, merge it in generically */
     340       43722 :             if (rqlist->have_lobound)
     341       40668 :                 s1 *= rqlist->lobound;
     342             :             else
     343        3054 :                 s1 *= rqlist->hibound;
     344             :         }
     345             :         /* release storage and advance */
     346       49250 :         rqnext = rqlist->next;
     347       49250 :         pfree(rqlist);
     348       49250 :         rqlist = rqnext;
     349             :     }
     350             : 
     351      637790 :     return s1;
     352             : }
     353             : 
     354             : /*
     355             :  * addRangeClause --- add a new range clause for clauselist_selectivity
     356             :  *
     357             :  * Here is where we try to match up pairs of range-query clauses
     358             :  */
     359             : static void
     360       54986 : addRangeClause(RangeQueryClause **rqlist, Node *clause,
     361             :                bool varonleft, bool isLTsel, Selectivity s2)
     362             : {
     363             :     RangeQueryClause *rqelem;
     364             :     Node       *var;
     365             :     bool        is_lobound;
     366             : 
     367       54986 :     if (varonleft)
     368             :     {
     369       54902 :         var = get_leftop((Expr *) clause);
     370       54902 :         is_lobound = !isLTsel;  /* x < something is high bound */
     371             :     }
     372             :     else
     373             :     {
     374          84 :         var = get_rightop((Expr *) clause);
     375          84 :         is_lobound = isLTsel;   /* something < x is low bound */
     376             :     }
     377             : 
     378       55854 :     for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
     379             :     {
     380             :         /*
     381             :          * We use full equal() here because the "var" might be a function of
     382             :          * one or more attributes of the same relation...
     383             :          */
     384        6604 :         if (!equal(var, rqelem->var))
     385         868 :             continue;
     386             :         /* Found the right group to put this clause in */
     387        5736 :         if (is_lobound)
     388             :         {
     389         184 :             if (!rqelem->have_lobound)
     390             :             {
     391          68 :                 rqelem->have_lobound = true;
     392          68 :                 rqelem->lobound = s2;
     393             :             }
     394             :             else
     395             :             {
     396             : 
     397             :                 /*------
     398             :                  * We have found two similar clauses, such as
     399             :                  * x < y AND x <= z.
     400             :                  * Keep only the more restrictive one.
     401             :                  *------
     402             :                  */
     403         116 :                 if (rqelem->lobound > s2)
     404           0 :                     rqelem->lobound = s2;
     405             :             }
     406             :         }
     407             :         else
     408             :         {
     409        5552 :             if (!rqelem->have_hibound)
     410             :             {
     411        5460 :                 rqelem->have_hibound = true;
     412        5460 :                 rqelem->hibound = s2;
     413             :             }
     414             :             else
     415             :             {
     416             : 
     417             :                 /*------
     418             :                  * We have found two similar clauses, such as
     419             :                  * x > y AND x >= z.
     420             :                  * Keep only the more restrictive one.
     421             :                  *------
     422             :                  */
     423          92 :                 if (rqelem->hibound > s2)
     424          24 :                     rqelem->hibound = s2;
     425             :             }
     426             :         }
     427        5736 :         return;
     428             :     }
     429             : 
     430             :     /* No matching var found, so make a new clause-pair data structure */
     431       49250 :     rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
     432       49250 :     rqelem->var = var;
     433       49250 :     if (is_lobound)
     434             :     {
     435       46128 :         rqelem->have_lobound = true;
     436       46128 :         rqelem->have_hibound = false;
     437       46128 :         rqelem->lobound = s2;
     438             :     }
     439             :     else
     440             :     {
     441        3122 :         rqelem->have_lobound = false;
     442        3122 :         rqelem->have_hibound = true;
     443        3122 :         rqelem->hibound = s2;
     444             :     }
     445       49250 :     rqelem->next = *rqlist;
     446       49250 :     *rqlist = rqelem;
     447             : }
     448             : 
     449             : /*
     450             :  * find_single_rel_for_clauses
     451             :  *      Examine each clause in 'clauses' and determine if all clauses
     452             :  *      reference only a single relation.  If so return that relation,
     453             :  *      otherwise return NULL.
     454             :  */
     455             : static RelOptInfo *
     456     1444208 : find_single_rel_for_clauses(PlannerInfo *root, List *clauses)
     457             : {
     458     1444208 :     int         lastrelid = 0;
     459             :     ListCell   *l;
     460             : 
     461     2095506 :     foreach(l, clauses)
     462             :     {
     463     1132272 :         RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
     464             :         int         relid;
     465             : 
     466             :         /*
     467             :          * If we have a list of bare clauses rather than RestrictInfos, we
     468             :          * could pull out their relids the hard way with pull_varnos().
     469             :          * However, currently the extended-stats machinery won't do anything
     470             :          * with non-RestrictInfo clauses anyway, so there's no point in
     471             :          * spending extra cycles; just fail if that's what we have.
     472             :          */
     473     1132272 :         if (!IsA(rinfo, RestrictInfo))
     474      480974 :             return NULL;
     475             : 
     476     1128550 :         if (bms_is_empty(rinfo->clause_relids))
     477        3224 :             continue;           /* we can ignore variable-free clauses */
     478     1125326 :         if (!bms_get_singleton_member(rinfo->clause_relids, &relid))
     479      476834 :             return NULL;        /* multiple relations in this clause */
     480      648492 :         if (lastrelid == 0)
     481      539070 :             lastrelid = relid;  /* first clause referencing a relation */
     482      109422 :         else if (relid != lastrelid)
     483         418 :             return NULL;        /* relation not same as last one */
     484             :     }
     485             : 
     486      963234 :     if (lastrelid != 0)
     487      515902 :         return find_base_rel(root, lastrelid);
     488             : 
     489      447332 :     return NULL;                /* no clauses */
     490             : }
     491             : 
     492             : /*
     493             :  * bms_is_subset_singleton
     494             :  *
     495             :  * Same result as bms_is_subset(s, bms_make_singleton(x)),
     496             :  * but a little faster and doesn't leak memory.
     497             :  *
     498             :  * Is this of use anywhere else?  If so move to bitmapset.c ...
     499             :  */
     500             : static bool
     501      631874 : bms_is_subset_singleton(const Bitmapset *s, int x)
     502             : {
     503      631874 :     switch (bms_membership(s))
     504             :     {
     505           0 :         case BMS_EMPTY_SET:
     506           0 :             return true;
     507      479348 :         case BMS_SINGLETON:
     508      479348 :             return bms_is_member(x, s);
     509      152526 :         case BMS_MULTIPLE:
     510      152526 :             return false;
     511             :     }
     512             :     /* can't get here... */
     513           0 :     return false;
     514             : }
     515             : 
     516             : /*
     517             :  * treat_as_join_clause -
     518             :  *    Decide whether an operator clause is to be handled by the
     519             :  *    restriction or join estimator.  Subroutine for clause_selectivity().
     520             :  */
     521             : static inline bool
     522      495018 : treat_as_join_clause(Node *clause, RestrictInfo *rinfo,
     523             :                      int varRelid, SpecialJoinInfo *sjinfo)
     524             : {
     525      495018 :     if (varRelid != 0)
     526             :     {
     527             :         /*
     528             :          * Caller is forcing restriction mode (eg, because we are examining an
     529             :          * inner indexscan qual).
     530             :          */
     531      158754 :         return false;
     532             :     }
     533      336264 :     else if (sjinfo == NULL)
     534             :     {
     535             :         /*
     536             :          * It must be a restriction clause, since it's being evaluated at a
     537             :          * scan node.
     538             :          */
     539      198832 :         return false;
     540             :     }
     541             :     else
     542             :     {
     543             :         /*
     544             :          * Otherwise, it's a join if there's more than one relation used. We
     545             :          * can optimize this calculation if an rinfo was passed.
     546             :          *
     547             :          * XXX  Since we know the clause is being evaluated at a join, the
     548             :          * only way it could be single-relation is if it was delayed by outer
     549             :          * joins.  Although we can make use of the restriction qual estimators
     550             :          * anyway, it seems likely that we ought to account for the
     551             :          * probability of injected nulls somehow.
     552             :          */
     553      137432 :         if (rinfo)
     554      137116 :             return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
     555             :         else
     556         316 :             return (NumRelids(clause) > 1);
     557             :     }
     558             : }
     559             : 
     560             : 
     561             : /*
     562             :  * clause_selectivity -
     563             :  *    Compute the selectivity of a general boolean expression clause.
     564             :  *
     565             :  * The clause can be either a RestrictInfo or a plain expression.  If it's
     566             :  * a RestrictInfo, we try to cache the selectivity for possible re-use,
     567             :  * so passing RestrictInfos is preferred.
     568             :  *
     569             :  * varRelid is either 0 or a rangetable index.
     570             :  *
     571             :  * When varRelid is not 0, only variables belonging to that relation are
     572             :  * considered in computing selectivity; other vars are treated as constants
     573             :  * of unknown values.  This is appropriate for estimating the selectivity of
     574             :  * a join clause that is being used as a restriction clause in a scan of a
     575             :  * nestloop join's inner relation --- varRelid should then be the ID of the
     576             :  * inner relation.
     577             :  *
     578             :  * When varRelid is 0, all variables are treated as variables.  This
     579             :  * is appropriate for ordinary join clauses and restriction clauses.
     580             :  *
     581             :  * jointype is the join type, if the clause is a join clause.  Pass JOIN_INNER
     582             :  * if the clause isn't a join clause.
     583             :  *
     584             :  * sjinfo is NULL for a non-join clause, otherwise it provides additional
     585             :  * context information about the join being performed.  There are some
     586             :  * special cases:
     587             :  *  1. For a special (not INNER) join, sjinfo is always a member of
     588             :  *     root->join_info_list.
     589             :  *  2. For an INNER join, sjinfo is just a transient struct, and only the
     590             :  *     relids and jointype fields in it can be trusted.
     591             :  * It is possible for jointype to be different from sjinfo->jointype.
     592             :  * This indicates we are considering a variant join: either with
     593             :  * the LHS and RHS switched, or with one input unique-ified.
     594             :  *
     595             :  * Note: when passing nonzero varRelid, it's normally appropriate to set
     596             :  * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
     597             :  * join clause; because we aren't treating it as a join clause.
     598             :  */
     599             : Selectivity
     600     1523842 : clause_selectivity(PlannerInfo *root,
     601             :                    Node *clause,
     602             :                    int varRelid,
     603             :                    JoinType jointype,
     604             :                    SpecialJoinInfo *sjinfo)
     605             : {
     606     1523842 :     Selectivity s1 = 0.5;       /* default for any unhandled clause type */
     607     1523842 :     RestrictInfo *rinfo = NULL;
     608     1523842 :     bool        cacheable = false;
     609             : 
     610     1523842 :     if (clause == NULL)         /* can this still happen? */
     611           0 :         return s1;
     612             : 
     613     1523842 :     if (IsA(clause, RestrictInfo))
     614             :     {
     615     1512918 :         rinfo = (RestrictInfo *) clause;
     616             : 
     617             :         /*
     618             :          * If the clause is marked pseudoconstant, then it will be used as a
     619             :          * gating qual and should not affect selectivity estimates; hence
     620             :          * return 1.0.  The only exception is that a constant FALSE may be
     621             :          * taken as having selectivity 0.0, since it will surely mean no rows
     622             :          * out of the plan.  This case is simple enough that we need not
     623             :          * bother caching the result.
     624             :          */
     625     1512918 :         if (rinfo->pseudoconstant)
     626             :         {
     627        3012 :             if (!IsA(rinfo->clause, Const))
     628        2920 :                 return (Selectivity) 1.0;
     629             :         }
     630             : 
     631             :         /*
     632             :          * If the clause is marked redundant, always return 1.0.
     633             :          */
     634     1509998 :         if (rinfo->norm_selec > 1)
     635       47684 :             return (Selectivity) 1.0;
     636             : 
     637             :         /*
     638             :          * If possible, cache the result of the selectivity calculation for
     639             :          * the clause.  We can cache if varRelid is zero or the clause
     640             :          * contains only vars of that relid --- otherwise varRelid will affect
     641             :          * the result, so mustn't cache.  Outer join quals might be examined
     642             :          * with either their join's actual jointype or JOIN_INNER, so we need
     643             :          * two cache variables to remember both cases.  Note: we assume the
     644             :          * result won't change if we are switching the input relations or
     645             :          * considering a unique-ified case, so we only need one cache variable
     646             :          * for all non-JOIN_INNER cases.
     647             :          */
     648     2094188 :         if (varRelid == 0 ||
     649      631874 :             bms_is_subset_singleton(rinfo->clause_relids, varRelid))
     650             :         {
     651             :             /* Cacheable --- do we already have the result? */
     652     1308968 :             if (jointype == JOIN_INNER)
     653             :             {
     654     1113556 :                 if (rinfo->norm_selec >= 0)
     655      814578 :                     return rinfo->norm_selec;
     656             :             }
     657             :             else
     658             :             {
     659      195412 :                 if (rinfo->outer_selec >= 0)
     660      121746 :                     return rinfo->outer_selec;
     661             :             }
     662      372644 :             cacheable = true;
     663             :         }
     664             : 
     665             :         /*
     666             :          * Proceed with examination of contained clause.  If the clause is an
     667             :          * OR-clause, we want to look at the variant with sub-RestrictInfos,
     668             :          * so that per-subclause selectivities can be cached.
     669             :          */
     670      525990 :         if (rinfo->orclause)
     671        9164 :             clause = (Node *) rinfo->orclause;
     672             :         else
     673      516826 :             clause = (Node *) rinfo->clause;
     674             :     }
     675             : 
     676      536914 :     if (IsA(clause, Var))
     677             :     {
     678       14044 :         Var        *var = (Var *) clause;
     679             : 
     680             :         /*
     681             :          * We probably shouldn't ever see an uplevel Var here, but if we do,
     682             :          * return the default selectivity...
     683             :          */
     684       14044 :         if (var->varlevelsup == 0 &&
     685          24 :             (varRelid == 0 || varRelid == (int) var->varno))
     686             :         {
     687             :             /* Use the restriction selectivity function for a bool Var */
     688       14040 :             s1 = boolvarsel(root, (Node *) var, varRelid);
     689             :         }
     690             :     }
     691      522870 :     else if (IsA(clause, Const))
     692             :     {
     693             :         /* bool constant is pretty easy... */
     694         132 :         Const      *con = (Const *) clause;
     695             : 
     696         264 :         s1 = con->constisnull ? 0.0 :
     697         132 :             DatumGetBool(con->constvalue) ? 1.0 : 0.0;
     698             :     }
     699      522738 :     else if (IsA(clause, Param))
     700             :     {
     701             :         /* see if we can replace the Param */
     702          16 :         Node       *subst = estimate_expression_value(root, clause);
     703             : 
     704          16 :         if (IsA(subst, Const))
     705             :         {
     706             :             /* bool constant is pretty easy... */
     707           0 :             Const      *con = (Const *) subst;
     708             : 
     709           0 :             s1 = con->constisnull ? 0.0 :
     710           0 :                 DatumGetBool(con->constvalue) ? 1.0 : 0.0;
     711             :         }
     712             :         else
     713             :         {
     714             :             /* XXX any way to do better than default? */
     715             :         }
     716             :     }
     717      522722 :     else if (is_notclause(clause))
     718             :     {
     719             :         /* inverse of the selectivity of the underlying clause */
     720        5476 :         s1 = 1.0 - clause_selectivity(root,
     721        5476 :                                       (Node *) get_notclausearg((Expr *) clause),
     722             :                                       varRelid,
     723             :                                       jointype,
     724             :                                       sjinfo);
     725             :     }
     726      517246 :     else if (is_andclause(clause))
     727             :     {
     728             :         /* share code with clauselist_selectivity() */
     729        1438 :         s1 = clauselist_selectivity(root,
     730             :                                     ((BoolExpr *) clause)->args,
     731             :                                     varRelid,
     732             :                                     jointype,
     733             :                                     sjinfo);
     734             :     }
     735      515808 :     else if (is_orclause(clause))
     736             :     {
     737             :         /*
     738             :          * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to
     739             :          * account for the probable overlap of selected tuple sets.
     740             :          *
     741             :          * XXX is this too conservative?
     742             :          */
     743             :         ListCell   *arg;
     744             : 
     745        9200 :         s1 = 0.0;
     746       30036 :         foreach(arg, ((BoolExpr *) clause)->args)
     747             :         {
     748       20836 :             Selectivity s2 = clause_selectivity(root,
     749       20836 :                                                 (Node *) lfirst(arg),
     750             :                                                 varRelid,
     751             :                                                 jointype,
     752             :                                                 sjinfo);
     753             : 
     754       20836 :             s1 = s1 + s2 - s1 * s2;
     755             :         }
     756             :     }
     757      506608 :     else if (is_opclause(clause) || IsA(clause, DistinctExpr))
     758      474688 :     {
     759      474688 :         OpExpr     *opclause = (OpExpr *) clause;
     760      474688 :         Oid         opno = opclause->opno;
     761             : 
     762      474688 :         if (treat_as_join_clause(clause, rinfo, varRelid, sjinfo))
     763             :         {
     764             :             /* Estimate selectivity for a join clause. */
     765      127704 :             s1 = join_selectivity(root, opno,
     766             :                                   opclause->args,
     767             :                                   opclause->inputcollid,
     768             :                                   jointype,
     769             :                                   sjinfo);
     770             :         }
     771             :         else
     772             :         {
     773             :             /* Estimate selectivity for a restriction clause. */
     774      346984 :             s1 = restriction_selectivity(root, opno,
     775             :                                          opclause->args,
     776             :                                          opclause->inputcollid,
     777             :                                          varRelid);
     778             :         }
     779             : 
     780             :         /*
     781             :          * DistinctExpr has the same representation as OpExpr, but the
     782             :          * contained operator is "=" not "<>", so we must negate the result.
     783             :          * This estimation method doesn't give the right behavior for nulls,
     784             :          * but it's better than doing nothing.
     785             :          */
     786      474688 :         if (IsA(clause, DistinctExpr))
     787         418 :             s1 = 1.0 - s1;
     788             :     }
     789       31920 :     else if (is_funcclause(clause))
     790             :     {
     791        5800 :         FuncExpr   *funcclause = (FuncExpr *) clause;
     792             : 
     793             :         /* Try to get an estimate from the support function, if any */
     794        5800 :         s1 = function_selectivity(root,
     795             :                                   funcclause->funcid,
     796             :                                   funcclause->args,
     797             :                                   funcclause->inputcollid,
     798        5800 :                                   treat_as_join_clause(clause, rinfo,
     799             :                                                        varRelid, sjinfo),
     800             :                                   varRelid,
     801             :                                   jointype,
     802             :                                   sjinfo);
     803             :     }
     804       26120 :     else if (IsA(clause, ScalarArrayOpExpr))
     805             :     {
     806             :         /* Use node specific selectivity calculation function */
     807       14530 :         s1 = scalararraysel(root,
     808             :                             (ScalarArrayOpExpr *) clause,
     809       14530 :                             treat_as_join_clause(clause, rinfo,
     810             :                                                  varRelid, sjinfo),
     811             :                             varRelid,
     812             :                             jointype,
     813             :                             sjinfo);
     814             :     }
     815       11590 :     else if (IsA(clause, RowCompareExpr))
     816             :     {
     817             :         /* Use node specific selectivity calculation function */
     818         104 :         s1 = rowcomparesel(root,
     819             :                            (RowCompareExpr *) clause,
     820             :                            varRelid,
     821             :                            jointype,
     822             :                            sjinfo);
     823             :     }
     824       11486 :     else if (IsA(clause, NullTest))
     825             :     {
     826             :         /* Use node specific selectivity calculation function */
     827        9698 :         s1 = nulltestsel(root,
     828             :                          ((NullTest *) clause)->nulltesttype,
     829        9698 :                          (Node *) ((NullTest *) clause)->arg,
     830             :                          varRelid,
     831             :                          jointype,
     832             :                          sjinfo);
     833             :     }
     834        1788 :     else if (IsA(clause, BooleanTest))
     835             :     {
     836             :         /* Use node specific selectivity calculation function */
     837         132 :         s1 = booltestsel(root,
     838             :                          ((BooleanTest *) clause)->booltesttype,
     839         132 :                          (Node *) ((BooleanTest *) clause)->arg,
     840             :                          varRelid,
     841             :                          jointype,
     842             :                          sjinfo);
     843             :     }
     844        1656 :     else if (IsA(clause, CurrentOfExpr))
     845             :     {
     846             :         /* CURRENT OF selects at most one row of its table */
     847         384 :         CurrentOfExpr *cexpr = (CurrentOfExpr *) clause;
     848         384 :         RelOptInfo *crel = find_base_rel(root, cexpr->cvarno);
     849             : 
     850         384 :         if (crel->tuples > 0)
     851         364 :             s1 = 1.0 / crel->tuples;
     852             :     }
     853        1272 :     else if (IsA(clause, RelabelType))
     854             :     {
     855             :         /* Not sure this case is needed, but it can't hurt */
     856           0 :         s1 = clause_selectivity(root,
     857           0 :                                 (Node *) ((RelabelType *) clause)->arg,
     858             :                                 varRelid,
     859             :                                 jointype,
     860             :                                 sjinfo);
     861             :     }
     862        1272 :     else if (IsA(clause, CoerceToDomain))
     863             :     {
     864             :         /* Not sure this case is needed, but it can't hurt */
     865           0 :         s1 = clause_selectivity(root,
     866           0 :                                 (Node *) ((CoerceToDomain *) clause)->arg,
     867             :                                 varRelid,
     868             :                                 jointype,
     869             :                                 sjinfo);
     870             :     }
     871             :     else
     872             :     {
     873             :         /*
     874             :          * For anything else, see if we can consider it as a boolean variable.
     875             :          * This only works if it's an immutable expression in Vars of a single
     876             :          * relation; but there's no point in us checking that here because
     877             :          * boolvarsel() will do it internally, and return a suitable default
     878             :          * selectivity if not.
     879             :          */
     880        1272 :         s1 = boolvarsel(root, clause, varRelid);
     881             :     }
     882             : 
     883             :     /* Cache the result if possible */
     884      536914 :     if (cacheable)
     885             :     {
     886      372644 :         if (jointype == JOIN_INNER)
     887      298978 :             rinfo->norm_selec = s1;
     888             :         else
     889       73666 :             rinfo->outer_selec = s1;
     890             :     }
     891             : 
     892             : #ifdef SELECTIVITY_DEBUG
     893             :     elog(DEBUG4, "clause_selectivity: s1 %f", s1);
     894             : #endif                          /* SELECTIVITY_DEBUG */
     895             : 
     896      536914 :     return s1;
     897             : }

Generated by: LCOV version 1.13