LCOV - code coverage report
Current view: top level - src/backend/optimizer/path - clausesel.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 210 224 93.8 %
Date: 2019-09-19 16:06:56 Functions: 7 7 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * clausesel.c
       4             :  *    Routines to compute clause selectivities
       5             :  *
       6             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/optimizer/path/clausesel.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "nodes/makefuncs.h"
      18             : #include "nodes/nodeFuncs.h"
      19             : #include "optimizer/clauses.h"
      20             : #include "optimizer/cost.h"
      21             : #include "optimizer/optimizer.h"
      22             : #include "optimizer/pathnode.h"
      23             : #include "optimizer/plancat.h"
      24             : #include "utils/fmgroids.h"
      25             : #include "utils/lsyscache.h"
      26             : #include "utils/selfuncs.h"
      27             : #include "statistics/statistics.h"
      28             : 
      29             : 
      30             : /*
      31             :  * Data structure for accumulating info about possible range-query
      32             :  * clause pairs in clauselist_selectivity.
      33             :  */
      34             : typedef struct RangeQueryClause
      35             : {
      36             :     struct RangeQueryClause *next;  /* next in linked list */
      37             :     Node       *var;            /* The common variable of the clauses */
      38             :     bool        have_lobound;   /* found a low-bound clause yet? */
      39             :     bool        have_hibound;   /* found a high-bound clause yet? */
      40             :     Selectivity lobound;        /* Selectivity of a var > something clause */
      41             :     Selectivity hibound;        /* Selectivity of a var < something clause */
      42             : } RangeQueryClause;
      43             : 
      44             : static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
      45             :                            bool varonleft, bool isLTsel, Selectivity s2);
      46             : static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root,
      47             :                                                List *clauses);
      48             : 
      49             : /****************************************************************************
      50             :  *      ROUTINES TO COMPUTE SELECTIVITIES
      51             :  ****************************************************************************/
      52             : 
      53             : /*
      54             :  * clauselist_selectivity -
      55             :  *    Compute the selectivity of an implicitly-ANDed list of boolean
      56             :  *    expression clauses.  The list can be empty, in which case 1.0
      57             :  *    must be returned.  List elements may be either RestrictInfos
      58             :  *    or bare expression clauses --- the former is preferred since
      59             :  *    it allows caching of results.
      60             :  *
      61             :  * See clause_selectivity() for the meaning of the additional parameters.
      62             :  *
      63             :  * The basic approach is to apply extended statistics first, on as many
      64             :  * clauses as possible, in order to capture cross-column dependencies etc.
      65             :  * The remaining clauses are then estimated using regular statistics tracked
      66             :  * for individual columns.  This is done by simply passing the clauses to
      67             :  * clauselist_selectivity_simple.
      68             :  */
      69             : Selectivity
      70     1320164 : clauselist_selectivity(PlannerInfo *root,
      71             :                        List *clauses,
      72             :                        int varRelid,
      73             :                        JoinType jointype,
      74             :                        SpecialJoinInfo *sjinfo)
      75             : {
      76     1320164 :     Selectivity s1 = 1.0;
      77             :     RelOptInfo *rel;
      78     1320164 :     Bitmapset  *estimatedclauses = NULL;
      79             : 
      80             :     /*
      81             :      * Determine if these clauses reference a single relation.  If so, and if
      82             :      * it has extended statistics, try to apply those.
      83             :      */
      84     1320164 :     rel = find_single_rel_for_clauses(root, clauses);
      85     1320164 :     if (rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
      86             :     {
      87             :         /*
      88             :          * Estimate as many clauses as possible using extended statistics.
      89             :          *
      90             :          * 'estimatedclauses' tracks the 0-based list position index of
      91             :          * clauses that we've estimated using extended statistics, and that
      92             :          * should be ignored.
      93             :          */
      94         228 :         s1 *= statext_clauselist_selectivity(root, clauses, varRelid,
      95             :                                              jointype, sjinfo, rel,
      96             :                                              &estimatedclauses);
      97             :     }
      98             : 
      99             :     /*
     100             :      * Apply normal selectivity estimates for the remaining clauses, passing
     101             :      * 'estimatedclauses' so that it skips already estimated ones.
     102             :      */
     103     1320164 :     return s1 * clauselist_selectivity_simple(root, clauses, varRelid,
     104             :                                               jointype, sjinfo,
     105             :                                               estimatedclauses);
     106             : }
     107             : 
     108             : /*
     109             :  * clauselist_selectivity_simple -
     110             :  *    Compute the selectivity of an implicitly-ANDed list of boolean
     111             :  *    expression clauses.  The list can be empty, in which case 1.0
     112             :  *    must be returned.  List elements may be either RestrictInfos
     113             :  *    or bare expression clauses --- the former is preferred since
     114             :  *    it allows caching of results.  The estimatedclauses bitmap tracks
     115             :  *    clauses that have already been estimated by other means.
     116             :  *
     117             :  * See clause_selectivity() for the meaning of the additional parameters.
     118             :  *
     119             :  * Our basic approach is to take the product of the selectivities of the
     120             :  * subclauses.  However, that's only right if the subclauses have independent
     121             :  * probabilities, and in reality they are often NOT independent.  So,
     122             :  * we want to be smarter where we can.
     123             :  *
     124             :  * We also recognize "range queries", such as "x > 34 AND x < 42".  Clauses
     125             :  * are recognized as possible range query components if they are restriction
     126             :  * opclauses whose operators have scalarltsel or a related function as their
     127             :  * restriction selectivity estimator.  We pair up clauses of this form that
     128             :  * refer to the same variable.  An unpairable clause of this kind is simply
     129             :  * multiplied into the selectivity product in the normal way.  But when we
     130             :  * find a pair, we know that the selectivities represent the relative
     131             :  * positions of the low and high bounds within the column's range, so instead
     132             :  * of figuring the selectivity as hisel * losel, we can figure it as hisel +
     133             :  * losel - 1.  (To visualize this, see that hisel is the fraction of the range
     134             :  * below the high bound, while losel is the fraction above the low bound; so
     135             :  * hisel can be interpreted directly as a 0..1 value but we need to convert
     136             :  * losel to 1-losel before interpreting it as a value.  Then the available
     137             :  * range is 1-losel to hisel.  However, this calculation double-excludes
     138             :  * nulls, so really we need hisel + losel + null_frac - 1.)
     139             :  *
     140             :  * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
     141             :  * and instead use DEFAULT_RANGE_INEQ_SEL.  The same applies if the equation
     142             :  * yields an impossible (negative) result.
     143             :  *
     144             :  * A free side-effect is that we can recognize redundant inequalities such
     145             :  * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
     146             :  *
     147             :  * Of course this is all very dependent on the behavior of the inequality
     148             :  * selectivity functions; perhaps some day we can generalize the approach.
     149             :  */
     150             : Selectivity
     151     1320228 : clauselist_selectivity_simple(PlannerInfo *root,
     152             :                               List *clauses,
     153             :                               int varRelid,
     154             :                               JoinType jointype,
     155             :                               SpecialJoinInfo *sjinfo,
     156             :                               Bitmapset *estimatedclauses)
     157             : {
     158     1320228 :     Selectivity s1 = 1.0;
     159     1320228 :     RangeQueryClause *rqlist = NULL;
     160             :     ListCell   *l;
     161             :     int         listidx;
     162             : 
     163             :     /*
     164             :      * If there's exactly one clause (and it was not estimated yet), just go
     165             :      * directly to clause_selectivity(). None of what we might do below is
     166             :      * relevant.
     167             :      */
     168     2062352 :     if ((list_length(clauses) == 1) &&
     169      742124 :         bms_num_members(estimatedclauses) == 0)
     170      742120 :         return clause_selectivity(root, (Node *) linitial(clauses),
     171             :                                   varRelid, jointype, sjinfo);
     172             : 
     173             :     /*
     174             :      * Anything that doesn't look like a potential rangequery clause gets
     175             :      * multiplied into s1 and forgotten. Anything that does gets inserted into
     176             :      * an rqlist entry.
     177             :      */
     178      578108 :     listidx = -1;
     179      960708 :     foreach(l, clauses)
     180             :     {
     181      382600 :         Node       *clause = (Node *) lfirst(l);
     182             :         RestrictInfo *rinfo;
     183             :         Selectivity s2;
     184             : 
     185      382600 :         listidx++;
     186             : 
     187             :         /*
     188             :          * Skip this clause if it's already been estimated by some other
     189             :          * statistics above.
     190             :          */
     191      382600 :         if (bms_is_member(listidx, estimatedclauses))
     192         364 :             continue;
     193             : 
     194             :         /* Always compute the selectivity using clause_selectivity */
     195      382236 :         s2 = clause_selectivity(root, clause, varRelid, jointype, sjinfo);
     196             : 
     197             :         /*
     198             :          * Check for being passed a RestrictInfo.
     199             :          *
     200             :          * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
     201             :          * 0.0; just use that rather than looking for range pairs.
     202             :          */
     203      382236 :         if (IsA(clause, RestrictInfo))
     204             :         {
     205      381484 :             rinfo = (RestrictInfo *) clause;
     206      381484 :             if (rinfo->pseudoconstant)
     207             :             {
     208        2480 :                 s1 = s1 * s2;
     209        2480 :                 continue;
     210             :             }
     211      379004 :             clause = (Node *) rinfo->clause;
     212             :         }
     213             :         else
     214         752 :             rinfo = NULL;
     215             : 
     216             :         /*
     217             :          * See if it looks like a restriction clause with a pseudoconstant on
     218             :          * one side.  (Anything more complicated than that might not behave in
     219             :          * the simple way we are expecting.)  Most of the tests here can be
     220             :          * done more efficiently with rinfo than without.
     221             :          */
     222      379756 :         if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
     223             :         {
     224      346024 :             OpExpr     *expr = (OpExpr *) clause;
     225      346024 :             bool        varonleft = true;
     226             :             bool        ok;
     227             : 
     228      346024 :             if (rinfo)
     229             :             {
     230      810916 :                 ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) &&
     231      232938 :                     (is_pseudo_constant_clause_relids(lsecond(expr->args),
     232        4856 :                                                       rinfo->right_relids) ||
     233        4856 :                      (varonleft = false,
     234        4856 :                       is_pseudo_constant_clause_relids(linitial(expr->args),
     235             :                                                        rinfo->left_relids)));
     236             :             }
     237             :             else
     238             :             {
     239        2208 :                 ok = (NumRelids(clause) == 1) &&
     240         736 :                     (is_pseudo_constant_clause(lsecond(expr->args)) ||
     241           0 :                      (varonleft = false,
     242           0 :                       is_pseudo_constant_clause(linitial(expr->args))));
     243             :             }
     244             : 
     245      346024 :             if (ok)
     246             :             {
     247             :                 /*
     248             :                  * If it's not a "<"/"<="/">"/">=" operator, just merge the
     249             :                  * selectivity in generically.  But if it's the right oprrest,
     250             :                  * add the clause to rqlist for later processing.
     251             :                  */
     252      233426 :                 switch (get_oprrest(expr->opno))
     253             :                 {
     254             :                     case F_SCALARLTSEL:
     255             :                     case F_SCALARLESEL:
     256        9470 :                         addRangeClause(&rqlist, clause,
     257             :                                        varonleft, true, s2);
     258        9470 :                         break;
     259             :                     case F_SCALARGTSEL:
     260             :                     case F_SCALARGESEL:
     261       43932 :                         addRangeClause(&rqlist, clause,
     262             :                                        varonleft, false, s2);
     263       43932 :                         break;
     264             :                     default:
     265             :                         /* Just merge the selectivity in generically */
     266      180024 :                         s1 = s1 * s2;
     267      180024 :                         break;
     268             :                 }
     269      233426 :                 continue;       /* drop to loop bottom */
     270             :             }
     271             :         }
     272             : 
     273             :         /* Not the right form, so treat it generically. */
     274      146330 :         s1 = s1 * s2;
     275             :     }
     276             : 
     277             :     /*
     278             :      * Now scan the rangequery pair list.
     279             :      */
     280     1203662 :     while (rqlist != NULL)
     281             :     {
     282             :         RangeQueryClause *rqnext;
     283             : 
     284       47446 :         if (rqlist->have_lobound && rqlist->have_hibound)
     285        5748 :         {
     286             :             /* Successfully matched a pair of range clauses */
     287             :             Selectivity s2;
     288             : 
     289             :             /*
     290             :              * Exact equality to the default value probably means the
     291             :              * selectivity function punted.  This is not airtight but should
     292             :              * be good enough.
     293             :              */
     294        8818 :             if (rqlist->hibound == DEFAULT_INEQ_SEL ||
     295        3070 :                 rqlist->lobound == DEFAULT_INEQ_SEL)
     296             :             {
     297        2678 :                 s2 = DEFAULT_RANGE_INEQ_SEL;
     298             :             }
     299             :             else
     300             :             {
     301        3070 :                 s2 = rqlist->hibound + rqlist->lobound - 1.0;
     302             : 
     303             :                 /* Adjust for double-exclusion of NULLs */
     304        3070 :                 s2 += nulltestsel(root, IS_NULL, rqlist->var,
     305             :                                   varRelid, jointype, sjinfo);
     306             : 
     307             :                 /*
     308             :                  * A zero or slightly negative s2 should be converted into a
     309             :                  * small positive value; we probably are dealing with a very
     310             :                  * tight range and got a bogus result due to roundoff errors.
     311             :                  * However, if s2 is very negative, then we probably have
     312             :                  * default selectivity estimates on one or both sides of the
     313             :                  * range that we failed to recognize above for some reason.
     314             :                  */
     315        3070 :                 if (s2 <= 0.0)
     316             :                 {
     317         388 :                     if (s2 < -0.01)
     318             :                     {
     319             :                         /*
     320             :                          * No data available --- use a default estimate that
     321             :                          * is small, but not real small.
     322             :                          */
     323           0 :                         s2 = DEFAULT_RANGE_INEQ_SEL;
     324             :                     }
     325             :                     else
     326             :                     {
     327             :                         /*
     328             :                          * It's just roundoff error; use a small positive
     329             :                          * value
     330             :                          */
     331         388 :                         s2 = 1.0e-10;
     332             :                     }
     333             :                 }
     334             :             }
     335             :             /* Merge in the selectivity of the pair of clauses */
     336        5748 :             s1 *= s2;
     337             :         }
     338             :         else
     339             :         {
     340             :             /* Only found one of a pair, merge it in generically */
     341       41698 :             if (rqlist->have_lobound)
     342       38080 :                 s1 *= rqlist->lobound;
     343             :             else
     344        3618 :                 s1 *= rqlist->hibound;
     345             :         }
     346             :         /* release storage and advance */
     347       47446 :         rqnext = rqlist->next;
     348       47446 :         pfree(rqlist);
     349       47446 :         rqlist = rqnext;
     350             :     }
     351             : 
     352      578108 :     return s1;
     353             : }
     354             : 
     355             : /*
     356             :  * addRangeClause --- add a new range clause for clauselist_selectivity
     357             :  *
     358             :  * Here is where we try to match up pairs of range-query clauses
     359             :  */
     360             : static void
     361       53402 : addRangeClause(RangeQueryClause **rqlist, Node *clause,
     362             :                bool varonleft, bool isLTsel, Selectivity s2)
     363             : {
     364             :     RangeQueryClause *rqelem;
     365             :     Node       *var;
     366             :     bool        is_lobound;
     367             : 
     368       53402 :     if (varonleft)
     369             :     {
     370       53390 :         var = get_leftop((Expr *) clause);
     371       53390 :         is_lobound = !isLTsel;  /* x < something is high bound */
     372             :     }
     373             :     else
     374             :     {
     375          12 :         var = get_rightop((Expr *) clause);
     376          12 :         is_lobound = isLTsel;   /* something < x is low bound */
     377             :     }
     378             : 
     379      109836 :     for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
     380             :     {
     381             :         /*
     382             :          * We use full equal() here because the "var" might be a function of
     383             :          * one or more attributes of the same relation...
     384             :          */
     385        7472 :         if (!equal(var, rqelem->var))
     386        1516 :             continue;
     387             :         /* Found the right group to put this clause in */
     388        5956 :         if (is_lobound)
     389             :         {
     390         184 :             if (!rqelem->have_lobound)
     391             :             {
     392          68 :                 rqelem->have_lobound = true;
     393          68 :                 rqelem->lobound = s2;
     394             :             }
     395             :             else
     396             :             {
     397             : 
     398             :                 /*------
     399             :                  * We have found two similar clauses, such as
     400             :                  * x < y AND x <= z.
     401             :                  * Keep only the more restrictive one.
     402             :                  *------
     403             :                  */
     404         116 :                 if (rqelem->lobound > s2)
     405           0 :                     rqelem->lobound = s2;
     406             :             }
     407             :         }
     408             :         else
     409             :         {
     410        5772 :             if (!rqelem->have_hibound)
     411             :             {
     412        5680 :                 rqelem->have_hibound = true;
     413        5680 :                 rqelem->hibound = s2;
     414             :             }
     415             :             else
     416             :             {
     417             : 
     418             :                 /*------
     419             :                  * We have found two similar clauses, such as
     420             :                  * x > y AND x >= z.
     421             :                  * Keep only the more restrictive one.
     422             :                  *------
     423             :                  */
     424          92 :                 if (rqelem->hibound > s2)
     425          24 :                     rqelem->hibound = s2;
     426             :             }
     427             :         }
     428        5956 :         return;
     429             :     }
     430             : 
     431             :     /* No matching var found, so make a new clause-pair data structure */
     432       47446 :     rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
     433       47446 :     rqelem->var = var;
     434       47446 :     if (is_lobound)
     435             :     {
     436       43760 :         rqelem->have_lobound = true;
     437       43760 :         rqelem->have_hibound = false;
     438       43760 :         rqelem->lobound = s2;
     439             :     }
     440             :     else
     441             :     {
     442        3686 :         rqelem->have_lobound = false;
     443        3686 :         rqelem->have_hibound = true;
     444        3686 :         rqelem->hibound = s2;
     445             :     }
     446       47446 :     rqelem->next = *rqlist;
     447       47446 :     *rqlist = rqelem;
     448             : }
     449             : 
     450             : /*
     451             :  * find_single_rel_for_clauses
     452             :  *      Examine each clause in 'clauses' and determine if all clauses
     453             :  *      reference only a single relation.  If so return that relation,
     454             :  *      otherwise return NULL.
     455             :  */
     456             : static RelOptInfo *
     457     1320164 : find_single_rel_for_clauses(PlannerInfo *root, List *clauses)
     458             : {
     459     1320164 :     int         lastrelid = 0;
     460             :     ListCell   *l;
     461             : 
     462     1917692 :     foreach(l, clauses)
     463             :     {
     464     1041008 :         RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
     465             :         int         relid;
     466             : 
     467             :         /*
     468             :          * If we have a list of bare clauses rather than RestrictInfos, we
     469             :          * could pull out their relids the hard way with pull_varnos().
     470             :          * However, currently the extended-stats machinery won't do anything
     471             :          * with non-RestrictInfo clauses anyway, so there's no point in
     472             :          * spending extra cycles; just fail if that's what we have.
     473             :          */
     474     1041008 :         if (!IsA(rinfo, RestrictInfo))
     475      446978 :             return NULL;
     476             : 
     477     1037510 :         if (bms_is_empty(rinfo->clause_relids))
     478        2844 :             continue;           /* we can ignore variable-free clauses */
     479     1034666 :         if (!bms_get_singleton_member(rinfo->clause_relids, &relid))
     480      439692 :             return NULL;        /* multiple relations in this clause */
     481      594974 :         if (lastrelid == 0)
     482      493178 :             lastrelid = relid;  /* first clause referencing a relation */
     483      101796 :         else if (relid != lastrelid)
     484         290 :             return NULL;        /* relation not same as last one */
     485             :     }
     486             : 
     487      876684 :     if (lastrelid != 0)
     488      471946 :         return find_base_rel(root, lastrelid);
     489             : 
     490      404738 :     return NULL;                /* no clauses */
     491             : }
     492             : 
     493             : /*
     494             :  * bms_is_subset_singleton
     495             :  *
     496             :  * Same result as bms_is_subset(s, bms_make_singleton(x)),
     497             :  * but a little faster and doesn't leak memory.
     498             :  *
     499             :  * Is this of use anywhere else?  If so move to bitmapset.c ...
     500             :  */
     501             : static bool
     502      577280 : bms_is_subset_singleton(const Bitmapset *s, int x)
     503             : {
     504      577280 :     switch (bms_membership(s))
     505             :     {
     506             :         case BMS_EMPTY_SET:
     507           0 :             return true;
     508             :         case BMS_SINGLETON:
     509      437136 :             return bms_is_member(x, s);
     510             :         case BMS_MULTIPLE:
     511      140144 :             return false;
     512             :     }
     513             :     /* can't get here... */
     514           0 :     return false;
     515             : }
     516             : 
     517             : /*
     518             :  * treat_as_join_clause -
     519             :  *    Decide whether an operator clause is to be handled by the
     520             :  *    restriction or join estimator.  Subroutine for clause_selectivity().
     521             :  */
     522             : static inline bool
     523      453392 : treat_as_join_clause(Node *clause, RestrictInfo *rinfo,
     524             :                      int varRelid, SpecialJoinInfo *sjinfo)
     525             : {
     526      453392 :     if (varRelid != 0)
     527             :     {
     528             :         /*
     529             :          * Caller is forcing restriction mode (eg, because we are examining an
     530             :          * inner indexscan qual).
     531             :          */
     532      145664 :         return false;
     533             :     }
     534      307728 :     else if (sjinfo == NULL)
     535             :     {
     536             :         /*
     537             :          * It must be a restriction clause, since it's being evaluated at a
     538             :          * scan node.
     539             :          */
     540      184610 :         return false;
     541             :     }
     542             :     else
     543             :     {
     544             :         /*
     545             :          * Otherwise, it's a join if there's more than one relation used. We
     546             :          * can optimize this calculation if an rinfo was passed.
     547             :          *
     548             :          * XXX  Since we know the clause is being evaluated at a join, the
     549             :          * only way it could be single-relation is if it was delayed by outer
     550             :          * joins.  Although we can make use of the restriction qual estimators
     551             :          * anyway, it seems likely that we ought to account for the
     552             :          * probability of injected nulls somehow.
     553             :          */
     554      123118 :         if (rinfo)
     555      122802 :             return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
     556             :         else
     557         316 :             return (NumRelids(clause) > 1);
     558             :     }
     559             : }
     560             : 
     561             : 
     562             : /*
     563             :  * clause_selectivity -
     564             :  *    Compute the selectivity of a general boolean expression clause.
     565             :  *
     566             :  * The clause can be either a RestrictInfo or a plain expression.  If it's
     567             :  * a RestrictInfo, we try to cache the selectivity for possible re-use,
     568             :  * so passing RestrictInfos is preferred.
     569             :  *
     570             :  * varRelid is either 0 or a rangetable index.
     571             :  *
     572             :  * When varRelid is not 0, only variables belonging to that relation are
     573             :  * considered in computing selectivity; other vars are treated as constants
     574             :  * of unknown values.  This is appropriate for estimating the selectivity of
     575             :  * a join clause that is being used as a restriction clause in a scan of a
     576             :  * nestloop join's inner relation --- varRelid should then be the ID of the
     577             :  * inner relation.
     578             :  *
     579             :  * When varRelid is 0, all variables are treated as variables.  This
     580             :  * is appropriate for ordinary join clauses and restriction clauses.
     581             :  *
     582             :  * jointype is the join type, if the clause is a join clause.  Pass JOIN_INNER
     583             :  * if the clause isn't a join clause.
     584             :  *
     585             :  * sjinfo is NULL for a non-join clause, otherwise it provides additional
     586             :  * context information about the join being performed.  There are some
     587             :  * special cases:
     588             :  *  1. For a special (not INNER) join, sjinfo is always a member of
     589             :  *     root->join_info_list.
     590             :  *  2. For an INNER join, sjinfo is just a transient struct, and only the
     591             :  *     relids and jointype fields in it can be trusted.
     592             :  * It is possible for jointype to be different from sjinfo->jointype.
     593             :  * This indicates we are considering a variant join: either with
     594             :  * the LHS and RHS switched, or with one input unique-ified.
     595             :  *
     596             :  * Note: when passing nonzero varRelid, it's normally appropriate to set
     597             :  * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
     598             :  * join clause; because we aren't treating it as a join clause.
     599             :  */
     600             : Selectivity
     601     1388374 : clause_selectivity(PlannerInfo *root,
     602             :                    Node *clause,
     603             :                    int varRelid,
     604             :                    JoinType jointype,
     605             :                    SpecialJoinInfo *sjinfo)
     606             : {
     607     1388374 :     Selectivity s1 = 0.5;       /* default for any unhandled clause type */
     608     1388374 :     RestrictInfo *rinfo = NULL;
     609     1388374 :     bool        cacheable = false;
     610             : 
     611     1388374 :     if (clause == NULL)         /* can this still happen? */
     612           0 :         return s1;
     613             : 
     614     1388374 :     if (IsA(clause, RestrictInfo))
     615             :     {
     616     1378546 :         rinfo = (RestrictInfo *) clause;
     617             : 
     618             :         /*
     619             :          * If the clause is marked pseudoconstant, then it will be used as a
     620             :          * gating qual and should not affect selectivity estimates; hence
     621             :          * return 1.0.  The only exception is that a constant FALSE may be
     622             :          * taken as having selectivity 0.0, since it will surely mean no rows
     623             :          * out of the plan.  This case is simple enough that we need not
     624             :          * bother caching the result.
     625             :          */
     626     1378546 :         if (rinfo->pseudoconstant)
     627             :         {
     628        2632 :             if (!IsA(rinfo->clause, Const))
     629        2540 :                 return (Selectivity) 1.0;
     630             :         }
     631             : 
     632             :         /*
     633             :          * If the clause is marked redundant, always return 1.0.
     634             :          */
     635     1376006 :         if (rinfo->norm_selec > 1)
     636       44212 :             return (Selectivity) 1.0;
     637             : 
     638             :         /*
     639             :          * If possible, cache the result of the selectivity calculation for
     640             :          * the clause.  We can cache if varRelid is zero or the clause
     641             :          * contains only vars of that relid --- otherwise varRelid will affect
     642             :          * the result, so mustn't cache.  Outer join quals might be examined
     643             :          * with either their join's actual jointype or JOIN_INNER, so we need
     644             :          * two cache variables to remember both cases.  Note: we assume the
     645             :          * result won't change if we are switching the input relations or
     646             :          * considering a unique-ified case, so we only need one cache variable
     647             :          * for all non-JOIN_INNER cases.
     648             :          */
     649     1909074 :         if (varRelid == 0 ||
     650      577280 :             bms_is_subset_singleton(rinfo->clause_relids, varRelid))
     651             :         {
     652             :             /* Cacheable --- do we already have the result? */
     653     1190838 :             if (jointype == JOIN_INNER)
     654             :             {
     655     1011178 :                 if (rinfo->norm_selec >= 0)
     656      736054 :                     return rinfo->norm_selec;
     657             :             }
     658             :             else
     659             :             {
     660      179660 :                 if (rinfo->outer_selec >= 0)
     661      113354 :                     return rinfo->outer_selec;
     662             :             }
     663      341430 :             cacheable = true;
     664             :         }
     665             : 
     666             :         /*
     667             :          * Proceed with examination of contained clause.  If the clause is an
     668             :          * OR-clause, we want to look at the variant with sub-RestrictInfos,
     669             :          * so that per-subclause selectivities can be cached.
     670             :          */
     671      482386 :         if (rinfo->orclause)
     672        8644 :             clause = (Node *) rinfo->orclause;
     673             :         else
     674      473742 :             clause = (Node *) rinfo->clause;
     675             :     }
     676             : 
     677      492214 :     if (IsA(clause, Var))
     678             :     {
     679       12920 :         Var        *var = (Var *) clause;
     680             : 
     681             :         /*
     682             :          * We probably shouldn't ever see an uplevel Var here, but if we do,
     683             :          * return the default selectivity...
     684             :          */
     685       12920 :         if (var->varlevelsup == 0 &&
     686          24 :             (varRelid == 0 || varRelid == (int) var->varno))
     687             :         {
     688             :             /* Use the restriction selectivity function for a bool Var */
     689       12916 :             s1 = boolvarsel(root, (Node *) var, varRelid);
     690             :         }
     691             :     }
     692      479294 :     else if (IsA(clause, Const))
     693             :     {
     694             :         /* bool constant is pretty easy... */
     695         132 :         Const      *con = (Const *) clause;
     696             : 
     697         264 :         s1 = con->constisnull ? 0.0 :
     698         132 :             DatumGetBool(con->constvalue) ? 1.0 : 0.0;
     699             :     }
     700      479162 :     else if (IsA(clause, Param))
     701             :     {
     702             :         /* see if we can replace the Param */
     703          16 :         Node       *subst = estimate_expression_value(root, clause);
     704             : 
     705          16 :         if (IsA(subst, Const))
     706             :         {
     707             :             /* bool constant is pretty easy... */
     708           0 :             Const      *con = (Const *) subst;
     709             : 
     710           0 :             s1 = con->constisnull ? 0.0 :
     711           0 :                 DatumGetBool(con->constvalue) ? 1.0 : 0.0;
     712             :         }
     713             :         else
     714             :         {
     715             :             /* XXX any way to do better than default? */
     716             :         }
     717             :     }
     718      479146 :     else if (is_notclause(clause))
     719             :     {
     720             :         /* inverse of the selectivity of the underlying clause */
     721        4916 :         s1 = 1.0 - clause_selectivity(root,
     722        4916 :                                       (Node *) get_notclausearg((Expr *) clause),
     723             :                                       varRelid,
     724             :                                       jointype,
     725             :                                       sjinfo);
     726             :     }
     727      474230 :     else if (is_andclause(clause))
     728             :     {
     729             :         /* share code with clauselist_selectivity() */
     730        1126 :         s1 = clauselist_selectivity(root,
     731             :                                     ((BoolExpr *) clause)->args,
     732             :                                     varRelid,
     733             :                                     jointype,
     734             :                                     sjinfo);
     735             :     }
     736      473104 :     else if (is_orclause(clause))
     737             :     {
     738             :         /*
     739             :          * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to
     740             :          * account for the probable overlap of selected tuple sets.
     741             :          *
     742             :          * XXX is this too conservative?
     743             :          */
     744             :         ListCell   *arg;
     745             : 
     746        8680 :         s1 = 0.0;
     747       28396 :         foreach(arg, ((BoolExpr *) clause)->args)
     748             :         {
     749       19716 :             Selectivity s2 = clause_selectivity(root,
     750       19716 :                                                 (Node *) lfirst(arg),
     751             :                                                 varRelid,
     752             :                                                 jointype,
     753             :                                                 sjinfo);
     754             : 
     755       19716 :             s1 = s1 + s2 - s1 * s2;
     756             :         }
     757             :     }
     758      464424 :     else if (is_opclause(clause) || IsA(clause, DistinctExpr))
     759      434950 :     {
     760      434950 :         OpExpr     *opclause = (OpExpr *) clause;
     761      434950 :         Oid         opno = opclause->opno;
     762             : 
     763      434950 :         if (treat_as_join_clause(clause, rinfo, varRelid, sjinfo))
     764             :         {
     765             :             /* Estimate selectivity for a join clause. */
     766      114146 :             s1 = join_selectivity(root, opno,
     767             :                                   opclause->args,
     768             :                                   opclause->inputcollid,
     769             :                                   jointype,
     770             :                                   sjinfo);
     771             :         }
     772             :         else
     773             :         {
     774             :             /* Estimate selectivity for a restriction clause. */
     775      320804 :             s1 = restriction_selectivity(root, opno,
     776             :                                          opclause->args,
     777             :                                          opclause->inputcollid,
     778             :                                          varRelid);
     779             :         }
     780             : 
     781             :         /*
     782             :          * DistinctExpr has the same representation as OpExpr, but the
     783             :          * contained operator is "=" not "<>", so we must negate the result.
     784             :          * This estimation method doesn't give the right behavior for nulls,
     785             :          * but it's better than doing nothing.
     786             :          */
     787      434950 :         if (IsA(clause, DistinctExpr))
     788         406 :             s1 = 1.0 - s1;
     789             :     }
     790       29474 :     else if (is_funcclause(clause))
     791             :     {
     792        5290 :         FuncExpr   *funcclause = (FuncExpr *) clause;
     793             : 
     794             :         /* Try to get an estimate from the support function, if any */
     795        5290 :         s1 = function_selectivity(root,
     796             :                                   funcclause->funcid,
     797             :                                   funcclause->args,
     798             :                                   funcclause->inputcollid,
     799        5290 :                                   treat_as_join_clause(clause, rinfo,
     800             :                                                        varRelid, sjinfo),
     801             :                                   varRelid,
     802             :                                   jointype,
     803             :                                   sjinfo);
     804             :     }
     805       24184 :     else if (IsA(clause, ScalarArrayOpExpr))
     806             :     {
     807             :         /* Use node specific selectivity calculation function */
     808       13152 :         s1 = scalararraysel(root,
     809             :                             (ScalarArrayOpExpr *) clause,
     810       13152 :                             treat_as_join_clause(clause, rinfo,
     811             :                                                  varRelid, sjinfo),
     812             :                             varRelid,
     813             :                             jointype,
     814             :                             sjinfo);
     815             :     }
     816       11032 :     else if (IsA(clause, RowCompareExpr))
     817             :     {
     818             :         /* Use node specific selectivity calculation function */
     819         104 :         s1 = rowcomparesel(root,
     820             :                            (RowCompareExpr *) clause,
     821             :                            varRelid,
     822             :                            jointype,
     823             :                            sjinfo);
     824             :     }
     825       10928 :     else if (IsA(clause, NullTest))
     826             :     {
     827             :         /* Use node specific selectivity calculation function */
     828        9188 :         s1 = nulltestsel(root,
     829             :                          ((NullTest *) clause)->nulltesttype,
     830        9188 :                          (Node *) ((NullTest *) clause)->arg,
     831             :                          varRelid,
     832             :                          jointype,
     833             :                          sjinfo);
     834             :     }
     835        1740 :     else if (IsA(clause, BooleanTest))
     836             :     {
     837             :         /* Use node specific selectivity calculation function */
     838         132 :         s1 = booltestsel(root,
     839             :                          ((BooleanTest *) clause)->booltesttype,
     840         132 :                          (Node *) ((BooleanTest *) clause)->arg,
     841             :                          varRelid,
     842             :                          jointype,
     843             :                          sjinfo);
     844             :     }
     845        1608 :     else if (IsA(clause, CurrentOfExpr))
     846             :     {
     847             :         /* CURRENT OF selects at most one row of its table */
     848         384 :         CurrentOfExpr *cexpr = (CurrentOfExpr *) clause;
     849         384 :         RelOptInfo *crel = find_base_rel(root, cexpr->cvarno);
     850             : 
     851         384 :         if (crel->tuples > 0)
     852         364 :             s1 = 1.0 / crel->tuples;
     853             :     }
     854        1224 :     else if (IsA(clause, RelabelType))
     855             :     {
     856             :         /* Not sure this case is needed, but it can't hurt */
     857           0 :         s1 = clause_selectivity(root,
     858           0 :                                 (Node *) ((RelabelType *) clause)->arg,
     859             :                                 varRelid,
     860             :                                 jointype,
     861             :                                 sjinfo);
     862             :     }
     863        1224 :     else if (IsA(clause, CoerceToDomain))
     864             :     {
     865             :         /* Not sure this case is needed, but it can't hurt */
     866           0 :         s1 = clause_selectivity(root,
     867           0 :                                 (Node *) ((CoerceToDomain *) clause)->arg,
     868             :                                 varRelid,
     869             :                                 jointype,
     870             :                                 sjinfo);
     871             :     }
     872             :     else
     873             :     {
     874             :         /*
     875             :          * For anything else, see if we can consider it as a boolean variable.
     876             :          * This only works if it's an immutable expression in Vars of a single
     877             :          * relation; but there's no point in us checking that here because
     878             :          * boolvarsel() will do it internally, and return a suitable default
     879             :          * selectivity if not.
     880             :          */
     881        1224 :         s1 = boolvarsel(root, clause, varRelid);
     882             :     }
     883             : 
     884             :     /* Cache the result if possible */
     885      492214 :     if (cacheable)
     886             :     {
     887      341430 :         if (jointype == JOIN_INNER)
     888      275124 :             rinfo->norm_selec = s1;
     889             :         else
     890       66306 :             rinfo->outer_selec = s1;
     891             :     }
     892             : 
     893             : #ifdef SELECTIVITY_DEBUG
     894             :     elog(DEBUG4, "clause_selectivity: s1 %f", s1);
     895             : #endif                          /* SELECTIVITY_DEBUG */
     896             : 
     897      492214 :     return s1;
     898             : }

Generated by: LCOV version 1.13