LCOV - code coverage report
Current view: top level - src/backend/optimizer/path - clausesel.c (source / functions) Hit Total Coverage
Test: PostgreSQL 14devel Lines: 233 249 93.6 %
Date: 2021-01-26 02:06:48 Functions: 9 9 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * clausesel.c
       4             :  *    Routines to compute clause selectivities
       5             :  *
       6             :  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/optimizer/path/clausesel.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "nodes/makefuncs.h"
      18             : #include "nodes/nodeFuncs.h"
      19             : #include "optimizer/clauses.h"
      20             : #include "optimizer/cost.h"
      21             : #include "optimizer/optimizer.h"
      22             : #include "optimizer/pathnode.h"
      23             : #include "optimizer/plancat.h"
      24             : #include "statistics/statistics.h"
      25             : #include "utils/fmgroids.h"
      26             : #include "utils/lsyscache.h"
      27             : #include "utils/selfuncs.h"
      28             : 
      29             : /*
      30             :  * Data structure for accumulating info about possible range-query
      31             :  * clause pairs in clauselist_selectivity.
      32             :  */
      33             : typedef struct RangeQueryClause
      34             : {
      35             :     struct RangeQueryClause *next;  /* next in linked list */
      36             :     Node       *var;            /* The common variable of the clauses */
      37             :     bool        have_lobound;   /* found a low-bound clause yet? */
      38             :     bool        have_hibound;   /* found a high-bound clause yet? */
      39             :     Selectivity lobound;        /* Selectivity of a var > something clause */
      40             :     Selectivity hibound;        /* Selectivity of a var < something clause */
      41             : } RangeQueryClause;
      42             : 
      43             : static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
      44             :                            bool varonleft, bool isLTsel, Selectivity s2);
      45             : static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root,
      46             :                                                List *clauses);
      47             : static Selectivity clauselist_selectivity_or(PlannerInfo *root,
      48             :                                              List *clauses,
      49             :                                              int varRelid,
      50             :                                              JoinType jointype,
      51             :                                              SpecialJoinInfo *sjinfo,
      52             :                                              bool use_extended_stats);
      53             : 
      54             : /****************************************************************************
      55             :  *      ROUTINES TO COMPUTE SELECTIVITIES
      56             :  ****************************************************************************/
      57             : 
      58             : /*
      59             :  * clauselist_selectivity -
      60             :  *    Compute the selectivity of an implicitly-ANDed list of boolean
      61             :  *    expression clauses.  The list can be empty, in which case 1.0
      62             :  *    must be returned.  List elements may be either RestrictInfos
      63             :  *    or bare expression clauses --- the former is preferred since
      64             :  *    it allows caching of results.
      65             :  *
      66             :  * See clause_selectivity() for the meaning of the additional parameters.
      67             :  *
      68             :  * The basic approach is to apply extended statistics first, on as many
      69             :  * clauses as possible, in order to capture cross-column dependencies etc.
      70             :  * The remaining clauses are then estimated by taking the product of their
      71             :  * selectivities, but that's only right if they have independent
      72             :  * probabilities, and in reality they are often NOT independent even if they
      73             :  * only refer to a single column.  So, we want to be smarter where we can.
      74             :  *
      75             :  * We also recognize "range queries", such as "x > 34 AND x < 42".  Clauses
      76             :  * are recognized as possible range query components if they are restriction
      77             :  * opclauses whose operators have scalarltsel or a related function as their
      78             :  * restriction selectivity estimator.  We pair up clauses of this form that
      79             :  * refer to the same variable.  An unpairable clause of this kind is simply
      80             :  * multiplied into the selectivity product in the normal way.  But when we
      81             :  * find a pair, we know that the selectivities represent the relative
      82             :  * positions of the low and high bounds within the column's range, so instead
      83             :  * of figuring the selectivity as hisel * losel, we can figure it as hisel +
      84             :  * losel - 1.  (To visualize this, see that hisel is the fraction of the range
      85             :  * below the high bound, while losel is the fraction above the low bound; so
      86             :  * hisel can be interpreted directly as a 0..1 value but we need to convert
      87             :  * losel to 1-losel before interpreting it as a value.  Then the available
      88             :  * range is 1-losel to hisel.  However, this calculation double-excludes
      89             :  * nulls, so really we need hisel + losel + null_frac - 1.)
      90             :  *
      91             :  * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
      92             :  * and instead use DEFAULT_RANGE_INEQ_SEL.  The same applies if the equation
      93             :  * yields an impossible (negative) result.
      94             :  *
      95             :  * A free side-effect is that we can recognize redundant inequalities such
      96             :  * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
      97             :  *
      98             :  * Of course this is all very dependent on the behavior of the inequality
      99             :  * selectivity functions; perhaps some day we can generalize the approach.
     100             :  */
     101             : Selectivity
     102     1437324 : clauselist_selectivity(PlannerInfo *root,
     103             :                        List *clauses,
     104             :                        int varRelid,
     105             :                        JoinType jointype,
     106             :                        SpecialJoinInfo *sjinfo)
     107             : {
     108     1437324 :     return clauselist_selectivity_ext(root, clauses, varRelid,
     109             :                                       jointype, sjinfo, true);
     110             : }
     111             : 
     112             : /*
     113             :  * clauselist_selectivity_ext -
     114             :  *    Extended version of clauselist_selectivity().  If "use_extended_stats"
     115             :  *    is false, all extended statistics will be ignored, and only per-column
     116             :  *    statistics will be used.
     117             :  */
     118             : Selectivity
     119     1440630 : clauselist_selectivity_ext(PlannerInfo *root,
     120             :                            List *clauses,
     121             :                            int varRelid,
     122             :                            JoinType jointype,
     123             :                            SpecialJoinInfo *sjinfo,
     124             :                            bool use_extended_stats)
     125             : {
     126     1440630 :     Selectivity s1 = 1.0;
     127             :     RelOptInfo *rel;
     128     1440630 :     Bitmapset  *estimatedclauses = NULL;
     129     1440630 :     RangeQueryClause *rqlist = NULL;
     130             :     ListCell   *l;
     131             :     int         listidx;
     132             : 
     133             :     /*
     134             :      * If there's exactly one clause, just go directly to
     135             :      * clause_selectivity_ext(). None of what we might do below is relevant.
     136             :      */
     137     1440630 :     if (list_length(clauses) == 1)
     138      798746 :         return clause_selectivity_ext(root, (Node *) linitial(clauses),
     139             :                                       varRelid, jointype, sjinfo,
     140             :                                       use_extended_stats);
     141             : 
     142             :     /*
     143             :      * Determine if these clauses reference a single relation.  If so, and if
     144             :      * it has extended statistics, try to apply those.
     145             :      */
     146      641884 :     rel = find_single_rel_for_clauses(root, clauses);
     147      641884 :     if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
     148             :     {
     149             :         /*
     150             :          * Estimate as many clauses as possible using extended statistics.
     151             :          *
     152             :          * 'estimatedclauses' is populated with the 0-based list position
     153             :          * index of clauses estimated here, and that should be ignored below.
     154             :          */
     155         992 :         s1 = statext_clauselist_selectivity(root, clauses, varRelid,
     156             :                                             jointype, sjinfo, rel,
     157             :                                             &estimatedclauses, false);
     158             :     }
     159             : 
     160             :     /*
     161             :      * Apply normal selectivity estimates for remaining clauses. We'll be
     162             :      * careful to skip any clauses which were already estimated above.
     163             :      *
     164             :      * Anything that doesn't look like a potential rangequery clause gets
     165             :      * multiplied into s1 and forgotten. Anything that does gets inserted into
     166             :      * an rqlist entry.
     167             :      */
     168      641884 :     listidx = -1;
     169     1089512 :     foreach(l, clauses)
     170             :     {
     171      447628 :         Node       *clause = (Node *) lfirst(l);
     172             :         RestrictInfo *rinfo;
     173             :         Selectivity s2;
     174             : 
     175      447628 :         listidx++;
     176             : 
     177             :         /*
     178             :          * Skip this clause if it's already been estimated by some other
     179             :          * statistics above.
     180             :          */
     181      447628 :         if (bms_is_member(listidx, estimatedclauses))
     182        2044 :             continue;
     183             : 
     184             :         /* Compute the selectivity of this clause in isolation */
     185      445584 :         s2 = clause_selectivity_ext(root, clause, varRelid, jointype, sjinfo,
     186             :                                     use_extended_stats);
     187             : 
     188             :         /*
     189             :          * Check for being passed a RestrictInfo.
     190             :          *
     191             :          * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
     192             :          * 0.0; just use that rather than looking for range pairs.
     193             :          */
     194      445584 :         if (IsA(clause, RestrictInfo))
     195             :         {
     196      444704 :             rinfo = (RestrictInfo *) clause;
     197      444704 :             if (rinfo->pseudoconstant)
     198             :             {
     199        2924 :                 s1 = s1 * s2;
     200        2924 :                 continue;
     201             :             }
     202      441780 :             clause = (Node *) rinfo->clause;
     203             :         }
     204             :         else
     205         880 :             rinfo = NULL;
     206             : 
     207             :         /*
     208             :          * See if it looks like a restriction clause with a pseudoconstant on
     209             :          * one side.  (Anything more complicated than that might not behave in
     210             :          * the simple way we are expecting.)  Most of the tests here can be
     211             :          * done more efficiently with rinfo than without.
     212             :          */
     213      442660 :         if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
     214             :         {
     215      399606 :             OpExpr     *expr = (OpExpr *) clause;
     216      399606 :             bool        varonleft = true;
     217             :             bool        ok;
     218             : 
     219      399606 :             if (rinfo)
     220             :             {
     221      683870 :                 ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) &&
     222      279570 :                     (is_pseudo_constant_clause_relids(lsecond(expr->args),
     223        5558 :                                                       rinfo->right_relids) ||
     224        5558 :                      (varonleft = false,
     225        5558 :                       is_pseudo_constant_clause_relids(linitial(expr->args),
     226             :                                                        rinfo->left_relids)));
     227             :             }
     228             :             else
     229             :             {
     230        1728 :                 ok = (NumRelids(root, clause) == 1) &&
     231         864 :                     (is_pseudo_constant_clause(lsecond(expr->args)) ||
     232           0 :                      (varonleft = false,
     233           0 :                       is_pseudo_constant_clause(linitial(expr->args))));
     234             :             }
     235             : 
     236      399606 :             if (ok)
     237             :             {
     238             :                 /*
     239             :                  * If it's not a "<"/"<="/">"/">=" operator, just merge the
     240             :                  * selectivity in generically.  But if it's the right oprrest,
     241             :                  * add the clause to rqlist for later processing.
     242             :                  */
     243      280176 :                 switch (get_oprrest(expr->opno))
     244             :                 {
     245        8778 :                     case F_SCALARLTSEL:
     246             :                     case F_SCALARLESEL:
     247        8778 :                         addRangeClause(&rqlist, clause,
     248             :                                        varonleft, true, s2);
     249        8778 :                         break;
     250       47236 :                     case F_SCALARGTSEL:
     251             :                     case F_SCALARGESEL:
     252       47236 :                         addRangeClause(&rqlist, clause,
     253             :                                        varonleft, false, s2);
     254       47236 :                         break;
     255      224162 :                     default:
     256             :                         /* Just merge the selectivity in generically */
     257      224162 :                         s1 = s1 * s2;
     258      224162 :                         break;
     259             :                 }
     260      280176 :                 continue;       /* drop to loop bottom */
     261             :             }
     262             :         }
     263             : 
     264             :         /* Not the right form, so treat it generically. */
     265      162484 :         s1 = s1 * s2;
     266             :     }
     267             : 
     268             :     /*
     269             :      * Now scan the rangequery pair list.
     270             :      */
     271      692034 :     while (rqlist != NULL)
     272             :     {
     273             :         RangeQueryClause *rqnext;
     274             : 
     275       50150 :         if (rqlist->have_lobound && rqlist->have_hibound)
     276        5652 :         {
     277             :             /* Successfully matched a pair of range clauses */
     278             :             Selectivity s2;
     279             : 
     280             :             /*
     281             :              * Exact equality to the default value probably means the
     282             :              * selectivity function punted.  This is not airtight but should
     283             :              * be good enough.
     284             :              */
     285        5652 :             if (rqlist->hibound == DEFAULT_INEQ_SEL ||
     286        3614 :                 rqlist->lobound == DEFAULT_INEQ_SEL)
     287             :             {
     288        2038 :                 s2 = DEFAULT_RANGE_INEQ_SEL;
     289             :             }
     290             :             else
     291             :             {
     292        3614 :                 s2 = rqlist->hibound + rqlist->lobound - 1.0;
     293             : 
     294             :                 /* Adjust for double-exclusion of NULLs */
     295        3614 :                 s2 += nulltestsel(root, IS_NULL, rqlist->var,
     296             :                                   varRelid, jointype, sjinfo);
     297             : 
     298             :                 /*
     299             :                  * A zero or slightly negative s2 should be converted into a
     300             :                  * small positive value; we probably are dealing with a very
     301             :                  * tight range and got a bogus result due to roundoff errors.
     302             :                  * However, if s2 is very negative, then we probably have
     303             :                  * default selectivity estimates on one or both sides of the
     304             :                  * range that we failed to recognize above for some reason.
     305             :                  */
     306        3614 :                 if (s2 <= 0.0)
     307             :                 {
     308         468 :                     if (s2 < -0.01)
     309             :                     {
     310             :                         /*
     311             :                          * No data available --- use a default estimate that
     312             :                          * is small, but not real small.
     313             :                          */
     314           0 :                         s2 = DEFAULT_RANGE_INEQ_SEL;
     315             :                     }
     316             :                     else
     317             :                     {
     318             :                         /*
     319             :                          * It's just roundoff error; use a small positive
     320             :                          * value
     321             :                          */
     322         468 :                         s2 = 1.0e-10;
     323             :                     }
     324             :                 }
     325             :             }
     326             :             /* Merge in the selectivity of the pair of clauses */
     327        5652 :             s1 *= s2;
     328             :         }
     329             :         else
     330             :         {
     331             :             /* Only found one of a pair, merge it in generically */
     332       44498 :             if (rqlist->have_lobound)
     333       41404 :                 s1 *= rqlist->lobound;
     334             :             else
     335        3094 :                 s1 *= rqlist->hibound;
     336             :         }
     337             :         /* release storage and advance */
     338       50150 :         rqnext = rqlist->next;
     339       50150 :         pfree(rqlist);
     340       50150 :         rqlist = rqnext;
     341             :     }
     342             : 
     343      641884 :     return s1;
     344             : }
     345             : 
     346             : /*
     347             :  * clauselist_selectivity_or -
     348             :  *    Compute the selectivity of an implicitly-ORed list of boolean
     349             :  *    expression clauses.  The list can be empty, in which case 0.0
     350             :  *    must be returned.  List elements may be either RestrictInfos
     351             :  *    or bare expression clauses --- the former is preferred since
     352             :  *    it allows caching of results.
     353             :  *
     354             :  * See clause_selectivity() for the meaning of the additional parameters.
     355             :  *
     356             :  * The basic approach is to apply extended statistics first, on as many
     357             :  * clauses as possible, in order to capture cross-column dependencies etc.
     358             :  * The remaining clauses are then estimated as if they were independent.
     359             :  */
     360             : static Selectivity
     361        9536 : clauselist_selectivity_or(PlannerInfo *root,
     362             :                           List *clauses,
     363             :                           int varRelid,
     364             :                           JoinType jointype,
     365             :                           SpecialJoinInfo *sjinfo,
     366             :                           bool use_extended_stats)
     367             : {
     368        9536 :     Selectivity s1 = 0.0;
     369             :     RelOptInfo *rel;
     370        9536 :     Bitmapset  *estimatedclauses = NULL;
     371             :     ListCell   *lc;
     372             :     int         listidx;
     373             : 
     374             :     /*
     375             :      * Determine if these clauses reference a single relation.  If so, and if
     376             :      * it has extended statistics, try to apply those.
     377             :      */
     378        9536 :     rel = find_single_rel_for_clauses(root, clauses);
     379        9536 :     if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
     380             :     {
     381             :         /*
     382             :          * Estimate as many clauses as possible using extended statistics.
     383             :          *
     384             :          * 'estimatedclauses' is populated with the 0-based list position
     385             :          * index of clauses estimated here, and that should be ignored below.
     386             :          */
     387          52 :         s1 = statext_clauselist_selectivity(root, clauses, varRelid,
     388             :                                             jointype, sjinfo, rel,
     389             :                                             &estimatedclauses, true);
     390             :     }
     391             : 
     392             :     /*
     393             :      * Estimate the remaining clauses as if they were independent.
     394             :      *
     395             :      * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to account
     396             :      * for the probable overlap of selected tuple sets.
     397             :      *
     398             :      * XXX is this too conservative?
     399             :      */
     400        9536 :     listidx = -1;
     401       31104 :     foreach(lc, clauses)
     402             :     {
     403             :         Selectivity s2;
     404             : 
     405       21568 :         listidx++;
     406             : 
     407             :         /*
     408             :          * Skip this clause if it's already been estimated by some other
     409             :          * statistics above.
     410             :          */
     411       21568 :         if (bms_is_member(listidx, estimatedclauses))
     412         128 :             continue;
     413             : 
     414       21440 :         s2 = clause_selectivity_ext(root, (Node *) lfirst(lc), varRelid,
     415             :                                     jointype, sjinfo, use_extended_stats);
     416             : 
     417       21440 :         s1 = s1 + s2 - s1 * s2;
     418             :     }
     419             : 
     420        9536 :     return s1;
     421             : }
     422             : 
     423             : /*
     424             :  * addRangeClause --- add a new range clause for clauselist_selectivity
     425             :  *
     426             :  * Here is where we try to match up pairs of range-query clauses
     427             :  */
     428             : static void
     429       56014 : addRangeClause(RangeQueryClause **rqlist, Node *clause,
     430             :                bool varonleft, bool isLTsel, Selectivity s2)
     431             : {
     432             :     RangeQueryClause *rqelem;
     433             :     Node       *var;
     434             :     bool        is_lobound;
     435             : 
     436       56014 :     if (varonleft)
     437             :     {
     438       55930 :         var = get_leftop((Expr *) clause);
     439       55930 :         is_lobound = !isLTsel;  /* x < something is high bound */
     440             :     }
     441             :     else
     442             :     {
     443          84 :         var = get_rightop((Expr *) clause);
     444          84 :         is_lobound = isLTsel;   /* something < x is low bound */
     445             :     }
     446             : 
     447       56922 :     for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
     448             :     {
     449             :         /*
     450             :          * We use full equal() here because the "var" might be a function of
     451             :          * one or more attributes of the same relation...
     452             :          */
     453        6772 :         if (!equal(var, rqelem->var))
     454         908 :             continue;
     455             :         /* Found the right group to put this clause in */
     456        5864 :         if (is_lobound)
     457             :         {
     458         188 :             if (!rqelem->have_lobound)
     459             :             {
     460          68 :                 rqelem->have_lobound = true;
     461          68 :                 rqelem->lobound = s2;
     462             :             }
     463             :             else
     464             :             {
     465             : 
     466             :                 /*------
     467             :                  * We have found two similar clauses, such as
     468             :                  * x < y AND x <= z.
     469             :                  * Keep only the more restrictive one.
     470             :                  *------
     471             :                  */
     472         120 :                 if (rqelem->lobound > s2)
     473           0 :                     rqelem->lobound = s2;
     474             :             }
     475             :         }
     476             :         else
     477             :         {
     478        5676 :             if (!rqelem->have_hibound)
     479             :             {
     480        5584 :                 rqelem->have_hibound = true;
     481        5584 :                 rqelem->hibound = s2;
     482             :             }
     483             :             else
     484             :             {
     485             : 
     486             :                 /*------
     487             :                  * We have found two similar clauses, such as
     488             :                  * x > y AND x >= z.
     489             :                  * Keep only the more restrictive one.
     490             :                  *------
     491             :                  */
     492          92 :                 if (rqelem->hibound > s2)
     493          24 :                     rqelem->hibound = s2;
     494             :             }
     495             :         }
     496        5864 :         return;
     497             :     }
     498             : 
     499             :     /* No matching var found, so make a new clause-pair data structure */
     500       50150 :     rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
     501       50150 :     rqelem->var = var;
     502       50150 :     if (is_lobound)
     503             :     {
     504       46988 :         rqelem->have_lobound = true;
     505       46988 :         rqelem->have_hibound = false;
     506       46988 :         rqelem->lobound = s2;
     507             :     }
     508             :     else
     509             :     {
     510        3162 :         rqelem->have_lobound = false;
     511        3162 :         rqelem->have_hibound = true;
     512        3162 :         rqelem->hibound = s2;
     513             :     }
     514       50150 :     rqelem->next = *rqlist;
     515       50150 :     *rqlist = rqelem;
     516             : }
     517             : 
     518             : /*
     519             :  * find_single_rel_for_clauses
     520             :  *      Examine each clause in 'clauses' and determine if all clauses
     521             :  *      reference only a single relation.  If so return that relation,
     522             :  *      otherwise return NULL.
     523             :  */
     524             : static RelOptInfo *
     525      652630 : find_single_rel_for_clauses(PlannerInfo *root, List *clauses)
     526             : {
     527      652630 :     int         lastrelid = 0;
     528             :     ListCell   *l;
     529             : 
     530      930268 :     foreach(l, clauses)
     531             :     {
     532      370286 :         RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
     533             :         int         relid;
     534             : 
     535             :         /*
     536             :          * If we have a list of bare clauses rather than RestrictInfos, we
     537             :          * could pull out their relids the hard way with pull_varnos().
     538             :          * However, currently the extended-stats machinery won't do anything
     539             :          * with non-RestrictInfo clauses anyway, so there's no point in
     540             :          * spending extra cycles; just fail if that's what we have.
     541             :          *
     542             :          * An exception to that rule is if we have a bare BoolExpr AND clause.
     543             :          * We treat this as a special case because the restrictinfo machinery
     544             :          * doesn't build RestrictInfos on top of AND clauses.
     545             :          */
     546      370286 :         if (is_andclause(rinfo))
     547             :         {
     548             :             RelOptInfo *rel;
     549             : 
     550        1210 :             rel = find_single_rel_for_clauses(root,
     551             :                                               ((BoolExpr *) rinfo)->args);
     552             : 
     553        1210 :             if (rel == NULL)
     554       92648 :                 return NULL;
     555         976 :             if (lastrelid == 0)
     556         394 :                 lastrelid = rel->relid;
     557         582 :             else if (rel->relid != lastrelid)
     558           0 :                 return NULL;
     559             : 
     560        3976 :             continue;
     561             :         }
     562             : 
     563      369076 :         if (!IsA(rinfo, RestrictInfo))
     564         764 :             return NULL;
     565             : 
     566      368312 :         if (bms_is_empty(rinfo->clause_relids))
     567        3000 :             continue;           /* we can ignore variable-free clauses */
     568      365312 :         if (!bms_get_singleton_member(rinfo->clause_relids, &relid))
     569       87418 :             return NULL;        /* multiple relations in this clause */
     570      277894 :         if (lastrelid == 0)
     571      132992 :             lastrelid = relid;  /* first clause referencing a relation */
     572      144902 :         else if (relid != lastrelid)
     573        4232 :             return NULL;        /* relation not same as last one */
     574             :     }
     575             : 
     576      559982 :     if (lastrelid != 0)
     577      112060 :         return find_base_rel(root, lastrelid);
     578             : 
     579      447922 :     return NULL;                /* no clauses */
     580             : }
     581             : 
     582             : /*
     583             :  * bms_is_subset_singleton
     584             :  *
     585             :  * Same result as bms_is_subset(s, bms_make_singleton(x)),
     586             :  * but a little faster and doesn't leak memory.
     587             :  *
     588             :  * Is this of use anywhere else?  If so move to bitmapset.c ...
     589             :  */
     590             : static bool
     591      648182 : bms_is_subset_singleton(const Bitmapset *s, int x)
     592             : {
     593      648182 :     switch (bms_membership(s))
     594             :     {
     595           0 :         case BMS_EMPTY_SET:
     596           0 :             return true;
     597      499036 :         case BMS_SINGLETON:
     598      499036 :             return bms_is_member(x, s);
     599      149146 :         case BMS_MULTIPLE:
     600      149146 :             return false;
     601             :     }
     602             :     /* can't get here... */
     603           0 :     return false;
     604             : }
     605             : 
     606             : /*
     607             :  * treat_as_join_clause -
     608             :  *    Decide whether an operator clause is to be handled by the
     609             :  *    restriction or join estimator.  Subroutine for clause_selectivity().
     610             :  */
     611             : static inline bool
     612      501422 : treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo,
     613             :                      int varRelid, SpecialJoinInfo *sjinfo)
     614             : {
     615      501422 :     if (varRelid != 0)
     616             :     {
     617             :         /*
     618             :          * Caller is forcing restriction mode (eg, because we are examining an
     619             :          * inner indexscan qual).
     620             :          */
     621      155554 :         return false;
     622             :     }
     623      345868 :     else if (sjinfo == NULL)
     624             :     {
     625             :         /*
     626             :          * It must be a restriction clause, since it's being evaluated at a
     627             :          * scan node.
     628             :          */
     629      212230 :         return false;
     630             :     }
     631             :     else
     632             :     {
     633             :         /*
     634             :          * Otherwise, it's a join if there's more than one relation used. We
     635             :          * can optimize this calculation if an rinfo was passed.
     636             :          *
     637             :          * XXX  Since we know the clause is being evaluated at a join, the
     638             :          * only way it could be single-relation is if it was delayed by outer
     639             :          * joins.  Although we can make use of the restriction qual estimators
     640             :          * anyway, it seems likely that we ought to account for the
     641             :          * probability of injected nulls somehow.
     642             :          */
     643      133638 :         if (rinfo)
     644      133338 :             return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
     645             :         else
     646         300 :             return (NumRelids(root, clause) > 1);
     647             :     }
     648             : }
     649             : 
     650             : 
     651             : /*
     652             :  * clause_selectivity -
     653             :  *    Compute the selectivity of a general boolean expression clause.
     654             :  *
     655             :  * The clause can be either a RestrictInfo or a plain expression.  If it's
     656             :  * a RestrictInfo, we try to cache the selectivity for possible re-use,
     657             :  * so passing RestrictInfos is preferred.
     658             :  *
     659             :  * varRelid is either 0 or a rangetable index.
     660             :  *
     661             :  * When varRelid is not 0, only variables belonging to that relation are
     662             :  * considered in computing selectivity; other vars are treated as constants
     663             :  * of unknown values.  This is appropriate for estimating the selectivity of
     664             :  * a join clause that is being used as a restriction clause in a scan of a
     665             :  * nestloop join's inner relation --- varRelid should then be the ID of the
     666             :  * inner relation.
     667             :  *
     668             :  * When varRelid is 0, all variables are treated as variables.  This
     669             :  * is appropriate for ordinary join clauses and restriction clauses.
     670             :  *
     671             :  * jointype is the join type, if the clause is a join clause.  Pass JOIN_INNER
     672             :  * if the clause isn't a join clause.
     673             :  *
     674             :  * sjinfo is NULL for a non-join clause, otherwise it provides additional
     675             :  * context information about the join being performed.  There are some
     676             :  * special cases:
     677             :  *  1. For a special (not INNER) join, sjinfo is always a member of
     678             :  *     root->join_info_list.
     679             :  *  2. For an INNER join, sjinfo is just a transient struct, and only the
     680             :  *     relids and jointype fields in it can be trusted.
     681             :  * It is possible for jointype to be different from sjinfo->jointype.
     682             :  * This indicates we are considering a variant join: either with
     683             :  * the LHS and RHS switched, or with one input unique-ified.
     684             :  *
     685             :  * Note: when passing nonzero varRelid, it's normally appropriate to set
     686             :  * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
     687             :  * join clause; because we aren't treating it as a join clause.
     688             :  */
     689             : Selectivity
     690      291482 : clause_selectivity(PlannerInfo *root,
     691             :                    Node *clause,
     692             :                    int varRelid,
     693             :                    JoinType jointype,
     694             :                    SpecialJoinInfo *sjinfo)
     695             : {
     696      291482 :     return clause_selectivity_ext(root, clause, varRelid,
     697             :                                   jointype, sjinfo, true);
     698             : }
     699             : 
     700             : /*
     701             :  * clause_selectivity_ext -
     702             :  *    Extended version of clause_selectivity().  If "use_extended_stats" is
     703             :  *    false, all extended statistics will be ignored, and only per-column
     704             :  *    statistics will be used.
     705             :  */
     706             : Selectivity
     707     1563004 : clause_selectivity_ext(PlannerInfo *root,
     708             :                        Node *clause,
     709             :                        int varRelid,
     710             :                        JoinType jointype,
     711             :                        SpecialJoinInfo *sjinfo,
     712             :                        bool use_extended_stats)
     713             : {
     714     1563004 :     Selectivity s1 = 0.5;       /* default for any unhandled clause type */
     715     1563004 :     RestrictInfo *rinfo = NULL;
     716     1563004 :     bool        cacheable = false;
     717             : 
     718     1563004 :     if (clause == NULL)         /* can this still happen? */
     719           0 :         return s1;
     720             : 
     721     1563004 :     if (IsA(clause, RestrictInfo))
     722             :     {
     723     1551772 :         rinfo = (RestrictInfo *) clause;
     724             : 
     725             :         /*
     726             :          * If the clause is marked pseudoconstant, then it will be used as a
     727             :          * gating qual and should not affect selectivity estimates; hence
     728             :          * return 1.0.  The only exception is that a constant FALSE may be
     729             :          * taken as having selectivity 0.0, since it will surely mean no rows
     730             :          * out of the plan.  This case is simple enough that we need not
     731             :          * bother caching the result.
     732             :          */
     733     1551772 :         if (rinfo->pseudoconstant)
     734             :         {
     735        3076 :             if (!IsA(rinfo->clause, Const))
     736        2984 :                 return (Selectivity) 1.0;
     737             :         }
     738             : 
     739             :         /*
     740             :          * If the clause is marked redundant, always return 1.0.
     741             :          */
     742     1548788 :         if (rinfo->norm_selec > 1)
     743       48662 :             return (Selectivity) 1.0;
     744             : 
     745             :         /*
     746             :          * If possible, cache the result of the selectivity calculation for
     747             :          * the clause.  We can cache if varRelid is zero or the clause
     748             :          * contains only vars of that relid --- otherwise varRelid will affect
     749             :          * the result, so mustn't cache.  Outer join quals might be examined
     750             :          * with either their join's actual jointype or JOIN_INNER, so we need
     751             :          * two cache variables to remember both cases.  Note: we assume the
     752             :          * result won't change if we are switching the input relations or
     753             :          * considering a unique-ified case, so we only need one cache variable
     754             :          * for all non-JOIN_INNER cases.
     755             :          */
     756     2148308 :         if (varRelid == 0 ||
     757      648182 :             bms_is_subset_singleton(rinfo->clause_relids, varRelid))
     758             :         {
     759             :             /* Cacheable --- do we already have the result? */
     760     1350140 :             if (jointype == JOIN_INNER)
     761             :             {
     762     1163248 :                 if (rinfo->norm_selec >= 0)
     763      848288 :                     return rinfo->norm_selec;
     764             :             }
     765             :             else
     766             :             {
     767      186892 :                 if (rinfo->outer_selec >= 0)
     768      115558 :                     return rinfo->outer_selec;
     769             :             }
     770      386294 :             cacheable = true;
     771             :         }
     772             : 
     773             :         /*
     774             :          * Proceed with examination of contained clause.  If the clause is an
     775             :          * OR-clause, we want to look at the variant with sub-RestrictInfos,
     776             :          * so that per-subclause selectivities can be cached.
     777             :          */
     778      536280 :         if (rinfo->orclause)
     779        9500 :             clause = (Node *) rinfo->orclause;
     780             :         else
     781      526780 :             clause = (Node *) rinfo->clause;
     782             :     }
     783             : 
     784      547512 :     if (IsA(clause, Var))
     785             :     {
     786       13858 :         Var        *var = (Var *) clause;
     787             : 
     788             :         /*
     789             :          * We probably shouldn't ever see an uplevel Var here, but if we do,
     790             :          * return the default selectivity...
     791             :          */
     792       13858 :         if (var->varlevelsup == 0 &&
     793          24 :             (varRelid == 0 || varRelid == (int) var->varno))
     794             :         {
     795             :             /* Use the restriction selectivity function for a bool Var */
     796       13854 :             s1 = boolvarsel(root, (Node *) var, varRelid);
     797             :         }
     798             :     }
     799      533654 :     else if (IsA(clause, Const))
     800             :     {
     801             :         /* bool constant is pretty easy... */
     802         132 :         Const      *con = (Const *) clause;
     803             : 
     804         264 :         s1 = con->constisnull ? 0.0 :
     805         132 :             DatumGetBool(con->constvalue) ? 1.0 : 0.0;
     806             :     }
     807      533522 :     else if (IsA(clause, Param))
     808             :     {
     809             :         /* see if we can replace the Param */
     810          16 :         Node       *subst = estimate_expression_value(root, clause);
     811             : 
     812          16 :         if (IsA(subst, Const))
     813             :         {
     814             :             /* bool constant is pretty easy... */
     815           0 :             Const      *con = (Const *) subst;
     816             : 
     817           0 :             s1 = con->constisnull ? 0.0 :
     818           0 :                 DatumGetBool(con->constvalue) ? 1.0 : 0.0;
     819             :         }
     820             :         else
     821             :         {
     822             :             /* XXX any way to do better than default? */
     823             :         }
     824             :     }
     825      533506 :     else if (is_notclause(clause))
     826             :     {
     827             :         /* inverse of the selectivity of the underlying clause */
     828        5624 :         s1 = 1.0 - clause_selectivity_ext(root,
     829        5624 :                                           (Node *) get_notclausearg((Expr *) clause),
     830             :                                           varRelid,
     831             :                                           jointype,
     832             :                                           sjinfo,
     833             :                                           use_extended_stats);
     834             :     }
     835      527882 :     else if (is_andclause(clause))
     836             :     {
     837             :         /* share code with clauselist_selectivity() */
     838        1506 :         s1 = clauselist_selectivity_ext(root,
     839             :                                         ((BoolExpr *) clause)->args,
     840             :                                         varRelid,
     841             :                                         jointype,
     842             :                                         sjinfo,
     843             :                                         use_extended_stats);
     844             :     }
     845      526376 :     else if (is_orclause(clause))
     846             :     {
     847             :         /*
     848             :          * Almost the same thing as clauselist_selectivity, but with the
     849             :          * clauses connected by OR.
     850             :          */
     851        9536 :         s1 = clauselist_selectivity_or(root,
     852             :                                        ((BoolExpr *) clause)->args,
     853             :                                        varRelid,
     854             :                                        jointype,
     855             :                                        sjinfo,
     856             :                                        use_extended_stats);
     857             :     }
     858      516840 :     else if (is_opclause(clause) || IsA(clause, DistinctExpr))
     859      480624 :     {
     860      480624 :         OpExpr     *opclause = (OpExpr *) clause;
     861      480624 :         Oid         opno = opclause->opno;
     862             : 
     863      480624 :         if (treat_as_join_clause(root, clause, rinfo, varRelid, sjinfo))
     864             :         {
     865             :             /* Estimate selectivity for a join clause. */
     866      126586 :             s1 = join_selectivity(root, opno,
     867             :                                   opclause->args,
     868             :                                   opclause->inputcollid,
     869             :                                   jointype,
     870             :                                   sjinfo);
     871             :         }
     872             :         else
     873             :         {
     874             :             /* Estimate selectivity for a restriction clause. */
     875      354038 :             s1 = restriction_selectivity(root, opno,
     876             :                                          opclause->args,
     877             :                                          opclause->inputcollid,
     878             :                                          varRelid);
     879             :         }
     880             : 
     881             :         /*
     882             :          * DistinctExpr has the same representation as OpExpr, but the
     883             :          * contained operator is "=" not "<>", so we must negate the result.
     884             :          * This estimation method doesn't give the right behavior for nulls,
     885             :          * but it's better than doing nothing.
     886             :          */
     887      480624 :         if (IsA(clause, DistinctExpr))
     888         426 :             s1 = 1.0 - s1;
     889             :     }
     890       36216 :     else if (is_funcclause(clause))
     891             :     {
     892        5866 :         FuncExpr   *funcclause = (FuncExpr *) clause;
     893             : 
     894             :         /* Try to get an estimate from the support function, if any */
     895        5866 :         s1 = function_selectivity(root,
     896             :                                   funcclause->funcid,
     897             :                                   funcclause->args,
     898             :                                   funcclause->inputcollid,
     899        5866 :                                   treat_as_join_clause(root, clause, rinfo,
     900             :                                                        varRelid, sjinfo),
     901             :                                   varRelid,
     902             :                                   jointype,
     903             :                                   sjinfo);
     904             :     }
     905       30350 :     else if (IsA(clause, ScalarArrayOpExpr))
     906             :     {
     907             :         /* Use node specific selectivity calculation function */
     908       14932 :         s1 = scalararraysel(root,
     909             :                             (ScalarArrayOpExpr *) clause,
     910       14932 :                             treat_as_join_clause(root, clause, rinfo,
     911             :                                                  varRelid, sjinfo),
     912             :                             varRelid,
     913             :                             jointype,
     914             :                             sjinfo);
     915             :     }
     916       15418 :     else if (IsA(clause, RowCompareExpr))
     917             :     {
     918             :         /* Use node specific selectivity calculation function */
     919         104 :         s1 = rowcomparesel(root,
     920             :                            (RowCompareExpr *) clause,
     921             :                            varRelid,
     922             :                            jointype,
     923             :                            sjinfo);
     924             :     }
     925       15314 :     else if (IsA(clause, NullTest))
     926             :     {
     927             :         /* Use node specific selectivity calculation function */
     928       13450 :         s1 = nulltestsel(root,
     929             :                          ((NullTest *) clause)->nulltesttype,
     930       13450 :                          (Node *) ((NullTest *) clause)->arg,
     931             :                          varRelid,
     932             :                          jointype,
     933             :                          sjinfo);
     934             :     }
     935        1864 :     else if (IsA(clause, BooleanTest))
     936             :     {
     937             :         /* Use node specific selectivity calculation function */
     938         132 :         s1 = booltestsel(root,
     939             :                          ((BooleanTest *) clause)->booltesttype,
     940         132 :                          (Node *) ((BooleanTest *) clause)->arg,
     941             :                          varRelid,
     942             :                          jointype,
     943             :                          sjinfo);
     944             :     }
     945        1732 :     else if (IsA(clause, CurrentOfExpr))
     946             :     {
     947             :         /* CURRENT OF selects at most one row of its table */
     948         384 :         CurrentOfExpr *cexpr = (CurrentOfExpr *) clause;
     949         384 :         RelOptInfo *crel = find_base_rel(root, cexpr->cvarno);
     950             : 
     951         384 :         if (crel->tuples > 0)
     952         364 :             s1 = 1.0 / crel->tuples;
     953             :     }
     954        1348 :     else if (IsA(clause, RelabelType))
     955             :     {
     956             :         /* Not sure this case is needed, but it can't hurt */
     957           0 :         s1 = clause_selectivity_ext(root,
     958           0 :                                     (Node *) ((RelabelType *) clause)->arg,
     959             :                                     varRelid,
     960             :                                     jointype,
     961             :                                     sjinfo,
     962             :                                     use_extended_stats);
     963             :     }
     964        1348 :     else if (IsA(clause, CoerceToDomain))
     965             :     {
     966             :         /* Not sure this case is needed, but it can't hurt */
     967           0 :         s1 = clause_selectivity_ext(root,
     968           0 :                                     (Node *) ((CoerceToDomain *) clause)->arg,
     969             :                                     varRelid,
     970             :                                     jointype,
     971             :                                     sjinfo,
     972             :                                     use_extended_stats);
     973             :     }
     974             :     else
     975             :     {
     976             :         /*
     977             :          * For anything else, see if we can consider it as a boolean variable.
     978             :          * This only works if it's an immutable expression in Vars of a single
     979             :          * relation; but there's no point in us checking that here because
     980             :          * boolvarsel() will do it internally, and return a suitable default
     981             :          * selectivity if not.
     982             :          */
     983        1348 :         s1 = boolvarsel(root, clause, varRelid);
     984             :     }
     985             : 
     986             :     /* Cache the result if possible */
     987      547512 :     if (cacheable)
     988             :     {
     989      386294 :         if (jointype == JOIN_INNER)
     990      314960 :             rinfo->norm_selec = s1;
     991             :         else
     992       71334 :             rinfo->outer_selec = s1;
     993             :     }
     994             : 
     995             : #ifdef SELECTIVITY_DEBUG
     996             :     elog(DEBUG4, "clause_selectivity: s1 %f", s1);
     997             : #endif                          /* SELECTIVITY_DEBUG */
     998             : 
     999      547512 :     return s1;
    1000             : }

Generated by: LCOV version 1.13