LCOV - code coverage report
Current view: top level - src/backend/optimizer/path - clausesel.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 229 240 95.4 %
Date: 2024-11-21 08:14:44 Functions: 8 8 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * clausesel.c
       4             :  *    Routines to compute clause selectivities
       5             :  *
       6             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/optimizer/path/clausesel.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "nodes/nodeFuncs.h"
      18             : #include "optimizer/clauses.h"
      19             : #include "optimizer/optimizer.h"
      20             : #include "optimizer/pathnode.h"
      21             : #include "optimizer/plancat.h"
      22             : #include "statistics/statistics.h"
      23             : #include "utils/fmgroids.h"
      24             : #include "utils/lsyscache.h"
      25             : #include "utils/selfuncs.h"
      26             : 
      27             : /*
      28             :  * Data structure for accumulating info about possible range-query
      29             :  * clause pairs in clauselist_selectivity.
      30             :  */
      31             : typedef struct RangeQueryClause
      32             : {
      33             :     struct RangeQueryClause *next;  /* next in linked list */
      34             :     Node       *var;            /* The common variable of the clauses */
      35             :     bool        have_lobound;   /* found a low-bound clause yet? */
      36             :     bool        have_hibound;   /* found a high-bound clause yet? */
      37             :     Selectivity lobound;        /* Selectivity of a var > something clause */
      38             :     Selectivity hibound;        /* Selectivity of a var < something clause */
      39             : } RangeQueryClause;
      40             : 
      41             : static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
      42             :                            bool varonleft, bool isLTsel, Selectivity s2);
      43             : static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root,
      44             :                                                List *clauses);
      45             : static Selectivity clauselist_selectivity_or(PlannerInfo *root,
      46             :                                              List *clauses,
      47             :                                              int varRelid,
      48             :                                              JoinType jointype,
      49             :                                              SpecialJoinInfo *sjinfo,
      50             :                                              bool use_extended_stats);
      51             : 
      52             : /****************************************************************************
      53             :  *      ROUTINES TO COMPUTE SELECTIVITIES
      54             :  ****************************************************************************/
      55             : 
      56             : /*
      57             :  * clauselist_selectivity -
      58             :  *    Compute the selectivity of an implicitly-ANDed list of boolean
      59             :  *    expression clauses.  The list can be empty, in which case 1.0
      60             :  *    must be returned.  List elements may be either RestrictInfos
      61             :  *    or bare expression clauses --- the former is preferred since
      62             :  *    it allows caching of results.
      63             :  *
      64             :  * See clause_selectivity() for the meaning of the additional parameters.
      65             :  *
      66             :  * The basic approach is to apply extended statistics first, on as many
      67             :  * clauses as possible, in order to capture cross-column dependencies etc.
      68             :  * The remaining clauses are then estimated by taking the product of their
      69             :  * selectivities, but that's only right if they have independent
      70             :  * probabilities, and in reality they are often NOT independent even if they
      71             :  * only refer to a single column.  So, we want to be smarter where we can.
      72             :  *
      73             :  * We also recognize "range queries", such as "x > 34 AND x < 42".  Clauses
      74             :  * are recognized as possible range query components if they are restriction
      75             :  * opclauses whose operators have scalarltsel or a related function as their
      76             :  * restriction selectivity estimator.  We pair up clauses of this form that
      77             :  * refer to the same variable.  An unpairable clause of this kind is simply
      78             :  * multiplied into the selectivity product in the normal way.  But when we
      79             :  * find a pair, we know that the selectivities represent the relative
      80             :  * positions of the low and high bounds within the column's range, so instead
      81             :  * of figuring the selectivity as hisel * losel, we can figure it as hisel +
      82             :  * losel - 1.  (To visualize this, see that hisel is the fraction of the range
      83             :  * below the high bound, while losel is the fraction above the low bound; so
      84             :  * hisel can be interpreted directly as a 0..1 value but we need to convert
      85             :  * losel to 1-losel before interpreting it as a value.  Then the available
      86             :  * range is 1-losel to hisel.  However, this calculation double-excludes
      87             :  * nulls, so really we need hisel + losel + null_frac - 1.)
      88             :  *
      89             :  * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
      90             :  * and instead use DEFAULT_RANGE_INEQ_SEL.  The same applies if the equation
      91             :  * yields an impossible (negative) result.
      92             :  *
      93             :  * A free side-effect is that we can recognize redundant inequalities such
      94             :  * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
      95             :  *
      96             :  * Of course this is all very dependent on the behavior of the inequality
      97             :  * selectivity functions; perhaps some day we can generalize the approach.
      98             :  */
      99             : Selectivity
     100     2364742 : clauselist_selectivity(PlannerInfo *root,
     101             :                        List *clauses,
     102             :                        int varRelid,
     103             :                        JoinType jointype,
     104             :                        SpecialJoinInfo *sjinfo)
     105             : {
     106     2364742 :     return clauselist_selectivity_ext(root, clauses, varRelid,
     107             :                                       jointype, sjinfo, true);
     108             : }
     109             : 
     110             : /*
     111             :  * clauselist_selectivity_ext -
     112             :  *    Extended version of clauselist_selectivity().  If "use_extended_stats"
     113             :  *    is false, all extended statistics will be ignored, and only per-column
     114             :  *    statistics will be used.
     115             :  */
     116             : Selectivity
     117     2370842 : clauselist_selectivity_ext(PlannerInfo *root,
     118             :                            List *clauses,
     119             :                            int varRelid,
     120             :                            JoinType jointype,
     121             :                            SpecialJoinInfo *sjinfo,
     122             :                            bool use_extended_stats)
     123             : {
     124     2370842 :     Selectivity s1 = 1.0;
     125             :     RelOptInfo *rel;
     126     2370842 :     Bitmapset  *estimatedclauses = NULL;
     127     2370842 :     RangeQueryClause *rqlist = NULL;
     128             :     ListCell   *l;
     129             :     int         listidx;
     130             : 
     131             :     /*
     132             :      * If there's exactly one clause, just go directly to
     133             :      * clause_selectivity_ext(). None of what we might do below is relevant.
     134             :      */
     135     2370842 :     if (list_length(clauses) == 1)
     136     1205042 :         return clause_selectivity_ext(root, (Node *) linitial(clauses),
     137             :                                       varRelid, jointype, sjinfo,
     138             :                                       use_extended_stats);
     139             : 
     140             :     /*
     141             :      * Determine if these clauses reference a single relation.  If so, and if
     142             :      * it has extended statistics, try to apply those.
     143             :      */
     144     1165800 :     rel = find_single_rel_for_clauses(root, clauses);
     145     1165800 :     if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
     146             :     {
     147             :         /*
     148             :          * Estimate as many clauses as possible using extended statistics.
     149             :          *
     150             :          * 'estimatedclauses' is populated with the 0-based list position
     151             :          * index of clauses estimated here, and that should be ignored below.
     152             :          */
     153        1734 :         s1 = statext_clauselist_selectivity(root, clauses, varRelid,
     154             :                                             jointype, sjinfo, rel,
     155             :                                             &estimatedclauses, false);
     156             :     }
     157             : 
     158             :     /*
     159             :      * Apply normal selectivity estimates for remaining clauses. We'll be
     160             :      * careful to skip any clauses which were already estimated above.
     161             :      *
     162             :      * Anything that doesn't look like a potential rangequery clause gets
     163             :      * multiplied into s1 and forgotten. Anything that does gets inserted into
     164             :      * an rqlist entry.
     165             :      */
     166     1165800 :     listidx = -1;
     167     1918096 :     foreach(l, clauses)
     168             :     {
     169      752296 :         Node       *clause = (Node *) lfirst(l);
     170             :         RestrictInfo *rinfo;
     171             :         Selectivity s2;
     172             : 
     173      752296 :         listidx++;
     174             : 
     175             :         /*
     176             :          * Skip this clause if it's already been estimated by some other
     177             :          * statistics above.
     178             :          */
     179      752296 :         if (bms_is_member(listidx, estimatedclauses))
     180        3402 :             continue;
     181             : 
     182             :         /* Compute the selectivity of this clause in isolation */
     183      748894 :         s2 = clause_selectivity_ext(root, clause, varRelid, jointype, sjinfo,
     184             :                                     use_extended_stats);
     185             : 
     186             :         /*
     187             :          * Check for being passed a RestrictInfo.
     188             :          *
     189             :          * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
     190             :          * 0.0; just use that rather than looking for range pairs.
     191             :          */
     192      748894 :         if (IsA(clause, RestrictInfo))
     193             :         {
     194      747594 :             rinfo = (RestrictInfo *) clause;
     195      747594 :             if (rinfo->pseudoconstant)
     196             :             {
     197       25562 :                 s1 = s1 * s2;
     198       25562 :                 continue;
     199             :             }
     200      722032 :             clause = (Node *) rinfo->clause;
     201             :         }
     202             :         else
     203        1300 :             rinfo = NULL;
     204             : 
     205             :         /*
     206             :          * See if it looks like a restriction clause with a pseudoconstant on
     207             :          * one side.  (Anything more complicated than that might not behave in
     208             :          * the simple way we are expecting.)  Most of the tests here can be
     209             :          * done more efficiently with rinfo than without.
     210             :          */
     211      723332 :         if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
     212             :         {
     213      620946 :             OpExpr     *expr = (OpExpr *) clause;
     214      620946 :             bool        varonleft = true;
     215             :             bool        ok;
     216             : 
     217      620946 :             if (rinfo)
     218             :             {
     219     1025306 :                 ok = (rinfo->num_base_rels == 1) &&
     220      399188 :                     (is_pseudo_constant_clause_relids(lsecond(expr->args),
     221        6456 :                                                       rinfo->right_relids) ||
     222        6456 :                      (varonleft = false,
     223        6456 :                       is_pseudo_constant_clause_relids(linitial(expr->args),
     224             :                                                        rinfo->left_relids)));
     225             :             }
     226             :             else
     227             :             {
     228        2496 :                 ok = (NumRelids(root, clause) == 1) &&
     229        1212 :                     (is_pseudo_constant_clause(lsecond(expr->args)) ||
     230           0 :                      (varonleft = false,
     231           0 :                       is_pseudo_constant_clause(linitial(expr->args))));
     232             :             }
     233             : 
     234      620946 :             if (ok)
     235             :             {
     236             :                 /*
     237             :                  * If it's not a "<"/"<="/">"/">=" operator, just merge the
     238             :                  * selectivity in generically.  But if it's the right oprrest,
     239             :                  * add the clause to rqlist for later processing.
     240             :                  */
     241      399808 :                 switch (get_oprrest(expr->opno))
     242             :                 {
     243       13242 :                     case F_SCALARLTSEL:
     244             :                     case F_SCALARLESEL:
     245       13242 :                         addRangeClause(&rqlist, clause,
     246             :                                        varonleft, true, s2);
     247       13242 :                         break;
     248       35326 :                     case F_SCALARGTSEL:
     249             :                     case F_SCALARGESEL:
     250       35326 :                         addRangeClause(&rqlist, clause,
     251             :                                        varonleft, false, s2);
     252       35326 :                         break;
     253      351240 :                     default:
     254             :                         /* Just merge the selectivity in generically */
     255      351240 :                         s1 = s1 * s2;
     256      351240 :                         break;
     257             :                 }
     258      399808 :                 continue;       /* drop to loop bottom */
     259             :             }
     260             :         }
     261             : 
     262             :         /* Not the right form, so treat it generically. */
     263      323524 :         s1 = s1 * s2;
     264             :     }
     265             : 
     266             :     /*
     267             :      * Now scan the rangequery pair list.
     268             :      */
     269     1205298 :     while (rqlist != NULL)
     270             :     {
     271             :         RangeQueryClause *rqnext;
     272             : 
     273       39498 :         if (rqlist->have_lobound && rqlist->have_hibound)
     274        8572 :         {
     275             :             /* Successfully matched a pair of range clauses */
     276             :             Selectivity s2;
     277             : 
     278             :             /*
     279             :              * Exact equality to the default value probably means the
     280             :              * selectivity function punted.  This is not airtight but should
     281             :              * be good enough.
     282             :              */
     283        8572 :             if (rqlist->hibound == DEFAULT_INEQ_SEL ||
     284        6426 :                 rqlist->lobound == DEFAULT_INEQ_SEL)
     285             :             {
     286        2146 :                 s2 = DEFAULT_RANGE_INEQ_SEL;
     287             :             }
     288             :             else
     289             :             {
     290        6426 :                 s2 = rqlist->hibound + rqlist->lobound - 1.0;
     291             : 
     292             :                 /* Adjust for double-exclusion of NULLs */
     293        6426 :                 s2 += nulltestsel(root, IS_NULL, rqlist->var,
     294             :                                   varRelid, jointype, sjinfo);
     295             : 
     296             :                 /*
     297             :                  * A zero or slightly negative s2 should be converted into a
     298             :                  * small positive value; we probably are dealing with a very
     299             :                  * tight range and got a bogus result due to roundoff errors.
     300             :                  * However, if s2 is very negative, then we probably have
     301             :                  * default selectivity estimates on one or both sides of the
     302             :                  * range that we failed to recognize above for some reason.
     303             :                  */
     304        6426 :                 if (s2 <= 0.0)
     305             :                 {
     306         950 :                     if (s2 < -0.01)
     307             :                     {
     308             :                         /*
     309             :                          * No data available --- use a default estimate that
     310             :                          * is small, but not real small.
     311             :                          */
     312          12 :                         s2 = DEFAULT_RANGE_INEQ_SEL;
     313             :                     }
     314             :                     else
     315             :                     {
     316             :                         /*
     317             :                          * It's just roundoff error; use a small positive
     318             :                          * value
     319             :                          */
     320         938 :                         s2 = 1.0e-10;
     321             :                     }
     322             :                 }
     323             :             }
     324             :             /* Merge in the selectivity of the pair of clauses */
     325        8572 :             s1 *= s2;
     326             :         }
     327             :         else
     328             :         {
     329             :             /* Only found one of a pair, merge it in generically */
     330       30926 :             if (rqlist->have_lobound)
     331       26352 :                 s1 *= rqlist->lobound;
     332             :             else
     333        4574 :                 s1 *= rqlist->hibound;
     334             :         }
     335             :         /* release storage and advance */
     336       39498 :         rqnext = rqlist->next;
     337       39498 :         pfree(rqlist);
     338       39498 :         rqlist = rqnext;
     339             :     }
     340             : 
     341     1165800 :     return s1;
     342             : }
     343             : 
     344             : /*
     345             :  * clauselist_selectivity_or -
     346             :  *    Compute the selectivity of an implicitly-ORed list of boolean
     347             :  *    expression clauses.  The list can be empty, in which case 0.0
     348             :  *    must be returned.  List elements may be either RestrictInfos
     349             :  *    or bare expression clauses --- the former is preferred since
     350             :  *    it allows caching of results.
     351             :  *
     352             :  * See clause_selectivity() for the meaning of the additional parameters.
     353             :  *
     354             :  * The basic approach is to apply extended statistics first, on as many
     355             :  * clauses as possible, in order to capture cross-column dependencies etc.
     356             :  * The remaining clauses are then estimated as if they were independent.
     357             :  */
     358             : static Selectivity
     359       12344 : clauselist_selectivity_or(PlannerInfo *root,
     360             :                           List *clauses,
     361             :                           int varRelid,
     362             :                           JoinType jointype,
     363             :                           SpecialJoinInfo *sjinfo,
     364             :                           bool use_extended_stats)
     365             : {
     366       12344 :     Selectivity s1 = 0.0;
     367             :     RelOptInfo *rel;
     368       12344 :     Bitmapset  *estimatedclauses = NULL;
     369             :     ListCell   *lc;
     370             :     int         listidx;
     371             : 
     372             :     /*
     373             :      * Determine if these clauses reference a single relation.  If so, and if
     374             :      * it has extended statistics, try to apply those.
     375             :      */
     376       12344 :     rel = find_single_rel_for_clauses(root, clauses);
     377       12344 :     if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
     378             :     {
     379             :         /*
     380             :          * Estimate as many clauses as possible using extended statistics.
     381             :          *
     382             :          * 'estimatedclauses' is populated with the 0-based list position
     383             :          * index of clauses estimated here, and that should be ignored below.
     384             :          */
     385         108 :         s1 = statext_clauselist_selectivity(root, clauses, varRelid,
     386             :                                             jointype, sjinfo, rel,
     387             :                                             &estimatedclauses, true);
     388             :     }
     389             : 
     390             :     /*
     391             :      * Estimate the remaining clauses as if they were independent.
     392             :      *
     393             :      * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to account
     394             :      * for the probable overlap of selected tuple sets.
     395             :      *
     396             :      * XXX is this too conservative?
     397             :      */
     398       12344 :     listidx = -1;
     399       40542 :     foreach(lc, clauses)
     400             :     {
     401             :         Selectivity s2;
     402             : 
     403       28198 :         listidx++;
     404             : 
     405             :         /*
     406             :          * Skip this clause if it's already been estimated by some other
     407             :          * statistics above.
     408             :          */
     409       28198 :         if (bms_is_member(listidx, estimatedclauses))
     410         240 :             continue;
     411             : 
     412       27958 :         s2 = clause_selectivity_ext(root, (Node *) lfirst(lc), varRelid,
     413             :                                     jointype, sjinfo, use_extended_stats);
     414             : 
     415       27958 :         s1 = s1 + s2 - s1 * s2;
     416             :     }
     417             : 
     418       12344 :     return s1;
     419             : }
     420             : 
     421             : /*
     422             :  * addRangeClause --- add a new range clause for clauselist_selectivity
     423             :  *
     424             :  * Here is where we try to match up pairs of range-query clauses
     425             :  */
     426             : static void
     427       48568 : addRangeClause(RangeQueryClause **rqlist, Node *clause,
     428             :                bool varonleft, bool isLTsel, Selectivity s2)
     429             : {
     430             :     RangeQueryClause *rqelem;
     431             :     Node       *var;
     432             :     bool        is_lobound;
     433             : 
     434       48568 :     if (varonleft)
     435             :     {
     436       48346 :         var = get_leftop((Expr *) clause);
     437       48346 :         is_lobound = !isLTsel;  /* x < something is high bound */
     438             :     }
     439             :     else
     440             :     {
     441         222 :         var = get_rightop((Expr *) clause);
     442         222 :         is_lobound = isLTsel;   /* something < x is low bound */
     443             :     }
     444             : 
     445       49802 :     for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
     446             :     {
     447             :         /*
     448             :          * We use full equal() here because the "var" might be a function of
     449             :          * one or more attributes of the same relation...
     450             :          */
     451       10304 :         if (!equal(var, rqelem->var))
     452        1234 :             continue;
     453             :         /* Found the right group to put this clause in */
     454        9070 :         if (is_lobound)
     455             :         {
     456         420 :             if (!rqelem->have_lobound)
     457             :             {
     458         180 :                 rqelem->have_lobound = true;
     459         180 :                 rqelem->lobound = s2;
     460             :             }
     461             :             else
     462             :             {
     463             : 
     464             :                 /*------
     465             :                  * We have found two similar clauses, such as
     466             :                  * x < y AND x <= z.
     467             :                  * Keep only the more restrictive one.
     468             :                  *------
     469             :                  */
     470         240 :                 if (rqelem->lobound > s2)
     471          60 :                     rqelem->lobound = s2;
     472             :             }
     473             :         }
     474             :         else
     475             :         {
     476        8650 :             if (!rqelem->have_hibound)
     477             :             {
     478        8392 :                 rqelem->have_hibound = true;
     479        8392 :                 rqelem->hibound = s2;
     480             :             }
     481             :             else
     482             :             {
     483             : 
     484             :                 /*------
     485             :                  * We have found two similar clauses, such as
     486             :                  * x > y AND x >= z.
     487             :                  * Keep only the more restrictive one.
     488             :                  *------
     489             :                  */
     490         258 :                 if (rqelem->hibound > s2)
     491          96 :                     rqelem->hibound = s2;
     492             :             }
     493             :         }
     494        9070 :         return;
     495             :     }
     496             : 
     497             :     /* No matching var found, so make a new clause-pair data structure */
     498       39498 :     rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
     499       39498 :     rqelem->var = var;
     500       39498 :     if (is_lobound)
     501             :     {
     502       34744 :         rqelem->have_lobound = true;
     503       34744 :         rqelem->have_hibound = false;
     504       34744 :         rqelem->lobound = s2;
     505             :     }
     506             :     else
     507             :     {
     508        4754 :         rqelem->have_lobound = false;
     509        4754 :         rqelem->have_hibound = true;
     510        4754 :         rqelem->hibound = s2;
     511             :     }
     512       39498 :     rqelem->next = *rqlist;
     513       39498 :     *rqlist = rqelem;
     514             : }
     515             : 
     516             : /*
     517             :  * find_single_rel_for_clauses
     518             :  *      Examine each clause in 'clauses' and determine if all clauses
     519             :  *      reference only a single relation.  If so return that relation,
     520             :  *      otherwise return NULL.
     521             :  */
     522             : static RelOptInfo *
     523     1180642 : find_single_rel_for_clauses(PlannerInfo *root, List *clauses)
     524             : {
     525     1180642 :     int         lastrelid = 0;
     526             :     ListCell   *l;
     527             : 
     528     1625406 :     foreach(l, clauses)
     529             :     {
     530      631074 :         RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
     531             :         int         relid;
     532             : 
     533             :         /*
     534             :          * If we have a list of bare clauses rather than RestrictInfos, we
     535             :          * could pull out their relids the hard way with pull_varnos().
     536             :          * However, currently the extended-stats machinery won't do anything
     537             :          * with non-RestrictInfo clauses anyway, so there's no point in
     538             :          * spending extra cycles; just fail if that's what we have.
     539             :          *
     540             :          * An exception to that rule is if we have a bare BoolExpr AND clause.
     541             :          * We treat this as a special case because the restrictinfo machinery
     542             :          * doesn't build RestrictInfos on top of AND clauses.
     543             :          */
     544      631074 :         if (is_andclause(rinfo))
     545             :         {
     546             :             RelOptInfo *rel;
     547             : 
     548        2498 :             rel = find_single_rel_for_clauses(root,
     549             :                                               ((BoolExpr *) rinfo)->args);
     550             : 
     551        2498 :             if (rel == NULL)
     552      186310 :                 return NULL;
     553        1990 :             if (lastrelid == 0)
     554         990 :                 lastrelid = rel->relid;
     555        1000 :             else if (rel->relid != lastrelid)
     556           0 :                 return NULL;
     557             : 
     558       35880 :             continue;
     559             :         }
     560             : 
     561      628576 :         if (!IsA(rinfo, RestrictInfo))
     562        1106 :             return NULL;
     563             : 
     564      627470 :         if (bms_is_empty(rinfo->clause_relids))
     565       33890 :             continue;           /* we can ignore variable-free clauses */
     566      593580 :         if (!bms_get_singleton_member(rinfo->clause_relids, &relid))
     567      181464 :             return NULL;        /* multiple relations in this clause */
     568      412116 :         if (lastrelid == 0)
     569      204448 :             lastrelid = relid;  /* first clause referencing a relation */
     570      207668 :         else if (relid != lastrelid)
     571        3232 :             return NULL;        /* relation not same as last one */
     572             :     }
     573             : 
     574      994332 :     if (lastrelid != 0)
     575      161684 :         return find_base_rel(root, lastrelid);
     576             : 
     577      832648 :     return NULL;                /* no clauses */
     578             : }
     579             : 
     580             : /*
     581             :  * treat_as_join_clause -
     582             :  *    Decide whether an operator clause is to be handled by the
     583             :  *    restriction or join estimator.  Subroutine for clause_selectivity().
     584             :  */
     585             : static inline bool
     586      846568 : treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo,
     587             :                      int varRelid, SpecialJoinInfo *sjinfo)
     588             : {
     589      846568 :     if (varRelid != 0)
     590             :     {
     591             :         /*
     592             :          * Caller is forcing restriction mode (eg, because we are examining an
     593             :          * inner indexscan qual).
     594             :          */
     595      310624 :         return false;
     596             :     }
     597      535944 :     else if (sjinfo == NULL)
     598             :     {
     599             :         /*
     600             :          * It must be a restriction clause, since it's being evaluated at a
     601             :          * scan node.
     602             :          */
     603      328186 :         return false;
     604             :     }
     605             :     else
     606             :     {
     607             :         /*
     608             :          * Otherwise, it's a join if there's more than one base relation used.
     609             :          * We can optimize this calculation if an rinfo was passed.
     610             :          *
     611             :          * XXX  Since we know the clause is being evaluated at a join, the
     612             :          * only way it could be single-relation is if it was delayed by outer
     613             :          * joins.  We intentionally count only baserels here, not OJs that
     614             :          * might be present in rinfo->clause_relids, so that we direct such
     615             :          * cases to the restriction qual estimators not join estimators.
     616             :          * Eventually some notice should be taken of the possibility of
     617             :          * injected nulls, but we'll likely want to do that in the restriction
     618             :          * estimators rather than starting to treat such cases as join quals.
     619             :          */
     620      207758 :         if (rinfo)
     621      207296 :             return (rinfo->num_base_rels > 1);
     622             :         else
     623         462 :             return (NumRelids(root, clause) > 1);
     624             :     }
     625             : }
     626             : 
     627             : 
     628             : /*
     629             :  * clause_selectivity -
     630             :  *    Compute the selectivity of a general boolean expression clause.
     631             :  *
     632             :  * The clause can be either a RestrictInfo or a plain expression.  If it's
     633             :  * a RestrictInfo, we try to cache the selectivity for possible re-use,
     634             :  * so passing RestrictInfos is preferred.
     635             :  *
     636             :  * varRelid is either 0 or a rangetable index.
     637             :  *
     638             :  * When varRelid is not 0, only variables belonging to that relation are
     639             :  * considered in computing selectivity; other vars are treated as constants
     640             :  * of unknown values.  This is appropriate for estimating the selectivity of
     641             :  * a join clause that is being used as a restriction clause in a scan of a
     642             :  * nestloop join's inner relation --- varRelid should then be the ID of the
     643             :  * inner relation.
     644             :  *
     645             :  * When varRelid is 0, all variables are treated as variables.  This
     646             :  * is appropriate for ordinary join clauses and restriction clauses.
     647             :  *
     648             :  * jointype is the join type, if the clause is a join clause.  Pass JOIN_INNER
     649             :  * if the clause isn't a join clause.
     650             :  *
     651             :  * sjinfo is NULL for a non-join clause, otherwise it provides additional
     652             :  * context information about the join being performed.  There are some
     653             :  * special cases:
     654             :  *  1. For a special (not INNER) join, sjinfo is always a member of
     655             :  *     root->join_info_list.
     656             :  *  2. For an INNER join, sjinfo is just a transient struct, and only the
     657             :  *     relids and jointype fields in it can be trusted.
     658             :  * It is possible for jointype to be different from sjinfo->jointype.
     659             :  * This indicates we are considering a variant join: either with
     660             :  * the LHS and RHS switched, or with one input unique-ified.
     661             :  *
     662             :  * Note: when passing nonzero varRelid, it's normally appropriate to set
     663             :  * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
     664             :  * join clause; because we aren't treating it as a join clause.
     665             :  */
     666             : Selectivity
     667      460066 : clause_selectivity(PlannerInfo *root,
     668             :                    Node *clause,
     669             :                    int varRelid,
     670             :                    JoinType jointype,
     671             :                    SpecialJoinInfo *sjinfo)
     672             : {
     673      460066 :     return clause_selectivity_ext(root, clause, varRelid,
     674             :                                   jointype, sjinfo, true);
     675             : }
     676             : 
     677             : /*
     678             :  * clause_selectivity_ext -
     679             :  *    Extended version of clause_selectivity().  If "use_extended_stats" is
     680             :  *    false, all extended statistics will be ignored, and only per-column
     681             :  *    statistics will be used.
     682             :  */
     683             : Selectivity
     684     2450336 : clause_selectivity_ext(PlannerInfo *root,
     685             :                        Node *clause,
     686             :                        int varRelid,
     687             :                        JoinType jointype,
     688             :                        SpecialJoinInfo *sjinfo,
     689             :                        bool use_extended_stats)
     690             : {
     691     2450336 :     Selectivity s1 = 0.5;       /* default for any unhandled clause type */
     692     2450336 :     RestrictInfo *rinfo = NULL;
     693     2450336 :     bool        cacheable = false;
     694             : 
     695     2450336 :     if (clause == NULL)         /* can this still happen? */
     696           0 :         return s1;
     697             : 
     698     2450336 :     if (IsA(clause, RestrictInfo))
     699             :     {
     700     2431678 :         rinfo = (RestrictInfo *) clause;
     701             : 
     702             :         /*
     703             :          * If the clause is marked pseudoconstant, then it will be used as a
     704             :          * gating qual and should not affect selectivity estimates; hence
     705             :          * return 1.0.  The only exception is that a constant FALSE may be
     706             :          * taken as having selectivity 0.0, since it will surely mean no rows
     707             :          * out of the plan.  This case is simple enough that we need not
     708             :          * bother caching the result.
     709             :          */
     710     2431678 :         if (rinfo->pseudoconstant)
     711             :         {
     712       25904 :             if (!IsA(rinfo->clause, Const))
     713       25698 :                 return (Selectivity) 1.0;
     714             :         }
     715             : 
     716             :         /*
     717             :          * If possible, cache the result of the selectivity calculation for
     718             :          * the clause.  We can cache if varRelid is zero or the clause
     719             :          * contains only vars of that relid --- otherwise varRelid will affect
     720             :          * the result, so mustn't cache.  Outer join quals might be examined
     721             :          * with either their join's actual jointype or JOIN_INNER, so we need
     722             :          * two cache variables to remember both cases.  Note: we assume the
     723             :          * result won't change if we are switching the input relations or
     724             :          * considering a unique-ified case, so we only need one cache variable
     725             :          * for all non-JOIN_INNER cases.
     726             :          */
     727     2405980 :         if (varRelid == 0 ||
     728      987700 :             rinfo->num_base_rels == 0 ||
     729     1676648 :             (rinfo->num_base_rels == 1 &&
     730      688948 :              bms_is_member(varRelid, rinfo->clause_relids)))
     731             :         {
     732             :             /* Cacheable --- do we already have the result? */
     733     2104952 :             if (jointype == JOIN_INNER)
     734             :             {
     735     1810572 :                 if (rinfo->norm_selec >= 0)
     736     1311716 :                     return rinfo->norm_selec;
     737             :             }
     738             :             else
     739             :             {
     740      294380 :                 if (rinfo->outer_selec >= 0)
     741      185594 :                     return rinfo->outer_selec;
     742             :             }
     743      607642 :             cacheable = true;
     744             :         }
     745             : 
     746             :         /*
     747             :          * Proceed with examination of contained clause.  If the clause is an
     748             :          * OR-clause, we want to look at the variant with sub-RestrictInfos,
     749             :          * so that per-subclause selectivities can be cached.
     750             :          */
     751      908670 :         if (rinfo->orclause)
     752       12304 :             clause = (Node *) rinfo->orclause;
     753             :         else
     754      896366 :             clause = (Node *) rinfo->clause;
     755             :     }
     756             : 
     757      927328 :     if (IsA(clause, Var))
     758             :     {
     759       40472 :         Var        *var = (Var *) clause;
     760             : 
     761             :         /*
     762             :          * We probably shouldn't ever see an uplevel Var here, but if we do,
     763             :          * return the default selectivity...
     764             :          */
     765       40472 :         if (var->varlevelsup == 0 &&
     766         676 :             (varRelid == 0 || varRelid == (int) var->varno))
     767             :         {
     768             :             /* Use the restriction selectivity function for a bool Var */
     769       40158 :             s1 = boolvarsel(root, (Node *) var, varRelid);
     770             :         }
     771             :     }
     772      886856 :     else if (IsA(clause, Const))
     773             :     {
     774             :         /* bool constant is pretty easy... */
     775        3588 :         Const      *con = (Const *) clause;
     776             : 
     777        7060 :         s1 = con->constisnull ? 0.0 :
     778        3472 :             DatumGetBool(con->constvalue) ? 1.0 : 0.0;
     779             :     }
     780      883268 :     else if (IsA(clause, Param))
     781             :     {
     782             :         /* see if we can replace the Param */
     783          24 :         Node       *subst = estimate_expression_value(root, clause);
     784             : 
     785          24 :         if (IsA(subst, Const))
     786             :         {
     787             :             /* bool constant is pretty easy... */
     788           0 :             Const      *con = (Const *) subst;
     789             : 
     790           0 :             s1 = con->constisnull ? 0.0 :
     791           0 :                 DatumGetBool(con->constvalue) ? 1.0 : 0.0;
     792             :         }
     793             :         else
     794             :         {
     795             :             /* XXX any way to do better than default? */
     796             :         }
     797             :     }
     798      883244 :     else if (is_notclause(clause))
     799             :     {
     800             :         /* inverse of the selectivity of the underlying clause */
     801        8136 :         s1 = 1.0 - clause_selectivity_ext(root,
     802        8136 :                                           (Node *) get_notclausearg((Expr *) clause),
     803             :                                           varRelid,
     804             :                                           jointype,
     805             :                                           sjinfo,
     806             :                                           use_extended_stats);
     807             :     }
     808      875108 :     else if (is_andclause(clause))
     809             :     {
     810             :         /* share code with clauselist_selectivity() */
     811        3226 :         s1 = clauselist_selectivity_ext(root,
     812             :                                         ((BoolExpr *) clause)->args,
     813             :                                         varRelid,
     814             :                                         jointype,
     815             :                                         sjinfo,
     816             :                                         use_extended_stats);
     817             :     }
     818      871882 :     else if (is_orclause(clause))
     819             :     {
     820             :         /*
     821             :          * Almost the same thing as clauselist_selectivity, but with the
     822             :          * clauses connected by OR.
     823             :          */
     824       12344 :         s1 = clauselist_selectivity_or(root,
     825             :                                        ((BoolExpr *) clause)->args,
     826             :                                        varRelid,
     827             :                                        jointype,
     828             :                                        sjinfo,
     829             :                                        use_extended_stats);
     830             :     }
     831      859538 :     else if (is_opclause(clause) || IsA(clause, DistinctExpr))
     832      815894 :     {
     833      815918 :         OpExpr     *opclause = (OpExpr *) clause;
     834      815918 :         Oid         opno = opclause->opno;
     835             : 
     836      815918 :         if (treat_as_join_clause(root, clause, rinfo, varRelid, sjinfo))
     837             :         {
     838             :             /* Estimate selectivity for a join clause. */
     839      198990 :             s1 = join_selectivity(root, opno,
     840             :                                   opclause->args,
     841             :                                   opclause->inputcollid,
     842             :                                   jointype,
     843             :                                   sjinfo);
     844             :         }
     845             :         else
     846             :         {
     847             :             /* Estimate selectivity for a restriction clause. */
     848      616928 :             s1 = restriction_selectivity(root, opno,
     849             :                                          opclause->args,
     850             :                                          opclause->inputcollid,
     851             :                                          varRelid);
     852             :         }
     853             : 
     854             :         /*
     855             :          * DistinctExpr has the same representation as OpExpr, but the
     856             :          * contained operator is "=" not "<>", so we must negate the result.
     857             :          * This estimation method doesn't give the right behavior for nulls,
     858             :          * but it's better than doing nothing.
     859             :          */
     860      815894 :         if (IsA(clause, DistinctExpr))
     861         792 :             s1 = 1.0 - s1;
     862             :     }
     863       43620 :     else if (is_funcclause(clause))
     864             :     {
     865       10868 :         FuncExpr   *funcclause = (FuncExpr *) clause;
     866             : 
     867             :         /* Try to get an estimate from the support function, if any */
     868       10868 :         s1 = function_selectivity(root,
     869             :                                   funcclause->funcid,
     870             :                                   funcclause->args,
     871             :                                   funcclause->inputcollid,
     872       10868 :                                   treat_as_join_clause(root, clause, rinfo,
     873             :                                                        varRelid, sjinfo),
     874             :                                   varRelid,
     875             :                                   jointype,
     876             :                                   sjinfo);
     877             :     }
     878       32752 :     else if (IsA(clause, ScalarArrayOpExpr))
     879             :     {
     880             :         /* Use node specific selectivity calculation function */
     881       19782 :         s1 = scalararraysel(root,
     882             :                             (ScalarArrayOpExpr *) clause,
     883       19782 :                             treat_as_join_clause(root, clause, rinfo,
     884             :                                                  varRelid, sjinfo),
     885             :                             varRelid,
     886             :                             jointype,
     887             :                             sjinfo);
     888             :     }
     889       12970 :     else if (IsA(clause, RowCompareExpr))
     890             :     {
     891             :         /* Use node specific selectivity calculation function */
     892         156 :         s1 = rowcomparesel(root,
     893             :                            (RowCompareExpr *) clause,
     894             :                            varRelid,
     895             :                            jointype,
     896             :                            sjinfo);
     897             :     }
     898       12814 :     else if (IsA(clause, NullTest))
     899             :     {
     900             :         /* Use node specific selectivity calculation function */
     901        9340 :         s1 = nulltestsel(root,
     902             :                          ((NullTest *) clause)->nulltesttype,
     903        9340 :                          (Node *) ((NullTest *) clause)->arg,
     904             :                          varRelid,
     905             :                          jointype,
     906             :                          sjinfo);
     907             :     }
     908        3474 :     else if (IsA(clause, BooleanTest))
     909             :     {
     910             :         /* Use node specific selectivity calculation function */
     911         838 :         s1 = booltestsel(root,
     912             :                          ((BooleanTest *) clause)->booltesttype,
     913         838 :                          (Node *) ((BooleanTest *) clause)->arg,
     914             :                          varRelid,
     915             :                          jointype,
     916             :                          sjinfo);
     917             :     }
     918        2636 :     else if (IsA(clause, CurrentOfExpr))
     919             :     {
     920             :         /* CURRENT OF selects at most one row of its table */
     921         406 :         CurrentOfExpr *cexpr = (CurrentOfExpr *) clause;
     922         406 :         RelOptInfo *crel = find_base_rel(root, cexpr->cvarno);
     923             : 
     924         406 :         if (crel->tuples > 0)
     925         392 :             s1 = 1.0 / crel->tuples;
     926             :     }
     927        2230 :     else if (IsA(clause, RelabelType))
     928             :     {
     929             :         /* Not sure this case is needed, but it can't hurt */
     930           0 :         s1 = clause_selectivity_ext(root,
     931           0 :                                     (Node *) ((RelabelType *) clause)->arg,
     932             :                                     varRelid,
     933             :                                     jointype,
     934             :                                     sjinfo,
     935             :                                     use_extended_stats);
     936             :     }
     937        2230 :     else if (IsA(clause, CoerceToDomain))
     938             :     {
     939             :         /* Not sure this case is needed, but it can't hurt */
     940           0 :         s1 = clause_selectivity_ext(root,
     941           0 :                                     (Node *) ((CoerceToDomain *) clause)->arg,
     942             :                                     varRelid,
     943             :                                     jointype,
     944             :                                     sjinfo,
     945             :                                     use_extended_stats);
     946             :     }
     947             :     else
     948             :     {
     949             :         /*
     950             :          * For anything else, see if we can consider it as a boolean variable.
     951             :          * This only works if it's an immutable expression in Vars of a single
     952             :          * relation; but there's no point in us checking that here because
     953             :          * boolvarsel() will do it internally, and return a suitable default
     954             :          * selectivity if not.
     955             :          */
     956        2230 :         s1 = boolvarsel(root, clause, varRelid);
     957             :     }
     958             : 
     959             :     /* Cache the result if possible */
     960      927304 :     if (cacheable)
     961             :     {
     962      607618 :         if (jointype == JOIN_INNER)
     963      498832 :             rinfo->norm_selec = s1;
     964             :         else
     965      108786 :             rinfo->outer_selec = s1;
     966             :     }
     967             : 
     968             : #ifdef SELECTIVITY_DEBUG
     969             :     elog(DEBUG4, "clause_selectivity: s1 %f", s1);
     970             : #endif                          /* SELECTIVITY_DEBUG */
     971             : 
     972      927304 :     return s1;
     973             : }

Generated by: LCOV version 1.14