LCOV - code coverage report
Current view: top level - src/backend/optimizer/path - clausesel.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 228 240 95.0 %
Date: 2023-11-29 05:10:53 Functions: 8 8 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * clausesel.c
       4             :  *    Routines to compute clause selectivities
       5             :  *
       6             :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/optimizer/path/clausesel.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "nodes/makefuncs.h"
      18             : #include "nodes/nodeFuncs.h"
      19             : #include "optimizer/clauses.h"
      20             : #include "optimizer/cost.h"
      21             : #include "optimizer/optimizer.h"
      22             : #include "optimizer/pathnode.h"
      23             : #include "optimizer/plancat.h"
      24             : #include "statistics/statistics.h"
      25             : #include "utils/fmgroids.h"
      26             : #include "utils/lsyscache.h"
      27             : #include "utils/selfuncs.h"
      28             : 
      29             : /*
      30             :  * Data structure for accumulating info about possible range-query
      31             :  * clause pairs in clauselist_selectivity.
      32             :  */
      33             : typedef struct RangeQueryClause
      34             : {
      35             :     struct RangeQueryClause *next;  /* next in linked list */
      36             :     Node       *var;            /* The common variable of the clauses */
      37             :     bool        have_lobound;   /* found a low-bound clause yet? */
      38             :     bool        have_hibound;   /* found a high-bound clause yet? */
      39             :     Selectivity lobound;        /* Selectivity of a var > something clause */
      40             :     Selectivity hibound;        /* Selectivity of a var < something clause */
      41             : } RangeQueryClause;
      42             : 
      43             : static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
      44             :                            bool varonleft, bool isLTsel, Selectivity s2);
      45             : static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root,
      46             :                                                List *clauses);
      47             : static Selectivity clauselist_selectivity_or(PlannerInfo *root,
      48             :                                              List *clauses,
      49             :                                              int varRelid,
      50             :                                              JoinType jointype,
      51             :                                              SpecialJoinInfo *sjinfo,
      52             :                                              bool use_extended_stats);
      53             : 
      54             : /****************************************************************************
      55             :  *      ROUTINES TO COMPUTE SELECTIVITIES
      56             :  ****************************************************************************/
      57             : 
      58             : /*
      59             :  * clauselist_selectivity -
      60             :  *    Compute the selectivity of an implicitly-ANDed list of boolean
      61             :  *    expression clauses.  The list can be empty, in which case 1.0
      62             :  *    must be returned.  List elements may be either RestrictInfos
      63             :  *    or bare expression clauses --- the former is preferred since
      64             :  *    it allows caching of results.
      65             :  *
      66             :  * See clause_selectivity() for the meaning of the additional parameters.
      67             :  *
      68             :  * The basic approach is to apply extended statistics first, on as many
      69             :  * clauses as possible, in order to capture cross-column dependencies etc.
      70             :  * The remaining clauses are then estimated by taking the product of their
      71             :  * selectivities, but that's only right if they have independent
      72             :  * probabilities, and in reality they are often NOT independent even if they
      73             :  * only refer to a single column.  So, we want to be smarter where we can.
      74             :  *
      75             :  * We also recognize "range queries", such as "x > 34 AND x < 42".  Clauses
      76             :  * are recognized as possible range query components if they are restriction
      77             :  * opclauses whose operators have scalarltsel or a related function as their
      78             :  * restriction selectivity estimator.  We pair up clauses of this form that
      79             :  * refer to the same variable.  An unpairable clause of this kind is simply
      80             :  * multiplied into the selectivity product in the normal way.  But when we
      81             :  * find a pair, we know that the selectivities represent the relative
      82             :  * positions of the low and high bounds within the column's range, so instead
      83             :  * of figuring the selectivity as hisel * losel, we can figure it as hisel +
      84             :  * losel - 1.  (To visualize this, see that hisel is the fraction of the range
      85             :  * below the high bound, while losel is the fraction above the low bound; so
      86             :  * hisel can be interpreted directly as a 0..1 value but we need to convert
      87             :  * losel to 1-losel before interpreting it as a value.  Then the available
      88             :  * range is 1-losel to hisel.  However, this calculation double-excludes
      89             :  * nulls, so really we need hisel + losel + null_frac - 1.)
      90             :  *
      91             :  * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
      92             :  * and instead use DEFAULT_RANGE_INEQ_SEL.  The same applies if the equation
      93             :  * yields an impossible (negative) result.
      94             :  *
      95             :  * A free side-effect is that we can recognize redundant inequalities such
      96             :  * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
      97             :  *
      98             :  * Of course this is all very dependent on the behavior of the inequality
      99             :  * selectivity functions; perhaps some day we can generalize the approach.
     100             :  */
     101             : Selectivity
     102     2035404 : clauselist_selectivity(PlannerInfo *root,
     103             :                        List *clauses,
     104             :                        int varRelid,
     105             :                        JoinType jointype,
     106             :                        SpecialJoinInfo *sjinfo)
     107             : {
     108     2035404 :     return clauselist_selectivity_ext(root, clauses, varRelid,
     109             :                                       jointype, sjinfo, true);
     110             : }
     111             : 
     112             : /*
     113             :  * clauselist_selectivity_ext -
     114             :  *    Extended version of clauselist_selectivity().  If "use_extended_stats"
     115             :  *    is false, all extended statistics will be ignored, and only per-column
     116             :  *    statistics will be used.
     117             :  */
     118             : Selectivity
     119     2041292 : clauselist_selectivity_ext(PlannerInfo *root,
     120             :                            List *clauses,
     121             :                            int varRelid,
     122             :                            JoinType jointype,
     123             :                            SpecialJoinInfo *sjinfo,
     124             :                            bool use_extended_stats)
     125             : {
     126     2041292 :     Selectivity s1 = 1.0;
     127             :     RelOptInfo *rel;
     128     2041292 :     Bitmapset  *estimatedclauses = NULL;
     129     2041292 :     RangeQueryClause *rqlist = NULL;
     130             :     ListCell   *l;
     131             :     int         listidx;
     132             : 
     133             :     /*
     134             :      * If there's exactly one clause, just go directly to
     135             :      * clause_selectivity_ext(). None of what we might do below is relevant.
     136             :      */
     137     2041292 :     if (list_length(clauses) == 1)
     138     1068078 :         return clause_selectivity_ext(root, (Node *) linitial(clauses),
     139             :                                       varRelid, jointype, sjinfo,
     140             :                                       use_extended_stats);
     141             : 
     142             :     /*
     143             :      * Determine if these clauses reference a single relation.  If so, and if
     144             :      * it has extended statistics, try to apply those.
     145             :      */
     146      973214 :     rel = find_single_rel_for_clauses(root, clauses);
     147      973214 :     if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
     148             :     {
     149             :         /*
     150             :          * Estimate as many clauses as possible using extended statistics.
     151             :          *
     152             :          * 'estimatedclauses' is populated with the 0-based list position
     153             :          * index of clauses estimated here, and that should be ignored below.
     154             :          */
     155        1770 :         s1 = statext_clauselist_selectivity(root, clauses, varRelid,
     156             :                                             jointype, sjinfo, rel,
     157             :                                             &estimatedclauses, false);
     158             :     }
     159             : 
     160             :     /*
     161             :      * Apply normal selectivity estimates for remaining clauses. We'll be
     162             :      * careful to skip any clauses which were already estimated above.
     163             :      *
     164             :      * Anything that doesn't look like a potential rangequery clause gets
     165             :      * multiplied into s1 and forgotten. Anything that does gets inserted into
     166             :      * an rqlist entry.
     167             :      */
     168      973214 :     listidx = -1;
     169     1586280 :     foreach(l, clauses)
     170             :     {
     171      613066 :         Node       *clause = (Node *) lfirst(l);
     172             :         RestrictInfo *rinfo;
     173             :         Selectivity s2;
     174             : 
     175      613066 :         listidx++;
     176             : 
     177             :         /*
     178             :          * Skip this clause if it's already been estimated by some other
     179             :          * statistics above.
     180             :          */
     181      613066 :         if (bms_is_member(listidx, estimatedclauses))
     182        3474 :             continue;
     183             : 
     184             :         /* Compute the selectivity of this clause in isolation */
     185      609592 :         s2 = clause_selectivity_ext(root, clause, varRelid, jointype, sjinfo,
     186             :                                     use_extended_stats);
     187             : 
     188             :         /*
     189             :          * Check for being passed a RestrictInfo.
     190             :          *
     191             :          * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
     192             :          * 0.0; just use that rather than looking for range pairs.
     193             :          */
     194      609592 :         if (IsA(clause, RestrictInfo))
     195             :         {
     196      608412 :             rinfo = (RestrictInfo *) clause;
     197      608412 :             if (rinfo->pseudoconstant)
     198             :             {
     199       22016 :                 s1 = s1 * s2;
     200       22016 :                 continue;
     201             :             }
     202      586396 :             clause = (Node *) rinfo->clause;
     203             :         }
     204             :         else
     205        1180 :             rinfo = NULL;
     206             : 
     207             :         /*
     208             :          * See if it looks like a restriction clause with a pseudoconstant on
     209             :          * one side.  (Anything more complicated than that might not behave in
     210             :          * the simple way we are expecting.)  Most of the tests here can be
     211             :          * done more efficiently with rinfo than without.
     212             :          */
     213      587576 :         if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
     214             :         {
     215      498002 :             OpExpr     *expr = (OpExpr *) clause;
     216      498002 :             bool        varonleft = true;
     217             :             bool        ok;
     218             : 
     219      498002 :             if (rinfo)
     220             :             {
     221      818888 :                 ok = (rinfo->num_base_rels == 1) &&
     222      317382 :                     (is_pseudo_constant_clause_relids(lsecond(expr->args),
     223        4668 :                                                       rinfo->right_relids) ||
     224        4668 :                      (varonleft = false,
     225        4668 :                       is_pseudo_constant_clause_relids(linitial(expr->args),
     226             :                                                        rinfo->left_relids)));
     227             :             }
     228             :             else
     229             :             {
     230        2328 :                 ok = (NumRelids(root, clause) == 1) &&
     231        1164 :                     (is_pseudo_constant_clause(lsecond(expr->args)) ||
     232           0 :                      (varonleft = false,
     233           0 :                       is_pseudo_constant_clause(linitial(expr->args))));
     234             :             }
     235             : 
     236      498002 :             if (ok)
     237             :             {
     238             :                 /*
     239             :                  * If it's not a "<"/"<="/">"/">=" operator, just merge the
     240             :                  * selectivity in generically.  But if it's the right oprrest,
     241             :                  * add the clause to rqlist for later processing.
     242             :                  */
     243      318176 :                 switch (get_oprrest(expr->opno))
     244             :                 {
     245       12358 :                     case F_SCALARLTSEL:
     246             :                     case F_SCALARLESEL:
     247       12358 :                         addRangeClause(&rqlist, clause,
     248             :                                        varonleft, true, s2);
     249       12358 :                         break;
     250       31542 :                     case F_SCALARGTSEL:
     251             :                     case F_SCALARGESEL:
     252       31542 :                         addRangeClause(&rqlist, clause,
     253             :                                        varonleft, false, s2);
     254       31542 :                         break;
     255      274276 :                     default:
     256             :                         /* Just merge the selectivity in generically */
     257      274276 :                         s1 = s1 * s2;
     258      274276 :                         break;
     259             :                 }
     260      318176 :                 continue;       /* drop to loop bottom */
     261             :             }
     262             :         }
     263             : 
     264             :         /* Not the right form, so treat it generically. */
     265      269400 :         s1 = s1 * s2;
     266             :     }
     267             : 
     268             :     /*
     269             :      * Now scan the rangequery pair list.
     270             :      */
     271     1008754 :     while (rqlist != NULL)
     272             :     {
     273             :         RangeQueryClause *rqnext;
     274             : 
     275       35540 :         if (rqlist->have_lobound && rqlist->have_hibound)
     276        8042 :         {
     277             :             /* Successfully matched a pair of range clauses */
     278             :             Selectivity s2;
     279             : 
     280             :             /*
     281             :              * Exact equality to the default value probably means the
     282             :              * selectivity function punted.  This is not airtight but should
     283             :              * be good enough.
     284             :              */
     285        8042 :             if (rqlist->hibound == DEFAULT_INEQ_SEL ||
     286        6020 :                 rqlist->lobound == DEFAULT_INEQ_SEL)
     287             :             {
     288        2022 :                 s2 = DEFAULT_RANGE_INEQ_SEL;
     289             :             }
     290             :             else
     291             :             {
     292        6020 :                 s2 = rqlist->hibound + rqlist->lobound - 1.0;
     293             : 
     294             :                 /* Adjust for double-exclusion of NULLs */
     295        6020 :                 s2 += nulltestsel(root, IS_NULL, rqlist->var,
     296             :                                   varRelid, jointype, sjinfo);
     297             : 
     298             :                 /*
     299             :                  * A zero or slightly negative s2 should be converted into a
     300             :                  * small positive value; we probably are dealing with a very
     301             :                  * tight range and got a bogus result due to roundoff errors.
     302             :                  * However, if s2 is very negative, then we probably have
     303             :                  * default selectivity estimates on one or both sides of the
     304             :                  * range that we failed to recognize above for some reason.
     305             :                  */
     306        6020 :                 if (s2 <= 0.0)
     307             :                 {
     308         878 :                     if (s2 < -0.01)
     309             :                     {
     310             :                         /*
     311             :                          * No data available --- use a default estimate that
     312             :                          * is small, but not real small.
     313             :                          */
     314          12 :                         s2 = DEFAULT_RANGE_INEQ_SEL;
     315             :                     }
     316             :                     else
     317             :                     {
     318             :                         /*
     319             :                          * It's just roundoff error; use a small positive
     320             :                          * value
     321             :                          */
     322         866 :                         s2 = 1.0e-10;
     323             :                     }
     324             :                 }
     325             :             }
     326             :             /* Merge in the selectivity of the pair of clauses */
     327        8042 :             s1 *= s2;
     328             :         }
     329             :         else
     330             :         {
     331             :             /* Only found one of a pair, merge it in generically */
     332       27498 :             if (rqlist->have_lobound)
     333       23152 :                 s1 *= rqlist->lobound;
     334             :             else
     335        4346 :                 s1 *= rqlist->hibound;
     336             :         }
     337             :         /* release storage and advance */
     338       35540 :         rqnext = rqlist->next;
     339       35540 :         pfree(rqlist);
     340       35540 :         rqlist = rqnext;
     341             :     }
     342             : 
     343      973214 :     return s1;
     344             : }
     345             : 
     346             : /*
     347             :  * clauselist_selectivity_or -
     348             :  *    Compute the selectivity of an implicitly-ORed list of boolean
     349             :  *    expression clauses.  The list can be empty, in which case 0.0
     350             :  *    must be returned.  List elements may be either RestrictInfos
     351             :  *    or bare expression clauses --- the former is preferred since
     352             :  *    it allows caching of results.
     353             :  *
     354             :  * See clause_selectivity() for the meaning of the additional parameters.
     355             :  *
     356             :  * The basic approach is to apply extended statistics first, on as many
     357             :  * clauses as possible, in order to capture cross-column dependencies etc.
     358             :  * The remaining clauses are then estimated as if they were independent.
     359             :  */
     360             : static Selectivity
     361       10820 : clauselist_selectivity_or(PlannerInfo *root,
     362             :                           List *clauses,
     363             :                           int varRelid,
     364             :                           JoinType jointype,
     365             :                           SpecialJoinInfo *sjinfo,
     366             :                           bool use_extended_stats)
     367             : {
     368       10820 :     Selectivity s1 = 0.0;
     369             :     RelOptInfo *rel;
     370       10820 :     Bitmapset  *estimatedclauses = NULL;
     371             :     ListCell   *lc;
     372             :     int         listidx;
     373             : 
     374             :     /*
     375             :      * Determine if these clauses reference a single relation.  If so, and if
     376             :      * it has extended statistics, try to apply those.
     377             :      */
     378       10820 :     rel = find_single_rel_for_clauses(root, clauses);
     379       10820 :     if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
     380             :     {
     381             :         /*
     382             :          * Estimate as many clauses as possible using extended statistics.
     383             :          *
     384             :          * 'estimatedclauses' is populated with the 0-based list position
     385             :          * index of clauses estimated here, and that should be ignored below.
     386             :          */
     387         108 :         s1 = statext_clauselist_selectivity(root, clauses, varRelid,
     388             :                                             jointype, sjinfo, rel,
     389             :                                             &estimatedclauses, true);
     390             :     }
     391             : 
     392             :     /*
     393             :      * Estimate the remaining clauses as if they were independent.
     394             :      *
     395             :      * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to account
     396             :      * for the probable overlap of selected tuple sets.
     397             :      *
     398             :      * XXX is this too conservative?
     399             :      */
     400       10820 :     listidx = -1;
     401       35826 :     foreach(lc, clauses)
     402             :     {
     403             :         Selectivity s2;
     404             : 
     405       25006 :         listidx++;
     406             : 
     407             :         /*
     408             :          * Skip this clause if it's already been estimated by some other
     409             :          * statistics above.
     410             :          */
     411       25006 :         if (bms_is_member(listidx, estimatedclauses))
     412         240 :             continue;
     413             : 
     414       24766 :         s2 = clause_selectivity_ext(root, (Node *) lfirst(lc), varRelid,
     415             :                                     jointype, sjinfo, use_extended_stats);
     416             : 
     417       24766 :         s1 = s1 + s2 - s1 * s2;
     418             :     }
     419             : 
     420       10820 :     return s1;
     421             : }
     422             : 
     423             : /*
     424             :  * addRangeClause --- add a new range clause for clauselist_selectivity
     425             :  *
     426             :  * Here is where we try to match up pairs of range-query clauses
     427             :  */
     428             : static void
     429       43900 : addRangeClause(RangeQueryClause **rqlist, Node *clause,
     430             :                bool varonleft, bool isLTsel, Selectivity s2)
     431             : {
     432             :     RangeQueryClause *rqelem;
     433             :     Node       *var;
     434             :     bool        is_lobound;
     435             : 
     436       43900 :     if (varonleft)
     437             :     {
     438       43684 :         var = get_leftop((Expr *) clause);
     439       43684 :         is_lobound = !isLTsel;  /* x < something is high bound */
     440             :     }
     441             :     else
     442             :     {
     443         216 :         var = get_rightop((Expr *) clause);
     444         216 :         is_lobound = isLTsel;   /* something < x is low bound */
     445             :     }
     446             : 
     447       45130 :     for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
     448             :     {
     449             :         /*
     450             :          * We use full equal() here because the "var" might be a function of
     451             :          * one or more attributes of the same relation...
     452             :          */
     453        9590 :         if (!equal(var, rqelem->var))
     454        1230 :             continue;
     455             :         /* Found the right group to put this clause in */
     456        8360 :         if (is_lobound)
     457             :         {
     458         312 :             if (!rqelem->have_lobound)
     459             :             {
     460         132 :                 rqelem->have_lobound = true;
     461         132 :                 rqelem->lobound = s2;
     462             :             }
     463             :             else
     464             :             {
     465             : 
     466             :                 /*------
     467             :                  * We have found two similar clauses, such as
     468             :                  * x < y AND x <= z.
     469             :                  * Keep only the more restrictive one.
     470             :                  *------
     471             :                  */
     472         180 :                 if (rqelem->lobound > s2)
     473           0 :                     rqelem->lobound = s2;
     474             :             }
     475             :         }
     476             :         else
     477             :         {
     478        8048 :             if (!rqelem->have_hibound)
     479             :             {
     480        7910 :                 rqelem->have_hibound = true;
     481        7910 :                 rqelem->hibound = s2;
     482             :             }
     483             :             else
     484             :             {
     485             : 
     486             :                 /*------
     487             :                  * We have found two similar clauses, such as
     488             :                  * x > y AND x >= z.
     489             :                  * Keep only the more restrictive one.
     490             :                  *------
     491             :                  */
     492         138 :                 if (rqelem->hibound > s2)
     493          36 :                     rqelem->hibound = s2;
     494             :             }
     495             :         }
     496        8360 :         return;
     497             :     }
     498             : 
     499             :     /* No matching var found, so make a new clause-pair data structure */
     500       35540 :     rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
     501       35540 :     rqelem->var = var;
     502       35540 :     if (is_lobound)
     503             :     {
     504       31062 :         rqelem->have_lobound = true;
     505       31062 :         rqelem->have_hibound = false;
     506       31062 :         rqelem->lobound = s2;
     507             :     }
     508             :     else
     509             :     {
     510        4478 :         rqelem->have_lobound = false;
     511        4478 :         rqelem->have_hibound = true;
     512        4478 :         rqelem->hibound = s2;
     513             :     }
     514       35540 :     rqelem->next = *rqlist;
     515       35540 :     *rqlist = rqelem;
     516             : }
     517             : 
     518             : /*
     519             :  * find_single_rel_for_clauses
     520             :  *      Examine each clause in 'clauses' and determine if all clauses
     521             :  *      reference only a single relation.  If so return that relation,
     522             :  *      otherwise return NULL.
     523             :  */
     524             : static RelOptInfo *
     525      986332 : find_single_rel_for_clauses(PlannerInfo *root, List *clauses)
     526             : {
     527      986332 :     int         lastrelid = 0;
     528             :     ListCell   *l;
     529             : 
     530     1354768 :     foreach(l, clauses)
     531             :     {
     532      519458 :         RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
     533             :         int         relid;
     534             : 
     535             :         /*
     536             :          * If we have a list of bare clauses rather than RestrictInfos, we
     537             :          * could pull out their relids the hard way with pull_varnos().
     538             :          * However, currently the extended-stats machinery won't do anything
     539             :          * with non-RestrictInfo clauses anyway, so there's no point in
     540             :          * spending extra cycles; just fail if that's what we have.
     541             :          *
     542             :          * An exception to that rule is if we have a bare BoolExpr AND clause.
     543             :          * We treat this as a special case because the restrictinfo machinery
     544             :          * doesn't build RestrictInfos on top of AND clauses.
     545             :          */
     546      519458 :         if (is_andclause(rinfo))
     547             :         {
     548             :             RelOptInfo *rel;
     549             : 
     550        2298 :             rel = find_single_rel_for_clauses(root,
     551             :                                               ((BoolExpr *) rinfo)->args);
     552             : 
     553        2298 :             if (rel == NULL)
     554      151022 :                 return NULL;
     555        1838 :             if (lastrelid == 0)
     556         912 :                 lastrelid = rel->relid;
     557         926 :             else if (rel->relid != lastrelid)
     558           0 :                 return NULL;
     559             : 
     560       30786 :             continue;
     561             :         }
     562             : 
     563      517160 :         if (!IsA(rinfo, RestrictInfo))
     564        1022 :             return NULL;
     565             : 
     566      516138 :         if (bms_is_empty(rinfo->clause_relids))
     567       28948 :             continue;           /* we can ignore variable-free clauses */
     568      487190 :         if (!bms_get_singleton_member(rinfo->clause_relids, &relid))
     569      146722 :             return NULL;        /* multiple relations in this clause */
     570      340468 :         if (lastrelid == 0)
     571      165506 :             lastrelid = relid;  /* first clause referencing a relation */
     572      174962 :         else if (relid != lastrelid)
     573        2818 :             return NULL;        /* relation not same as last one */
     574             :     }
     575             : 
     576      835310 :     if (lastrelid != 0)
     577      133814 :         return find_base_rel(root, lastrelid);
     578             : 
     579      701496 :     return NULL;                /* no clauses */
     580             : }
     581             : 
     582             : /*
     583             :  * treat_as_join_clause -
     584             :  *    Decide whether an operator clause is to be handled by the
     585             :  *    restriction or join estimator.  Subroutine for clause_selectivity().
     586             :  */
     587             : static inline bool
     588      732488 : treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo,
     589             :                      int varRelid, SpecialJoinInfo *sjinfo)
     590             : {
     591      732488 :     if (varRelid != 0)
     592             :     {
     593             :         /*
     594             :          * Caller is forcing restriction mode (eg, because we are examining an
     595             :          * inner indexscan qual).
     596             :          */
     597      264528 :         return false;
     598             :     }
     599      467960 :     else if (sjinfo == NULL)
     600             :     {
     601             :         /*
     602             :          * It must be a restriction clause, since it's being evaluated at a
     603             :          * scan node.
     604             :          */
     605      284516 :         return false;
     606             :     }
     607             :     else
     608             :     {
     609             :         /*
     610             :          * Otherwise, it's a join if there's more than one base relation used.
     611             :          * We can optimize this calculation if an rinfo was passed.
     612             :          *
     613             :          * XXX  Since we know the clause is being evaluated at a join, the
     614             :          * only way it could be single-relation is if it was delayed by outer
     615             :          * joins.  We intentionally count only baserels here, not OJs that
     616             :          * might be present in rinfo->clause_relids, so that we direct such
     617             :          * cases to the restriction qual estimators not join estimators.
     618             :          * Eventually some notice should be taken of the possibility of
     619             :          * injected nulls, but we'll likely want to do that in the restriction
     620             :          * estimators rather than starting to treat such cases as join quals.
     621             :          */
     622      183444 :         if (rinfo)
     623      182988 :             return (rinfo->num_base_rels > 1);
     624             :         else
     625         456 :             return (NumRelids(root, clause) > 1);
     626             :     }
     627             : }
     628             : 
     629             : 
     630             : /*
     631             :  * clause_selectivity -
     632             :  *    Compute the selectivity of a general boolean expression clause.
     633             :  *
     634             :  * The clause can be either a RestrictInfo or a plain expression.  If it's
     635             :  * a RestrictInfo, we try to cache the selectivity for possible re-use,
     636             :  * so passing RestrictInfos is preferred.
     637             :  *
     638             :  * varRelid is either 0 or a rangetable index.
     639             :  *
     640             :  * When varRelid is not 0, only variables belonging to that relation are
     641             :  * considered in computing selectivity; other vars are treated as constants
     642             :  * of unknown values.  This is appropriate for estimating the selectivity of
     643             :  * a join clause that is being used as a restriction clause in a scan of a
     644             :  * nestloop join's inner relation --- varRelid should then be the ID of the
     645             :  * inner relation.
     646             :  *
     647             :  * When varRelid is 0, all variables are treated as variables.  This
     648             :  * is appropriate for ordinary join clauses and restriction clauses.
     649             :  *
     650             :  * jointype is the join type, if the clause is a join clause.  Pass JOIN_INNER
     651             :  * if the clause isn't a join clause.
     652             :  *
     653             :  * sjinfo is NULL for a non-join clause, otherwise it provides additional
     654             :  * context information about the join being performed.  There are some
     655             :  * special cases:
     656             :  *  1. For a special (not INNER) join, sjinfo is always a member of
     657             :  *     root->join_info_list.
     658             :  *  2. For an INNER join, sjinfo is just a transient struct, and only the
     659             :  *     relids and jointype fields in it can be trusted.
     660             :  * It is possible for jointype to be different from sjinfo->jointype.
     661             :  * This indicates we are considering a variant join: either with
     662             :  * the LHS and RHS switched, or with one input unique-ified.
     663             :  *
     664             :  * Note: when passing nonzero varRelid, it's normally appropriate to set
     665             :  * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
     666             :  * join clause; because we aren't treating it as a join clause.
     667             :  */
     668             : Selectivity
     669      388330 : clause_selectivity(PlannerInfo *root,
     670             :                    Node *clause,
     671             :                    int varRelid,
     672             :                    JoinType jointype,
     673             :                    SpecialJoinInfo *sjinfo)
     674             : {
     675      388330 :     return clause_selectivity_ext(root, clause, varRelid,
     676             :                                   jointype, sjinfo, true);
     677             : }
     678             : 
     679             : /*
     680             :  * clause_selectivity_ext -
     681             :  *    Extended version of clause_selectivity().  If "use_extended_stats" is
     682             :  *    false, all extended statistics will be ignored, and only per-column
     683             :  *    statistics will be used.
     684             :  */
     685             : Selectivity
     686     2097686 : clause_selectivity_ext(PlannerInfo *root,
     687             :                        Node *clause,
     688             :                        int varRelid,
     689             :                        JoinType jointype,
     690             :                        SpecialJoinInfo *sjinfo,
     691             :                        bool use_extended_stats)
     692             : {
     693     2097686 :     Selectivity s1 = 0.5;       /* default for any unhandled clause type */
     694     2097686 :     RestrictInfo *rinfo = NULL;
     695     2097686 :     bool        cacheable = false;
     696             : 
     697     2097686 :     if (clause == NULL)         /* can this still happen? */
     698           0 :         return s1;
     699             : 
     700     2097686 :     if (IsA(clause, RestrictInfo))
     701             :     {
     702     2081886 :         rinfo = (RestrictInfo *) clause;
     703             : 
     704             :         /*
     705             :          * If the clause is marked pseudoconstant, then it will be used as a
     706             :          * gating qual and should not affect selectivity estimates; hence
     707             :          * return 1.0.  The only exception is that a constant FALSE may be
     708             :          * taken as having selectivity 0.0, since it will surely mean no rows
     709             :          * out of the plan.  This case is simple enough that we need not
     710             :          * bother caching the result.
     711             :          */
     712     2081886 :         if (rinfo->pseudoconstant)
     713             :         {
     714       22334 :             if (!IsA(rinfo->clause, Const))
     715       22146 :                 return (Selectivity) 1.0;
     716             :         }
     717             : 
     718             :         /*
     719             :          * If possible, cache the result of the selectivity calculation for
     720             :          * the clause.  We can cache if varRelid is zero or the clause
     721             :          * contains only vars of that relid --- otherwise varRelid will affect
     722             :          * the result, so mustn't cache.  Outer join quals might be examined
     723             :          * with either their join's actual jointype or JOIN_INNER, so we need
     724             :          * two cache variables to remember both cases.  Note: we assume the
     725             :          * result won't change if we are switching the input relations or
     726             :          * considering a unique-ified case, so we only need one cache variable
     727             :          * for all non-JOIN_INNER cases.
     728             :          */
     729     2059740 :         if (varRelid == 0 ||
     730      821416 :             rinfo->num_base_rels == 0 ||
     731     1387528 :             (rinfo->num_base_rels == 1 &&
     732      566112 :              bms_is_member(varRelid, rinfo->clause_relids)))
     733             :         {
     734             :             /* Cacheable --- do we already have the result? */
     735     1802448 :             if (jointype == JOIN_INNER)
     736             :             {
     737     1535086 :                 if (rinfo->norm_selec >= 0)
     738     1095258 :                     return rinfo->norm_selec;
     739             :             }
     740             :             else
     741             :             {
     742      267362 :                 if (rinfo->outer_selec >= 0)
     743      171210 :                     return rinfo->outer_selec;
     744             :             }
     745      535980 :             cacheable = true;
     746             :         }
     747             : 
     748             :         /*
     749             :          * Proceed with examination of contained clause.  If the clause is an
     750             :          * OR-clause, we want to look at the variant with sub-RestrictInfos,
     751             :          * so that per-subclause selectivities can be cached.
     752             :          */
     753      793272 :         if (rinfo->orclause)
     754       10780 :             clause = (Node *) rinfo->orclause;
     755             :         else
     756      782492 :             clause = (Node *) rinfo->clause;
     757             :     }
     758             : 
     759      809072 :     if (IsA(clause, Var))
     760             :     {
     761       38950 :         Var        *var = (Var *) clause;
     762             : 
     763             :         /*
     764             :          * We probably shouldn't ever see an uplevel Var here, but if we do,
     765             :          * return the default selectivity...
     766             :          */
     767       38950 :         if (var->varlevelsup == 0 &&
     768         544 :             (varRelid == 0 || varRelid == (int) var->varno))
     769             :         {
     770             :             /* Use the restriction selectivity function for a bool Var */
     771       38702 :             s1 = boolvarsel(root, (Node *) var, varRelid);
     772             :         }
     773             :     }
     774      770122 :     else if (IsA(clause, Const))
     775             :     {
     776             :         /* bool constant is pretty easy... */
     777        3064 :         Const      *con = (Const *) clause;
     778             : 
     779        6012 :         s1 = con->constisnull ? 0.0 :
     780        2948 :             DatumGetBool(con->constvalue) ? 1.0 : 0.0;
     781             :     }
     782      767058 :     else if (IsA(clause, Param))
     783             :     {
     784             :         /* see if we can replace the Param */
     785          24 :         Node       *subst = estimate_expression_value(root, clause);
     786             : 
     787          24 :         if (IsA(subst, Const))
     788             :         {
     789             :             /* bool constant is pretty easy... */
     790           0 :             Const      *con = (Const *) subst;
     791             : 
     792           0 :             s1 = con->constisnull ? 0.0 :
     793           0 :                 DatumGetBool(con->constvalue) ? 1.0 : 0.0;
     794             :         }
     795             :         else
     796             :         {
     797             :             /* XXX any way to do better than default? */
     798             :         }
     799             :     }
     800      767034 :     else if (is_notclause(clause))
     801             :     {
     802             :         /* inverse of the selectivity of the underlying clause */
     803        6680 :         s1 = 1.0 - clause_selectivity_ext(root,
     804        6680 :                                           (Node *) get_notclausearg((Expr *) clause),
     805             :                                           varRelid,
     806             :                                           jointype,
     807             :                                           sjinfo,
     808             :                                           use_extended_stats);
     809             :     }
     810      760354 :     else if (is_andclause(clause))
     811             :     {
     812             :         /* share code with clauselist_selectivity() */
     813        2942 :         s1 = clauselist_selectivity_ext(root,
     814             :                                         ((BoolExpr *) clause)->args,
     815             :                                         varRelid,
     816             :                                         jointype,
     817             :                                         sjinfo,
     818             :                                         use_extended_stats);
     819             :     }
     820      757412 :     else if (is_orclause(clause))
     821             :     {
     822             :         /*
     823             :          * Almost the same thing as clauselist_selectivity, but with the
     824             :          * clauses connected by OR.
     825             :          */
     826       10820 :         s1 = clauselist_selectivity_or(root,
     827             :                                        ((BoolExpr *) clause)->args,
     828             :                                        varRelid,
     829             :                                        jointype,
     830             :                                        sjinfo,
     831             :                                        use_extended_stats);
     832             :     }
     833      746592 :     else if (is_opclause(clause) || IsA(clause, DistinctExpr))
     834      708396 :     {
     835      708404 :         OpExpr     *opclause = (OpExpr *) clause;
     836      708404 :         Oid         opno = opclause->opno;
     837             : 
     838      708404 :         if (treat_as_join_clause(root, clause, rinfo, varRelid, sjinfo))
     839             :         {
     840             :             /* Estimate selectivity for a join clause. */
     841      175946 :             s1 = join_selectivity(root, opno,
     842             :                                   opclause->args,
     843             :                                   opclause->inputcollid,
     844             :                                   jointype,
     845             :                                   sjinfo);
     846             :         }
     847             :         else
     848             :         {
     849             :             /* Estimate selectivity for a restriction clause. */
     850      532458 :             s1 = restriction_selectivity(root, opno,
     851             :                                          opclause->args,
     852             :                                          opclause->inputcollid,
     853             :                                          varRelid);
     854             :         }
     855             : 
     856             :         /*
     857             :          * DistinctExpr has the same representation as OpExpr, but the
     858             :          * contained operator is "=" not "<>", so we must negate the result.
     859             :          * This estimation method doesn't give the right behavior for nulls,
     860             :          * but it's better than doing nothing.
     861             :          */
     862      708396 :         if (IsA(clause, DistinctExpr))
     863         654 :             s1 = 1.0 - s1;
     864             :     }
     865       38188 :     else if (is_funcclause(clause))
     866             :     {
     867        9420 :         FuncExpr   *funcclause = (FuncExpr *) clause;
     868             : 
     869             :         /* Try to get an estimate from the support function, if any */
     870        9420 :         s1 = function_selectivity(root,
     871             :                                   funcclause->funcid,
     872             :                                   funcclause->args,
     873             :                                   funcclause->inputcollid,
     874        9420 :                                   treat_as_join_clause(root, clause, rinfo,
     875             :                                                        varRelid, sjinfo),
     876             :                                   varRelid,
     877             :                                   jointype,
     878             :                                   sjinfo);
     879             :     }
     880       28768 :     else if (IsA(clause, ScalarArrayOpExpr))
     881             :     {
     882             :         /* Use node specific selectivity calculation function */
     883       14664 :         s1 = scalararraysel(root,
     884             :                             (ScalarArrayOpExpr *) clause,
     885       14664 :                             treat_as_join_clause(root, clause, rinfo,
     886             :                                                  varRelid, sjinfo),
     887             :                             varRelid,
     888             :                             jointype,
     889             :                             sjinfo);
     890             :     }
     891       14104 :     else if (IsA(clause, RowCompareExpr))
     892             :     {
     893             :         /* Use node specific selectivity calculation function */
     894         156 :         s1 = rowcomparesel(root,
     895             :                            (RowCompareExpr *) clause,
     896             :                            varRelid,
     897             :                            jointype,
     898             :                            sjinfo);
     899             :     }
     900       13948 :     else if (IsA(clause, NullTest))
     901             :     {
     902             :         /* Use node specific selectivity calculation function */
     903       10880 :         s1 = nulltestsel(root,
     904             :                          ((NullTest *) clause)->nulltesttype,
     905       10880 :                          (Node *) ((NullTest *) clause)->arg,
     906             :                          varRelid,
     907             :                          jointype,
     908             :                          sjinfo);
     909             :     }
     910        3068 :     else if (IsA(clause, BooleanTest))
     911             :     {
     912             :         /* Use node specific selectivity calculation function */
     913         624 :         s1 = booltestsel(root,
     914             :                          ((BooleanTest *) clause)->booltesttype,
     915         624 :                          (Node *) ((BooleanTest *) clause)->arg,
     916             :                          varRelid,
     917             :                          jointype,
     918             :                          sjinfo);
     919             :     }
     920        2444 :     else if (IsA(clause, CurrentOfExpr))
     921             :     {
     922             :         /* CURRENT OF selects at most one row of its table */
     923         394 :         CurrentOfExpr *cexpr = (CurrentOfExpr *) clause;
     924         394 :         RelOptInfo *crel = find_base_rel(root, cexpr->cvarno);
     925             : 
     926         394 :         if (crel->tuples > 0)
     927         380 :             s1 = 1.0 / crel->tuples;
     928             :     }
     929        2050 :     else if (IsA(clause, RelabelType))
     930             :     {
     931             :         /* Not sure this case is needed, but it can't hurt */
     932           0 :         s1 = clause_selectivity_ext(root,
     933           0 :                                     (Node *) ((RelabelType *) clause)->arg,
     934             :                                     varRelid,
     935             :                                     jointype,
     936             :                                     sjinfo,
     937             :                                     use_extended_stats);
     938             :     }
     939        2050 :     else if (IsA(clause, CoerceToDomain))
     940             :     {
     941             :         /* Not sure this case is needed, but it can't hurt */
     942           0 :         s1 = clause_selectivity_ext(root,
     943           0 :                                     (Node *) ((CoerceToDomain *) clause)->arg,
     944             :                                     varRelid,
     945             :                                     jointype,
     946             :                                     sjinfo,
     947             :                                     use_extended_stats);
     948             :     }
     949             :     else
     950             :     {
     951             :         /*
     952             :          * For anything else, see if we can consider it as a boolean variable.
     953             :          * This only works if it's an immutable expression in Vars of a single
     954             :          * relation; but there's no point in us checking that here because
     955             :          * boolvarsel() will do it internally, and return a suitable default
     956             :          * selectivity if not.
     957             :          */
     958        2050 :         s1 = boolvarsel(root, clause, varRelid);
     959             :     }
     960             : 
     961             :     /* Cache the result if possible */
     962      809064 :     if (cacheable)
     963             :     {
     964      535972 :         if (jointype == JOIN_INNER)
     965      439820 :             rinfo->norm_selec = s1;
     966             :         else
     967       96152 :             rinfo->outer_selec = s1;
     968             :     }
     969             : 
     970             : #ifdef SELECTIVITY_DEBUG
     971             :     elog(DEBUG4, "clause_selectivity: s1 %f", s1);
     972             : #endif                          /* SELECTIVITY_DEBUG */
     973             : 
     974      809064 :     return s1;
     975             : }

Generated by: LCOV version 1.14