LCOV - code coverage report
Current view: top level - src/backend/optimizer/path - clausesel.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 95.5 % 243 232
Test Date: 2026-02-17 17:20:33 Functions: 100.0 % 8 8
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * clausesel.c
       4              :  *    Routines to compute clause selectivities
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *    src/backend/optimizer/path/clausesel.c
      12              :  *
      13              :  *-------------------------------------------------------------------------
      14              :  */
      15              : #include "postgres.h"
      16              : 
      17              : #include "nodes/nodeFuncs.h"
      18              : #include "optimizer/clauses.h"
      19              : #include "optimizer/optimizer.h"
      20              : #include "optimizer/pathnode.h"
      21              : #include "optimizer/plancat.h"
      22              : #include "statistics/statistics.h"
      23              : #include "utils/fmgroids.h"
      24              : #include "utils/lsyscache.h"
      25              : #include "utils/selfuncs.h"
      26              : 
      27              : /*
      28              :  * Data structure for accumulating info about possible range-query
      29              :  * clause pairs in clauselist_selectivity.
      30              :  */
      31              : typedef struct RangeQueryClause
      32              : {
      33              :     struct RangeQueryClause *next;  /* next in linked list */
      34              :     Node       *var;            /* The common variable of the clauses */
      35              :     bool        have_lobound;   /* found a low-bound clause yet? */
      36              :     bool        have_hibound;   /* found a high-bound clause yet? */
      37              :     Selectivity lobound;        /* Selectivity of a var > something clause */
      38              :     Selectivity hibound;        /* Selectivity of a var < something clause */
      39              : } RangeQueryClause;
      40              : 
      41              : static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
      42              :                            bool varonleft, bool isLTsel, Selectivity s2);
      43              : static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root,
      44              :                                                List *clauses);
      45              : static Selectivity clauselist_selectivity_or(PlannerInfo *root,
      46              :                                              List *clauses,
      47              :                                              int varRelid,
      48              :                                              JoinType jointype,
      49              :                                              SpecialJoinInfo *sjinfo,
      50              :                                              bool use_extended_stats);
      51              : 
      52              : /****************************************************************************
      53              :  *      ROUTINES TO COMPUTE SELECTIVITIES
      54              :  ****************************************************************************/
      55              : 
      56              : /*
      57              :  * clauselist_selectivity -
      58              :  *    Compute the selectivity of an implicitly-ANDed list of boolean
      59              :  *    expression clauses.  The list can be empty, in which case 1.0
      60              :  *    must be returned.  List elements may be either RestrictInfos
      61              :  *    or bare expression clauses --- the former is preferred since
      62              :  *    it allows caching of results.
      63              :  *
      64              :  * See clause_selectivity() for the meaning of the additional parameters.
      65              :  *
      66              :  * The basic approach is to apply extended statistics first, on as many
      67              :  * clauses as possible, in order to capture cross-column dependencies etc.
      68              :  * The remaining clauses are then estimated by taking the product of their
      69              :  * selectivities, but that's only right if they have independent
      70              :  * probabilities, and in reality they are often NOT independent even if they
      71              :  * only refer to a single column.  So, we want to be smarter where we can.
      72              :  *
      73              :  * We also recognize "range queries", such as "x > 34 AND x < 42".  Clauses
      74              :  * are recognized as possible range query components if they are restriction
      75              :  * opclauses whose operators have scalarltsel or a related function as their
      76              :  * restriction selectivity estimator.  We pair up clauses of this form that
      77              :  * refer to the same variable.  An unpairable clause of this kind is simply
      78              :  * multiplied into the selectivity product in the normal way.  But when we
      79              :  * find a pair, we know that the selectivities represent the relative
      80              :  * positions of the low and high bounds within the column's range, so instead
      81              :  * of figuring the selectivity as hisel * losel, we can figure it as hisel +
      82              :  * losel - 1.  (To visualize this, see that hisel is the fraction of the range
      83              :  * below the high bound, while losel is the fraction above the low bound; so
      84              :  * hisel can be interpreted directly as a 0..1 value but we need to convert
      85              :  * losel to 1-losel before interpreting it as a value.  Then the available
      86              :  * range is 1-losel to hisel.  However, this calculation double-excludes
      87              :  * nulls, so really we need hisel + losel + null_frac - 1.)
      88              :  *
      89              :  * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
      90              :  * and instead use DEFAULT_RANGE_INEQ_SEL.  The same applies if the equation
      91              :  * yields an impossible (negative) result.
      92              :  *
      93              :  * A free side-effect is that we can recognize redundant inequalities such
      94              :  * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
      95              :  *
      96              :  * Of course this is all very dependent on the behavior of the inequality
      97              :  * selectivity functions; perhaps some day we can generalize the approach.
      98              :  */
      99              : Selectivity
     100      1486731 : clauselist_selectivity(PlannerInfo *root,
     101              :                        List *clauses,
     102              :                        int varRelid,
     103              :                        JoinType jointype,
     104              :                        SpecialJoinInfo *sjinfo)
     105              : {
     106      1486731 :     return clauselist_selectivity_ext(root, clauses, varRelid,
     107              :                                       jointype, sjinfo, true);
     108              : }
     109              : 
     110              : /*
     111              :  * clauselist_selectivity_ext -
     112              :  *    Extended version of clauselist_selectivity().  If "use_extended_stats"
     113              :  *    is false, all extended statistics will be ignored, and only per-column
     114              :  *    statistics will be used.
     115              :  */
     116              : Selectivity
     117      1490982 : clauselist_selectivity_ext(PlannerInfo *root,
     118              :                            List *clauses,
     119              :                            int varRelid,
     120              :                            JoinType jointype,
     121              :                            SpecialJoinInfo *sjinfo,
     122              :                            bool use_extended_stats)
     123              : {
     124      1490982 :     Selectivity s1 = 1.0;
     125              :     RelOptInfo *rel;
     126      1490982 :     Bitmapset  *estimatedclauses = NULL;
     127      1490982 :     RangeQueryClause *rqlist = NULL;
     128              :     ListCell   *l;
     129              :     int         listidx;
     130              : 
     131              :     /*
     132              :      * If there's exactly one clause, just go directly to
     133              :      * clause_selectivity_ext(). None of what we might do below is relevant.
     134              :      */
     135      1490982 :     if (list_length(clauses) == 1)
     136       743464 :         return clause_selectivity_ext(root, (Node *) linitial(clauses),
     137              :                                       varRelid, jointype, sjinfo,
     138              :                                       use_extended_stats);
     139              : 
     140              :     /*
     141              :      * Determine if these clauses reference a single relation.  If so, and if
     142              :      * it has extended statistics, try to apply those.
     143              :      */
     144       747518 :     rel = find_single_rel_for_clauses(root, clauses);
     145       747518 :     if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
     146              :     {
     147              :         /*
     148              :          * Estimate as many clauses as possible using extended statistics.
     149              :          *
     150              :          * 'estimatedclauses' is populated with the 0-based list position
     151              :          * index of clauses estimated here, and that should be ignored below.
     152              :          */
     153         1218 :         s1 = statext_clauselist_selectivity(root, clauses, varRelid,
     154              :                                             jointype, sjinfo, rel,
     155              :                                             &estimatedclauses, false);
     156              :     }
     157              : 
     158              :     /*
     159              :      * Apply normal selectivity estimates for remaining clauses. We'll be
     160              :      * careful to skip any clauses which were already estimated above.
     161              :      *
     162              :      * Anything that doesn't look like a potential rangequery clause gets
     163              :      * multiplied into s1 and forgotten. Anything that does gets inserted into
     164              :      * an rqlist entry.
     165              :      */
     166       747518 :     listidx = -1;
     167      1223652 :     foreach(l, clauses)
     168              :     {
     169       476134 :         Node       *clause = (Node *) lfirst(l);
     170              :         RestrictInfo *rinfo;
     171              :         Selectivity s2;
     172              : 
     173       476134 :         listidx++;
     174              : 
     175              :         /*
     176              :          * Skip this clause if it's already been estimated by some other
     177              :          * statistics above.
     178              :          */
     179       476134 :         if (bms_is_member(listidx, estimatedclauses))
     180         2331 :             continue;
     181              : 
     182              :         /* Compute the selectivity of this clause in isolation */
     183       473803 :         s2 = clause_selectivity_ext(root, clause, varRelid, jointype, sjinfo,
     184              :                                     use_extended_stats);
     185              : 
     186              :         /*
     187              :          * Check for being passed a RestrictInfo.
     188              :          *
     189              :          * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
     190              :          * 0.0; just use that rather than looking for range pairs.
     191              :          */
     192       473803 :         if (IsA(clause, RestrictInfo))
     193              :         {
     194       473141 :             rinfo = (RestrictInfo *) clause;
     195       473141 :             if (rinfo->pseudoconstant)
     196              :             {
     197        14415 :                 s1 = s1 * s2;
     198        14415 :                 continue;
     199              :             }
     200       458726 :             clause = (Node *) rinfo->clause;
     201              :         }
     202              :         else
     203          662 :             rinfo = NULL;
     204              : 
     205              :         /*
     206              :          * See if it looks like a restriction clause with a pseudoconstant on
     207              :          * one side.  (Anything more complicated than that might not behave in
     208              :          * the simple way we are expecting.)  Most of the tests here can be
     209              :          * done more efficiently with rinfo than without.
     210              :          */
     211       459388 :         if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
     212              :         {
     213       398417 :             OpExpr     *expr = (OpExpr *) clause;
     214       398417 :             bool        varonleft = true;
     215              :             bool        ok;
     216              : 
     217       398417 :             if (rinfo)
     218              :             {
     219       628891 :                 ok = (rinfo->num_base_rels == 1) &&
     220       227545 :                     (is_pseudo_constant_clause_relids(lsecond(expr->args),
     221         3583 :                                                       rinfo->right_relids) ||
     222         3583 :                      (varonleft = false,
     223         3583 :                       is_pseudo_constant_clause_relids(linitial(expr->args),
     224              :                                                        rinfo->left_relids)));
     225              :             }
     226              :             else
     227              :             {
     228         1272 :                 ok = (NumRelids(root, clause) == 1) &&
     229          618 :                     (is_pseudo_constant_clause(lsecond(expr->args)) ||
     230            0 :                      (varonleft = false,
     231            0 :                       is_pseudo_constant_clause(linitial(expr->args))));
     232              :             }
     233              : 
     234       398417 :             if (ok)
     235              :             {
     236              :                 /*
     237              :                  * If it's not a "<"/"<="/">"/">=" operator, just merge the
     238              :                  * selectivity in generically.  But if it's the right oprrest,
     239              :                  * add the clause to rqlist for later processing.
     240              :                  */
     241       227825 :                 switch (get_oprrest(expr->opno))
     242              :                 {
     243         7394 :                     case F_SCALARLTSEL:
     244              :                     case F_SCALARLESEL:
     245         7394 :                         addRangeClause(&rqlist, clause,
     246              :                                        varonleft, true, s2);
     247         7394 :                         break;
     248        20022 :                     case F_SCALARGTSEL:
     249              :                     case F_SCALARGESEL:
     250        20022 :                         addRangeClause(&rqlist, clause,
     251              :                                        varonleft, false, s2);
     252        20022 :                         break;
     253       200409 :                     default:
     254              :                         /* Just merge the selectivity in generically */
     255       200409 :                         s1 = s1 * s2;
     256       200409 :                         break;
     257              :                 }
     258       227825 :                 continue;       /* drop to loop bottom */
     259              :             }
     260              :         }
     261              : 
     262              :         /* Not the right form, so treat it generically. */
     263       231563 :         s1 = s1 * s2;
     264              :     }
     265              : 
     266              :     /*
     267              :      * Now scan the rangequery pair list.
     268              :      */
     269       769931 :     while (rqlist != NULL)
     270              :     {
     271              :         RangeQueryClause *rqnext;
     272              : 
     273        22413 :         if (rqlist->have_lobound && rqlist->have_hibound)
     274         4589 :         {
     275              :             /* Successfully matched a pair of range clauses */
     276              :             Selectivity s2;
     277              : 
     278              :             /*
     279              :              * Exact equality to the default value probably means the
     280              :              * selectivity function punted.  This is not airtight but should
     281              :              * be good enough.
     282              :              */
     283         4589 :             if (rqlist->hibound == DEFAULT_INEQ_SEL ||
     284         3441 :                 rqlist->lobound == DEFAULT_INEQ_SEL)
     285              :             {
     286         1148 :                 s2 = DEFAULT_RANGE_INEQ_SEL;
     287              :             }
     288              :             else
     289              :             {
     290         3441 :                 s2 = rqlist->hibound + rqlist->lobound - 1.0;
     291              : 
     292              :                 /* Adjust for double-exclusion of NULLs */
     293         3441 :                 s2 += nulltestsel(root, IS_NULL, rqlist->var,
     294              :                                   varRelid, jointype, sjinfo);
     295              : 
     296              :                 /*
     297              :                  * A zero or slightly negative s2 should be converted into a
     298              :                  * small positive value; we probably are dealing with a very
     299              :                  * tight range and got a bogus result due to roundoff errors.
     300              :                  * However, if s2 is very negative, then we probably have
     301              :                  * default selectivity estimates on one or both sides of the
     302              :                  * range that we failed to recognize above for some reason.
     303              :                  */
     304         3441 :                 if (s2 <= 0.0)
     305              :                 {
     306          497 :                     if (s2 < -0.01)
     307              :                     {
     308              :                         /*
     309              :                          * No data available --- use a default estimate that
     310              :                          * is small, but not real small.
     311              :                          */
     312            6 :                         s2 = DEFAULT_RANGE_INEQ_SEL;
     313              :                     }
     314              :                     else
     315              :                     {
     316              :                         /*
     317              :                          * It's just roundoff error; use a small positive
     318              :                          * value
     319              :                          */
     320          491 :                         s2 = 1.0e-10;
     321              :                     }
     322              :                 }
     323              :             }
     324              :             /* Merge in the selectivity of the pair of clauses */
     325         4589 :             s1 *= s2;
     326              :         }
     327              :         else
     328              :         {
     329              :             /* Only found one of a pair, merge it in generically */
     330        17824 :             if (rqlist->have_lobound)
     331        15175 :                 s1 *= rqlist->lobound;
     332              :             else
     333         2649 :                 s1 *= rqlist->hibound;
     334              :         }
     335              :         /* release storage and advance */
     336        22413 :         rqnext = rqlist->next;
     337        22413 :         pfree(rqlist);
     338        22413 :         rqlist = rqnext;
     339              :     }
     340              : 
     341       747518 :     return s1;
     342              : }
     343              : 
     344              : /*
     345              :  * clauselist_selectivity_or -
     346              :  *    Compute the selectivity of an implicitly-ORed list of boolean
     347              :  *    expression clauses.  The list can be empty, in which case 0.0
     348              :  *    must be returned.  List elements may be either RestrictInfos
     349              :  *    or bare expression clauses --- the former is preferred since
     350              :  *    it allows caching of results.
     351              :  *
     352              :  * See clause_selectivity() for the meaning of the additional parameters.
     353              :  *
     354              :  * The basic approach is to apply extended statistics first, on as many
     355              :  * clauses as possible, in order to capture cross-column dependencies etc.
     356              :  * The remaining clauses are then estimated as if they were independent.
     357              :  */
     358              : static Selectivity
     359         7750 : clauselist_selectivity_or(PlannerInfo *root,
     360              :                           List *clauses,
     361              :                           int varRelid,
     362              :                           JoinType jointype,
     363              :                           SpecialJoinInfo *sjinfo,
     364              :                           bool use_extended_stats)
     365              : {
     366         7750 :     Selectivity s1 = 0.0;
     367              :     RelOptInfo *rel;
     368         7750 :     Bitmapset  *estimatedclauses = NULL;
     369              :     ListCell   *lc;
     370              :     int         listidx;
     371              : 
     372              :     /*
     373              :      * Determine if these clauses reference a single relation.  If so, and if
     374              :      * it has extended statistics, try to apply those.
     375              :      */
     376         7750 :     rel = find_single_rel_for_clauses(root, clauses);
     377         7750 :     if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
     378              :     {
     379              :         /*
     380              :          * Estimate as many clauses as possible using extended statistics.
     381              :          *
     382              :          * 'estimatedclauses' is populated with the 0-based list position
     383              :          * index of clauses estimated here, and that should be ignored below.
     384              :          */
     385           78 :         s1 = statext_clauselist_selectivity(root, clauses, varRelid,
     386              :                                             jointype, sjinfo, rel,
     387              :                                             &estimatedclauses, true);
     388              :     }
     389              : 
     390              :     /*
     391              :      * Estimate the remaining clauses as if they were independent.
     392              :      *
     393              :      * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to account
     394              :      * for the probable overlap of selected tuple sets.
     395              :      *
     396              :      * XXX is this too conservative?
     397              :      */
     398         7750 :     listidx = -1;
     399        25381 :     foreach(lc, clauses)
     400              :     {
     401              :         Selectivity s2;
     402              : 
     403        17631 :         listidx++;
     404              : 
     405              :         /*
     406              :          * Skip this clause if it's already been estimated by some other
     407              :          * statistics above.
     408              :          */
     409        17631 :         if (bms_is_member(listidx, estimatedclauses))
     410          120 :             continue;
     411              : 
     412        17511 :         s2 = clause_selectivity_ext(root, (Node *) lfirst(lc), varRelid,
     413              :                                     jointype, sjinfo, use_extended_stats);
     414              : 
     415        17511 :         s1 = s1 + s2 - s1 * s2;
     416              :     }
     417              : 
     418         7750 :     return s1;
     419              : }
     420              : 
     421              : /*
     422              :  * addRangeClause --- add a new range clause for clauselist_selectivity
     423              :  *
     424              :  * Here is where we try to match up pairs of range-query clauses
     425              :  */
     426              : static void
     427        27416 : addRangeClause(RangeQueryClause **rqlist, Node *clause,
     428              :                bool varonleft, bool isLTsel, Selectivity s2)
     429              : {
     430              :     RangeQueryClause *rqelem;
     431              :     Node       *var;
     432              :     bool        is_lobound;
     433              : 
     434        27416 :     if (varonleft)
     435              :     {
     436        27305 :         var = get_leftop((Expr *) clause);
     437        27305 :         is_lobound = !isLTsel;  /* x < something is high bound */
     438              :     }
     439              :     else
     440              :     {
     441          111 :         var = get_rightop((Expr *) clause);
     442          111 :         is_lobound = isLTsel;   /* something < x is low bound */
     443              :     }
     444              : 
     445        28159 :     for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
     446              :     {
     447              :         /*
     448              :          * We use full equal() here because the "var" might be a function of
     449              :          * one or more attributes of the same relation...
     450              :          */
     451         5746 :         if (!equal(var, rqelem->var))
     452          743 :             continue;
     453              :         /* Found the right group to put this clause in */
     454         5003 :         if (is_lobound)
     455              :         {
     456          267 :             if (!rqelem->have_lobound)
     457              :             {
     458           90 :                 rqelem->have_lobound = true;
     459           90 :                 rqelem->lobound = s2;
     460              :             }
     461              :             else
     462              :             {
     463              : 
     464              :                 /*------
     465              :                  * We have found two similar clauses, such as
     466              :                  * x < y AND x <= z.
     467              :                  * Keep only the more restrictive one.
     468              :                  *------
     469              :                  */
     470          177 :                 if (rqelem->lobound > s2)
     471           30 :                     rqelem->lobound = s2;
     472              :             }
     473              :         }
     474              :         else
     475              :         {
     476         4736 :             if (!rqelem->have_hibound)
     477              :             {
     478         4499 :                 rqelem->have_hibound = true;
     479         4499 :                 rqelem->hibound = s2;
     480              :             }
     481              :             else
     482              :             {
     483              : 
     484              :                 /*------
     485              :                  * We have found two similar clauses, such as
     486              :                  * x > y AND x >= z.
     487              :                  * Keep only the more restrictive one.
     488              :                  *------
     489              :                  */
     490          237 :                 if (rqelem->hibound > s2)
     491           78 :                     rqelem->hibound = s2;
     492              :             }
     493              :         }
     494         5003 :         return;
     495              :     }
     496              : 
     497              :     /* No matching var found, so make a new clause-pair data structure */
     498        22413 :     rqelem = palloc_object(RangeQueryClause);
     499        22413 :     rqelem->var = var;
     500        22413 :     if (is_lobound)
     501              :     {
     502        19674 :         rqelem->have_lobound = true;
     503        19674 :         rqelem->have_hibound = false;
     504        19674 :         rqelem->lobound = s2;
     505              :     }
     506              :     else
     507              :     {
     508         2739 :         rqelem->have_lobound = false;
     509         2739 :         rqelem->have_hibound = true;
     510         2739 :         rqelem->hibound = s2;
     511              :     }
     512        22413 :     rqelem->next = *rqlist;
     513        22413 :     *rqlist = rqelem;
     514              : }
     515              : 
     516              : /*
     517              :  * find_single_rel_for_clauses
     518              :  *      Examine each clause in 'clauses' and determine if all clauses
     519              :  *      reference only a single relation.  If so return that relation,
     520              :  *      otherwise return NULL.
     521              :  */
     522              : static RelOptInfo *
     523       756897 : find_single_rel_for_clauses(PlannerInfo *root, List *clauses)
     524              : {
     525       756897 :     int         lastrelid = 0;
     526              :     ListCell   *l;
     527              : 
     528      1006333 :     foreach(l, clauses)
     529              :     {
     530       379564 :         RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
     531              :         int         relid;
     532              : 
     533              :         /*
     534              :          * If we have a list of bare clauses rather than RestrictInfos, we
     535              :          * could pull out their relids the hard way with pull_varnos().
     536              :          * However, currently the extended-stats machinery won't do anything
     537              :          * with non-RestrictInfo clauses anyway, so there's no point in
     538              :          * spending extra cycles; just fail if that's what we have.
     539              :          *
     540              :          * An exception to that rule is if we have a bare BoolExpr AND clause.
     541              :          * We treat this as a special case because the restrictinfo machinery
     542              :          * doesn't build RestrictInfos on top of AND clauses.
     543              :          */
     544       379564 :         if (is_andclause(rinfo))
     545         1121 :         {
     546              :             RelOptInfo *rel;
     547              : 
     548         1629 :             rel = find_single_rel_for_clauses(root,
     549              :                                               ((BoolExpr *) rinfo)->args);
     550              : 
     551         1629 :             if (rel == NULL)
     552       130128 :                 return NULL;
     553         1121 :             if (lastrelid == 0)
     554          601 :                 lastrelid = rel->relid;
     555          520 :             else if (rel->relid != lastrelid)
     556            0 :                 return NULL;
     557              : 
     558        20124 :             continue;
     559              :         }
     560              : 
     561       377935 :         if (!IsA(rinfo, RestrictInfo))
     562          565 :             return NULL;
     563              : 
     564       377370 :         if (bms_is_empty(rinfo->clause_relids))
     565        19003 :             continue;           /* we can ignore variable-free clauses */
     566       358367 :         if (!bms_get_singleton_member(rinfo->clause_relids, &relid))
     567       126515 :             return NULL;        /* multiple relations in this clause */
     568       231852 :         if (lastrelid == 0)
     569       115366 :             lastrelid = relid;  /* first clause referencing a relation */
     570       116486 :         else if (relid != lastrelid)
     571         2540 :             return NULL;        /* relation not same as last one */
     572              :     }
     573              : 
     574       626769 :     if (lastrelid != 0)
     575        90960 :         return find_base_rel(root, lastrelid);
     576              : 
     577       535809 :     return NULL;                /* no clauses */
     578              : }
     579              : 
     580              : /*
     581              :  * treat_as_join_clause -
     582              :  *    Decide whether an operator clause is to be handled by the
     583              :  *    restriction or join estimator.  Subroutine for clause_selectivity().
     584              :  */
     585              : static inline bool
     586       537264 : treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo,
     587              :                      int varRelid, SpecialJoinInfo *sjinfo)
     588              : {
     589       537264 :     if (varRelid != 0)
     590              :     {
     591              :         /*
     592              :          * Caller is forcing restriction mode (eg, because we are examining an
     593              :          * inner indexscan qual).
     594              :          */
     595       206808 :         return false;
     596              :     }
     597       330456 :     else if (sjinfo == NULL)
     598              :     {
     599              :         /*
     600              :          * It must be a restriction clause, since it's being evaluated at a
     601              :          * scan node.
     602              :          */
     603       187483 :         return false;
     604              :     }
     605              :     else
     606              :     {
     607              :         /*
     608              :          * Otherwise, it's a join if there's more than one base relation used.
     609              :          * We can optimize this calculation if an rinfo was passed.
     610              :          *
     611              :          * XXX  Since we know the clause is being evaluated at a join, the
     612              :          * only way it could be single-relation is if it was delayed by outer
     613              :          * joins.  We intentionally count only baserels here, not OJs that
     614              :          * might be present in rinfo->clause_relids, so that we direct such
     615              :          * cases to the restriction qual estimators not join estimators.
     616              :          * Eventually some notice should be taken of the possibility of
     617              :          * injected nulls, but we'll likely want to do that in the restriction
     618              :          * estimators rather than starting to treat such cases as join quals.
     619              :          */
     620       142973 :         if (rinfo)
     621       142742 :             return (rinfo->num_base_rels > 1);
     622              :         else
     623          231 :             return (NumRelids(root, clause) > 1);
     624              :     }
     625              : }
     626              : 
     627              : 
     628              : /*
     629              :  * clause_selectivity -
     630              :  *    Compute the selectivity of a general boolean expression clause.
     631              :  *
     632              :  * The clause can be either a RestrictInfo or a plain expression.  If it's
     633              :  * a RestrictInfo, we try to cache the selectivity for possible re-use,
     634              :  * so passing RestrictInfos is preferred.
     635              :  *
     636              :  * varRelid is either 0 or a rangetable index.
     637              :  *
     638              :  * When varRelid is not 0, only variables belonging to that relation are
     639              :  * considered in computing selectivity; other vars are treated as constants
     640              :  * of unknown values.  This is appropriate for estimating the selectivity of
     641              :  * a join clause that is being used as a restriction clause in a scan of a
     642              :  * nestloop join's inner relation --- varRelid should then be the ID of the
     643              :  * inner relation.
     644              :  *
     645              :  * When varRelid is 0, all variables are treated as variables.  This
     646              :  * is appropriate for ordinary join clauses and restriction clauses.
     647              :  *
     648              :  * jointype is the join type, if the clause is a join clause.  Pass JOIN_INNER
     649              :  * if the clause isn't a join clause.
     650              :  *
     651              :  * sjinfo is NULL for a non-join clause, otherwise it provides additional
     652              :  * context information about the join being performed.  There are some
     653              :  * special cases:
     654              :  *  1. For a special (not INNER) join, sjinfo is always a member of
     655              :  *     root->join_info_list.
     656              :  *  2. For an INNER join, sjinfo is just a transient struct, and only the
     657              :  *     relids and jointype fields in it can be trusted.
     658              :  * It is possible for jointype to be different from sjinfo->jointype.
     659              :  * This indicates we are considering a variant join: either with
     660              :  * the LHS and RHS switched, or with one input unique-ified.
     661              :  *
     662              :  * Note: when passing nonzero varRelid, it's normally appropriate to set
     663              :  * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
     664              :  * join clause; because we aren't treating it as a join clause.
     665              :  */
     666              : Selectivity
     667       450929 : clause_selectivity(PlannerInfo *root,
     668              :                    Node *clause,
     669              :                    int varRelid,
     670              :                    JoinType jointype,
     671              :                    SpecialJoinInfo *sjinfo)
     672              : {
     673       450929 :     return clause_selectivity_ext(root, clause, varRelid,
     674              :                                   jointype, sjinfo, true);
     675              : }
     676              : 
     677              : /*
     678              :  * clause_selectivity_ext -
     679              :  *    Extended version of clause_selectivity().  If "use_extended_stats" is
     680              :  *    false, all extended statistics will be ignored, and only per-column
     681              :  *    statistics will be used.
     682              :  */
     683              : Selectivity
     684      1691204 : clause_selectivity_ext(PlannerInfo *root,
     685              :                        Node *clause,
     686              :                        int varRelid,
     687              :                        JoinType jointype,
     688              :                        SpecialJoinInfo *sjinfo,
     689              :                        bool use_extended_stats)
     690              : {
     691      1691204 :     Selectivity s1 = 0.5;       /* default for any unhandled clause type */
     692      1691204 :     RestrictInfo *rinfo = NULL;
     693      1691204 :     bool        cacheable = false;
     694              : 
     695      1691204 :     if (clause == NULL)         /* can this still happen? */
     696            0 :         return s1;
     697              : 
     698      1691204 :     if (IsA(clause, RestrictInfo))
     699              :     {
     700      1679772 :         rinfo = (RestrictInfo *) clause;
     701              : 
     702              :         /*
     703              :          * If the clause is marked pseudoconstant, then it will be used as a
     704              :          * gating qual and should not affect selectivity estimates; hence
     705              :          * return 1.0.  The only exception is that a constant FALSE may be
     706              :          * taken as having selectivity 0.0, since it will surely mean no rows
     707              :          * out of the plan.  This case is simple enough that we need not
     708              :          * bother caching the result.
     709              :          */
     710      1679772 :         if (rinfo->pseudoconstant)
     711              :         {
     712        14610 :             if (!IsA(rinfo->clause, Const))
     713        14492 :                 return (Selectivity) 1.0;
     714              :         }
     715              : 
     716              :         /*
     717              :          * If possible, cache the result of the selectivity calculation for
     718              :          * the clause.  We can cache if varRelid is zero or the clause
     719              :          * contains only vars of that relid --- otherwise varRelid will affect
     720              :          * the result, so mustn't cache.  Outer join quals might be examined
     721              :          * with either their join's actual jointype or JOIN_INNER, so we need
     722              :          * two cache variables to remember both cases.  Note: we assume the
     723              :          * result won't change if we are switching the input relations or
     724              :          * considering a unique-ified case, so we only need one cache variable
     725              :          * for all non-JOIN_INNER cases.
     726              :          */
     727      1665280 :         if (varRelid == 0 ||
     728       602235 :             rinfo->num_base_rels == 0 ||
     729      1001441 :             (rinfo->num_base_rels == 1 &&
     730       399206 :              bms_is_member(varRelid, rinfo->clause_relids)))
     731              :         {
     732              :             /* Cacheable --- do we already have the result? */
     733      1460589 :             if (jointype == JOIN_INNER)
     734              :             {
     735      1270944 :                 if (rinfo->norm_selec >= 0)
     736       974447 :                     return rinfo->norm_selec;
     737              :             }
     738              :             else
     739              :             {
     740       189645 :                 if (rinfo->outer_selec >= 0)
     741       118829 :                     return rinfo->outer_selec;
     742              :             }
     743       367313 :             cacheable = true;
     744              :         }
     745              : 
     746              :         /*
     747              :          * Proceed with examination of contained clause.  If the clause is an
     748              :          * OR-clause, we want to look at the variant with sub-RestrictInfos,
     749              :          * so that per-subclause selectivities can be cached.
     750              :          */
     751       572004 :         if (rinfo->orclause)
     752         7730 :             clause = (Node *) rinfo->orclause;
     753              :         else
     754       564274 :             clause = (Node *) rinfo->clause;
     755              :     }
     756              : 
     757       583436 :     if (IsA(clause, Var))
     758              :     {
     759        20689 :         Var        *var = (Var *) clause;
     760              : 
     761              :         /*
     762              :          * We probably shouldn't ever see an uplevel Var here, but if we do,
     763              :          * return the default selectivity...
     764              :          */
     765        20689 :         if (var->varlevelsup == 0 &&
     766          404 :             (varRelid == 0 || varRelid == (int) var->varno))
     767              :         {
     768              :             /* Use the restriction selectivity function for a bool Var */
     769        20499 :             s1 = boolvarsel(root, (Node *) var, varRelid);
     770              :         }
     771              :     }
     772       562747 :     else if (IsA(clause, Const))
     773              :     {
     774              :         /* bool constant is pretty easy... */
     775         2025 :         Const      *con = (Const *) clause;
     776              : 
     777         3992 :         s1 = con->constisnull ? 0.0 :
     778         1967 :             DatumGetBool(con->constvalue) ? 1.0 : 0.0;
     779              :     }
     780       560722 :     else if (IsA(clause, Param))
     781              :     {
     782              :         /* see if we can replace the Param */
     783           15 :         Node       *subst = estimate_expression_value(root, clause);
     784              : 
     785           15 :         if (IsA(subst, Const))
     786              :         {
     787              :             /* bool constant is pretty easy... */
     788            0 :             Const      *con = (Const *) subst;
     789              : 
     790            0 :             s1 = con->constisnull ? 0.0 :
     791            0 :                 DatumGetBool(con->constvalue) ? 1.0 : 0.0;
     792              :         }
     793              :         else
     794              :         {
     795              :             /* XXX any way to do better than default? */
     796              :         }
     797              :     }
     798       560707 :     else if (is_notclause(clause))
     799              :     {
     800              :         /* inverse of the selectivity of the underlying clause */
     801         5377 :         s1 = 1.0 - clause_selectivity_ext(root,
     802         5377 :                                           (Node *) get_notclausearg((Expr *) clause),
     803              :                                           varRelid,
     804              :                                           jointype,
     805              :                                           sjinfo,
     806              :                                           use_extended_stats);
     807              :     }
     808       555330 :     else if (is_andclause(clause))
     809              :     {
     810              :         /* share code with clauselist_selectivity() */
     811         2247 :         s1 = clauselist_selectivity_ext(root,
     812              :                                         ((BoolExpr *) clause)->args,
     813              :                                         varRelid,
     814              :                                         jointype,
     815              :                                         sjinfo,
     816              :                                         use_extended_stats);
     817              :     }
     818       553083 :     else if (is_orclause(clause))
     819              :     {
     820              :         /*
     821              :          * Almost the same thing as clauselist_selectivity, but with the
     822              :          * clauses connected by OR.
     823              :          */
     824         7750 :         s1 = clauselist_selectivity_or(root,
     825              :                                        ((BoolExpr *) clause)->args,
     826              :                                        varRelid,
     827              :                                        jointype,
     828              :                                        sjinfo,
     829              :                                        use_extended_stats);
     830              :     }
     831       545333 :     else if (is_opclause(clause) || IsA(clause, DistinctExpr))
     832       519198 :     {
     833       519213 :         OpExpr     *opclause = (OpExpr *) clause;
     834       519213 :         Oid         opno = opclause->opno;
     835              : 
     836       519213 :         if (treat_as_join_clause(root, clause, rinfo, varRelid, sjinfo))
     837              :         {
     838              :             /* Estimate selectivity for a join clause. */
     839       137097 :             s1 = join_selectivity(root, opno,
     840              :                                   opclause->args,
     841              :                                   opclause->inputcollid,
     842              :                                   jointype,
     843              :                                   sjinfo);
     844              :         }
     845              :         else
     846              :         {
     847              :             /* Estimate selectivity for a restriction clause. */
     848       382116 :             s1 = restriction_selectivity(root, opno,
     849              :                                          opclause->args,
     850              :                                          opclause->inputcollid,
     851              :                                          varRelid);
     852              :         }
     853              : 
     854              :         /*
     855              :          * DistinctExpr has the same representation as OpExpr, but the
     856              :          * contained operator is "=" not "<>", so we must negate the result.
     857              :          * This estimation method doesn't give the right behavior for nulls,
     858              :          * but it's better than doing nothing.
     859              :          */
     860       519198 :         if (IsA(clause, DistinctExpr))
     861          523 :             s1 = 1.0 - s1;
     862              :     }
     863        26120 :     else if (is_funcclause(clause))
     864              :     {
     865         6638 :         FuncExpr   *funcclause = (FuncExpr *) clause;
     866              : 
     867              :         /* Try to get an estimate from the support function, if any */
     868         6638 :         s1 = function_selectivity(root,
     869              :                                   funcclause->funcid,
     870              :                                   funcclause->args,
     871              :                                   funcclause->inputcollid,
     872         6638 :                                   treat_as_join_clause(root, clause, rinfo,
     873              :                                                        varRelid, sjinfo),
     874              :                                   varRelid,
     875              :                                   jointype,
     876              :                                   sjinfo);
     877              : 
     878              :         /* If no support, fall back on boolvarsel */
     879         6638 :         if (s1 < 0)
     880         6623 :             s1 = boolvarsel(root, clause, varRelid);
     881              :     }
     882        19482 :     else if (IsA(clause, ScalarArrayOpExpr))
     883              :     {
     884              :         /* Use node specific selectivity calculation function */
     885        11413 :         s1 = scalararraysel(root,
     886              :                             (ScalarArrayOpExpr *) clause,
     887        11413 :                             treat_as_join_clause(root, clause, rinfo,
     888              :                                                  varRelid, sjinfo),
     889              :                             varRelid,
     890              :                             jointype,
     891              :                             sjinfo);
     892              :     }
     893         8069 :     else if (IsA(clause, RowCompareExpr))
     894              :     {
     895              :         /* Use node specific selectivity calculation function */
     896          126 :         s1 = rowcomparesel(root,
     897              :                            (RowCompareExpr *) clause,
     898              :                            varRelid,
     899              :                            jointype,
     900              :                            sjinfo);
     901              :     }
     902         7943 :     else if (IsA(clause, NullTest))
     903              :     {
     904              :         /* Use node specific selectivity calculation function */
     905         5693 :         s1 = nulltestsel(root,
     906              :                          ((NullTest *) clause)->nulltesttype,
     907         5693 :                          (Node *) ((NullTest *) clause)->arg,
     908              :                          varRelid,
     909              :                          jointype,
     910              :                          sjinfo);
     911              :     }
     912         2250 :     else if (IsA(clause, BooleanTest))
     913              :     {
     914              :         /* Use node specific selectivity calculation function */
     915          507 :         s1 = booltestsel(root,
     916              :                          ((BooleanTest *) clause)->booltesttype,
     917          507 :                          (Node *) ((BooleanTest *) clause)->arg,
     918              :                          varRelid,
     919              :                          jointype,
     920              :                          sjinfo);
     921              :     }
     922         1743 :     else if (IsA(clause, CurrentOfExpr))
     923              :     {
     924              :         /* CURRENT OF selects at most one row of its table */
     925          203 :         CurrentOfExpr *cexpr = (CurrentOfExpr *) clause;
     926          203 :         RelOptInfo *crel = find_base_rel(root, cexpr->cvarno);
     927              : 
     928          203 :         if (crel->tuples > 0)
     929          196 :             s1 = 1.0 / crel->tuples;
     930              :     }
     931         1540 :     else if (IsA(clause, RelabelType))
     932              :     {
     933              :         /* Not sure this case is needed, but it can't hurt */
     934            0 :         s1 = clause_selectivity_ext(root,
     935            0 :                                     (Node *) ((RelabelType *) clause)->arg,
     936              :                                     varRelid,
     937              :                                     jointype,
     938              :                                     sjinfo,
     939              :                                     use_extended_stats);
     940              :     }
     941         1540 :     else if (IsA(clause, CoerceToDomain))
     942              :     {
     943              :         /* Not sure this case is needed, but it can't hurt */
     944            0 :         s1 = clause_selectivity_ext(root,
     945            0 :                                     (Node *) ((CoerceToDomain *) clause)->arg,
     946              :                                     varRelid,
     947              :                                     jointype,
     948              :                                     sjinfo,
     949              :                                     use_extended_stats);
     950              :     }
     951              :     else
     952              :     {
     953              :         /*
     954              :          * For anything else, see if we can consider it as a boolean variable.
     955              :          * This only works if it's an immutable expression in Vars of a single
     956              :          * relation; but there's no point in us checking that here because
     957              :          * boolvarsel() will do it internally, and return a suitable default
     958              :          * selectivity if not.
     959              :          */
     960         1540 :         s1 = boolvarsel(root, clause, varRelid);
     961              :     }
     962              : 
     963              :     /* Cache the result if possible */
     964       583421 :     if (cacheable)
     965              :     {
     966       367298 :         if (jointype == JOIN_INNER)
     967       296482 :             rinfo->norm_selec = s1;
     968              :         else
     969        70816 :             rinfo->outer_selec = s1;
     970              :     }
     971              : 
     972              : #ifdef SELECTIVITY_DEBUG
     973              :     elog(DEBUG4, "clause_selectivity: s1 %f", s1);
     974              : #endif                          /* SELECTIVITY_DEBUG */
     975              : 
     976       583421 :     return s1;
     977              : }
        

Generated by: LCOV version 2.0-1