LCOV - code coverage report
Current view: top level - src/backend/utils/adt - rangetypes_selfuncs.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 270 336 80.4 %
Date: 2024-11-21 08:14:44 Functions: 13 13 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * rangetypes_selfuncs.c
       4             :  *    Functions for selectivity estimation of range operators
       5             :  *
       6             :  * Estimates are based on histograms of lower and upper bounds, and the
       7             :  * fraction of empty ranges.
       8             :  *
       9             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
      10             :  * Portions Copyright (c) 1994, Regents of the University of California
      11             :  *
      12             :  *
      13             :  * IDENTIFICATION
      14             :  *    src/backend/utils/adt/rangetypes_selfuncs.c
      15             :  *
      16             :  *-------------------------------------------------------------------------
      17             :  */
      18             : #include "postgres.h"
      19             : 
      20             : #include <math.h>
      21             : 
      22             : #include "access/htup_details.h"
      23             : #include "catalog/pg_operator.h"
      24             : #include "catalog/pg_statistic.h"
      25             : #include "utils/float.h"
      26             : #include "utils/fmgrprotos.h"
      27             : #include "utils/lsyscache.h"
      28             : #include "utils/rangetypes.h"
      29             : #include "utils/selfuncs.h"
      30             : #include "utils/typcache.h"
      31             : 
      32             : static double calc_rangesel(TypeCacheEntry *typcache, VariableStatData *vardata,
      33             :                             const RangeType *constval, Oid operator);
      34             : static double default_range_selectivity(Oid operator);
      35             : static double calc_hist_selectivity(TypeCacheEntry *typcache,
      36             :                                     VariableStatData *vardata, const RangeType *constval,
      37             :                                     Oid operator);
      38             : static double calc_hist_selectivity_scalar(TypeCacheEntry *typcache,
      39             :                                            const RangeBound *constbound,
      40             :                                            const RangeBound *hist, int hist_nvalues,
      41             :                                            bool equal);
      42             : static int  rbound_bsearch(TypeCacheEntry *typcache, const RangeBound *value,
      43             :                            const RangeBound *hist, int hist_length, bool equal);
      44             : static float8 get_position(TypeCacheEntry *typcache, const RangeBound *value,
      45             :                            const RangeBound *hist1, const RangeBound *hist2);
      46             : static float8 get_len_position(double value, double hist1, double hist2);
      47             : static float8 get_distance(TypeCacheEntry *typcache, const RangeBound *bound1,
      48             :                            const RangeBound *bound2);
      49             : static int  length_hist_bsearch(Datum *length_hist_values,
      50             :                                 int length_hist_nvalues, double value, bool equal);
      51             : static double calc_length_hist_frac(Datum *length_hist_values,
      52             :                                     int length_hist_nvalues, double length1, double length2, bool equal);
      53             : static double calc_hist_selectivity_contained(TypeCacheEntry *typcache,
      54             :                                               const RangeBound *lower, RangeBound *upper,
      55             :                                               const RangeBound *hist_lower, int hist_nvalues,
      56             :                                               Datum *length_hist_values, int length_hist_nvalues);
      57             : static double calc_hist_selectivity_contains(TypeCacheEntry *typcache,
      58             :                                              const RangeBound *lower, const RangeBound *upper,
      59             :                                              const RangeBound *hist_lower, int hist_nvalues,
      60             :                                              Datum *length_hist_values, int length_hist_nvalues);
      61             : 
      62             : /*
      63             :  * Returns a default selectivity estimate for given operator, when we don't
      64             :  * have statistics or cannot use them for some reason.
      65             :  */
      66             : static double
      67        1160 : default_range_selectivity(Oid operator)
      68             : {
      69        1160 :     switch (operator)
      70             :     {
      71         650 :         case OID_RANGE_OVERLAP_OP:
      72         650 :             return 0.01;
      73             : 
      74         108 :         case OID_RANGE_CONTAINS_OP:
      75             :         case OID_RANGE_CONTAINED_OP:
      76         108 :             return 0.005;
      77             : 
      78         102 :         case OID_RANGE_CONTAINS_ELEM_OP:
      79             :         case OID_RANGE_ELEM_CONTAINED_OP:
      80             : 
      81             :             /*
      82             :              * "range @> elem" is more or less identical to a scalar
      83             :              * inequality "A >= b AND A <= c".
      84             :              */
      85         102 :             return DEFAULT_RANGE_INEQ_SEL;
      86             : 
      87         300 :         case OID_RANGE_LESS_OP:
      88             :         case OID_RANGE_LESS_EQUAL_OP:
      89             :         case OID_RANGE_GREATER_OP:
      90             :         case OID_RANGE_GREATER_EQUAL_OP:
      91             :         case OID_RANGE_LEFT_OP:
      92             :         case OID_RANGE_RIGHT_OP:
      93             :         case OID_RANGE_OVERLAPS_LEFT_OP:
      94             :         case OID_RANGE_OVERLAPS_RIGHT_OP:
      95             :             /* these are similar to regular scalar inequalities */
      96         300 :             return DEFAULT_INEQ_SEL;
      97             : 
      98           0 :         default:
      99             :             /* all range operators should be handled above, but just in case */
     100           0 :             return 0.01;
     101             :     }
     102             : }
     103             : 
     104             : /*
     105             :  * rangesel -- restriction selectivity for range operators
     106             :  */
     107             : Datum
     108        1490 : rangesel(PG_FUNCTION_ARGS)
     109             : {
     110        1490 :     PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
     111        1490 :     Oid         operator = PG_GETARG_OID(1);
     112        1490 :     List       *args = (List *) PG_GETARG_POINTER(2);
     113        1490 :     int         varRelid = PG_GETARG_INT32(3);
     114             :     VariableStatData vardata;
     115             :     Node       *other;
     116             :     bool        varonleft;
     117             :     Selectivity selec;
     118        1490 :     TypeCacheEntry *typcache = NULL;
     119        1490 :     RangeType  *constrange = NULL;
     120             : 
     121             :     /*
     122             :      * If expression is not (variable op something) or (something op
     123             :      * variable), then punt and return a default estimate.
     124             :      */
     125        1490 :     if (!get_restriction_variable(root, args, varRelid,
     126             :                                   &vardata, &other, &varonleft))
     127           0 :         PG_RETURN_FLOAT8(default_range_selectivity(operator));
     128             : 
     129             :     /*
     130             :      * Can't do anything useful if the something is not a constant, either.
     131             :      */
     132        1490 :     if (!IsA(other, Const))
     133             :     {
     134          48 :         ReleaseVariableStats(vardata);
     135          48 :         PG_RETURN_FLOAT8(default_range_selectivity(operator));
     136             :     }
     137             : 
     138             :     /*
     139             :      * All the range operators are strict, so we can cope with a NULL constant
     140             :      * right away.
     141             :      */
     142        1442 :     if (((Const *) other)->constisnull)
     143             :     {
     144           0 :         ReleaseVariableStats(vardata);
     145           0 :         PG_RETURN_FLOAT8(0.0);
     146             :     }
     147             : 
     148             :     /*
     149             :      * If var is on the right, commute the operator, so that we can assume the
     150             :      * var is on the left in what follows.
     151             :      */
     152        1442 :     if (!varonleft)
     153             :     {
     154             :         /* we have other Op var, commute to make var Op other */
     155         280 :         operator = get_commutator(operator);
     156         280 :         if (!operator)
     157             :         {
     158             :             /* Use default selectivity (should we raise an error instead?) */
     159           0 :             ReleaseVariableStats(vardata);
     160           0 :             PG_RETURN_FLOAT8(default_range_selectivity(operator));
     161             :         }
     162             :     }
     163             : 
     164             :     /*
     165             :      * OK, there's a Var and a Const we're dealing with here.  We need the
     166             :      * Const to be of same range type as the column, else we can't do anything
     167             :      * useful. (Such cases will likely fail at runtime, but here we'd rather
     168             :      * just return a default estimate.)
     169             :      *
     170             :      * If the operator is "range @> element", the constant should be of the
     171             :      * element type of the range column. Convert it to a range that includes
     172             :      * only that single point, so that we don't need special handling for that
     173             :      * in what follows.
     174             :      */
     175        1442 :     if (operator == OID_RANGE_CONTAINS_ELEM_OP)
     176             :     {
     177         120 :         typcache = range_get_typcache(fcinfo, vardata.vartype);
     178             : 
     179         120 :         if (((Const *) other)->consttype == typcache->rngelemtype->type_id)
     180             :         {
     181             :             RangeBound  lower,
     182             :                         upper;
     183             : 
     184         120 :             lower.inclusive = true;
     185         120 :             lower.val = ((Const *) other)->constvalue;
     186         120 :             lower.infinite = false;
     187         120 :             lower.lower = true;
     188         120 :             upper.inclusive = true;
     189         120 :             upper.val = ((Const *) other)->constvalue;
     190         120 :             upper.infinite = false;
     191         120 :             upper.lower = false;
     192         120 :             constrange = range_serialize(typcache, &lower, &upper, false, NULL);
     193             :         }
     194             :     }
     195        1322 :     else if (operator == OID_RANGE_ELEM_CONTAINED_OP)
     196             :     {
     197             :         /*
     198             :          * Here, the Var is the elem, not the range.  In typical cases
     199             :          * elem_contained_by_range_support will have simplified this case, so
     200             :          * that we won't get here.  If we do get here we'll fall back on a
     201             :          * default estimate.
     202             :          */
     203             :     }
     204        1322 :     else if (((Const *) other)->consttype == vardata.vartype)
     205             :     {
     206             :         /* Both sides are the same range type */
     207        1322 :         typcache = range_get_typcache(fcinfo, vardata.vartype);
     208             : 
     209        1322 :         constrange = DatumGetRangeTypeP(((Const *) other)->constvalue);
     210             :     }
     211             : 
     212             :     /*
     213             :      * If we got a valid constant on one side of the operator, proceed to
     214             :      * estimate using statistics. Otherwise punt and return a default constant
     215             :      * estimate.  Note that calc_rangesel need not handle
     216             :      * OID_RANGE_ELEM_CONTAINED_OP.
     217             :      */
     218        1442 :     if (constrange)
     219        1442 :         selec = calc_rangesel(typcache, &vardata, constrange, operator);
     220             :     else
     221           0 :         selec = default_range_selectivity(operator);
     222             : 
     223        1442 :     ReleaseVariableStats(vardata);
     224             : 
     225        1442 :     CLAMP_PROBABILITY(selec);
     226             : 
     227        1442 :     PG_RETURN_FLOAT8((float8) selec);
     228             : }
     229             : 
     230             : static double
     231        1442 : calc_rangesel(TypeCacheEntry *typcache, VariableStatData *vardata,
     232             :               const RangeType *constval, Oid operator)
     233             : {
     234             :     double      hist_selec;
     235             :     double      selec;
     236             :     float4      empty_frac,
     237             :                 null_frac;
     238             : 
     239             :     /*
     240             :      * First look up the fraction of NULLs and empty ranges from pg_statistic.
     241             :      */
     242        1442 :     if (HeapTupleIsValid(vardata->statsTuple))
     243             :     {
     244             :         Form_pg_statistic stats;
     245             :         AttStatsSlot sslot;
     246             : 
     247         162 :         stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
     248         162 :         null_frac = stats->stanullfrac;
     249             : 
     250             :         /* Try to get fraction of empty ranges */
     251         162 :         if (get_attstatsslot(&sslot, vardata->statsTuple,
     252             :                              STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM,
     253             :                              InvalidOid,
     254             :                              ATTSTATSSLOT_NUMBERS))
     255             :         {
     256         162 :             if (sslot.nnumbers != 1)
     257           0 :                 elog(ERROR, "invalid empty fraction statistic");  /* shouldn't happen */
     258         162 :             empty_frac = sslot.numbers[0];
     259         162 :             free_attstatsslot(&sslot);
     260             :         }
     261             :         else
     262             :         {
     263             :             /* No empty fraction statistic. Assume no empty ranges. */
     264           0 :             empty_frac = 0.0;
     265             :         }
     266             :     }
     267             :     else
     268             :     {
     269             :         /*
     270             :          * No stats are available. Follow through the calculations below
     271             :          * anyway, assuming no NULLs and no empty ranges. This still allows us
     272             :          * to give a better-than-nothing estimate based on whether the
     273             :          * constant is an empty range or not.
     274             :          */
     275        1280 :         null_frac = 0.0;
     276        1280 :         empty_frac = 0.0;
     277             :     }
     278             : 
     279        1442 :     if (RangeIsEmpty(constval))
     280             :     {
     281             :         /*
     282             :          * An empty range matches all ranges, all empty ranges, or nothing,
     283             :          * depending on the operator
     284             :          */
     285         186 :         switch (operator)
     286             :         {
     287             :                 /* these return false if either argument is empty */
     288          12 :             case OID_RANGE_OVERLAP_OP:
     289             :             case OID_RANGE_OVERLAPS_LEFT_OP:
     290             :             case OID_RANGE_OVERLAPS_RIGHT_OP:
     291             :             case OID_RANGE_LEFT_OP:
     292             :             case OID_RANGE_RIGHT_OP:
     293             :                 /* nothing is less than an empty range */
     294             :             case OID_RANGE_LESS_OP:
     295          12 :                 selec = 0.0;
     296          12 :                 break;
     297             : 
     298             :                 /* only empty ranges can be contained by an empty range */
     299          54 :             case OID_RANGE_CONTAINED_OP:
     300             :                 /* only empty ranges are <= an empty range */
     301             :             case OID_RANGE_LESS_EQUAL_OP:
     302          54 :                 selec = empty_frac;
     303          54 :                 break;
     304             : 
     305             :                 /* everything contains an empty range */
     306          90 :             case OID_RANGE_CONTAINS_OP:
     307             :                 /* everything is >= an empty range */
     308             :             case OID_RANGE_GREATER_EQUAL_OP:
     309          90 :                 selec = 1.0;
     310          90 :                 break;
     311             : 
     312             :                 /* all non-empty ranges are > an empty range */
     313          30 :             case OID_RANGE_GREATER_OP:
     314          30 :                 selec = 1.0 - empty_frac;
     315          30 :                 break;
     316             : 
     317             :                 /* an element cannot be empty */
     318           0 :             case OID_RANGE_CONTAINS_ELEM_OP:
     319             :             default:
     320           0 :                 elog(ERROR, "unexpected operator %u", operator);
     321             :                 selec = 0.0;    /* keep compiler quiet */
     322             :                 break;
     323             :         }
     324             :     }
     325             :     else
     326             :     {
     327             :         /*
     328             :          * Calculate selectivity using bound histograms. If that fails for
     329             :          * some reason, e.g no histogram in pg_statistic, use the default
     330             :          * constant estimate for the fraction of non-empty values. This is
     331             :          * still somewhat better than just returning the default estimate,
     332             :          * because this still takes into account the fraction of empty and
     333             :          * NULL tuples, if we had statistics for them.
     334             :          */
     335        1256 :         hist_selec = calc_hist_selectivity(typcache, vardata, constval,
     336             :                                            operator);
     337        1256 :         if (hist_selec < 0.0)
     338        1112 :             hist_selec = default_range_selectivity(operator);
     339             : 
     340             :         /*
     341             :          * Now merge the results for the empty ranges and histogram
     342             :          * calculations, realizing that the histogram covers only the
     343             :          * non-null, non-empty values.
     344             :          */
     345        1256 :         if (operator == OID_RANGE_CONTAINED_OP)
     346             :         {
     347             :             /* empty is contained by anything non-empty */
     348          72 :             selec = (1.0 - empty_frac) * hist_selec + empty_frac;
     349             :         }
     350             :         else
     351             :         {
     352             :             /* with any other operator, empty Op non-empty matches nothing */
     353        1184 :             selec = (1.0 - empty_frac) * hist_selec;
     354             :         }
     355             :     }
     356             : 
     357             :     /* all range operators are strict */
     358        1442 :     selec *= (1.0 - null_frac);
     359             : 
     360             :     /* result should be in range, but make sure... */
     361        1442 :     CLAMP_PROBABILITY(selec);
     362             : 
     363        1442 :     return selec;
     364             : }
     365             : 
     366             : /*
     367             :  * Calculate range operator selectivity using histograms of range bounds.
     368             :  *
     369             :  * This estimate is for the portion of values that are not empty and not
     370             :  * NULL.
     371             :  */
     372             : static double
     373        1256 : calc_hist_selectivity(TypeCacheEntry *typcache, VariableStatData *vardata,
     374             :                       const RangeType *constval, Oid operator)
     375             : {
     376             :     AttStatsSlot hslot;
     377             :     AttStatsSlot lslot;
     378             :     int         nhist;
     379             :     RangeBound *hist_lower;
     380             :     RangeBound *hist_upper;
     381             :     int         i;
     382             :     RangeBound  const_lower;
     383             :     RangeBound  const_upper;
     384             :     bool        empty;
     385             :     double      hist_selec;
     386             : 
     387             :     /* Can't use the histogram with insecure range support functions */
     388        1256 :     if (!statistic_proc_security_check(vardata,
     389             :                                        typcache->rng_cmp_proc_finfo.fn_oid))
     390           0 :         return -1;
     391        1256 :     if (OidIsValid(typcache->rng_subdiff_finfo.fn_oid) &&
     392        1244 :         !statistic_proc_security_check(vardata,
     393             :                                        typcache->rng_subdiff_finfo.fn_oid))
     394           0 :         return -1;
     395             : 
     396             :     /* Try to get histogram of ranges */
     397        1400 :     if (!(HeapTupleIsValid(vardata->statsTuple) &&
     398         144 :           get_attstatsslot(&hslot, vardata->statsTuple,
     399             :                            STATISTIC_KIND_BOUNDS_HISTOGRAM, InvalidOid,
     400             :                            ATTSTATSSLOT_VALUES)))
     401        1112 :         return -1.0;
     402             : 
     403             :     /* check that it's a histogram, not just a dummy entry */
     404         144 :     if (hslot.nvalues < 2)
     405             :     {
     406           0 :         free_attstatsslot(&hslot);
     407           0 :         return -1.0;
     408             :     }
     409             : 
     410             :     /*
     411             :      * Convert histogram of ranges into histograms of its lower and upper
     412             :      * bounds.
     413             :      */
     414         144 :     nhist = hslot.nvalues;
     415         144 :     hist_lower = (RangeBound *) palloc(sizeof(RangeBound) * nhist);
     416         144 :     hist_upper = (RangeBound *) palloc(sizeof(RangeBound) * nhist);
     417       14688 :     for (i = 0; i < nhist; i++)
     418             :     {
     419       14544 :         range_deserialize(typcache, DatumGetRangeTypeP(hslot.values[i]),
     420       14544 :                           &hist_lower[i], &hist_upper[i], &empty);
     421             :         /* The histogram should not contain any empty ranges */
     422       14544 :         if (empty)
     423           0 :             elog(ERROR, "bounds histogram contains an empty range");
     424             :     }
     425             : 
     426             :     /* @> and @< also need a histogram of range lengths */
     427         144 :     if (operator == OID_RANGE_CONTAINS_OP ||
     428             :         operator == OID_RANGE_CONTAINED_OP)
     429             :     {
     430          72 :         if (!(HeapTupleIsValid(vardata->statsTuple) &&
     431          36 :               get_attstatsslot(&lslot, vardata->statsTuple,
     432             :                                STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM,
     433             :                                InvalidOid,
     434             :                                ATTSTATSSLOT_VALUES)))
     435             :         {
     436           0 :             free_attstatsslot(&hslot);
     437           0 :             return -1.0;
     438             :         }
     439             : 
     440             :         /* check that it's a histogram, not just a dummy entry */
     441          36 :         if (lslot.nvalues < 2)
     442             :         {
     443           0 :             free_attstatsslot(&lslot);
     444           0 :             free_attstatsslot(&hslot);
     445           0 :             return -1.0;
     446             :         }
     447             :     }
     448             :     else
     449         108 :         memset(&lslot, 0, sizeof(lslot));
     450             : 
     451             :     /* Extract the bounds of the constant value. */
     452         144 :     range_deserialize(typcache, constval, &const_lower, &const_upper, &empty);
     453             :     Assert(!empty);
     454             : 
     455             :     /*
     456             :      * Calculate selectivity comparing the lower or upper bound of the
     457             :      * constant with the histogram of lower or upper bounds.
     458             :      */
     459         144 :     switch (operator)
     460             :     {
     461           0 :         case OID_RANGE_LESS_OP:
     462             : 
     463             :             /*
     464             :              * The regular b-tree comparison operators (<, <=, >, >=) compare
     465             :              * the lower bounds first, and the upper bounds for values with
     466             :              * equal lower bounds. Estimate that by comparing the lower bounds
     467             :              * only. This gives a fairly accurate estimate assuming there
     468             :              * aren't many rows with a lower bound equal to the constant's
     469             :              * lower bound.
     470             :              */
     471             :             hist_selec =
     472           0 :                 calc_hist_selectivity_scalar(typcache, &const_lower,
     473             :                                              hist_lower, nhist, false);
     474           0 :             break;
     475             : 
     476           0 :         case OID_RANGE_LESS_EQUAL_OP:
     477             :             hist_selec =
     478           0 :                 calc_hist_selectivity_scalar(typcache, &const_lower,
     479             :                                              hist_lower, nhist, true);
     480           0 :             break;
     481             : 
     482           0 :         case OID_RANGE_GREATER_OP:
     483           0 :             hist_selec =
     484           0 :                 1 - calc_hist_selectivity_scalar(typcache, &const_lower,
     485             :                                                  hist_lower, nhist, false);
     486           0 :             break;
     487             : 
     488           0 :         case OID_RANGE_GREATER_EQUAL_OP:
     489           0 :             hist_selec =
     490           0 :                 1 - calc_hist_selectivity_scalar(typcache, &const_lower,
     491             :                                                  hist_lower, nhist, true);
     492           0 :             break;
     493             : 
     494          18 :         case OID_RANGE_LEFT_OP:
     495             :             /* var << const when upper(var) < lower(const) */
     496             :             hist_selec =
     497          18 :                 calc_hist_selectivity_scalar(typcache, &const_lower,
     498             :                                              hist_upper, nhist, false);
     499          18 :             break;
     500             : 
     501          18 :         case OID_RANGE_RIGHT_OP:
     502             :             /* var >> const when lower(var) > upper(const) */
     503          18 :             hist_selec =
     504          18 :                 1 - calc_hist_selectivity_scalar(typcache, &const_upper,
     505             :                                                  hist_lower, nhist, true);
     506          18 :             break;
     507             : 
     508          18 :         case OID_RANGE_OVERLAPS_RIGHT_OP:
     509             :             /* compare lower bounds */
     510          18 :             hist_selec =
     511          18 :                 1 - calc_hist_selectivity_scalar(typcache, &const_lower,
     512             :                                                  hist_lower, nhist, false);
     513          18 :             break;
     514             : 
     515          18 :         case OID_RANGE_OVERLAPS_LEFT_OP:
     516             :             /* compare upper bounds */
     517             :             hist_selec =
     518          18 :                 calc_hist_selectivity_scalar(typcache, &const_upper,
     519             :                                              hist_upper, nhist, true);
     520          18 :             break;
     521             : 
     522          36 :         case OID_RANGE_OVERLAP_OP:
     523             :         case OID_RANGE_CONTAINS_ELEM_OP:
     524             : 
     525             :             /*
     526             :              * A && B <=> NOT (A << B OR A >> B).
     527             :              *
     528             :              * Since A << B and A >> B are mutually exclusive events we can
     529             :              * sum their probabilities to find probability of (A << B OR A >>
     530             :              * B).
     531             :              *
     532             :              * "range @> elem" is equivalent to "range && [elem,elem]". The
     533             :              * caller already constructed the singular range from the element
     534             :              * constant, so just treat it the same as &&.
     535             :              */
     536             :             hist_selec =
     537          36 :                 calc_hist_selectivity_scalar(typcache, &const_lower, hist_upper,
     538             :                                              nhist, false);
     539          36 :             hist_selec +=
     540          36 :                 (1.0 - calc_hist_selectivity_scalar(typcache, &const_upper, hist_lower,
     541             :                                                     nhist, true));
     542          36 :             hist_selec = 1.0 - hist_selec;
     543          36 :             break;
     544             : 
     545          18 :         case OID_RANGE_CONTAINS_OP:
     546             :             hist_selec =
     547          18 :                 calc_hist_selectivity_contains(typcache, &const_lower,
     548             :                                                &const_upper, hist_lower, nhist,
     549             :                                                lslot.values, lslot.nvalues);
     550          18 :             break;
     551             : 
     552          18 :         case OID_RANGE_CONTAINED_OP:
     553          18 :             if (const_lower.infinite)
     554             :             {
     555             :                 /*
     556             :                  * Lower bound no longer matters. Just estimate the fraction
     557             :                  * with an upper bound <= const upper bound
     558             :                  */
     559             :                 hist_selec =
     560           0 :                     calc_hist_selectivity_scalar(typcache, &const_upper,
     561             :                                                  hist_upper, nhist, true);
     562             :             }
     563          18 :             else if (const_upper.infinite)
     564             :             {
     565           0 :                 hist_selec =
     566           0 :                     1.0 - calc_hist_selectivity_scalar(typcache, &const_lower,
     567             :                                                        hist_lower, nhist, false);
     568             :             }
     569             :             else
     570             :             {
     571             :                 hist_selec =
     572          18 :                     calc_hist_selectivity_contained(typcache, &const_lower,
     573             :                                                     &const_upper, hist_lower, nhist,
     574             :                                                     lslot.values, lslot.nvalues);
     575             :             }
     576          18 :             break;
     577             : 
     578           0 :         default:
     579           0 :             elog(ERROR, "unknown range operator %u", operator);
     580             :             hist_selec = -1.0;  /* keep compiler quiet */
     581             :             break;
     582             :     }
     583             : 
     584         144 :     free_attstatsslot(&lslot);
     585         144 :     free_attstatsslot(&hslot);
     586             : 
     587         144 :     return hist_selec;
     588             : }
     589             : 
     590             : 
     591             : /*
     592             :  * Look up the fraction of values less than (or equal, if 'equal' argument
     593             :  * is true) a given const in a histogram of range bounds.
     594             :  */
     595             : static double
     596         144 : calc_hist_selectivity_scalar(TypeCacheEntry *typcache, const RangeBound *constbound,
     597             :                              const RangeBound *hist, int hist_nvalues, bool equal)
     598             : {
     599             :     Selectivity selec;
     600             :     int         index;
     601             : 
     602             :     /*
     603             :      * Find the histogram bin the given constant falls into. Estimate
     604             :      * selectivity as the number of preceding whole bins.
     605             :      */
     606         144 :     index = rbound_bsearch(typcache, constbound, hist, hist_nvalues, equal);
     607         144 :     selec = (Selectivity) (Max(index, 0)) / (Selectivity) (hist_nvalues - 1);
     608             : 
     609             :     /* Adjust using linear interpolation within the bin */
     610         144 :     if (index >= 0 && index < hist_nvalues - 1)
     611         216 :         selec += get_position(typcache, constbound, &hist[index],
     612         108 :                               &hist[index + 1]) / (Selectivity) (hist_nvalues - 1);
     613             : 
     614         144 :     return selec;
     615             : }
     616             : 
     617             : /*
     618             :  * Binary search on an array of range bounds. Returns greatest index of range
     619             :  * bound in array which is less(less or equal) than given range bound. If all
     620             :  * range bounds in array are greater or equal(greater) than given range bound,
     621             :  * return -1. When "equal" flag is set conditions in brackets are used.
     622             :  *
     623             :  * This function is used in scalar operator selectivity estimation. Another
     624             :  * goal of this function is to find a histogram bin where to stop
     625             :  * interpolation of portion of bounds which are less than or equal to given bound.
     626             :  */
     627             : static int
     628         180 : rbound_bsearch(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist,
     629             :                int hist_length, bool equal)
     630             : {
     631         180 :     int         lower = -1,
     632         180 :                 upper = hist_length - 1,
     633             :                 cmp,
     634             :                 middle;
     635             : 
     636        1368 :     while (lower < upper)
     637             :     {
     638        1188 :         middle = (lower + upper + 1) / 2;
     639        1188 :         cmp = range_cmp_bounds(typcache, &hist[middle], value);
     640             : 
     641        1188 :         if (cmp < 0 || (equal && cmp == 0))
     642         342 :             lower = middle;
     643             :         else
     644         846 :             upper = middle - 1;
     645             :     }
     646         180 :     return lower;
     647             : }
     648             : 
     649             : 
     650             : /*
     651             :  * Binary search on length histogram. Returns greatest index of range length in
     652             :  * histogram which is less than (less than or equal) the given length value. If
     653             :  * all lengths in the histogram are greater than (greater than or equal) the
     654             :  * given length, returns -1.
     655             :  */
     656             : static int
     657         108 : length_hist_bsearch(Datum *length_hist_values, int length_hist_nvalues,
     658             :                     double value, bool equal)
     659             : {
     660         108 :     int         lower = -1,
     661         108 :                 upper = length_hist_nvalues - 1,
     662             :                 middle;
     663             : 
     664         828 :     while (lower < upper)
     665             :     {
     666             :         double      middleval;
     667             : 
     668         720 :         middle = (lower + upper + 1) / 2;
     669             : 
     670         720 :         middleval = DatumGetFloat8(length_hist_values[middle]);
     671         720 :         if (middleval < value || (equal && middleval <= value))
     672         270 :             lower = middle;
     673             :         else
     674         450 :             upper = middle - 1;
     675             :     }
     676         108 :     return lower;
     677             : }
     678             : 
     679             : /*
     680             :  * Get relative position of value in histogram bin in [0,1] range.
     681             :  */
     682             : static float8
     683         162 : get_position(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist1,
     684             :              const RangeBound *hist2)
     685             : {
     686         162 :     bool        has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
     687             :     float8      position;
     688             : 
     689         162 :     if (!hist1->infinite && !hist2->infinite)
     690             :     {
     691             :         float8      bin_width;
     692             : 
     693             :         /*
     694             :          * Both bounds are finite. Assuming the subtype's comparison function
     695             :          * works sanely, the value must be finite, too, because it lies
     696             :          * somewhere between the bounds.  If it doesn't, arbitrarily return
     697             :          * 0.5.
     698             :          */
     699         162 :         if (value->infinite)
     700           0 :             return 0.5;
     701             : 
     702             :         /* Can't interpolate without subdiff function */
     703         162 :         if (!has_subdiff)
     704           0 :             return 0.5;
     705             : 
     706             :         /* Calculate relative position using subdiff function. */
     707         162 :         bin_width = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
     708             :                                                      typcache->rng_collation,
     709             :                                                      hist2->val,
     710             :                                                      hist1->val));
     711         162 :         if (isnan(bin_width) || bin_width <= 0.0)
     712           0 :             return 0.5;         /* punt for NaN or zero-width bin */
     713             : 
     714         162 :         position = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
     715             :                                                     typcache->rng_collation,
     716             :                                                     value->val,
     717             :                                                     hist1->val))
     718             :             / bin_width;
     719             : 
     720         162 :         if (isnan(position))
     721           0 :             return 0.5;         /* punt for NaN from subdiff, Inf/Inf, etc */
     722             : 
     723             :         /* Relative position must be in [0,1] range */
     724         162 :         position = Max(position, 0.0);
     725         162 :         position = Min(position, 1.0);
     726         162 :         return position;
     727             :     }
     728           0 :     else if (hist1->infinite && !hist2->infinite)
     729             :     {
     730             :         /*
     731             :          * Lower bin boundary is -infinite, upper is finite. If the value is
     732             :          * -infinite, return 0.0 to indicate it's equal to the lower bound.
     733             :          * Otherwise return 1.0 to indicate it's infinitely far from the lower
     734             :          * bound.
     735             :          */
     736           0 :         return ((value->infinite && value->lower) ? 0.0 : 1.0);
     737             :     }
     738           0 :     else if (!hist1->infinite && hist2->infinite)
     739             :     {
     740             :         /* same as above, but in reverse */
     741           0 :         return ((value->infinite && !value->lower) ? 1.0 : 0.0);
     742             :     }
     743             :     else
     744             :     {
     745             :         /*
     746             :          * If both bin boundaries are infinite, they should be equal to each
     747             :          * other, and the value should also be infinite and equal to both
     748             :          * bounds. (But don't Assert that, to avoid crashing if a user creates
     749             :          * a datatype with a broken comparison function).
     750             :          *
     751             :          * Assume the value to lie in the middle of the infinite bounds.
     752             :          */
     753           0 :         return 0.5;
     754             :     }
     755             : }
     756             : 
     757             : 
     758             : /*
     759             :  * Get relative position of value in a length histogram bin in [0,1] range.
     760             :  */
     761             : static double
     762         162 : get_len_position(double value, double hist1, double hist2)
     763             : {
     764         162 :     if (!isinf(hist1) && !isinf(hist2))
     765             :     {
     766             :         /*
     767             :          * Both bounds are finite. The value should be finite too, because it
     768             :          * lies somewhere between the bounds. If it doesn't, just return
     769             :          * something.
     770             :          */
     771         126 :         if (isinf(value))
     772           0 :             return 0.5;
     773             : 
     774         126 :         return 1.0 - (hist2 - value) / (hist2 - hist1);
     775             :     }
     776          36 :     else if (isinf(hist1) && !isinf(hist2))
     777             :     {
     778             :         /*
     779             :          * Lower bin boundary is -infinite, upper is finite. Return 1.0 to
     780             :          * indicate the value is infinitely far from the lower bound.
     781             :          */
     782           0 :         return 1.0;
     783             :     }
     784          36 :     else if (isinf(hist1) && isinf(hist2))
     785             :     {
     786             :         /* same as above, but in reverse */
     787           0 :         return 0.0;
     788             :     }
     789             :     else
     790             :     {
     791             :         /*
     792             :          * If both bin boundaries are infinite, they should be equal to each
     793             :          * other, and the value should also be infinite and equal to both
     794             :          * bounds. (But don't Assert that, to avoid crashing unnecessarily if
     795             :          * the caller messes up)
     796             :          *
     797             :          * Assume the value to lie in the middle of the infinite bounds.
     798             :          */
     799          36 :         return 0.5;
     800             :     }
     801             : }
     802             : 
     803             : /*
     804             :  * Measure distance between two range bounds.
     805             :  */
     806             : static float8
     807         126 : get_distance(TypeCacheEntry *typcache, const RangeBound *bound1, const RangeBound *bound2)
     808             : {
     809         126 :     bool        has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
     810             : 
     811         126 :     if (!bound1->infinite && !bound2->infinite)
     812             :     {
     813             :         /*
     814             :          * Neither bound is infinite, use subdiff function or return default
     815             :          * value of 1.0 if no subdiff is available.
     816             :          */
     817          90 :         if (has_subdiff)
     818             :         {
     819             :             float8      res;
     820             : 
     821          90 :             res = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
     822             :                                                    typcache->rng_collation,
     823             :                                                    bound2->val,
     824             :                                                    bound1->val));
     825             :             /* Reject possible NaN result, also negative result */
     826          90 :             if (isnan(res) || res < 0.0)
     827           0 :                 return 1.0;
     828             :             else
     829          90 :                 return res;
     830             :         }
     831             :         else
     832           0 :             return 1.0;
     833             :     }
     834          36 :     else if (bound1->infinite && bound2->infinite)
     835             :     {
     836             :         /* Both bounds are infinite */
     837           0 :         if (bound1->lower == bound2->lower)
     838           0 :             return 0.0;
     839             :         else
     840           0 :             return get_float8_infinity();
     841             :     }
     842             :     else
     843             :     {
     844             :         /* One bound is infinite, the other is not */
     845          36 :         return get_float8_infinity();
     846             :     }
     847             : }
     848             : 
     849             : /*
     850             :  * Calculate the average of function P(x), in the interval [length1, length2],
     851             :  * where P(x) is the fraction of tuples with length < x (or length <= x if
     852             :  * 'equal' is true).
     853             :  */
     854             : static double
     855         108 : calc_length_hist_frac(Datum *length_hist_values, int length_hist_nvalues,
     856             :                       double length1, double length2, bool equal)
     857             : {
     858             :     double      frac;
     859             :     double      A,
     860             :                 B,
     861             :                 PA,
     862             :                 PB;
     863             :     double      pos;
     864             :     int         i;
     865             :     double      area;
     866             : 
     867             :     Assert(length2 >= length1);
     868             : 
     869         108 :     if (length2 < 0.0)
     870           0 :         return 0.0;             /* shouldn't happen, but doesn't hurt to check */
     871             : 
     872             :     /* All lengths in the table are <= infinite. */
     873         108 :     if (isinf(length2) && equal)
     874           0 :         return 1.0;
     875             : 
     876             :     /*----------
     877             :      * The average of a function between A and B can be calculated by the
     878             :      * formula:
     879             :      *
     880             :      *          B
     881             :      *    1     /
     882             :      * -------  | P(x)dx
     883             :      *  B - A   /
     884             :      *          A
     885             :      *
     886             :      * The geometrical interpretation of the integral is the area under the
     887             :      * graph of P(x). P(x) is defined by the length histogram. We calculate
     888             :      * the area in a piecewise fashion, iterating through the length histogram
     889             :      * bins. Each bin is a trapezoid:
     890             :      *
     891             :      *       P(x2)
     892             :      *        /|
     893             :      *       / |
     894             :      * P(x1)/  |
     895             :      *     |   |
     896             :      *     |   |
     897             :      *  ---+---+--
     898             :      *     x1  x2
     899             :      *
     900             :      * where x1 and x2 are the boundaries of the current histogram, and P(x1)
     901             :      * and P(x1) are the cumulative fraction of tuples at the boundaries.
     902             :      *
     903             :      * The area of each trapezoid is 1/2 * (P(x2) + P(x1)) * (x2 - x1)
     904             :      *
     905             :      * The first bin contains the lower bound passed by the caller, so we
     906             :      * use linear interpolation between the previous and next histogram bin
     907             :      * boundary to calculate P(x1). Likewise for the last bin: we use linear
     908             :      * interpolation to calculate P(x2). For the bins in between, x1 and x2
     909             :      * lie on histogram bin boundaries, so P(x1) and P(x2) are simply:
     910             :      * P(x1) =    (bin index) / (number of bins)
     911             :      * P(x2) = (bin index + 1 / (number of bins)
     912             :      */
     913             : 
     914             :     /* First bin, the one that contains lower bound */
     915         108 :     i = length_hist_bsearch(length_hist_values, length_hist_nvalues, length1, equal);
     916         108 :     if (i >= length_hist_nvalues - 1)
     917           0 :         return 1.0;
     918             : 
     919         108 :     if (i < 0)
     920             :     {
     921          36 :         i = 0;
     922          36 :         pos = 0.0;
     923             :     }
     924             :     else
     925             :     {
     926             :         /* interpolate length1's position in the bin */
     927          72 :         pos = get_len_position(length1,
     928          72 :                                DatumGetFloat8(length_hist_values[i]),
     929          72 :                                DatumGetFloat8(length_hist_values[i + 1]));
     930             :     }
     931         108 :     PB = (((double) i) + pos) / (double) (length_hist_nvalues - 1);
     932         108 :     B = length1;
     933             : 
     934             :     /*
     935             :      * In the degenerate case that length1 == length2, simply return
     936             :      * P(length1). This is not merely an optimization: if length1 == length2,
     937             :      * we'd divide by zero later on.
     938             :      */
     939         108 :     if (length2 == length1)
     940          18 :         return PB;
     941             : 
     942             :     /*
     943             :      * Loop through all the bins, until we hit the last bin, the one that
     944             :      * contains the upper bound. (if lower and upper bounds are in the same
     945             :      * bin, this falls out immediately)
     946             :      */
     947          90 :     area = 0.0;
     948        3186 :     for (; i < length_hist_nvalues - 1; i++)
     949             :     {
     950        3186 :         double      bin_upper = DatumGetFloat8(length_hist_values[i + 1]);
     951             : 
     952             :         /* check if we've reached the last bin */
     953        3186 :         if (!(bin_upper < length2 || (equal && bin_upper <= length2)))
     954             :             break;
     955             : 
     956             :         /* the upper bound of previous bin is the lower bound of this bin */
     957        3096 :         A = B;
     958        3096 :         PA = PB;
     959             : 
     960        3096 :         B = bin_upper;
     961        3096 :         PB = (double) i / (double) (length_hist_nvalues - 1);
     962             : 
     963             :         /*
     964             :          * Add the area of this trapezoid to the total. The point of the
     965             :          * if-check is to avoid NaN, in the corner case that PA == PB == 0,
     966             :          * and B - A == Inf. The area of a zero-height trapezoid (PA == PB ==
     967             :          * 0) is zero, regardless of the width (B - A).
     968             :          */
     969        3096 :         if (PA > 0 || PB > 0)
     970        3060 :             area += 0.5 * (PB + PA) * (B - A);
     971             :     }
     972             : 
     973             :     /* Last bin */
     974          90 :     A = B;
     975          90 :     PA = PB;
     976             : 
     977          90 :     B = length2;                /* last bin ends at the query upper bound */
     978          90 :     if (i >= length_hist_nvalues - 1)
     979           0 :         pos = 0.0;
     980             :     else
     981             :     {
     982          90 :         if (DatumGetFloat8(length_hist_values[i]) == DatumGetFloat8(length_hist_values[i + 1]))
     983           0 :             pos = 0.0;
     984             :         else
     985          90 :             pos = get_len_position(length2, DatumGetFloat8(length_hist_values[i]), DatumGetFloat8(length_hist_values[i + 1]));
     986             :     }
     987          90 :     PB = (((double) i) + pos) / (double) (length_hist_nvalues - 1);
     988             : 
     989          90 :     if (PA > 0 || PB > 0)
     990          90 :         area += 0.5 * (PB + PA) * (B - A);
     991             : 
     992             :     /*
     993             :      * Ok, we have calculated the area, ie. the integral. Divide by width to
     994             :      * get the requested average.
     995             :      *
     996             :      * Avoid NaN arising from infinite / infinite. This happens at least if
     997             :      * length2 is infinite. It's not clear what the correct value would be in
     998             :      * that case, so 0.5 seems as good as any value.
     999             :      */
    1000          90 :     if (isinf(area) && isinf(length2))
    1001          18 :         frac = 0.5;
    1002             :     else
    1003          72 :         frac = area / (length2 - length1);
    1004             : 
    1005          90 :     return frac;
    1006             : }
    1007             : 
    1008             : /*
    1009             :  * Calculate selectivity of "var <@ const" operator, ie. estimate the fraction
    1010             :  * of ranges that fall within the constant lower and upper bounds. This uses
    1011             :  * the histograms of range lower bounds and range lengths, on the assumption
    1012             :  * that the range lengths are independent of the lower bounds.
    1013             :  *
    1014             :  * The caller has already checked that constant lower and upper bounds are
    1015             :  * finite.
    1016             :  */
    1017             : static double
    1018          18 : calc_hist_selectivity_contained(TypeCacheEntry *typcache,
    1019             :                                 const RangeBound *lower, RangeBound *upper,
    1020             :                                 const RangeBound *hist_lower, int hist_nvalues,
    1021             :                                 Datum *length_hist_values, int length_hist_nvalues)
    1022             : {
    1023             :     int         i,
    1024             :                 upper_index;
    1025             :     float8      prev_dist;
    1026             :     double      bin_width;
    1027             :     double      upper_bin_width;
    1028             :     double      sum_frac;
    1029             : 
    1030             :     /*
    1031             :      * Begin by finding the bin containing the upper bound, in the lower bound
    1032             :      * histogram. Any range with a lower bound > constant upper bound can't
    1033             :      * match, ie. there are no matches in bins greater than upper_index.
    1034             :      */
    1035          18 :     upper->inclusive = !upper->inclusive;
    1036          18 :     upper->lower = true;
    1037          18 :     upper_index = rbound_bsearch(typcache, upper, hist_lower, hist_nvalues,
    1038             :                                  false);
    1039             : 
    1040             :     /*
    1041             :      * If the upper bound value is below the histogram's lower limit, there
    1042             :      * are no matches.
    1043             :      */
    1044          18 :     if (upper_index < 0)
    1045           0 :         return 0.0;
    1046             : 
    1047             :     /*
    1048             :      * If the upper bound value is at or beyond the histogram's upper limit,
    1049             :      * start our loop at the last actual bin, as though the upper bound were
    1050             :      * within that bin; get_position will clamp its result to 1.0 anyway.
    1051             :      * (This corresponds to assuming that the data population above the
    1052             :      * histogram's upper limit is empty, exactly like what we just assumed for
    1053             :      * the lower limit.)
    1054             :      */
    1055          18 :     upper_index = Min(upper_index, hist_nvalues - 2);
    1056             : 
    1057             :     /*
    1058             :      * Calculate upper_bin_width, ie. the fraction of the (upper_index,
    1059             :      * upper_index + 1) bin which is greater than upper bound of query range
    1060             :      * using linear interpolation of subdiff function.
    1061             :      */
    1062          18 :     upper_bin_width = get_position(typcache, upper,
    1063          18 :                                    &hist_lower[upper_index],
    1064          18 :                                    &hist_lower[upper_index + 1]);
    1065             : 
    1066             :     /*
    1067             :      * In the loop, dist and prev_dist are the distance of the "current" bin's
    1068             :      * lower and upper bounds from the constant upper bound.
    1069             :      *
    1070             :      * bin_width represents the width of the current bin. Normally it is 1.0,
    1071             :      * meaning a full width bin, but can be less in the corner cases: start
    1072             :      * and end of the loop. We start with bin_width = upper_bin_width, because
    1073             :      * we begin at the bin containing the upper bound.
    1074             :      */
    1075          18 :     prev_dist = 0.0;
    1076          18 :     bin_width = upper_bin_width;
    1077             : 
    1078          18 :     sum_frac = 0.0;
    1079          54 :     for (i = upper_index; i >= 0; i--)
    1080             :     {
    1081             :         double      dist;
    1082             :         double      length_hist_frac;
    1083          54 :         bool        final_bin = false;
    1084             : 
    1085             :         /*
    1086             :          * dist -- distance from upper bound of query range to lower bound of
    1087             :          * the current bin in the lower bound histogram. Or to the lower bound
    1088             :          * of the constant range, if this is the final bin, containing the
    1089             :          * constant lower bound.
    1090             :          */
    1091          54 :         if (range_cmp_bounds(typcache, &hist_lower[i], lower) < 0)
    1092             :         {
    1093          18 :             dist = get_distance(typcache, lower, upper);
    1094             : 
    1095             :             /*
    1096             :              * Subtract from bin_width the portion of this bin that we want to
    1097             :              * ignore.
    1098             :              */
    1099          36 :             bin_width -= get_position(typcache, lower, &hist_lower[i],
    1100          18 :                                       &hist_lower[i + 1]);
    1101          18 :             if (bin_width < 0.0)
    1102           0 :                 bin_width = 0.0;
    1103          18 :             final_bin = true;
    1104             :         }
    1105             :         else
    1106          36 :             dist = get_distance(typcache, &hist_lower[i], upper);
    1107             : 
    1108             :         /*
    1109             :          * Estimate the fraction of tuples in this bin that are narrow enough
    1110             :          * to not exceed the distance to the upper bound of the query range.
    1111             :          */
    1112          54 :         length_hist_frac = calc_length_hist_frac(length_hist_values,
    1113             :                                                  length_hist_nvalues,
    1114             :                                                  prev_dist, dist, true);
    1115             : 
    1116             :         /*
    1117             :          * Add the fraction of tuples in this bin, with a suitable length, to
    1118             :          * the total.
    1119             :          */
    1120          54 :         sum_frac += length_hist_frac * bin_width / (double) (hist_nvalues - 1);
    1121             : 
    1122          54 :         if (final_bin)
    1123          18 :             break;
    1124             : 
    1125          36 :         bin_width = 1.0;
    1126          36 :         prev_dist = dist;
    1127             :     }
    1128             : 
    1129          18 :     return sum_frac;
    1130             : }
    1131             : 
    1132             : /*
    1133             :  * Calculate selectivity of "var @> const" operator, ie. estimate the fraction
    1134             :  * of ranges that contain the constant lower and upper bounds. This uses
    1135             :  * the histograms of range lower bounds and range lengths, on the assumption
    1136             :  * that the range lengths are independent of the lower bounds.
    1137             :  */
    1138             : static double
    1139          18 : calc_hist_selectivity_contains(TypeCacheEntry *typcache,
    1140             :                                const RangeBound *lower, const RangeBound *upper,
    1141             :                                const RangeBound *hist_lower, int hist_nvalues,
    1142             :                                Datum *length_hist_values, int length_hist_nvalues)
    1143             : {
    1144             :     int         i,
    1145             :                 lower_index;
    1146             :     double      bin_width,
    1147             :                 lower_bin_width;
    1148             :     double      sum_frac;
    1149             :     float8      prev_dist;
    1150             : 
    1151             :     /* Find the bin containing the lower bound of query range. */
    1152          18 :     lower_index = rbound_bsearch(typcache, lower, hist_lower, hist_nvalues,
    1153             :                                  true);
    1154             : 
    1155             :     /*
    1156             :      * If the lower bound value is below the histogram's lower limit, there
    1157             :      * are no matches.
    1158             :      */
    1159          18 :     if (lower_index < 0)
    1160           0 :         return 0.0;
    1161             : 
    1162             :     /*
    1163             :      * If the lower bound value is at or beyond the histogram's upper limit,
    1164             :      * start our loop at the last actual bin, as though the upper bound were
    1165             :      * within that bin; get_position will clamp its result to 1.0 anyway.
    1166             :      * (This corresponds to assuming that the data population above the
    1167             :      * histogram's upper limit is empty, exactly like what we just assumed for
    1168             :      * the lower limit.)
    1169             :      */
    1170          18 :     lower_index = Min(lower_index, hist_nvalues - 2);
    1171             : 
    1172             :     /*
    1173             :      * Calculate lower_bin_width, ie. the fraction of the of (lower_index,
    1174             :      * lower_index + 1) bin which is greater than lower bound of query range
    1175             :      * using linear interpolation of subdiff function.
    1176             :      */
    1177          18 :     lower_bin_width = get_position(typcache, lower, &hist_lower[lower_index],
    1178          18 :                                    &hist_lower[lower_index + 1]);
    1179             : 
    1180             :     /*
    1181             :      * Loop through all the lower bound bins, smaller than the query lower
    1182             :      * bound. In the loop, dist and prev_dist are the distance of the
    1183             :      * "current" bin's lower and upper bounds from the constant upper bound.
    1184             :      * We begin from query lower bound, and walk backwards, so the first bin's
    1185             :      * upper bound is the query lower bound, and its distance to the query
    1186             :      * upper bound is the length of the query range.
    1187             :      *
    1188             :      * bin_width represents the width of the current bin. Normally it is 1.0,
    1189             :      * meaning a full width bin, except for the first bin, which is only
    1190             :      * counted up to the constant lower bound.
    1191             :      */
    1192          18 :     prev_dist = get_distance(typcache, lower, upper);
    1193          18 :     sum_frac = 0.0;
    1194          18 :     bin_width = lower_bin_width;
    1195          72 :     for (i = lower_index; i >= 0; i--)
    1196             :     {
    1197             :         float8      dist;
    1198             :         double      length_hist_frac;
    1199             : 
    1200             :         /*
    1201             :          * dist -- distance from upper bound of query range to current value
    1202             :          * of lower bound histogram or lower bound of query range (if we've
    1203             :          * reach it).
    1204             :          */
    1205          54 :         dist = get_distance(typcache, &hist_lower[i], upper);
    1206             : 
    1207             :         /*
    1208             :          * Get average fraction of length histogram which covers intervals
    1209             :          * longer than (or equal to) distance to upper bound of query range.
    1210             :          */
    1211          54 :         length_hist_frac =
    1212          54 :             1.0 - calc_length_hist_frac(length_hist_values,
    1213             :                                         length_hist_nvalues,
    1214             :                                         prev_dist, dist, false);
    1215             : 
    1216          54 :         sum_frac += length_hist_frac * bin_width / (double) (hist_nvalues - 1);
    1217             : 
    1218          54 :         bin_width = 1.0;
    1219          54 :         prev_dist = dist;
    1220             :     }
    1221             : 
    1222          18 :     return sum_frac;
    1223             : }

Generated by: LCOV version 1.14