LCOV - code coverage report
Current view: top level - src/backend/utils/adt - multirangetypes_selfuncs.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 302 360 83.9 %
Date: 2024-12-02 19:14:57 Functions: 13 13 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * multirangetypes_selfuncs.c
       4             :  *    Functions for selectivity estimation of multirange operators
       5             :  *
       6             :  * Estimates are based on histograms of lower and upper bounds, and the
       7             :  * fraction of empty multiranges.
       8             :  *
       9             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
      10             :  * Portions Copyright (c) 1994, Regents of the University of California
      11             :  *
      12             :  *
      13             :  * IDENTIFICATION
      14             :  *    src/backend/utils/adt/multirangetypes_selfuncs.c
      15             :  *
      16             :  *-------------------------------------------------------------------------
      17             :  */
      18             : #include "postgres.h"
      19             : 
      20             : #include <math.h>
      21             : 
      22             : #include "access/htup_details.h"
      23             : #include "catalog/pg_operator.h"
      24             : #include "catalog/pg_statistic.h"
      25             : #include "utils/float.h"
      26             : #include "utils/fmgrprotos.h"
      27             : #include "utils/lsyscache.h"
      28             : #include "utils/multirangetypes.h"
      29             : #include "utils/rangetypes.h"
      30             : #include "utils/selfuncs.h"
      31             : #include "utils/typcache.h"
      32             : 
      33             : static double calc_multirangesel(TypeCacheEntry *typcache,
      34             :                                  VariableStatData *vardata,
      35             :                                  const MultirangeType *constval, Oid operator);
      36             : static double default_multirange_selectivity(Oid operator);
      37             : static double calc_hist_selectivity(TypeCacheEntry *typcache,
      38             :                                     VariableStatData *vardata,
      39             :                                     const MultirangeType *constval,
      40             :                                     Oid operator);
      41             : static double calc_hist_selectivity_scalar(TypeCacheEntry *typcache,
      42             :                                            const RangeBound *constbound,
      43             :                                            const RangeBound *hist,
      44             :                                            int hist_nvalues, bool equal);
      45             : static int  rbound_bsearch(TypeCacheEntry *typcache, const RangeBound *value,
      46             :                            const RangeBound *hist, int hist_length, bool equal);
      47             : static float8 get_position(TypeCacheEntry *typcache, const RangeBound *value,
      48             :                            const RangeBound *hist1, const RangeBound *hist2);
      49             : static float8 get_len_position(double value, double hist1, double hist2);
      50             : static float8 get_distance(TypeCacheEntry *typcache, const RangeBound *bound1,
      51             :                            const RangeBound *bound2);
      52             : static int  length_hist_bsearch(Datum *length_hist_values,
      53             :                                 int length_hist_nvalues, double value,
      54             :                                 bool equal);
      55             : static double calc_length_hist_frac(Datum *length_hist_values,
      56             :                                     int length_hist_nvalues, double length1,
      57             :                                     double length2, bool equal);
      58             : static double calc_hist_selectivity_contained(TypeCacheEntry *typcache,
      59             :                                               const RangeBound *lower,
      60             :                                               RangeBound *upper,
      61             :                                               const RangeBound *hist_lower,
      62             :                                               int hist_nvalues,
      63             :                                               Datum *length_hist_values,
      64             :                                               int length_hist_nvalues);
      65             : static double calc_hist_selectivity_contains(TypeCacheEntry *typcache,
      66             :                                              const RangeBound *lower,
      67             :                                              const RangeBound *upper,
      68             :                                              const RangeBound *hist_lower,
      69             :                                              int hist_nvalues,
      70             :                                              Datum *length_hist_values,
      71             :                                              int length_hist_nvalues);
      72             : 
      73             : /*
      74             :  * Returns a default selectivity estimate for given operator, when we don't
      75             :  * have statistics or cannot use them for some reason.
      76             :  */
      77             : static double
      78        1378 : default_multirange_selectivity(Oid operator)
      79             : {
      80        1378 :     switch (operator)
      81             :     {
      82         558 :         case OID_MULTIRANGE_OVERLAPS_MULTIRANGE_OP:
      83             :         case OID_MULTIRANGE_OVERLAPS_RANGE_OP:
      84             :         case OID_RANGE_OVERLAPS_MULTIRANGE_OP:
      85         558 :             return 0.01;
      86             : 
      87         658 :         case OID_RANGE_CONTAINS_MULTIRANGE_OP:
      88             :         case OID_RANGE_MULTIRANGE_CONTAINED_OP:
      89             :         case OID_MULTIRANGE_CONTAINS_RANGE_OP:
      90             :         case OID_MULTIRANGE_CONTAINS_MULTIRANGE_OP:
      91             :         case OID_MULTIRANGE_RANGE_CONTAINED_OP:
      92             :         case OID_MULTIRANGE_MULTIRANGE_CONTAINED_OP:
      93         658 :             return 0.005;
      94             : 
      95          42 :         case OID_MULTIRANGE_CONTAINS_ELEM_OP:
      96             :         case OID_MULTIRANGE_ELEM_CONTAINED_OP:
      97             : 
      98             :             /*
      99             :              * "multirange @> elem" is more or less identical to a scalar
     100             :              * inequality "A >= b AND A <= c".
     101             :              */
     102          42 :             return DEFAULT_MULTIRANGE_INEQ_SEL;
     103             : 
     104         120 :         case OID_MULTIRANGE_LESS_OP:
     105             :         case OID_MULTIRANGE_LESS_EQUAL_OP:
     106             :         case OID_MULTIRANGE_GREATER_OP:
     107             :         case OID_MULTIRANGE_GREATER_EQUAL_OP:
     108             :         case OID_MULTIRANGE_LEFT_RANGE_OP:
     109             :         case OID_MULTIRANGE_LEFT_MULTIRANGE_OP:
     110             :         case OID_RANGE_LEFT_MULTIRANGE_OP:
     111             :         case OID_MULTIRANGE_RIGHT_RANGE_OP:
     112             :         case OID_MULTIRANGE_RIGHT_MULTIRANGE_OP:
     113             :         case OID_RANGE_RIGHT_MULTIRANGE_OP:
     114             :         case OID_MULTIRANGE_OVERLAPS_LEFT_RANGE_OP:
     115             :         case OID_RANGE_OVERLAPS_LEFT_MULTIRANGE_OP:
     116             :         case OID_MULTIRANGE_OVERLAPS_LEFT_MULTIRANGE_OP:
     117             :         case OID_MULTIRANGE_OVERLAPS_RIGHT_RANGE_OP:
     118             :         case OID_RANGE_OVERLAPS_RIGHT_MULTIRANGE_OP:
     119             :         case OID_MULTIRANGE_OVERLAPS_RIGHT_MULTIRANGE_OP:
     120             :             /* these are similar to regular scalar inequalities */
     121         120 :             return DEFAULT_INEQ_SEL;
     122             : 
     123           0 :         default:
     124             : 
     125             :             /*
     126             :              * all multirange operators should be handled above, but just in
     127             :              * case
     128             :              */
     129           0 :             return 0.01;
     130             :     }
     131             : }
     132             : 
     133             : /*
     134             :  * multirangesel -- restriction selectivity for multirange operators
     135             :  */
     136             : Datum
     137        1852 : multirangesel(PG_FUNCTION_ARGS)
     138             : {
     139        1852 :     PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
     140        1852 :     Oid         operator = PG_GETARG_OID(1);
     141        1852 :     List       *args = (List *) PG_GETARG_POINTER(2);
     142        1852 :     int         varRelid = PG_GETARG_INT32(3);
     143             :     VariableStatData vardata;
     144             :     Node       *other;
     145             :     bool        varonleft;
     146             :     Selectivity selec;
     147        1852 :     TypeCacheEntry *typcache = NULL;
     148        1852 :     MultirangeType *constmultirange = NULL;
     149        1852 :     RangeType  *constrange = NULL;
     150             : 
     151             :     /*
     152             :      * If expression is not (variable op something) or (something op
     153             :      * variable), then punt and return a default estimate.
     154             :      */
     155        1852 :     if (!get_restriction_variable(root, args, varRelid,
     156             :                                   &vardata, &other, &varonleft))
     157           0 :         PG_RETURN_FLOAT8(default_multirange_selectivity(operator));
     158             : 
     159             :     /*
     160             :      * Can't do anything useful if the something is not a constant, either.
     161             :      */
     162        1852 :     if (!IsA(other, Const))
     163             :     {
     164         108 :         ReleaseVariableStats(vardata);
     165         108 :         PG_RETURN_FLOAT8(default_multirange_selectivity(operator));
     166             :     }
     167             : 
     168             :     /*
     169             :      * All the multirange operators are strict, so we can cope with a NULL
     170             :      * constant right away.
     171             :      */
     172        1744 :     if (((Const *) other)->constisnull)
     173             :     {
     174           0 :         ReleaseVariableStats(vardata);
     175           0 :         PG_RETURN_FLOAT8(0.0);
     176             :     }
     177             : 
     178             :     /*
     179             :      * If var is on the right, commute the operator, so that we can assume the
     180             :      * var is on the left in what follows.
     181             :      */
     182        1744 :     if (!varonleft)
     183             :     {
     184             :         /* we have other Op var, commute to make var Op other */
     185         808 :         operator = get_commutator(operator);
     186         808 :         if (!operator)
     187             :         {
     188             :             /* Use default selectivity (should we raise an error instead?) */
     189           0 :             ReleaseVariableStats(vardata);
     190           0 :             PG_RETURN_FLOAT8(default_multirange_selectivity(operator));
     191             :         }
     192             :     }
     193             : 
     194             :     /*
     195             :      * OK, there's a Var and a Const we're dealing with here.  We need the
     196             :      * Const to be of same multirange type as the column, else we can't do
     197             :      * anything useful. (Such cases will likely fail at runtime, but here we'd
     198             :      * rather just return a default estimate.)
     199             :      *
     200             :      * If the operator is "multirange @> element", the constant should be of
     201             :      * the element type of the multirange column. Convert it to a multirange
     202             :      * that includes only that single point, so that we don't need special
     203             :      * handling for that in what follows.
     204             :      */
     205        1744 :     if (operator == OID_MULTIRANGE_CONTAINS_ELEM_OP)
     206             :     {
     207          72 :         typcache = multirange_get_typcache(fcinfo, vardata.vartype);
     208             : 
     209          72 :         if (((Const *) other)->consttype == typcache->rngtype->rngelemtype->type_id)
     210             :         {
     211             :             RangeBound  lower,
     212             :                         upper;
     213             : 
     214          72 :             lower.inclusive = true;
     215          72 :             lower.val = ((Const *) other)->constvalue;
     216          72 :             lower.infinite = false;
     217          72 :             lower.lower = true;
     218          72 :             upper.inclusive = true;
     219          72 :             upper.val = ((Const *) other)->constvalue;
     220          72 :             upper.infinite = false;
     221          72 :             upper.lower = false;
     222          72 :             constrange = range_serialize(typcache->rngtype, &lower, &upper,
     223             :                                          false, NULL);
     224          72 :             constmultirange = make_multirange(typcache->type_id, typcache->rngtype,
     225             :                                               1, &constrange);
     226             :         }
     227             :     }
     228        1672 :     else if (operator == OID_RANGE_MULTIRANGE_CONTAINED_OP ||
     229        1314 :              operator == OID_MULTIRANGE_CONTAINS_RANGE_OP ||
     230        1278 :              operator == OID_MULTIRANGE_OVERLAPS_RANGE_OP ||
     231        1254 :              operator == OID_MULTIRANGE_OVERLAPS_LEFT_RANGE_OP ||
     232        1230 :              operator == OID_MULTIRANGE_OVERLAPS_RIGHT_RANGE_OP ||
     233        1206 :              operator == OID_MULTIRANGE_LEFT_RANGE_OP ||
     234             :              operator == OID_MULTIRANGE_RIGHT_RANGE_OP)
     235             :     {
     236             :         /*
     237             :          * Promote a range in "multirange OP range" just like we do an element
     238             :          * in "multirange OP element".
     239             :          */
     240         490 :         typcache = multirange_get_typcache(fcinfo, vardata.vartype);
     241         490 :         if (((Const *) other)->consttype == typcache->rngtype->type_id)
     242             :         {
     243         490 :             constrange = DatumGetRangeTypeP(((Const *) other)->constvalue);
     244         490 :             constmultirange = make_multirange(typcache->type_id, typcache->rngtype,
     245             :                                               1, &constrange);
     246             :         }
     247             :     }
     248        1182 :     else if (operator == OID_RANGE_OVERLAPS_MULTIRANGE_OP ||
     249        1146 :              operator == OID_RANGE_OVERLAPS_LEFT_MULTIRANGE_OP ||
     250        1128 :              operator == OID_RANGE_OVERLAPS_RIGHT_MULTIRANGE_OP ||
     251        1110 :              operator == OID_RANGE_LEFT_MULTIRANGE_OP ||
     252        1092 :              operator == OID_RANGE_RIGHT_MULTIRANGE_OP ||
     253        1056 :              operator == OID_RANGE_CONTAINS_MULTIRANGE_OP ||
     254        1056 :              operator == OID_MULTIRANGE_ELEM_CONTAINED_OP ||
     255             :              operator == OID_MULTIRANGE_RANGE_CONTAINED_OP)
     256             :     {
     257             :         /*
     258             :          * Here, the Var is the elem/range, not the multirange.  For now we
     259             :          * just punt and return the default estimate.  In future we could
     260             :          * disassemble the multirange constant to do something more
     261             :          * intelligent.
     262             :          */
     263             :     }
     264        1038 :     else if (((Const *) other)->consttype == vardata.vartype)
     265             :     {
     266             :         /* Both sides are the same multirange type */
     267        1038 :         typcache = multirange_get_typcache(fcinfo, vardata.vartype);
     268             : 
     269        1038 :         constmultirange = DatumGetMultirangeTypeP(((Const *) other)->constvalue);
     270             :     }
     271             : 
     272             :     /*
     273             :      * If we got a valid constant on one side of the operator, proceed to
     274             :      * estimate using statistics. Otherwise punt and return a default constant
     275             :      * estimate.  Note that calc_multirangesel need not handle
     276             :      * OID_MULTIRANGE_*_CONTAINED_OP.
     277             :      */
     278        1744 :     if (constmultirange)
     279        1600 :         selec = calc_multirangesel(typcache, &vardata, constmultirange, operator);
     280             :     else
     281         144 :         selec = default_multirange_selectivity(operator);
     282             : 
     283        1744 :     ReleaseVariableStats(vardata);
     284             : 
     285        1744 :     CLAMP_PROBABILITY(selec);
     286             : 
     287        1744 :     PG_RETURN_FLOAT8((float8) selec);
     288             : }
     289             : 
     290             : static double
     291        1600 : calc_multirangesel(TypeCacheEntry *typcache, VariableStatData *vardata,
     292             :                    const MultirangeType *constval, Oid operator)
     293             : {
     294             :     double      hist_selec;
     295             :     double      selec;
     296             :     float4      empty_frac,
     297             :                 null_frac;
     298             : 
     299             :     /*
     300             :      * First look up the fraction of NULLs and empty multiranges from
     301             :      * pg_statistic.
     302             :      */
     303        1600 :     if (HeapTupleIsValid(vardata->statsTuple))
     304             :     {
     305             :         Form_pg_statistic stats;
     306             :         AttStatsSlot sslot;
     307             : 
     308         450 :         stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
     309         450 :         null_frac = stats->stanullfrac;
     310             : 
     311             :         /* Try to get fraction of empty multiranges */
     312         450 :         if (get_attstatsslot(&sslot, vardata->statsTuple,
     313             :                              STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM,
     314             :                              InvalidOid,
     315             :                              ATTSTATSSLOT_NUMBERS))
     316             :         {
     317         450 :             if (sslot.nnumbers != 1)
     318           0 :                 elog(ERROR, "invalid empty fraction statistic");  /* shouldn't happen */
     319         450 :             empty_frac = sslot.numbers[0];
     320         450 :             free_attstatsslot(&sslot);
     321             :         }
     322             :         else
     323             :         {
     324             :             /* No empty fraction statistic. Assume no empty ranges. */
     325           0 :             empty_frac = 0.0;
     326             :         }
     327             :     }
     328             :     else
     329             :     {
     330             :         /*
     331             :          * No stats are available. Follow through the calculations below
     332             :          * anyway, assuming no NULLs and no empty multiranges. This still
     333             :          * allows us to give a better-than-nothing estimate based on whether
     334             :          * the constant is an empty multirange or not.
     335             :          */
     336        1150 :         null_frac = 0.0;
     337        1150 :         empty_frac = 0.0;
     338             :     }
     339             : 
     340        1600 :     if (MultirangeIsEmpty(constval))
     341             :     {
     342             :         /*
     343             :          * An empty multirange matches all multiranges, all empty multiranges,
     344             :          * or nothing, depending on the operator
     345             :          */
     346         222 :         switch (operator)
     347             :         {
     348             :                 /* these return false if either argument is empty */
     349         126 :             case OID_MULTIRANGE_OVERLAPS_RANGE_OP:
     350             :             case OID_MULTIRANGE_OVERLAPS_MULTIRANGE_OP:
     351             :             case OID_MULTIRANGE_OVERLAPS_LEFT_RANGE_OP:
     352             :             case OID_MULTIRANGE_OVERLAPS_LEFT_MULTIRANGE_OP:
     353             :             case OID_MULTIRANGE_OVERLAPS_RIGHT_RANGE_OP:
     354             :             case OID_MULTIRANGE_OVERLAPS_RIGHT_MULTIRANGE_OP:
     355             :             case OID_MULTIRANGE_LEFT_RANGE_OP:
     356             :             case OID_MULTIRANGE_LEFT_MULTIRANGE_OP:
     357             :             case OID_MULTIRANGE_RIGHT_RANGE_OP:
     358             :             case OID_MULTIRANGE_RIGHT_MULTIRANGE_OP:
     359             :                 /* nothing is less than an empty multirange */
     360             :             case OID_MULTIRANGE_LESS_OP:
     361         126 :                 selec = 0.0;
     362         126 :                 break;
     363             : 
     364             :                 /*
     365             :                  * only empty multiranges can be contained by an empty
     366             :                  * multirange
     367             :                  */
     368          30 :             case OID_RANGE_MULTIRANGE_CONTAINED_OP:
     369             :             case OID_MULTIRANGE_MULTIRANGE_CONTAINED_OP:
     370             :                 /* only empty ranges are <= an empty multirange */
     371             :             case OID_MULTIRANGE_LESS_EQUAL_OP:
     372          30 :                 selec = empty_frac;
     373          30 :                 break;
     374             : 
     375             :                 /* everything contains an empty multirange */
     376          60 :             case OID_MULTIRANGE_CONTAINS_RANGE_OP:
     377             :             case OID_MULTIRANGE_CONTAINS_MULTIRANGE_OP:
     378             :                 /* everything is >= an empty multirange */
     379             :             case OID_MULTIRANGE_GREATER_EQUAL_OP:
     380          60 :                 selec = 1.0;
     381          60 :                 break;
     382             : 
     383             :                 /* all non-empty multiranges are > an empty multirange */
     384           6 :             case OID_MULTIRANGE_GREATER_OP:
     385           6 :                 selec = 1.0 - empty_frac;
     386           6 :                 break;
     387             : 
     388             :                 /* an element cannot be empty */
     389           0 :             case OID_MULTIRANGE_CONTAINS_ELEM_OP:
     390             : 
     391             :                 /* filtered out by multirangesel() */
     392             :             case OID_RANGE_OVERLAPS_MULTIRANGE_OP:
     393             :             case OID_RANGE_OVERLAPS_LEFT_MULTIRANGE_OP:
     394             :             case OID_RANGE_OVERLAPS_RIGHT_MULTIRANGE_OP:
     395             :             case OID_RANGE_LEFT_MULTIRANGE_OP:
     396             :             case OID_RANGE_RIGHT_MULTIRANGE_OP:
     397             :             case OID_RANGE_CONTAINS_MULTIRANGE_OP:
     398             :             case OID_MULTIRANGE_ELEM_CONTAINED_OP:
     399             :             case OID_MULTIRANGE_RANGE_CONTAINED_OP:
     400             : 
     401             :             default:
     402           0 :                 elog(ERROR, "unexpected operator %u", operator);
     403             :                 selec = 0.0;    /* keep compiler quiet */
     404             :                 break;
     405             :         }
     406             :     }
     407             :     else
     408             :     {
     409             :         /*
     410             :          * Calculate selectivity using bound histograms. If that fails for
     411             :          * some reason, e.g no histogram in pg_statistic, use the default
     412             :          * constant estimate for the fraction of non-empty values. This is
     413             :          * still somewhat better than just returning the default estimate,
     414             :          * because this still takes into account the fraction of empty and
     415             :          * NULL tuples, if we had statistics for them.
     416             :          */
     417        1378 :         hist_selec = calc_hist_selectivity(typcache, vardata, constval,
     418             :                                            operator);
     419        1378 :         if (hist_selec < 0.0)
     420        1126 :             hist_selec = default_multirange_selectivity(operator);
     421             : 
     422             :         /*
     423             :          * Now merge the results for the empty multiranges and histogram
     424             :          * calculations, realizing that the histogram covers only the
     425             :          * non-null, non-empty values.
     426             :          */
     427        1378 :         if (operator == OID_RANGE_MULTIRANGE_CONTAINED_OP ||
     428             :             operator == OID_MULTIRANGE_MULTIRANGE_CONTAINED_OP)
     429             :         {
     430             :             /* empty is contained by anything non-empty */
     431          24 :             selec = (1.0 - empty_frac) * hist_selec + empty_frac;
     432             :         }
     433             :         else
     434             :         {
     435             :             /* with any other operator, empty Op non-empty matches nothing */
     436        1354 :             selec = (1.0 - empty_frac) * hist_selec;
     437             :         }
     438             :     }
     439             : 
     440             :     /* all multirange operators are strict */
     441        1600 :     selec *= (1.0 - null_frac);
     442             : 
     443             :     /* result should be in range, but make sure... */
     444        1600 :     CLAMP_PROBABILITY(selec);
     445             : 
     446        1600 :     return selec;
     447             : }
     448             : 
     449             : /*
     450             :  * Calculate multirange operator selectivity using histograms of multirange bounds.
     451             :  *
     452             :  * This estimate is for the portion of values that are not empty and not
     453             :  * NULL.
     454             :  */
     455             : static double
     456        1378 : calc_hist_selectivity(TypeCacheEntry *typcache, VariableStatData *vardata,
     457             :                       const MultirangeType *constval, Oid operator)
     458             : {
     459        1378 :     TypeCacheEntry *rng_typcache = typcache->rngtype;
     460             :     AttStatsSlot hslot;
     461             :     AttStatsSlot lslot;
     462             :     int         nhist;
     463             :     RangeBound *hist_lower;
     464             :     RangeBound *hist_upper;
     465             :     int         i;
     466             :     RangeBound  const_lower;
     467             :     RangeBound  const_upper;
     468             :     RangeBound  tmp;
     469             :     double      hist_selec;
     470             : 
     471             :     /* Can't use the histogram with insecure multirange support functions */
     472        1378 :     if (!statistic_proc_security_check(vardata,
     473             :                                        rng_typcache->rng_cmp_proc_finfo.fn_oid))
     474           0 :         return -1;
     475        1378 :     if (OidIsValid(rng_typcache->rng_subdiff_finfo.fn_oid) &&
     476        1378 :         !statistic_proc_security_check(vardata,
     477             :                                        rng_typcache->rng_subdiff_finfo.fn_oid))
     478         544 :         return -1;
     479             : 
     480             :     /* Try to get histogram of ranges */
     481        1086 :     if (!(HeapTupleIsValid(vardata->statsTuple) &&
     482         252 :           get_attstatsslot(&hslot, vardata->statsTuple,
     483             :                            STATISTIC_KIND_BOUNDS_HISTOGRAM, InvalidOid,
     484             :                            ATTSTATSSLOT_VALUES)))
     485         582 :         return -1.0;
     486             : 
     487             :     /* check that it's a histogram, not just a dummy entry */
     488         252 :     if (hslot.nvalues < 2)
     489             :     {
     490           0 :         free_attstatsslot(&hslot);
     491           0 :         return -1.0;
     492             :     }
     493             : 
     494             :     /*
     495             :      * Convert histogram of ranges into histograms of its lower and upper
     496             :      * bounds.
     497             :      */
     498         252 :     nhist = hslot.nvalues;
     499         252 :     hist_lower = (RangeBound *) palloc(sizeof(RangeBound) * nhist);
     500         252 :     hist_upper = (RangeBound *) palloc(sizeof(RangeBound) * nhist);
     501       18936 :     for (i = 0; i < nhist; i++)
     502             :     {
     503             :         bool        empty;
     504             : 
     505       18684 :         range_deserialize(rng_typcache, DatumGetRangeTypeP(hslot.values[i]),
     506       18684 :                           &hist_lower[i], &hist_upper[i], &empty);
     507             :         /* The histogram should not contain any empty ranges */
     508       18684 :         if (empty)
     509           0 :             elog(ERROR, "bounds histogram contains an empty range");
     510             :     }
     511             : 
     512             :     /* @> and @< also need a histogram of range lengths */
     513         252 :     if (operator == OID_MULTIRANGE_CONTAINS_RANGE_OP ||
     514         204 :         operator == OID_MULTIRANGE_CONTAINS_MULTIRANGE_OP ||
     515         204 :         operator == OID_MULTIRANGE_RANGE_CONTAINED_OP ||
     516             :         operator == OID_MULTIRANGE_MULTIRANGE_CONTAINED_OP)
     517             :     {
     518         120 :         if (!(HeapTupleIsValid(vardata->statsTuple) &&
     519          60 :               get_attstatsslot(&lslot, vardata->statsTuple,
     520             :                                STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM,
     521             :                                InvalidOid,
     522             :                                ATTSTATSSLOT_VALUES)))
     523             :         {
     524           0 :             free_attstatsslot(&hslot);
     525           0 :             return -1.0;
     526             :         }
     527             : 
     528             :         /* check that it's a histogram, not just a dummy entry */
     529          60 :         if (lslot.nvalues < 2)
     530             :         {
     531           0 :             free_attstatsslot(&lslot);
     532           0 :             free_attstatsslot(&hslot);
     533           0 :             return -1.0;
     534             :         }
     535             :     }
     536             :     else
     537         192 :         memset(&lslot, 0, sizeof(lslot));
     538             : 
     539             :     /* Extract the bounds of the constant value. */
     540             :     Assert(constval->rangeCount > 0);
     541         252 :     multirange_get_bounds(rng_typcache, constval, 0,
     542             :                           &const_lower, &tmp);
     543         252 :     multirange_get_bounds(rng_typcache, constval, constval->rangeCount - 1,
     544             :                           &tmp, &const_upper);
     545             : 
     546             :     /*
     547             :      * Calculate selectivity comparing the lower or upper bound of the
     548             :      * constant with the histogram of lower or upper bounds.
     549             :      */
     550         252 :     switch (operator)
     551             :     {
     552           0 :         case OID_MULTIRANGE_LESS_OP:
     553             : 
     554             :             /*
     555             :              * The regular b-tree comparison operators (<, <=, >, >=) compare
     556             :              * the lower bounds first, and the upper bounds for values with
     557             :              * equal lower bounds. Estimate that by comparing the lower bounds
     558             :              * only. This gives a fairly accurate estimate assuming there
     559             :              * aren't many rows with a lower bound equal to the constant's
     560             :              * lower bound.
     561             :              */
     562             :             hist_selec =
     563           0 :                 calc_hist_selectivity_scalar(rng_typcache, &const_lower,
     564             :                                              hist_lower, nhist, false);
     565           0 :             break;
     566             : 
     567           0 :         case OID_MULTIRANGE_LESS_EQUAL_OP:
     568             :             hist_selec =
     569           0 :                 calc_hist_selectivity_scalar(rng_typcache, &const_lower,
     570             :                                              hist_lower, nhist, true);
     571           0 :             break;
     572             : 
     573           0 :         case OID_MULTIRANGE_GREATER_OP:
     574           0 :             hist_selec =
     575           0 :                 1 - calc_hist_selectivity_scalar(rng_typcache, &const_lower,
     576             :                                                  hist_lower, nhist, false);
     577           0 :             break;
     578             : 
     579           0 :         case OID_MULTIRANGE_GREATER_EQUAL_OP:
     580           0 :             hist_selec =
     581           0 :                 1 - calc_hist_selectivity_scalar(rng_typcache, &const_lower,
     582             :                                                  hist_lower, nhist, true);
     583           0 :             break;
     584             : 
     585          24 :         case OID_MULTIRANGE_LEFT_RANGE_OP:
     586             :         case OID_MULTIRANGE_LEFT_MULTIRANGE_OP:
     587             :             /* var << const when upper(var) < lower(const) */
     588             :             hist_selec =
     589          24 :                 calc_hist_selectivity_scalar(rng_typcache, &const_lower,
     590             :                                              hist_upper, nhist, false);
     591          24 :             break;
     592             : 
     593          24 :         case OID_MULTIRANGE_RIGHT_RANGE_OP:
     594             :         case OID_MULTIRANGE_RIGHT_MULTIRANGE_OP:
     595             :             /* var >> const when lower(var) > upper(const) */
     596          24 :             hist_selec =
     597          24 :                 1 - calc_hist_selectivity_scalar(rng_typcache, &const_upper,
     598             :                                                  hist_lower, nhist, true);
     599          24 :             break;
     600             : 
     601          24 :         case OID_MULTIRANGE_OVERLAPS_RIGHT_RANGE_OP:
     602             :         case OID_MULTIRANGE_OVERLAPS_RIGHT_MULTIRANGE_OP:
     603             :             /* compare lower bounds */
     604          24 :             hist_selec =
     605          24 :                 1 - calc_hist_selectivity_scalar(rng_typcache, &const_lower,
     606             :                                                  hist_lower, nhist, false);
     607          24 :             break;
     608             : 
     609          24 :         case OID_MULTIRANGE_OVERLAPS_LEFT_RANGE_OP:
     610             :         case OID_MULTIRANGE_OVERLAPS_LEFT_MULTIRANGE_OP:
     611             :             /* compare upper bounds */
     612             :             hist_selec =
     613          24 :                 calc_hist_selectivity_scalar(rng_typcache, &const_upper,
     614             :                                              hist_upper, nhist, true);
     615          24 :             break;
     616             : 
     617          84 :         case OID_MULTIRANGE_OVERLAPS_RANGE_OP:
     618             :         case OID_MULTIRANGE_OVERLAPS_MULTIRANGE_OP:
     619             :         case OID_MULTIRANGE_CONTAINS_ELEM_OP:
     620             : 
     621             :             /*
     622             :              * A && B <=> NOT (A << B OR A >> B).
     623             :              *
     624             :              * Since A << B and A >> B are mutually exclusive events we can
     625             :              * sum their probabilities to find probability of (A << B OR A >>
     626             :              * B).
     627             :              *
     628             :              * "multirange @> elem" is equivalent to "multirange &&
     629             :              * {[elem,elem]}". The caller already constructed the singular
     630             :              * range from the element constant, so just treat it the same as
     631             :              * &&.
     632             :              */
     633             :             hist_selec =
     634          84 :                 calc_hist_selectivity_scalar(rng_typcache,
     635             :                                              &const_lower, hist_upper,
     636             :                                              nhist, false);
     637          84 :             hist_selec +=
     638          84 :                 (1.0 - calc_hist_selectivity_scalar(rng_typcache,
     639             :                                                     &const_upper, hist_lower,
     640             :                                                     nhist, true));
     641          84 :             hist_selec = 1.0 - hist_selec;
     642          84 :             break;
     643             : 
     644          48 :         case OID_MULTIRANGE_CONTAINS_RANGE_OP:
     645             :         case OID_MULTIRANGE_CONTAINS_MULTIRANGE_OP:
     646             :             hist_selec =
     647          48 :                 calc_hist_selectivity_contains(rng_typcache, &const_lower,
     648             :                                                &const_upper, hist_lower, nhist,
     649             :                                                lslot.values, lslot.nvalues);
     650          48 :             break;
     651             : 
     652          24 :         case OID_MULTIRANGE_MULTIRANGE_CONTAINED_OP:
     653             :         case OID_RANGE_MULTIRANGE_CONTAINED_OP:
     654          24 :             if (const_lower.infinite)
     655             :             {
     656             :                 /*
     657             :                  * Lower bound no longer matters. Just estimate the fraction
     658             :                  * with an upper bound <= const upper bound
     659             :                  */
     660             :                 hist_selec =
     661           0 :                     calc_hist_selectivity_scalar(rng_typcache, &const_upper,
     662             :                                                  hist_upper, nhist, true);
     663             :             }
     664          24 :             else if (const_upper.infinite)
     665             :             {
     666           0 :                 hist_selec =
     667           0 :                     1.0 - calc_hist_selectivity_scalar(rng_typcache, &const_lower,
     668             :                                                        hist_lower, nhist, false);
     669             :             }
     670             :             else
     671             :             {
     672             :                 hist_selec =
     673          24 :                     calc_hist_selectivity_contained(rng_typcache, &const_lower,
     674             :                                                     &const_upper, hist_lower, nhist,
     675             :                                                     lslot.values, lslot.nvalues);
     676             :             }
     677          24 :             break;
     678             : 
     679             :             /* filtered out by multirangesel() */
     680           0 :         case OID_RANGE_OVERLAPS_MULTIRANGE_OP:
     681             :         case OID_RANGE_OVERLAPS_LEFT_MULTIRANGE_OP:
     682             :         case OID_RANGE_OVERLAPS_RIGHT_MULTIRANGE_OP:
     683             :         case OID_RANGE_LEFT_MULTIRANGE_OP:
     684             :         case OID_RANGE_RIGHT_MULTIRANGE_OP:
     685             :         case OID_RANGE_CONTAINS_MULTIRANGE_OP:
     686             :         case OID_MULTIRANGE_ELEM_CONTAINED_OP:
     687             :         case OID_MULTIRANGE_RANGE_CONTAINED_OP:
     688             : 
     689             :         default:
     690           0 :             elog(ERROR, "unknown multirange operator %u", operator);
     691             :             hist_selec = -1.0;  /* keep compiler quiet */
     692             :             break;
     693             :     }
     694             : 
     695         252 :     free_attstatsslot(&lslot);
     696         252 :     free_attstatsslot(&hslot);
     697             : 
     698         252 :     return hist_selec;
     699             : }
     700             : 
     701             : 
     702             : /*
     703             :  * Look up the fraction of values less than (or equal, if 'equal' argument
     704             :  * is true) a given const in a histogram of range bounds.
     705             :  */
     706             : static double
     707         264 : calc_hist_selectivity_scalar(TypeCacheEntry *typcache, const RangeBound *constbound,
     708             :                              const RangeBound *hist, int hist_nvalues, bool equal)
     709             : {
     710             :     Selectivity selec;
     711             :     int         index;
     712             : 
     713             :     /*
     714             :      * Find the histogram bin the given constant falls into. Estimate
     715             :      * selectivity as the number of preceding whole bins.
     716             :      */
     717         264 :     index = rbound_bsearch(typcache, constbound, hist, hist_nvalues, equal);
     718         264 :     selec = (Selectivity) (Max(index, 0)) / (Selectivity) (hist_nvalues - 1);
     719             : 
     720             :     /* Adjust using linear interpolation within the bin */
     721         264 :     if (index >= 0 && index < hist_nvalues - 1)
     722         360 :         selec += get_position(typcache, constbound, &hist[index],
     723         180 :                               &hist[index + 1]) / (Selectivity) (hist_nvalues - 1);
     724             : 
     725         264 :     return selec;
     726             : }
     727             : 
     728             : /*
     729             :  * Binary search on an array of range bounds. Returns greatest index of range
     730             :  * bound in array which is less(less or equal) than given range bound. If all
     731             :  * range bounds in array are greater or equal(greater) than given range bound,
     732             :  * return -1. When "equal" flag is set conditions in brackets are used.
     733             :  *
     734             :  * This function is used in scalar operator selectivity estimation. Another
     735             :  * goal of this function is to find a histogram bin where to stop
     736             :  * interpolation of portion of bounds which are less than or equal to given bound.
     737             :  */
     738             : static int
     739         336 : rbound_bsearch(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist,
     740             :                int hist_length, bool equal)
     741             : {
     742         336 :     int         lower = -1,
     743         336 :                 upper = hist_length - 1,
     744             :                 cmp,
     745             :                 middle;
     746             : 
     747        2136 :     while (lower < upper)
     748             :     {
     749        1800 :         middle = (lower + upper + 1) / 2;
     750        1800 :         cmp = range_cmp_bounds(typcache, &hist[middle], value);
     751             : 
     752        1800 :         if (cmp < 0 || (equal && cmp == 0))
     753         744 :             lower = middle;
     754             :         else
     755        1056 :             upper = middle - 1;
     756             :     }
     757         336 :     return lower;
     758             : }
     759             : 
     760             : 
     761             : /*
     762             :  * Binary search on length histogram. Returns greatest index of range length in
     763             :  * histogram which is less than (less than or equal) the given length value. If
     764             :  * all lengths in the histogram are greater than (greater than or equal) the
     765             :  * given length, returns -1.
     766             :  */
     767             : static int
     768         360 : length_hist_bsearch(Datum *length_hist_values, int length_hist_nvalues,
     769             :                     double value, bool equal)
     770             : {
     771         360 :     int         lower = -1,
     772         360 :                 upper = length_hist_nvalues - 1,
     773             :                 middle;
     774             : 
     775        1872 :     while (lower < upper)
     776             :     {
     777             :         double      middleval;
     778             : 
     779        1512 :         middle = (lower + upper + 1) / 2;
     780             : 
     781        1512 :         middleval = DatumGetFloat8(length_hist_values[middle]);
     782        1512 :         if (middleval < value || (equal && middleval <= value))
     783         744 :             lower = middle;
     784             :         else
     785         768 :             upper = middle - 1;
     786             :     }
     787         360 :     return lower;
     788             : }
     789             : 
     790             : /*
     791             :  * Get relative position of value in histogram bin in [0,1] range.
     792             :  */
     793             : static float8
     794         276 : get_position(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist1,
     795             :              const RangeBound *hist2)
     796             : {
     797         276 :     bool        has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
     798             :     float8      position;
     799             : 
     800         276 :     if (!hist1->infinite && !hist2->infinite)
     801             :     {
     802             :         float8      bin_width;
     803             : 
     804             :         /*
     805             :          * Both bounds are finite. Assuming the subtype's comparison function
     806             :          * works sanely, the value must be finite, too, because it lies
     807             :          * somewhere between the bounds.  If it doesn't, arbitrarily return
     808             :          * 0.5.
     809             :          */
     810         204 :         if (value->infinite)
     811           0 :             return 0.5;
     812             : 
     813             :         /* Can't interpolate without subdiff function */
     814         204 :         if (!has_subdiff)
     815           0 :             return 0.5;
     816             : 
     817             :         /* Calculate relative position using subdiff function. */
     818         204 :         bin_width = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
     819             :                                                      typcache->rng_collation,
     820             :                                                      hist2->val,
     821             :                                                      hist1->val));
     822         204 :         if (isnan(bin_width) || bin_width <= 0.0)
     823           0 :             return 0.5;         /* punt for NaN or zero-width bin */
     824             : 
     825         204 :         position = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
     826             :                                                     typcache->rng_collation,
     827             :                                                     value->val,
     828             :                                                     hist1->val))
     829             :             / bin_width;
     830             : 
     831         204 :         if (isnan(position))
     832           0 :             return 0.5;         /* punt for NaN from subdiff, Inf/Inf, etc */
     833             : 
     834             :         /* Relative position must be in [0,1] range */
     835         204 :         position = Max(position, 0.0);
     836         204 :         position = Min(position, 1.0);
     837         204 :         return position;
     838             :     }
     839          72 :     else if (hist1->infinite && !hist2->infinite)
     840             :     {
     841             :         /*
     842             :          * Lower bin boundary is -infinite, upper is finite. If the value is
     843             :          * -infinite, return 0.0 to indicate it's equal to the lower bound.
     844             :          * Otherwise return 1.0 to indicate it's infinitely far from the lower
     845             :          * bound.
     846             :          */
     847          60 :         return ((value->infinite && value->lower) ? 0.0 : 1.0);
     848             :     }
     849          12 :     else if (!hist1->infinite && hist2->infinite)
     850             :     {
     851             :         /* same as above, but in reverse */
     852          12 :         return ((value->infinite && !value->lower) ? 1.0 : 0.0);
     853             :     }
     854             :     else
     855             :     {
     856             :         /*
     857             :          * If both bin boundaries are infinite, they should be equal to each
     858             :          * other, and the value should also be infinite and equal to both
     859             :          * bounds. (But don't Assert that, to avoid crashing if a user creates
     860             :          * a datatype with a broken comparison function).
     861             :          *
     862             :          * Assume the value to lie in the middle of the infinite bounds.
     863             :          */
     864           0 :         return 0.5;
     865             :     }
     866             : }
     867             : 
     868             : 
     869             : /*
     870             :  * Get relative position of value in a length histogram bin in [0,1] range.
     871             :  */
     872             : static double
     873         360 : get_len_position(double value, double hist1, double hist2)
     874             : {
     875         360 :     if (!isinf(hist1) && !isinf(hist2))
     876             :     {
     877             :         /*
     878             :          * Both bounds are finite. The value should be finite too, because it
     879             :          * lies somewhere between the bounds. If it doesn't, just return
     880             :          * something.
     881             :          */
     882          60 :         if (isinf(value))
     883           0 :             return 0.5;
     884             : 
     885          60 :         return 1.0 - (hist2 - value) / (hist2 - hist1);
     886             :     }
     887         300 :     else if (isinf(hist1) && !isinf(hist2))
     888             :     {
     889             :         /*
     890             :          * Lower bin boundary is -infinite, upper is finite. Return 1.0 to
     891             :          * indicate the value is infinitely far from the lower bound.
     892             :          */
     893           0 :         return 1.0;
     894             :     }
     895         300 :     else if (isinf(hist1) && isinf(hist2))
     896             :     {
     897             :         /* same as above, but in reverse */
     898           0 :         return 0.0;
     899             :     }
     900             :     else
     901             :     {
     902             :         /*
     903             :          * If both bin boundaries are infinite, they should be equal to each
     904             :          * other, and the value should also be infinite and equal to both
     905             :          * bounds. (But don't Assert that, to avoid crashing unnecessarily if
     906             :          * the caller messes up)
     907             :          *
     908             :          * Assume the value to lie in the middle of the infinite bounds.
     909             :          */
     910         300 :         return 0.5;
     911             :     }
     912             : }
     913             : 
     914             : /*
     915             :  * Measure distance between two range bounds.
     916             :  */
     917             : static float8
     918         408 : get_distance(TypeCacheEntry *typcache, const RangeBound *bound1, const RangeBound *bound2)
     919             : {
     920         408 :     bool        has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
     921             : 
     922         408 :     if (!bound1->infinite && !bound2->infinite)
     923             :     {
     924             :         /*
     925             :          * Neither bound is infinite, use subdiff function or return default
     926             :          * value of 1.0 if no subdiff is available.
     927             :          */
     928         240 :         if (has_subdiff)
     929             :         {
     930             :             float8      res;
     931             : 
     932         240 :             res = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
     933             :                                                    typcache->rng_collation,
     934             :                                                    bound2->val,
     935             :                                                    bound1->val));
     936             :             /* Reject possible NaN result, also negative result */
     937         240 :             if (isnan(res) || res < 0.0)
     938           0 :                 return 1.0;
     939             :             else
     940         240 :                 return res;
     941             :         }
     942             :         else
     943           0 :             return 1.0;
     944             :     }
     945         168 :     else if (bound1->infinite && bound2->infinite)
     946             :     {
     947             :         /* Both bounds are infinite */
     948           0 :         if (bound1->lower == bound2->lower)
     949           0 :             return 0.0;
     950             :         else
     951           0 :             return get_float8_infinity();
     952             :     }
     953             :     else
     954             :     {
     955             :         /* One bound is infinite, the other is not */
     956         168 :         return get_float8_infinity();
     957             :     }
     958             : }
     959             : 
     960             : /*
     961             :  * Calculate the average of function P(x), in the interval [length1, length2],
     962             :  * where P(x) is the fraction of tuples with length < x (or length <= x if
     963             :  * 'equal' is true).
     964             :  */
     965             : static double
     966         360 : calc_length_hist_frac(Datum *length_hist_values, int length_hist_nvalues,
     967             :                       double length1, double length2, bool equal)
     968             : {
     969             :     double      frac;
     970             :     double      A,
     971             :                 B,
     972             :                 PA,
     973             :                 PB;
     974             :     double      pos;
     975             :     int         i;
     976             :     double      area;
     977             : 
     978             :     Assert(length2 >= length1);
     979             : 
     980         360 :     if (length2 < 0.0)
     981           0 :         return 0.0;             /* shouldn't happen, but doesn't hurt to check */
     982             : 
     983             :     /* All lengths in the table are <= infinite. */
     984         360 :     if (isinf(length2) && equal)
     985           0 :         return 1.0;
     986             : 
     987             :     /*----------
     988             :      * The average of a function between A and B can be calculated by the
     989             :      * formula:
     990             :      *
     991             :      *          B
     992             :      *    1     /
     993             :      * -------  | P(x)dx
     994             :      *  B - A   /
     995             :      *          A
     996             :      *
     997             :      * The geometrical interpretation of the integral is the area under the
     998             :      * graph of P(x). P(x) is defined by the length histogram. We calculate
     999             :      * the area in a piecewise fashion, iterating through the length histogram
    1000             :      * bins. Each bin is a trapezoid:
    1001             :      *
    1002             :      *       P(x2)
    1003             :      *        /|
    1004             :      *       / |
    1005             :      * P(x1)/  |
    1006             :      *     |   |
    1007             :      *     |   |
    1008             :      *  ---+---+--
    1009             :      *     x1  x2
    1010             :      *
    1011             :      * where x1 and x2 are the boundaries of the current histogram, and P(x1)
    1012             :      * and P(x1) are the cumulative fraction of tuples at the boundaries.
    1013             :      *
    1014             :      * The area of each trapezoid is 1/2 * (P(x2) + P(x1)) * (x2 - x1)
    1015             :      *
    1016             :      * The first bin contains the lower bound passed by the caller, so we
    1017             :      * use linear interpolation between the previous and next histogram bin
    1018             :      * boundary to calculate P(x1). Likewise for the last bin: we use linear
    1019             :      * interpolation to calculate P(x2). For the bins in between, x1 and x2
    1020             :      * lie on histogram bin boundaries, so P(x1) and P(x2) are simply:
    1021             :      * P(x1) =    (bin index) / (number of bins)
    1022             :      * P(x2) = (bin index + 1 / (number of bins)
    1023             :      */
    1024             : 
    1025             :     /* First bin, the one that contains lower bound */
    1026         360 :     i = length_hist_bsearch(length_hist_values, length_hist_nvalues, length1, equal);
    1027         360 :     if (i >= length_hist_nvalues - 1)
    1028          48 :         return 1.0;
    1029             : 
    1030         312 :     if (i < 0)
    1031             :     {
    1032          84 :         i = 0;
    1033          84 :         pos = 0.0;
    1034             :     }
    1035             :     else
    1036             :     {
    1037             :         /* interpolate length1's position in the bin */
    1038         228 :         pos = get_len_position(length1,
    1039         228 :                                DatumGetFloat8(length_hist_values[i]),
    1040         228 :                                DatumGetFloat8(length_hist_values[i + 1]));
    1041             :     }
    1042         312 :     PB = (((double) i) + pos) / (double) (length_hist_nvalues - 1);
    1043         312 :     B = length1;
    1044             : 
    1045             :     /*
    1046             :      * In the degenerate case that length1 == length2, simply return
    1047             :      * P(length1). This is not merely an optimization: if length1 == length2,
    1048             :      * we'd divide by zero later on.
    1049             :      */
    1050         312 :     if (length2 == length1)
    1051         144 :         return PB;
    1052             : 
    1053             :     /*
    1054             :      * Loop through all the bins, until we hit the last bin, the one that
    1055             :      * contains the upper bound. (if lower and upper bounds are in the same
    1056             :      * bin, this falls out immediately)
    1057             :      */
    1058         168 :     area = 0.0;
    1059        3168 :     for (; i < length_hist_nvalues - 1; i++)
    1060             :     {
    1061        3168 :         double      bin_upper = DatumGetFloat8(length_hist_values[i + 1]);
    1062             : 
    1063             :         /* check if we've reached the last bin */
    1064        3168 :         if (!(bin_upper < length2 || (equal && bin_upper <= length2)))
    1065             :             break;
    1066             : 
    1067             :         /* the upper bound of previous bin is the lower bound of this bin */
    1068        3000 :         A = B;
    1069        3000 :         PA = PB;
    1070             : 
    1071        3000 :         B = bin_upper;
    1072        3000 :         PB = (double) i / (double) (length_hist_nvalues - 1);
    1073             : 
    1074             :         /*
    1075             :          * Add the area of this trapezoid to the total. The point of the
    1076             :          * if-check is to avoid NaN, in the corner case that PA == PB == 0,
    1077             :          * and B - A == Inf. The area of a zero-height trapezoid (PA == PB ==
    1078             :          * 0) is zero, regardless of the width (B - A).
    1079             :          */
    1080        3000 :         if (PA > 0 || PB > 0)
    1081        2952 :             area += 0.5 * (PB + PA) * (B - A);
    1082             :     }
    1083             : 
    1084             :     /* Last bin */
    1085         168 :     A = B;
    1086         168 :     PA = PB;
    1087             : 
    1088         168 :     B = length2;                /* last bin ends at the query upper bound */
    1089         168 :     if (i >= length_hist_nvalues - 1)
    1090           0 :         pos = 0.0;
    1091             :     else
    1092             :     {
    1093         168 :         if (DatumGetFloat8(length_hist_values[i]) == DatumGetFloat8(length_hist_values[i + 1]))
    1094          36 :             pos = 0.0;
    1095             :         else
    1096         132 :             pos = get_len_position(length2,
    1097         132 :                                    DatumGetFloat8(length_hist_values[i]),
    1098         132 :                                    DatumGetFloat8(length_hist_values[i + 1]));
    1099             :     }
    1100         168 :     PB = (((double) i) + pos) / (double) (length_hist_nvalues - 1);
    1101             : 
    1102         168 :     if (PA > 0 || PB > 0)
    1103         132 :         area += 0.5 * (PB + PA) * (B - A);
    1104             : 
    1105             :     /*
    1106             :      * Ok, we have calculated the area, ie. the integral. Divide by width to
    1107             :      * get the requested average.
    1108             :      *
    1109             :      * Avoid NaN arising from infinite / infinite. This happens at least if
    1110             :      * length2 is infinite. It's not clear what the correct value would be in
    1111             :      * that case, so 0.5 seems as good as any value.
    1112             :      */
    1113         168 :     if (isinf(area) && isinf(length2))
    1114          48 :         frac = 0.5;
    1115             :     else
    1116         120 :         frac = area / (length2 - length1);
    1117             : 
    1118         168 :     return frac;
    1119             : }
    1120             : 
    1121             : /*
    1122             :  * Calculate selectivity of "var <@ const" operator, ie. estimate the fraction
    1123             :  * of multiranges that fall within the constant lower and upper bounds. This uses
    1124             :  * the histograms of range lower bounds and range lengths, on the assumption
    1125             :  * that the range lengths are independent of the lower bounds.
    1126             :  *
    1127             :  * The caller has already checked that constant lower and upper bounds are
    1128             :  * finite.
    1129             :  */
    1130             : static double
    1131          24 : calc_hist_selectivity_contained(TypeCacheEntry *typcache,
    1132             :                                 const RangeBound *lower, RangeBound *upper,
    1133             :                                 const RangeBound *hist_lower, int hist_nvalues,
    1134             :                                 Datum *length_hist_values, int length_hist_nvalues)
    1135             : {
    1136             :     int         i,
    1137             :                 upper_index;
    1138             :     float8      prev_dist;
    1139             :     double      bin_width;
    1140             :     double      upper_bin_width;
    1141             :     double      sum_frac;
    1142             : 
    1143             :     /*
    1144             :      * Begin by finding the bin containing the upper bound, in the lower bound
    1145             :      * histogram. Any range with a lower bound > constant upper bound can't
    1146             :      * match, ie. there are no matches in bins greater than upper_index.
    1147             :      */
    1148          24 :     upper->inclusive = !upper->inclusive;
    1149          24 :     upper->lower = true;
    1150          24 :     upper_index = rbound_bsearch(typcache, upper, hist_lower, hist_nvalues,
    1151             :                                  false);
    1152             : 
    1153             :     /*
    1154             :      * If the upper bound value is below the histogram's lower limit, there
    1155             :      * are no matches.
    1156             :      */
    1157          24 :     if (upper_index < 0)
    1158           0 :         return 0.0;
    1159             : 
    1160             :     /*
    1161             :      * If the upper bound value is at or beyond the histogram's upper limit,
    1162             :      * start our loop at the last actual bin, as though the upper bound were
    1163             :      * within that bin; get_position will clamp its result to 1.0 anyway.
    1164             :      * (This corresponds to assuming that the data population above the
    1165             :      * histogram's upper limit is empty, exactly like what we just assumed for
    1166             :      * the lower limit.)
    1167             :      */
    1168          24 :     upper_index = Min(upper_index, hist_nvalues - 2);
    1169             : 
    1170             :     /*
    1171             :      * Calculate upper_bin_width, ie. the fraction of the (upper_index,
    1172             :      * upper_index + 1) bin which is greater than upper bound of query range
    1173             :      * using linear interpolation of subdiff function.
    1174             :      */
    1175          24 :     upper_bin_width = get_position(typcache, upper,
    1176          24 :                                    &hist_lower[upper_index],
    1177          24 :                                    &hist_lower[upper_index + 1]);
    1178             : 
    1179             :     /*
    1180             :      * In the loop, dist and prev_dist are the distance of the "current" bin's
    1181             :      * lower and upper bounds from the constant upper bound.
    1182             :      *
    1183             :      * bin_width represents the width of the current bin. Normally it is 1.0,
    1184             :      * meaning a full width bin, but can be less in the corner cases: start
    1185             :      * and end of the loop. We start with bin_width = upper_bin_width, because
    1186             :      * we begin at the bin containing the upper bound.
    1187             :      */
    1188          24 :     prev_dist = 0.0;
    1189          24 :     bin_width = upper_bin_width;
    1190             : 
    1191          24 :     sum_frac = 0.0;
    1192         120 :     for (i = upper_index; i >= 0; i--)
    1193             :     {
    1194             :         double      dist;
    1195             :         double      length_hist_frac;
    1196         120 :         bool        final_bin = false;
    1197             : 
    1198             :         /*
    1199             :          * dist -- distance from upper bound of query range to lower bound of
    1200             :          * the current bin in the lower bound histogram. Or to the lower bound
    1201             :          * of the constant range, if this is the final bin, containing the
    1202             :          * constant lower bound.
    1203             :          */
    1204         120 :         if (range_cmp_bounds(typcache, &hist_lower[i], lower) < 0)
    1205             :         {
    1206          24 :             dist = get_distance(typcache, lower, upper);
    1207             : 
    1208             :             /*
    1209             :              * Subtract from bin_width the portion of this bin that we want to
    1210             :              * ignore.
    1211             :              */
    1212          48 :             bin_width -= get_position(typcache, lower, &hist_lower[i],
    1213          24 :                                       &hist_lower[i + 1]);
    1214          24 :             if (bin_width < 0.0)
    1215           0 :                 bin_width = 0.0;
    1216          24 :             final_bin = true;
    1217             :         }
    1218             :         else
    1219          96 :             dist = get_distance(typcache, &hist_lower[i], upper);
    1220             : 
    1221             :         /*
    1222             :          * Estimate the fraction of tuples in this bin that are narrow enough
    1223             :          * to not exceed the distance to the upper bound of the query range.
    1224             :          */
    1225         120 :         length_hist_frac = calc_length_hist_frac(length_hist_values,
    1226             :                                                  length_hist_nvalues,
    1227             :                                                  prev_dist, dist, true);
    1228             : 
    1229             :         /*
    1230             :          * Add the fraction of tuples in this bin, with a suitable length, to
    1231             :          * the total.
    1232             :          */
    1233         120 :         sum_frac += length_hist_frac * bin_width / (double) (hist_nvalues - 1);
    1234             : 
    1235         120 :         if (final_bin)
    1236          24 :             break;
    1237             : 
    1238          96 :         bin_width = 1.0;
    1239          96 :         prev_dist = dist;
    1240             :     }
    1241             : 
    1242          24 :     return sum_frac;
    1243             : }
    1244             : 
    1245             : /*
    1246             :  * Calculate selectivity of "var @> const" operator, ie. estimate the fraction
    1247             :  * of multiranges that contain the constant lower and upper bounds. This uses
    1248             :  * the histograms of range lower bounds and range lengths, on the assumption
    1249             :  * that the range lengths are independent of the lower bounds.
    1250             :  */
    1251             : static double
    1252          48 : calc_hist_selectivity_contains(TypeCacheEntry *typcache,
    1253             :                                const RangeBound *lower, const RangeBound *upper,
    1254             :                                const RangeBound *hist_lower, int hist_nvalues,
    1255             :                                Datum *length_hist_values, int length_hist_nvalues)
    1256             : {
    1257             :     int         i,
    1258             :                 lower_index;
    1259             :     double      bin_width,
    1260             :                 lower_bin_width;
    1261             :     double      sum_frac;
    1262             :     float8      prev_dist;
    1263             : 
    1264             :     /* Find the bin containing the lower bound of query range. */
    1265          48 :     lower_index = rbound_bsearch(typcache, lower, hist_lower, hist_nvalues,
    1266             :                                  true);
    1267             : 
    1268             :     /*
    1269             :      * If the lower bound value is below the histogram's lower limit, there
    1270             :      * are no matches.
    1271             :      */
    1272          48 :     if (lower_index < 0)
    1273           0 :         return 0.0;
    1274             : 
    1275             :     /*
    1276             :      * If the lower bound value is at or beyond the histogram's upper limit,
    1277             :      * start our loop at the last actual bin, as though the upper bound were
    1278             :      * within that bin; get_position will clamp its result to 1.0 anyway.
    1279             :      * (This corresponds to assuming that the data population above the
    1280             :      * histogram's upper limit is empty, exactly like what we just assumed for
    1281             :      * the lower limit.)
    1282             :      */
    1283          48 :     lower_index = Min(lower_index, hist_nvalues - 2);
    1284             : 
    1285             :     /*
    1286             :      * Calculate lower_bin_width, ie. the fraction of the of (lower_index,
    1287             :      * lower_index + 1) bin which is greater than lower bound of query range
    1288             :      * using linear interpolation of subdiff function.
    1289             :      */
    1290          48 :     lower_bin_width = get_position(typcache, lower, &hist_lower[lower_index],
    1291          48 :                                    &hist_lower[lower_index + 1]);
    1292             : 
    1293             :     /*
    1294             :      * Loop through all the lower bound bins, smaller than the query lower
    1295             :      * bound. In the loop, dist and prev_dist are the distance of the
    1296             :      * "current" bin's lower and upper bounds from the constant upper bound.
    1297             :      * We begin from query lower bound, and walk backwards, so the first bin's
    1298             :      * upper bound is the query lower bound, and its distance to the query
    1299             :      * upper bound is the length of the query range.
    1300             :      *
    1301             :      * bin_width represents the width of the current bin. Normally it is 1.0,
    1302             :      * meaning a full width bin, except for the first bin, which is only
    1303             :      * counted up to the constant lower bound.
    1304             :      */
    1305          48 :     prev_dist = get_distance(typcache, lower, upper);
    1306          48 :     sum_frac = 0.0;
    1307          48 :     bin_width = lower_bin_width;
    1308         288 :     for (i = lower_index; i >= 0; i--)
    1309             :     {
    1310             :         float8      dist;
    1311             :         double      length_hist_frac;
    1312             : 
    1313             :         /*
    1314             :          * dist -- distance from upper bound of query range to current value
    1315             :          * of lower bound histogram or lower bound of query range (if we've
    1316             :          * reach it).
    1317             :          */
    1318         240 :         dist = get_distance(typcache, &hist_lower[i], upper);
    1319             : 
    1320             :         /*
    1321             :          * Get average fraction of length histogram which covers intervals
    1322             :          * longer than (or equal to) distance to upper bound of query range.
    1323             :          */
    1324         240 :         length_hist_frac =
    1325         240 :             1.0 - calc_length_hist_frac(length_hist_values,
    1326             :                                         length_hist_nvalues,
    1327             :                                         prev_dist, dist, false);
    1328             : 
    1329         240 :         sum_frac += length_hist_frac * bin_width / (double) (hist_nvalues - 1);
    1330             : 
    1331         240 :         bin_width = 1.0;
    1332         240 :         prev_dist = dist;
    1333             :     }
    1334             : 
    1335          48 :     return sum_frac;
    1336             : }

Generated by: LCOV version 1.14