LCOV - code coverage report
Current view: top level - src/backend/utils/adt - multirangetypes_selfuncs.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 84.2 % 368 310
Test Date: 2026-03-01 18:15:11 Functions: 100.0 % 13 13
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * multirangetypes_selfuncs.c
       4              :  *    Functions for selectivity estimation of multirange operators
       5              :  *
       6              :  * Estimates are based on histograms of lower and upper bounds, and the
       7              :  * fraction of empty multiranges.
       8              :  *
       9              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      10              :  * Portions Copyright (c) 1994, Regents of the University of California
      11              :  *
      12              :  *
      13              :  * IDENTIFICATION
      14              :  *    src/backend/utils/adt/multirangetypes_selfuncs.c
      15              :  *
      16              :  *-------------------------------------------------------------------------
      17              :  */
      18              : #include "postgres.h"
      19              : 
      20              : #include <math.h>
      21              : 
      22              : #include "access/htup_details.h"
      23              : #include "catalog/pg_operator.h"
      24              : #include "catalog/pg_statistic.h"
      25              : #include "utils/float.h"
      26              : #include "utils/fmgrprotos.h"
      27              : #include "utils/lsyscache.h"
      28              : #include "utils/multirangetypes.h"
      29              : #include "utils/rangetypes.h"
      30              : #include "utils/selfuncs.h"
      31              : #include "utils/typcache.h"
      32              : 
      33              : static double calc_multirangesel(TypeCacheEntry *typcache,
      34              :                                  VariableStatData *vardata,
      35              :                                  const MultirangeType *constval, Oid operator);
      36              : static double default_multirange_selectivity(Oid operator);
      37              : static double calc_hist_selectivity(TypeCacheEntry *typcache,
      38              :                                     VariableStatData *vardata,
      39              :                                     const MultirangeType *constval,
      40              :                                     Oid operator);
      41              : static double calc_hist_selectivity_scalar(TypeCacheEntry *typcache,
      42              :                                            const RangeBound *constbound,
      43              :                                            const RangeBound *hist,
      44              :                                            int hist_nvalues, bool equal);
      45              : static int  rbound_bsearch(TypeCacheEntry *typcache, const RangeBound *value,
      46              :                            const RangeBound *hist, int hist_length, bool equal);
      47              : static float8 get_position(TypeCacheEntry *typcache, const RangeBound *value,
      48              :                            const RangeBound *hist1, const RangeBound *hist2);
      49              : static float8 get_len_position(double value, double hist1, double hist2);
      50              : static float8 get_distance(TypeCacheEntry *typcache, const RangeBound *bound1,
      51              :                            const RangeBound *bound2);
      52              : static int  length_hist_bsearch(const Datum *length_hist_values,
      53              :                                 int length_hist_nvalues, double value,
      54              :                                 bool equal);
      55              : static double calc_length_hist_frac(const Datum *length_hist_values,
      56              :                                     int length_hist_nvalues, double length1,
      57              :                                     double length2, bool equal);
      58              : static double calc_hist_selectivity_contained(TypeCacheEntry *typcache,
      59              :                                               const RangeBound *lower,
      60              :                                               RangeBound *upper,
      61              :                                               const RangeBound *hist_lower,
      62              :                                               int hist_nvalues,
      63              :                                               const Datum *length_hist_values,
      64              :                                               int length_hist_nvalues);
      65              : static double calc_hist_selectivity_contains(TypeCacheEntry *typcache,
      66              :                                              const RangeBound *lower,
      67              :                                              const RangeBound *upper,
      68              :                                              const RangeBound *hist_lower,
      69              :                                              int hist_nvalues,
      70              :                                              const Datum *length_hist_values,
      71              :                                              int length_hist_nvalues);
      72              : 
      73              : /*
      74              :  * Returns a default selectivity estimate for given operator, when we don't
      75              :  * have statistics or cannot use them for some reason.
      76              :  */
      77              : static double
      78          491 : default_multirange_selectivity(Oid operator)
      79              : {
      80          491 :     switch (operator)
      81              :     {
      82          210 :         case OID_MULTIRANGE_OVERLAPS_MULTIRANGE_OP:
      83              :         case OID_MULTIRANGE_OVERLAPS_RANGE_OP:
      84              :         case OID_RANGE_OVERLAPS_MULTIRANGE_OP:
      85          210 :             return 0.01;
      86              : 
      87          188 :         case OID_RANGE_CONTAINS_MULTIRANGE_OP:
      88              :         case OID_RANGE_MULTIRANGE_CONTAINED_OP:
      89              :         case OID_MULTIRANGE_CONTAINS_RANGE_OP:
      90              :         case OID_MULTIRANGE_CONTAINS_MULTIRANGE_OP:
      91              :         case OID_MULTIRANGE_RANGE_CONTAINED_OP:
      92              :         case OID_MULTIRANGE_MULTIRANGE_CONTAINED_OP:
      93          188 :             return 0.005;
      94              : 
      95           33 :         case OID_MULTIRANGE_CONTAINS_ELEM_OP:
      96              :         case OID_MULTIRANGE_ELEM_CONTAINED_OP:
      97              : 
      98              :             /*
      99              :              * "multirange @> elem" is more or less identical to a scalar
     100              :              * inequality "A >= b AND A <= c".
     101              :              */
     102           33 :             return DEFAULT_MULTIRANGE_INEQ_SEL;
     103              : 
     104           60 :         case OID_MULTIRANGE_LESS_OP:
     105              :         case OID_MULTIRANGE_LESS_EQUAL_OP:
     106              :         case OID_MULTIRANGE_GREATER_OP:
     107              :         case OID_MULTIRANGE_GREATER_EQUAL_OP:
     108              :         case OID_MULTIRANGE_LEFT_RANGE_OP:
     109              :         case OID_MULTIRANGE_LEFT_MULTIRANGE_OP:
     110              :         case OID_RANGE_LEFT_MULTIRANGE_OP:
     111              :         case OID_MULTIRANGE_RIGHT_RANGE_OP:
     112              :         case OID_MULTIRANGE_RIGHT_MULTIRANGE_OP:
     113              :         case OID_RANGE_RIGHT_MULTIRANGE_OP:
     114              :         case OID_MULTIRANGE_OVERLAPS_LEFT_RANGE_OP:
     115              :         case OID_RANGE_OVERLAPS_LEFT_MULTIRANGE_OP:
     116              :         case OID_MULTIRANGE_OVERLAPS_LEFT_MULTIRANGE_OP:
     117              :         case OID_MULTIRANGE_OVERLAPS_RIGHT_RANGE_OP:
     118              :         case OID_RANGE_OVERLAPS_RIGHT_MULTIRANGE_OP:
     119              :         case OID_MULTIRANGE_OVERLAPS_RIGHT_MULTIRANGE_OP:
     120              :             /* these are similar to regular scalar inequalities */
     121           60 :             return DEFAULT_INEQ_SEL;
     122              : 
     123            0 :         default:
     124              : 
     125              :             /*
     126              :              * all multirange operators should be handled above, but just in
     127              :              * case
     128              :              */
     129            0 :             return 0.01;
     130              :     }
     131              : }
     132              : 
     133              : /*
     134              :  * multirangesel -- restriction selectivity for multirange operators
     135              :  */
     136              : Datum
     137          728 : multirangesel(PG_FUNCTION_ARGS)
     138              : {
     139          728 :     PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
     140          728 :     Oid         operator = PG_GETARG_OID(1);
     141          728 :     List       *args = (List *) PG_GETARG_POINTER(2);
     142          728 :     int         varRelid = PG_GETARG_INT32(3);
     143              :     VariableStatData vardata;
     144              :     Node       *other;
     145              :     bool        varonleft;
     146              :     Selectivity selec;
     147          728 :     TypeCacheEntry *typcache = NULL;
     148          728 :     MultirangeType *constmultirange = NULL;
     149          728 :     RangeType  *constrange = NULL;
     150              : 
     151              :     /*
     152              :      * If expression is not (variable op something) or (something op
     153              :      * variable), then punt and return a default estimate.
     154              :      */
     155          728 :     if (!get_restriction_variable(root, args, varRelid,
     156              :                                   &vardata, &other, &varonleft))
     157            0 :         PG_RETURN_FLOAT8(default_multirange_selectivity(operator));
     158              : 
     159              :     /*
     160              :      * Can't do anything useful if the something is not a constant, either.
     161              :      */
     162          728 :     if (!IsA(other, Const))
     163              :     {
     164           51 :         ReleaseVariableStats(vardata);
     165           51 :         PG_RETURN_FLOAT8(default_multirange_selectivity(operator));
     166              :     }
     167              : 
     168              :     /*
     169              :      * All the multirange operators are strict, so we can cope with a NULL
     170              :      * constant right away.
     171              :      */
     172          677 :     if (((Const *) other)->constisnull)
     173              :     {
     174            0 :         ReleaseVariableStats(vardata);
     175            0 :         PG_RETURN_FLOAT8(0.0);
     176              :     }
     177              : 
     178              :     /*
     179              :      * If var is on the right, commute the operator, so that we can assume the
     180              :      * var is on the left in what follows.
     181              :      */
     182          677 :     if (!varonleft)
     183              :     {
     184              :         /* we have other Op var, commute to make var Op other */
     185          266 :         operator = get_commutator(operator);
     186          266 :         if (!operator)
     187              :         {
     188              :             /* Use default selectivity (should we raise an error instead?) */
     189            0 :             ReleaseVariableStats(vardata);
     190            0 :             PG_RETURN_FLOAT8(default_multirange_selectivity(operator));
     191              :         }
     192              :     }
     193              : 
     194              :     /*
     195              :      * OK, there's a Var and a Const we're dealing with here.  We need the
     196              :      * Const to be of same multirange type as the column, else we can't do
     197              :      * anything useful. (Such cases will likely fail at runtime, but here we'd
     198              :      * rather just return a default estimate.)
     199              :      *
     200              :      * If the operator is "multirange @> element", the constant should be of
     201              :      * the element type of the multirange column. Convert it to a multirange
     202              :      * that includes only that single point, so that we don't need special
     203              :      * handling for that in what follows.
     204              :      */
     205          677 :     if (operator == OID_MULTIRANGE_CONTAINS_ELEM_OP)
     206              :     {
     207           48 :         typcache = multirange_get_typcache(fcinfo, vardata.vartype);
     208              : 
     209           48 :         if (((Const *) other)->consttype == typcache->rngtype->rngelemtype->type_id)
     210              :         {
     211              :             RangeBound  lower,
     212              :                         upper;
     213              : 
     214           48 :             lower.inclusive = true;
     215           48 :             lower.val = ((Const *) other)->constvalue;
     216           48 :             lower.infinite = false;
     217           48 :             lower.lower = true;
     218           48 :             upper.inclusive = true;
     219           48 :             upper.val = ((Const *) other)->constvalue;
     220           48 :             upper.infinite = false;
     221           48 :             upper.lower = false;
     222           48 :             constrange = range_serialize(typcache->rngtype, &lower, &upper,
     223              :                                          false, NULL);
     224           48 :             constmultirange = make_multirange(typcache->type_id, typcache->rngtype,
     225              :                                               1, &constrange);
     226              :         }
     227              :     }
     228          629 :     else if (operator == OID_RANGE_MULTIRANGE_CONTAINED_OP ||
     229          516 :              operator == OID_MULTIRANGE_CONTAINS_RANGE_OP ||
     230          498 :              operator == OID_MULTIRANGE_OVERLAPS_RANGE_OP ||
     231          486 :              operator == OID_MULTIRANGE_OVERLAPS_LEFT_RANGE_OP ||
     232          474 :              operator == OID_MULTIRANGE_OVERLAPS_RIGHT_RANGE_OP ||
     233          462 :              operator == OID_MULTIRANGE_LEFT_RANGE_OP ||
     234              :              operator == OID_MULTIRANGE_RIGHT_RANGE_OP)
     235              :     {
     236              :         /*
     237              :          * Promote a range in "multirange OP range" just like we do an element
     238              :          * in "multirange OP element".
     239              :          */
     240          179 :         typcache = multirange_get_typcache(fcinfo, vardata.vartype);
     241          179 :         if (((Const *) other)->consttype == typcache->rngtype->type_id)
     242              :         {
     243          179 :             constrange = DatumGetRangeTypeP(((Const *) other)->constvalue);
     244          179 :             constmultirange = make_multirange(typcache->type_id, typcache->rngtype,
     245              :                                               1, &constrange);
     246              :         }
     247              :     }
     248          450 :     else if (operator == OID_RANGE_OVERLAPS_MULTIRANGE_OP ||
     249          432 :              operator == OID_RANGE_OVERLAPS_LEFT_MULTIRANGE_OP ||
     250          423 :              operator == OID_RANGE_OVERLAPS_RIGHT_MULTIRANGE_OP ||
     251          414 :              operator == OID_RANGE_LEFT_MULTIRANGE_OP ||
     252          405 :              operator == OID_RANGE_RIGHT_MULTIRANGE_OP ||
     253          387 :              operator == OID_RANGE_CONTAINS_MULTIRANGE_OP ||
     254          387 :              operator == OID_MULTIRANGE_ELEM_CONTAINED_OP ||
     255              :              operator == OID_MULTIRANGE_RANGE_CONTAINED_OP)
     256              :     {
     257              :         /*
     258              :          * Here, the Var is the elem/range, not the multirange.  For now we
     259              :          * just punt and return the default estimate.  In future we could
     260              :          * disassemble the multirange constant to do something more
     261              :          * intelligent.
     262              :          */
     263              :     }
     264          378 :     else if (((Const *) other)->consttype == vardata.vartype)
     265              :     {
     266              :         /* Both sides are the same multirange type */
     267          378 :         typcache = multirange_get_typcache(fcinfo, vardata.vartype);
     268              : 
     269          378 :         constmultirange = DatumGetMultirangeTypeP(((Const *) other)->constvalue);
     270              :     }
     271              : 
     272              :     /*
     273              :      * If we got a valid constant on one side of the operator, proceed to
     274              :      * estimate using statistics. Otherwise punt and return a default constant
     275              :      * estimate.  Note that calc_multirangesel need not handle
     276              :      * OID_MULTIRANGE_*_CONTAINED_OP.
     277              :      */
     278          677 :     if (constmultirange)
     279          605 :         selec = calc_multirangesel(typcache, &vardata, constmultirange, operator);
     280              :     else
     281           72 :         selec = default_multirange_selectivity(operator);
     282              : 
     283          677 :     ReleaseVariableStats(vardata);
     284              : 
     285          677 :     CLAMP_PROBABILITY(selec);
     286              : 
     287          677 :     PG_RETURN_FLOAT8((float8) selec);
     288              : }
     289              : 
     290              : static double
     291          605 : calc_multirangesel(TypeCacheEntry *typcache, VariableStatData *vardata,
     292              :                    const MultirangeType *constval, Oid operator)
     293              : {
     294              :     double      hist_selec;
     295              :     double      selec;
     296              :     float4      empty_frac,
     297              :                 null_frac;
     298              : 
     299              :     /*
     300              :      * First look up the fraction of NULLs and empty multiranges from
     301              :      * pg_statistic.
     302              :      */
     303          605 :     if (HeapTupleIsValid(vardata->statsTuple))
     304              :     {
     305              :         Form_pg_statistic stats;
     306              :         AttStatsSlot sslot;
     307              : 
     308          225 :         stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
     309          225 :         null_frac = stats->stanullfrac;
     310              : 
     311              :         /* Try to get fraction of empty multiranges */
     312          225 :         if (get_attstatsslot(&sslot, vardata->statsTuple,
     313              :                              STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM,
     314              :                              InvalidOid,
     315              :                              ATTSTATSSLOT_NUMBERS))
     316              :         {
     317          225 :             if (sslot.nnumbers != 1)
     318            0 :                 elog(ERROR, "invalid empty fraction statistic");  /* shouldn't happen */
     319          225 :             empty_frac = sslot.numbers[0];
     320          225 :             free_attstatsslot(&sslot);
     321              :         }
     322              :         else
     323              :         {
     324              :             /* No empty fraction statistic. Assume no empty ranges. */
     325            0 :             empty_frac = 0.0;
     326              :         }
     327              :     }
     328              :     else
     329              :     {
     330              :         /*
     331              :          * No stats are available. Follow through the calculations below
     332              :          * anyway, assuming no NULLs and no empty multiranges. This still
     333              :          * allows us to give a better-than-nothing estimate based on whether
     334              :          * the constant is an empty multirange or not.
     335              :          */
     336          380 :         null_frac = 0.0;
     337          380 :         empty_frac = 0.0;
     338              :     }
     339              : 
     340          605 :     if (MultirangeIsEmpty(constval))
     341              :     {
     342              :         /*
     343              :          * An empty multirange matches all multiranges, all empty multiranges,
     344              :          * or nothing, depending on the operator
     345              :          */
     346          111 :         switch (operator)
     347              :         {
     348              :                 /* these return false if either argument is empty */
     349           63 :             case OID_MULTIRANGE_OVERLAPS_RANGE_OP:
     350              :             case OID_MULTIRANGE_OVERLAPS_MULTIRANGE_OP:
     351              :             case OID_MULTIRANGE_OVERLAPS_LEFT_RANGE_OP:
     352              :             case OID_MULTIRANGE_OVERLAPS_LEFT_MULTIRANGE_OP:
     353              :             case OID_MULTIRANGE_OVERLAPS_RIGHT_RANGE_OP:
     354              :             case OID_MULTIRANGE_OVERLAPS_RIGHT_MULTIRANGE_OP:
     355              :             case OID_MULTIRANGE_LEFT_RANGE_OP:
     356              :             case OID_MULTIRANGE_LEFT_MULTIRANGE_OP:
     357              :             case OID_MULTIRANGE_RIGHT_RANGE_OP:
     358              :             case OID_MULTIRANGE_RIGHT_MULTIRANGE_OP:
     359              :                 /* nothing is less than an empty multirange */
     360              :             case OID_MULTIRANGE_LESS_OP:
     361           63 :                 selec = 0.0;
     362           63 :                 break;
     363              : 
     364              :                 /*
     365              :                  * only empty multiranges can be contained by an empty
     366              :                  * multirange
     367              :                  */
     368           15 :             case OID_RANGE_MULTIRANGE_CONTAINED_OP:
     369              :             case OID_MULTIRANGE_MULTIRANGE_CONTAINED_OP:
     370              :                 /* only empty ranges are <= an empty multirange */
     371              :             case OID_MULTIRANGE_LESS_EQUAL_OP:
     372           15 :                 selec = empty_frac;
     373           15 :                 break;
     374              : 
     375              :                 /* everything contains an empty multirange */
     376           30 :             case OID_MULTIRANGE_CONTAINS_RANGE_OP:
     377              :             case OID_MULTIRANGE_CONTAINS_MULTIRANGE_OP:
     378              :                 /* everything is >= an empty multirange */
     379              :             case OID_MULTIRANGE_GREATER_EQUAL_OP:
     380           30 :                 selec = 1.0;
     381           30 :                 break;
     382              : 
     383              :                 /* all non-empty multiranges are > an empty multirange */
     384            3 :             case OID_MULTIRANGE_GREATER_OP:
     385            3 :                 selec = 1.0 - empty_frac;
     386            3 :                 break;
     387              : 
     388              :                 /* an element cannot be empty */
     389            0 :             case OID_MULTIRANGE_CONTAINS_ELEM_OP:
     390              : 
     391              :                 /* filtered out by multirangesel() */
     392              :             case OID_RANGE_OVERLAPS_MULTIRANGE_OP:
     393              :             case OID_RANGE_OVERLAPS_LEFT_MULTIRANGE_OP:
     394              :             case OID_RANGE_OVERLAPS_RIGHT_MULTIRANGE_OP:
     395              :             case OID_RANGE_LEFT_MULTIRANGE_OP:
     396              :             case OID_RANGE_RIGHT_MULTIRANGE_OP:
     397              :             case OID_RANGE_CONTAINS_MULTIRANGE_OP:
     398              :             case OID_MULTIRANGE_ELEM_CONTAINED_OP:
     399              :             case OID_MULTIRANGE_RANGE_CONTAINED_OP:
     400              : 
     401              :             default:
     402            0 :                 elog(ERROR, "unexpected operator %u", operator);
     403              :                 selec = 0.0;    /* keep compiler quiet */
     404              :                 break;
     405              :         }
     406              :     }
     407              :     else
     408              :     {
     409              :         /*
     410              :          * Calculate selectivity using bound histograms. If that fails for
     411              :          * some reason, e.g no histogram in pg_statistic, use the default
     412              :          * constant estimate for the fraction of non-empty values. This is
     413              :          * still somewhat better than just returning the default estimate,
     414              :          * because this still takes into account the fraction of empty and
     415              :          * NULL tuples, if we had statistics for them.
     416              :          */
     417          494 :         hist_selec = calc_hist_selectivity(typcache, vardata, constval,
     418              :                                            operator);
     419          494 :         if (hist_selec < 0.0)
     420          368 :             hist_selec = default_multirange_selectivity(operator);
     421              : 
     422              :         /*
     423              :          * Now merge the results for the empty multiranges and histogram
     424              :          * calculations, realizing that the histogram covers only the
     425              :          * non-null, non-empty values.
     426              :          */
     427          494 :         if (operator == OID_RANGE_MULTIRANGE_CONTAINED_OP ||
     428              :             operator == OID_MULTIRANGE_MULTIRANGE_CONTAINED_OP)
     429              :         {
     430              :             /* empty is contained by anything non-empty */
     431           12 :             selec = (1.0 - empty_frac) * hist_selec + empty_frac;
     432              :         }
     433              :         else
     434              :         {
     435              :             /* with any other operator, empty Op non-empty matches nothing */
     436          482 :             selec = (1.0 - empty_frac) * hist_selec;
     437              :         }
     438              :     }
     439              : 
     440              :     /* all multirange operators are strict */
     441          605 :     selec *= (1.0 - null_frac);
     442              : 
     443              :     /* result should be in range, but make sure... */
     444          605 :     CLAMP_PROBABILITY(selec);
     445              : 
     446          605 :     return selec;
     447              : }
     448              : 
     449              : /*
     450              :  * Calculate multirange operator selectivity using histograms of multirange bounds.
     451              :  *
     452              :  * This estimate is for the portion of values that are not empty and not
     453              :  * NULL.
     454              :  */
     455              : static double
     456          494 : calc_hist_selectivity(TypeCacheEntry *typcache, VariableStatData *vardata,
     457              :                       const MultirangeType *constval, Oid operator)
     458              : {
     459          494 :     TypeCacheEntry *rng_typcache = typcache->rngtype;
     460              :     AttStatsSlot hslot;
     461              :     AttStatsSlot lslot;
     462              :     int         nhist;
     463              :     RangeBound *hist_lower;
     464              :     RangeBound *hist_upper;
     465              :     int         i;
     466              :     RangeBound  const_lower;
     467              :     RangeBound  const_upper;
     468              :     RangeBound  tmp;
     469              :     double      hist_selec;
     470              : 
     471              :     /* Can't use the histogram with insecure multirange support functions */
     472          494 :     if (!statistic_proc_security_check(vardata,
     473              :                                        rng_typcache->rng_cmp_proc_finfo.fn_oid))
     474            0 :         return -1;
     475          494 :     if (OidIsValid(rng_typcache->rng_subdiff_finfo.fn_oid) &&
     476          494 :         !statistic_proc_security_check(vardata,
     477              :                                        rng_typcache->rng_subdiff_finfo.fn_oid))
     478          137 :         return -1;
     479              : 
     480              :     /* Try to get histogram of ranges */
     481          483 :     if (!(HeapTupleIsValid(vardata->statsTuple) &&
     482          126 :           get_attstatsslot(&hslot, vardata->statsTuple,
     483              :                            STATISTIC_KIND_BOUNDS_HISTOGRAM, InvalidOid,
     484              :                            ATTSTATSSLOT_VALUES)))
     485          231 :         return -1.0;
     486              : 
     487              :     /* check that it's a histogram, not just a dummy entry */
     488          126 :     if (hslot.nvalues < 2)
     489              :     {
     490            0 :         free_attstatsslot(&hslot);
     491            0 :         return -1.0;
     492              :     }
     493              : 
     494              :     /*
     495              :      * Convert histogram of ranges into histograms of its lower and upper
     496              :      * bounds.
     497              :      */
     498          126 :     nhist = hslot.nvalues;
     499          126 :     hist_lower = palloc_array(RangeBound, nhist);
     500          126 :     hist_upper = palloc_array(RangeBound, nhist);
     501         9468 :     for (i = 0; i < nhist; i++)
     502              :     {
     503              :         bool        empty;
     504              : 
     505         9342 :         range_deserialize(rng_typcache, DatumGetRangeTypeP(hslot.values[i]),
     506         9342 :                           &hist_lower[i], &hist_upper[i], &empty);
     507              :         /* The histogram should not contain any empty ranges */
     508         9342 :         if (empty)
     509            0 :             elog(ERROR, "bounds histogram contains an empty range");
     510              :     }
     511              : 
     512              :     /* @> and @< also need a histogram of range lengths */
     513          126 :     if (operator == OID_MULTIRANGE_CONTAINS_RANGE_OP ||
     514          102 :         operator == OID_MULTIRANGE_CONTAINS_MULTIRANGE_OP ||
     515          102 :         operator == OID_MULTIRANGE_RANGE_CONTAINED_OP ||
     516              :         operator == OID_MULTIRANGE_MULTIRANGE_CONTAINED_OP)
     517              :     {
     518           60 :         if (!(HeapTupleIsValid(vardata->statsTuple) &&
     519           30 :               get_attstatsslot(&lslot, vardata->statsTuple,
     520              :                                STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM,
     521              :                                InvalidOid,
     522              :                                ATTSTATSSLOT_VALUES)))
     523              :         {
     524            0 :             free_attstatsslot(&hslot);
     525            0 :             return -1.0;
     526              :         }
     527              : 
     528              :         /* check that it's a histogram, not just a dummy entry */
     529           30 :         if (lslot.nvalues < 2)
     530              :         {
     531            0 :             free_attstatsslot(&lslot);
     532            0 :             free_attstatsslot(&hslot);
     533            0 :             return -1.0;
     534              :         }
     535              :     }
     536              :     else
     537           96 :         memset(&lslot, 0, sizeof(lslot));
     538              : 
     539              :     /* Extract the bounds of the constant value. */
     540              :     Assert(constval->rangeCount > 0);
     541          126 :     multirange_get_bounds(rng_typcache, constval, 0,
     542              :                           &const_lower, &tmp);
     543          126 :     multirange_get_bounds(rng_typcache, constval, constval->rangeCount - 1,
     544              :                           &tmp, &const_upper);
     545              : 
     546              :     /*
     547              :      * Calculate selectivity comparing the lower or upper bound of the
     548              :      * constant with the histogram of lower or upper bounds.
     549              :      */
     550          126 :     switch (operator)
     551              :     {
     552            0 :         case OID_MULTIRANGE_LESS_OP:
     553              : 
     554              :             /*
     555              :              * The regular b-tree comparison operators (<, <=, >, >=) compare
     556              :              * the lower bounds first, and the upper bounds for values with
     557              :              * equal lower bounds. Estimate that by comparing the lower bounds
     558              :              * only. This gives a fairly accurate estimate assuming there
     559              :              * aren't many rows with a lower bound equal to the constant's
     560              :              * lower bound.
     561              :              */
     562              :             hist_selec =
     563            0 :                 calc_hist_selectivity_scalar(rng_typcache, &const_lower,
     564              :                                              hist_lower, nhist, false);
     565            0 :             break;
     566              : 
     567            0 :         case OID_MULTIRANGE_LESS_EQUAL_OP:
     568              :             hist_selec =
     569            0 :                 calc_hist_selectivity_scalar(rng_typcache, &const_lower,
     570              :                                              hist_lower, nhist, true);
     571            0 :             break;
     572              : 
     573            0 :         case OID_MULTIRANGE_GREATER_OP:
     574            0 :             hist_selec =
     575            0 :                 1 - calc_hist_selectivity_scalar(rng_typcache, &const_lower,
     576              :                                                  hist_lower, nhist, false);
     577            0 :             break;
     578              : 
     579            0 :         case OID_MULTIRANGE_GREATER_EQUAL_OP:
     580            0 :             hist_selec =
     581            0 :                 1 - calc_hist_selectivity_scalar(rng_typcache, &const_lower,
     582              :                                                  hist_lower, nhist, true);
     583            0 :             break;
     584              : 
     585           12 :         case OID_MULTIRANGE_LEFT_RANGE_OP:
     586              :         case OID_MULTIRANGE_LEFT_MULTIRANGE_OP:
     587              :             /* var << const when upper(var) < lower(const) */
     588              :             hist_selec =
     589           12 :                 calc_hist_selectivity_scalar(rng_typcache, &const_lower,
     590              :                                              hist_upper, nhist, false);
     591           12 :             break;
     592              : 
     593           12 :         case OID_MULTIRANGE_RIGHT_RANGE_OP:
     594              :         case OID_MULTIRANGE_RIGHT_MULTIRANGE_OP:
     595              :             /* var >> const when lower(var) > upper(const) */
     596           12 :             hist_selec =
     597           12 :                 1 - calc_hist_selectivity_scalar(rng_typcache, &const_upper,
     598              :                                                  hist_lower, nhist, true);
     599           12 :             break;
     600              : 
     601           12 :         case OID_MULTIRANGE_OVERLAPS_RIGHT_RANGE_OP:
     602              :         case OID_MULTIRANGE_OVERLAPS_RIGHT_MULTIRANGE_OP:
     603              :             /* compare lower bounds */
     604           12 :             hist_selec =
     605           12 :                 1 - calc_hist_selectivity_scalar(rng_typcache, &const_lower,
     606              :                                                  hist_lower, nhist, false);
     607           12 :             break;
     608              : 
     609           12 :         case OID_MULTIRANGE_OVERLAPS_LEFT_RANGE_OP:
     610              :         case OID_MULTIRANGE_OVERLAPS_LEFT_MULTIRANGE_OP:
     611              :             /* compare upper bounds */
     612              :             hist_selec =
     613           12 :                 calc_hist_selectivity_scalar(rng_typcache, &const_upper,
     614              :                                              hist_upper, nhist, true);
     615           12 :             break;
     616              : 
     617           42 :         case OID_MULTIRANGE_OVERLAPS_RANGE_OP:
     618              :         case OID_MULTIRANGE_OVERLAPS_MULTIRANGE_OP:
     619              :         case OID_MULTIRANGE_CONTAINS_ELEM_OP:
     620              : 
     621              :             /*
     622              :              * A && B <=> NOT (A << B OR A >> B).
     623              :              *
     624              :              * Since A << B and A >> B are mutually exclusive events we can
     625              :              * sum their probabilities to find probability of (A << B OR A >>
     626              :              * B).
     627              :              *
     628              :              * "multirange @> elem" is equivalent to "multirange &&
     629              :              * {[elem,elem]}". The caller already constructed the singular
     630              :              * range from the element constant, so just treat it the same as
     631              :              * &&.
     632              :              */
     633              :             hist_selec =
     634           42 :                 calc_hist_selectivity_scalar(rng_typcache,
     635              :                                              &const_lower, hist_upper,
     636              :                                              nhist, false);
     637           42 :             hist_selec +=
     638           42 :                 (1.0 - calc_hist_selectivity_scalar(rng_typcache,
     639              :                                                     &const_upper, hist_lower,
     640              :                                                     nhist, true));
     641           42 :             hist_selec = 1.0 - hist_selec;
     642           42 :             break;
     643              : 
     644           24 :         case OID_MULTIRANGE_CONTAINS_RANGE_OP:
     645              :         case OID_MULTIRANGE_CONTAINS_MULTIRANGE_OP:
     646              :             hist_selec =
     647           24 :                 calc_hist_selectivity_contains(rng_typcache, &const_lower,
     648              :                                                &const_upper, hist_lower, nhist,
     649           24 :                                                lslot.values, lslot.nvalues);
     650           24 :             break;
     651              : 
     652           12 :         case OID_MULTIRANGE_MULTIRANGE_CONTAINED_OP:
     653              :         case OID_RANGE_MULTIRANGE_CONTAINED_OP:
     654           12 :             if (const_lower.infinite)
     655              :             {
     656              :                 /*
     657              :                  * Lower bound no longer matters. Just estimate the fraction
     658              :                  * with an upper bound <= const upper bound
     659              :                  */
     660              :                 hist_selec =
     661            0 :                     calc_hist_selectivity_scalar(rng_typcache, &const_upper,
     662              :                                                  hist_upper, nhist, true);
     663              :             }
     664           12 :             else if (const_upper.infinite)
     665              :             {
     666            0 :                 hist_selec =
     667            0 :                     1.0 - calc_hist_selectivity_scalar(rng_typcache, &const_lower,
     668              :                                                        hist_lower, nhist, false);
     669              :             }
     670              :             else
     671              :             {
     672              :                 hist_selec =
     673           12 :                     calc_hist_selectivity_contained(rng_typcache, &const_lower,
     674              :                                                     &const_upper, hist_lower, nhist,
     675           12 :                                                     lslot.values, lslot.nvalues);
     676              :             }
     677           12 :             break;
     678              : 
     679              :             /* filtered out by multirangesel() */
     680            0 :         case OID_RANGE_OVERLAPS_MULTIRANGE_OP:
     681              :         case OID_RANGE_OVERLAPS_LEFT_MULTIRANGE_OP:
     682              :         case OID_RANGE_OVERLAPS_RIGHT_MULTIRANGE_OP:
     683              :         case OID_RANGE_LEFT_MULTIRANGE_OP:
     684              :         case OID_RANGE_RIGHT_MULTIRANGE_OP:
     685              :         case OID_RANGE_CONTAINS_MULTIRANGE_OP:
     686              :         case OID_MULTIRANGE_ELEM_CONTAINED_OP:
     687              :         case OID_MULTIRANGE_RANGE_CONTAINED_OP:
     688              : 
     689              :         default:
     690            0 :             elog(ERROR, "unknown multirange operator %u", operator);
     691              :             hist_selec = -1.0;  /* keep compiler quiet */
     692              :             break;
     693              :     }
     694              : 
     695          126 :     free_attstatsslot(&lslot);
     696          126 :     free_attstatsslot(&hslot);
     697              : 
     698          126 :     return hist_selec;
     699              : }
     700              : 
     701              : 
     702              : /*
     703              :  * Look up the fraction of values less than (or equal, if 'equal' argument
     704              :  * is true) a given const in a histogram of range bounds.
     705              :  */
     706              : static double
     707          132 : calc_hist_selectivity_scalar(TypeCacheEntry *typcache, const RangeBound *constbound,
     708              :                              const RangeBound *hist, int hist_nvalues, bool equal)
     709              : {
     710              :     Selectivity selec;
     711              :     int         index;
     712              : 
     713              :     /*
     714              :      * Find the histogram bin the given constant falls into. Estimate
     715              :      * selectivity as the number of preceding whole bins.
     716              :      */
     717          132 :     index = rbound_bsearch(typcache, constbound, hist, hist_nvalues, equal);
     718          132 :     selec = (Selectivity) (Max(index, 0)) / (Selectivity) (hist_nvalues - 1);
     719              : 
     720              :     /* Adjust using linear interpolation within the bin */
     721          132 :     if (index >= 0 && index < hist_nvalues - 1)
     722          180 :         selec += get_position(typcache, constbound, &hist[index],
     723           90 :                               &hist[index + 1]) / (Selectivity) (hist_nvalues - 1);
     724              : 
     725          132 :     return selec;
     726              : }
     727              : 
     728              : /*
     729              :  * Binary search on an array of range bounds. Returns greatest index of range
     730              :  * bound in array which is less(less or equal) than given range bound. If all
     731              :  * range bounds in array are greater or equal(greater) than given range bound,
     732              :  * return -1. When "equal" flag is set conditions in brackets are used.
     733              :  *
     734              :  * This function is used in scalar operator selectivity estimation. Another
     735              :  * goal of this function is to find a histogram bin where to stop
     736              :  * interpolation of portion of bounds which are less than or equal to given bound.
     737              :  */
     738              : static int
     739          168 : rbound_bsearch(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist,
     740              :                int hist_length, bool equal)
     741              : {
     742          168 :     int         lower = -1,
     743          168 :                 upper = hist_length - 1,
     744              :                 cmp,
     745              :                 middle;
     746              : 
     747         1068 :     while (lower < upper)
     748              :     {
     749          900 :         middle = (lower + upper + 1) / 2;
     750          900 :         cmp = range_cmp_bounds(typcache, &hist[middle], value);
     751              : 
     752          900 :         if (cmp < 0 || (equal && cmp == 0))
     753          372 :             lower = middle;
     754              :         else
     755          528 :             upper = middle - 1;
     756              :     }
     757          168 :     return lower;
     758              : }
     759              : 
     760              : 
     761              : /*
     762              :  * Binary search on length histogram. Returns greatest index of range length in
     763              :  * histogram which is less than (less than or equal) the given length value. If
     764              :  * all lengths in the histogram are greater than (greater than or equal) the
     765              :  * given length, returns -1.
     766              :  */
     767              : static int
     768          180 : length_hist_bsearch(const Datum *length_hist_values, int length_hist_nvalues,
     769              :                     double value, bool equal)
     770              : {
     771          180 :     int         lower = -1,
     772          180 :                 upper = length_hist_nvalues - 1,
     773              :                 middle;
     774              : 
     775          936 :     while (lower < upper)
     776              :     {
     777              :         double      middleval;
     778              : 
     779          756 :         middle = (lower + upper + 1) / 2;
     780              : 
     781          756 :         middleval = DatumGetFloat8(length_hist_values[middle]);
     782          756 :         if (middleval < value || (equal && middleval <= value))
     783          372 :             lower = middle;
     784              :         else
     785          384 :             upper = middle - 1;
     786              :     }
     787          180 :     return lower;
     788              : }
     789              : 
     790              : /*
     791              :  * Get relative position of value in histogram bin in [0,1] range.
     792              :  */
     793              : static float8
     794          138 : get_position(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist1,
     795              :              const RangeBound *hist2)
     796              : {
     797          138 :     bool        has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
     798              :     float8      position;
     799              : 
     800          138 :     if (!hist1->infinite && !hist2->infinite)
     801              :     {
     802              :         float8      bin_width;
     803              : 
     804              :         /*
     805              :          * Both bounds are finite. Assuming the subtype's comparison function
     806              :          * works sanely, the value must be finite, too, because it lies
     807              :          * somewhere between the bounds.  If it doesn't, arbitrarily return
     808              :          * 0.5.
     809              :          */
     810          102 :         if (value->infinite)
     811            0 :             return 0.5;
     812              : 
     813              :         /* Can't interpolate without subdiff function */
     814          102 :         if (!has_subdiff)
     815            0 :             return 0.5;
     816              : 
     817              :         /* Calculate relative position using subdiff function. */
     818          102 :         bin_width = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
     819              :                                                      typcache->rng_collation,
     820          102 :                                                      hist2->val,
     821          102 :                                                      hist1->val));
     822          102 :         if (isnan(bin_width) || bin_width <= 0.0)
     823            0 :             return 0.5;         /* punt for NaN or zero-width bin */
     824              : 
     825          102 :         position = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
     826              :                                                     typcache->rng_collation,
     827          102 :                                                     value->val,
     828          102 :                                                     hist1->val))
     829              :             / bin_width;
     830              : 
     831          102 :         if (isnan(position))
     832            0 :             return 0.5;         /* punt for NaN from subdiff, Inf/Inf, etc */
     833              : 
     834              :         /* Relative position must be in [0,1] range */
     835          102 :         position = Max(position, 0.0);
     836          102 :         position = Min(position, 1.0);
     837          102 :         return position;
     838              :     }
     839           36 :     else if (hist1->infinite && !hist2->infinite)
     840              :     {
     841              :         /*
     842              :          * Lower bin boundary is -infinite, upper is finite. If the value is
     843              :          * -infinite, return 0.0 to indicate it's equal to the lower bound.
     844              :          * Otherwise return 1.0 to indicate it's infinitely far from the lower
     845              :          * bound.
     846              :          */
     847           30 :         return ((value->infinite && value->lower) ? 0.0 : 1.0);
     848              :     }
     849            6 :     else if (!hist1->infinite && hist2->infinite)
     850              :     {
     851              :         /* same as above, but in reverse */
     852            6 :         return ((value->infinite && !value->lower) ? 1.0 : 0.0);
     853              :     }
     854              :     else
     855              :     {
     856              :         /*
     857              :          * If both bin boundaries are infinite, they should be equal to each
     858              :          * other, and the value should also be infinite and equal to both
     859              :          * bounds. (But don't Assert that, to avoid crashing if a user creates
     860              :          * a datatype with a broken comparison function).
     861              :          *
     862              :          * Assume the value to lie in the middle of the infinite bounds.
     863              :          */
     864            0 :         return 0.5;
     865              :     }
     866              : }
     867              : 
     868              : 
     869              : /*
     870              :  * Get relative position of value in a length histogram bin in [0,1] range.
     871              :  */
     872              : static double
     873          180 : get_len_position(double value, double hist1, double hist2)
     874              : {
     875          180 :     if (!isinf(hist1) && !isinf(hist2))
     876              :     {
     877              :         /*
     878              :          * Both bounds are finite. The value should be finite too, because it
     879              :          * lies somewhere between the bounds. If it doesn't, just return
     880              :          * something.
     881              :          */
     882           30 :         if (isinf(value))
     883            0 :             return 0.5;
     884              : 
     885           30 :         return 1.0 - (hist2 - value) / (hist2 - hist1);
     886              :     }
     887          150 :     else if (isinf(hist1) && !isinf(hist2))
     888              :     {
     889              :         /*
     890              :          * Lower bin boundary is -infinite, upper is finite. Return 1.0 to
     891              :          * indicate the value is infinitely far from the lower bound.
     892              :          */
     893            0 :         return 1.0;
     894              :     }
     895          150 :     else if (isinf(hist1) && isinf(hist2))
     896              :     {
     897              :         /* same as above, but in reverse */
     898            0 :         return 0.0;
     899              :     }
     900              :     else
     901              :     {
     902              :         /*
     903              :          * If both bin boundaries are infinite, they should be equal to each
     904              :          * other, and the value should also be infinite and equal to both
     905              :          * bounds. (But don't Assert that, to avoid crashing unnecessarily if
     906              :          * the caller messes up)
     907              :          *
     908              :          * Assume the value to lie in the middle of the infinite bounds.
     909              :          */
     910          150 :         return 0.5;
     911              :     }
     912              : }
     913              : 
     914              : /*
     915              :  * Measure distance between two range bounds.
     916              :  */
     917              : static float8
     918          204 : get_distance(TypeCacheEntry *typcache, const RangeBound *bound1, const RangeBound *bound2)
     919              : {
     920          204 :     bool        has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
     921              : 
     922          204 :     if (!bound1->infinite && !bound2->infinite)
     923              :     {
     924              :         /*
     925              :          * Neither bound is infinite, use subdiff function or return default
     926              :          * value of 1.0 if no subdiff is available.
     927              :          */
     928          120 :         if (has_subdiff)
     929              :         {
     930              :             float8      res;
     931              : 
     932          120 :             res = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
     933              :                                                    typcache->rng_collation,
     934          120 :                                                    bound2->val,
     935          120 :                                                    bound1->val));
     936              :             /* Reject possible NaN result, also negative result */
     937          120 :             if (isnan(res) || res < 0.0)
     938            0 :                 return 1.0;
     939              :             else
     940          120 :                 return res;
     941              :         }
     942              :         else
     943            0 :             return 1.0;
     944              :     }
     945           84 :     else if (bound1->infinite && bound2->infinite)
     946              :     {
     947              :         /* Both bounds are infinite */
     948            0 :         if (bound1->lower == bound2->lower)
     949            0 :             return 0.0;
     950              :         else
     951            0 :             return get_float8_infinity();
     952              :     }
     953              :     else
     954              :     {
     955              :         /* One bound is infinite, the other is not */
     956           84 :         return get_float8_infinity();
     957              :     }
     958              : }
     959              : 
     960              : /*
     961              :  * Calculate the average of function P(x), in the interval [length1, length2],
     962              :  * where P(x) is the fraction of tuples with length < x (or length <= x if
     963              :  * 'equal' is true).
     964              :  */
     965              : static double
     966          180 : calc_length_hist_frac(const Datum *length_hist_values, int length_hist_nvalues,
     967              :                       double length1, double length2, bool equal)
     968              : {
     969              :     double      frac;
     970              :     double      A,
     971              :                 B,
     972              :                 PA,
     973              :                 PB;
     974              :     double      pos;
     975              :     int         i;
     976              :     double      area;
     977              : 
     978              :     Assert(length2 >= length1);
     979              : 
     980          180 :     if (length2 < 0.0)
     981            0 :         return 0.0;             /* shouldn't happen, but doesn't hurt to check */
     982              : 
     983              :     /* All lengths in the table are <= infinite. */
     984          180 :     if (isinf(length2) && equal)
     985            0 :         return 1.0;
     986              : 
     987              :     /*----------
     988              :      * The average of a function between A and B can be calculated by the
     989              :      * formula:
     990              :      *
     991              :      *          B
     992              :      *    1     /
     993              :      * -------  | P(x)dx
     994              :      *  B - A   /
     995              :      *          A
     996              :      *
     997              :      * The geometrical interpretation of the integral is the area under the
     998              :      * graph of P(x). P(x) is defined by the length histogram. We calculate
     999              :      * the area in a piecewise fashion, iterating through the length histogram
    1000              :      * bins. Each bin is a trapezoid:
    1001              :      *
    1002              :      *       P(x2)
    1003              :      *        /|
    1004              :      *       / |
    1005              :      * P(x1)/  |
    1006              :      *     |   |
    1007              :      *     |   |
    1008              :      *  ---+---+--
    1009              :      *     x1  x2
    1010              :      *
    1011              :      * where x1 and x2 are the boundaries of the current histogram, and P(x1)
    1012              :      * and P(x1) are the cumulative fraction of tuples at the boundaries.
    1013              :      *
    1014              :      * The area of each trapezoid is 1/2 * (P(x2) + P(x1)) * (x2 - x1)
    1015              :      *
    1016              :      * The first bin contains the lower bound passed by the caller, so we
    1017              :      * use linear interpolation between the previous and next histogram bin
    1018              :      * boundary to calculate P(x1). Likewise for the last bin: we use linear
    1019              :      * interpolation to calculate P(x2). For the bins in between, x1 and x2
    1020              :      * lie on histogram bin boundaries, so P(x1) and P(x2) are simply:
    1021              :      * P(x1) =    (bin index) / (number of bins)
    1022              :      * P(x2) = (bin index + 1 / (number of bins)
    1023              :      */
    1024              : 
    1025              :     /* First bin, the one that contains lower bound */
    1026          180 :     i = length_hist_bsearch(length_hist_values, length_hist_nvalues, length1, equal);
    1027          180 :     if (i >= length_hist_nvalues - 1)
    1028           24 :         return 1.0;
    1029              : 
    1030          156 :     if (i < 0)
    1031              :     {
    1032           42 :         i = 0;
    1033           42 :         pos = 0.0;
    1034              :     }
    1035              :     else
    1036              :     {
    1037              :         /* interpolate length1's position in the bin */
    1038          114 :         pos = get_len_position(length1,
    1039          114 :                                DatumGetFloat8(length_hist_values[i]),
    1040          114 :                                DatumGetFloat8(length_hist_values[i + 1]));
    1041              :     }
    1042          156 :     PB = (((double) i) + pos) / (double) (length_hist_nvalues - 1);
    1043          156 :     B = length1;
    1044              : 
    1045              :     /*
    1046              :      * In the degenerate case that length1 == length2, simply return
    1047              :      * P(length1). This is not merely an optimization: if length1 == length2,
    1048              :      * we'd divide by zero later on.
    1049              :      */
    1050          156 :     if (length2 == length1)
    1051           72 :         return PB;
    1052              : 
    1053              :     /*
    1054              :      * Loop through all the bins, until we hit the last bin, the one that
    1055              :      * contains the upper bound. (if lower and upper bounds are in the same
    1056              :      * bin, this falls out immediately)
    1057              :      */
    1058           84 :     area = 0.0;
    1059         1584 :     for (; i < length_hist_nvalues - 1; i++)
    1060              :     {
    1061         1584 :         double      bin_upper = DatumGetFloat8(length_hist_values[i + 1]);
    1062              : 
    1063              :         /* check if we've reached the last bin */
    1064         1584 :         if (!(bin_upper < length2 || (equal && bin_upper <= length2)))
    1065              :             break;
    1066              : 
    1067              :         /* the upper bound of previous bin is the lower bound of this bin */
    1068         1500 :         A = B;
    1069         1500 :         PA = PB;
    1070              : 
    1071         1500 :         B = bin_upper;
    1072         1500 :         PB = (double) i / (double) (length_hist_nvalues - 1);
    1073              : 
    1074              :         /*
    1075              :          * Add the area of this trapezoid to the total. The point of the
    1076              :          * if-check is to avoid NaN, in the corner case that PA == PB == 0,
    1077              :          * and B - A == Inf. The area of a zero-height trapezoid (PA == PB ==
    1078              :          * 0) is zero, regardless of the width (B - A).
    1079              :          */
    1080         1500 :         if (PA > 0 || PB > 0)
    1081         1476 :             area += 0.5 * (PB + PA) * (B - A);
    1082              :     }
    1083              : 
    1084              :     /* Last bin */
    1085           84 :     A = B;
    1086           84 :     PA = PB;
    1087              : 
    1088           84 :     B = length2;                /* last bin ends at the query upper bound */
    1089           84 :     if (i >= length_hist_nvalues - 1)
    1090            0 :         pos = 0.0;
    1091              :     else
    1092              :     {
    1093           84 :         if (DatumGetFloat8(length_hist_values[i]) == DatumGetFloat8(length_hist_values[i + 1]))
    1094           18 :             pos = 0.0;
    1095              :         else
    1096           66 :             pos = get_len_position(length2,
    1097           66 :                                    DatumGetFloat8(length_hist_values[i]),
    1098           66 :                                    DatumGetFloat8(length_hist_values[i + 1]));
    1099              :     }
    1100           84 :     PB = (((double) i) + pos) / (double) (length_hist_nvalues - 1);
    1101              : 
    1102           84 :     if (PA > 0 || PB > 0)
    1103           66 :         area += 0.5 * (PB + PA) * (B - A);
    1104              : 
    1105              :     /*
    1106              :      * Ok, we have calculated the area, ie. the integral. Divide by width to
    1107              :      * get the requested average.
    1108              :      *
    1109              :      * Avoid NaN arising from infinite / infinite. This happens at least if
    1110              :      * length2 is infinite. It's not clear what the correct value would be in
    1111              :      * that case, so 0.5 seems as good as any value.
    1112              :      */
    1113           84 :     if (isinf(area) && isinf(length2))
    1114           24 :         frac = 0.5;
    1115              :     else
    1116           60 :         frac = area / (length2 - length1);
    1117              : 
    1118           84 :     return frac;
    1119              : }
    1120              : 
    1121              : /*
    1122              :  * Calculate selectivity of "var <@ const" operator, ie. estimate the fraction
    1123              :  * of multiranges that fall within the constant lower and upper bounds. This uses
    1124              :  * the histograms of range lower bounds and range lengths, on the assumption
    1125              :  * that the range lengths are independent of the lower bounds.
    1126              :  *
    1127              :  * The caller has already checked that constant lower and upper bounds are
    1128              :  * finite.
    1129              :  */
    1130              : static double
    1131           12 : calc_hist_selectivity_contained(TypeCacheEntry *typcache,
    1132              :                                 const RangeBound *lower, RangeBound *upper,
    1133              :                                 const RangeBound *hist_lower, int hist_nvalues,
    1134              :                                 const Datum *length_hist_values, int length_hist_nvalues)
    1135              : {
    1136              :     int         i,
    1137              :                 upper_index;
    1138              :     float8      prev_dist;
    1139              :     double      bin_width;
    1140              :     double      upper_bin_width;
    1141              :     double      sum_frac;
    1142              : 
    1143              :     /*
    1144              :      * Begin by finding the bin containing the upper bound, in the lower bound
    1145              :      * histogram. Any range with a lower bound > constant upper bound can't
    1146              :      * match, ie. there are no matches in bins greater than upper_index.
    1147              :      */
    1148           12 :     upper->inclusive = !upper->inclusive;
    1149           12 :     upper->lower = true;
    1150           12 :     upper_index = rbound_bsearch(typcache, upper, hist_lower, hist_nvalues,
    1151              :                                  false);
    1152              : 
    1153              :     /*
    1154              :      * If the upper bound value is below the histogram's lower limit, there
    1155              :      * are no matches.
    1156              :      */
    1157           12 :     if (upper_index < 0)
    1158            0 :         return 0.0;
    1159              : 
    1160              :     /*
    1161              :      * If the upper bound value is at or beyond the histogram's upper limit,
    1162              :      * start our loop at the last actual bin, as though the upper bound were
    1163              :      * within that bin; get_position will clamp its result to 1.0 anyway.
    1164              :      * (This corresponds to assuming that the data population above the
    1165              :      * histogram's upper limit is empty, exactly like what we just assumed for
    1166              :      * the lower limit.)
    1167              :      */
    1168           12 :     upper_index = Min(upper_index, hist_nvalues - 2);
    1169              : 
    1170              :     /*
    1171              :      * Calculate upper_bin_width, ie. the fraction of the (upper_index,
    1172              :      * upper_index + 1) bin which is greater than upper bound of query range
    1173              :      * using linear interpolation of subdiff function.
    1174              :      */
    1175           12 :     upper_bin_width = get_position(typcache, upper,
    1176           12 :                                    &hist_lower[upper_index],
    1177           12 :                                    &hist_lower[upper_index + 1]);
    1178              : 
    1179              :     /*
    1180              :      * In the loop, dist and prev_dist are the distance of the "current" bin's
    1181              :      * lower and upper bounds from the constant upper bound.
    1182              :      *
    1183              :      * bin_width represents the width of the current bin. Normally it is 1.0,
    1184              :      * meaning a full width bin, but can be less in the corner cases: start
    1185              :      * and end of the loop. We start with bin_width = upper_bin_width, because
    1186              :      * we begin at the bin containing the upper bound.
    1187              :      */
    1188           12 :     prev_dist = 0.0;
    1189           12 :     bin_width = upper_bin_width;
    1190              : 
    1191           12 :     sum_frac = 0.0;
    1192           60 :     for (i = upper_index; i >= 0; i--)
    1193              :     {
    1194              :         double      dist;
    1195              :         double      length_hist_frac;
    1196           60 :         bool        final_bin = false;
    1197              : 
    1198              :         /*
    1199              :          * dist -- distance from upper bound of query range to lower bound of
    1200              :          * the current bin in the lower bound histogram. Or to the lower bound
    1201              :          * of the constant range, if this is the final bin, containing the
    1202              :          * constant lower bound.
    1203              :          */
    1204           60 :         if (range_cmp_bounds(typcache, &hist_lower[i], lower) < 0)
    1205              :         {
    1206           12 :             dist = get_distance(typcache, lower, upper);
    1207              : 
    1208              :             /*
    1209              :              * Subtract from bin_width the portion of this bin that we want to
    1210              :              * ignore.
    1211              :              */
    1212           24 :             bin_width -= get_position(typcache, lower, &hist_lower[i],
    1213           12 :                                       &hist_lower[i + 1]);
    1214           12 :             if (bin_width < 0.0)
    1215            0 :                 bin_width = 0.0;
    1216           12 :             final_bin = true;
    1217              :         }
    1218              :         else
    1219           48 :             dist = get_distance(typcache, &hist_lower[i], upper);
    1220              : 
    1221              :         /*
    1222              :          * Estimate the fraction of tuples in this bin that are narrow enough
    1223              :          * to not exceed the distance to the upper bound of the query range.
    1224              :          */
    1225           60 :         length_hist_frac = calc_length_hist_frac(length_hist_values,
    1226              :                                                  length_hist_nvalues,
    1227              :                                                  prev_dist, dist, true);
    1228              : 
    1229              :         /*
    1230              :          * Add the fraction of tuples in this bin, with a suitable length, to
    1231              :          * the total.
    1232              :          */
    1233           60 :         sum_frac += length_hist_frac * bin_width / (double) (hist_nvalues - 1);
    1234              : 
    1235           60 :         if (final_bin)
    1236           12 :             break;
    1237              : 
    1238           48 :         bin_width = 1.0;
    1239           48 :         prev_dist = dist;
    1240              :     }
    1241              : 
    1242           12 :     return sum_frac;
    1243              : }
    1244              : 
    1245              : /*
    1246              :  * Calculate selectivity of "var @> const" operator, ie. estimate the fraction
    1247              :  * of multiranges that contain the constant lower and upper bounds. This uses
    1248              :  * the histograms of range lower bounds and range lengths, on the assumption
    1249              :  * that the range lengths are independent of the lower bounds.
    1250              :  */
    1251              : static double
    1252           24 : calc_hist_selectivity_contains(TypeCacheEntry *typcache,
    1253              :                                const RangeBound *lower, const RangeBound *upper,
    1254              :                                const RangeBound *hist_lower, int hist_nvalues,
    1255              :                                const Datum *length_hist_values, int length_hist_nvalues)
    1256              : {
    1257              :     int         i,
    1258              :                 lower_index;
    1259              :     double      bin_width,
    1260              :                 lower_bin_width;
    1261              :     double      sum_frac;
    1262              :     float8      prev_dist;
    1263              : 
    1264              :     /* Find the bin containing the lower bound of query range. */
    1265           24 :     lower_index = rbound_bsearch(typcache, lower, hist_lower, hist_nvalues,
    1266              :                                  true);
    1267              : 
    1268              :     /*
    1269              :      * If the lower bound value is below the histogram's lower limit, there
    1270              :      * are no matches.
    1271              :      */
    1272           24 :     if (lower_index < 0)
    1273            0 :         return 0.0;
    1274              : 
    1275              :     /*
    1276              :      * If the lower bound value is at or beyond the histogram's upper limit,
    1277              :      * start our loop at the last actual bin, as though the upper bound were
    1278              :      * within that bin; get_position will clamp its result to 1.0 anyway.
    1279              :      * (This corresponds to assuming that the data population above the
    1280              :      * histogram's upper limit is empty, exactly like what we just assumed for
    1281              :      * the lower limit.)
    1282              :      */
    1283           24 :     lower_index = Min(lower_index, hist_nvalues - 2);
    1284              : 
    1285              :     /*
    1286              :      * Calculate lower_bin_width, ie. the fraction of the of (lower_index,
    1287              :      * lower_index + 1) bin which is greater than lower bound of query range
    1288              :      * using linear interpolation of subdiff function.
    1289              :      */
    1290           24 :     lower_bin_width = get_position(typcache, lower, &hist_lower[lower_index],
    1291           24 :                                    &hist_lower[lower_index + 1]);
    1292              : 
    1293              :     /*
    1294              :      * Loop through all the lower bound bins, smaller than the query lower
    1295              :      * bound. In the loop, dist and prev_dist are the distance of the
    1296              :      * "current" bin's lower and upper bounds from the constant upper bound.
    1297              :      * We begin from query lower bound, and walk backwards, so the first bin's
    1298              :      * upper bound is the query lower bound, and its distance to the query
    1299              :      * upper bound is the length of the query range.
    1300              :      *
    1301              :      * bin_width represents the width of the current bin. Normally it is 1.0,
    1302              :      * meaning a full width bin, except for the first bin, which is only
    1303              :      * counted up to the constant lower bound.
    1304              :      */
    1305           24 :     prev_dist = get_distance(typcache, lower, upper);
    1306           24 :     sum_frac = 0.0;
    1307           24 :     bin_width = lower_bin_width;
    1308          144 :     for (i = lower_index; i >= 0; i--)
    1309              :     {
    1310              :         float8      dist;
    1311              :         double      length_hist_frac;
    1312              : 
    1313              :         /*
    1314              :          * dist -- distance from upper bound of query range to current value
    1315              :          * of lower bound histogram or lower bound of query range (if we've
    1316              :          * reach it).
    1317              :          */
    1318          120 :         dist = get_distance(typcache, &hist_lower[i], upper);
    1319              : 
    1320              :         /*
    1321              :          * Get average fraction of length histogram which covers intervals
    1322              :          * longer than (or equal to) distance to upper bound of query range.
    1323              :          */
    1324          120 :         length_hist_frac =
    1325          120 :             1.0 - calc_length_hist_frac(length_hist_values,
    1326              :                                         length_hist_nvalues,
    1327              :                                         prev_dist, dist, false);
    1328              : 
    1329          120 :         sum_frac += length_hist_frac * bin_width / (double) (hist_nvalues - 1);
    1330              : 
    1331          120 :         bin_width = 1.0;
    1332          120 :         prev_dist = dist;
    1333              :     }
    1334              : 
    1335           24 :     return sum_frac;
    1336              : }
        

Generated by: LCOV version 2.0-1