LCOV - PostgreSQL 20devel - src/backend/utils/adt/array

LCOV - code coverage report

Current view:	top level - src/backend/utils/adt - array_selfuncs.c (source / functions)		Coverage	Total	Hit
Test:	PostgreSQL 20devel	Lines:	80.3 %	325	261
Test Date:	2026-07-22 12:15:41	Functions:	84.6 %	13	11
Legend:	Lines: hit not hit Branches: + taken - not taken # not executed	Branches:	61.0 %	236	144

             Branch data     Line data    Source code

       1                 :             : /*-------------------------------------------------------------------------
       2                 :             :  *
       3                 :             :  * array_selfuncs.c
       4                 :             :  *    Functions for selectivity estimation of array operators
       5                 :             :  *
       6                 :             :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7                 :             :  * Portions Copyright (c) 1994, Regents of the University of California
       8                 :             :  *
       9                 :             :  *
      10                 :             :  * IDENTIFICATION
      11                 :             :  *    src/backend/utils/adt/array_selfuncs.c
      12                 :             :  *
      13                 :             :  *-------------------------------------------------------------------------
      14                 :             :  */
      15                 :             : #include "postgres.h"
      16                 :             : 
      17                 :             : #include <math.h>
      18                 :             : 
      19                 :             : #include "access/htup_details.h"
      20                 :             : #include "catalog/pg_operator.h"
      21                 :             : #include "catalog/pg_statistic.h"
      22                 :             : #include "utils/array.h"
      23                 :             : #include "utils/fmgrprotos.h"
      24                 :             : #include "utils/lsyscache.h"
      25                 :             : #include "utils/selfuncs.h"
      26                 :             : #include "utils/typcache.h"
      27                 :             : 
      28                 :             : 
      29                 :             : /* Default selectivity constant for "@>" and "<@" operators */
      30                 :             : #define DEFAULT_CONTAIN_SEL 0.005
      31                 :             : 
      32                 :             : /* Default selectivity constant for "&&" operator */
      33                 :             : #define DEFAULT_OVERLAP_SEL 0.01
      34                 :             : 
      35                 :             : /* Default selectivity for given operator */
      36                 :             : #define DEFAULT_SEL(operator) \
      37                 :             :     ((operator) == OID_ARRAY_OVERLAP_OP ? \
      38                 :             :         DEFAULT_OVERLAP_SEL : DEFAULT_CONTAIN_SEL)
      39                 :             : 
      40                 :             : static Selectivity calc_arraycontsel(VariableStatData *vardata, Datum constval,
      41                 :             :                                      Oid elemtype, Oid operator);
      42                 :             : static Selectivity mcelem_array_selec(const ArrayType *array,
      43                 :             :                                       TypeCacheEntry *typentry,
      44                 :             :                                       const Datum *mcelem, int nmcelem,
      45                 :             :                                       const float4 *numbers, int nnumbers,
      46                 :             :                                       const float4 *hist, int nhist,
      47                 :             :                                       Oid operator);
      48                 :             : static Selectivity mcelem_array_contain_overlap_selec(const Datum *mcelem, int nmcelem,
      49                 :             :                                                       const float4 *numbers, int nnumbers,
      50                 :             :                                                       const Datum *array_data, int nitems,
      51                 :             :                                                       Oid operator, TypeCacheEntry *typentry);
      52                 :             : static Selectivity mcelem_array_contained_selec(const Datum *mcelem, int nmcelem,
      53                 :             :                                                 const float4 *numbers, int nnumbers,
      54                 :             :                                                 const Datum *array_data, int nitems,
      55                 :             :                                                 const float4 *hist, int nhist,
      56                 :             :                                                 Oid operator, TypeCacheEntry *typentry);
      57                 :             : static float *calc_hist(const float4 *hist, int nhist, int n);
      58                 :             : static float *calc_distr(const float *p, int n, int m, float rest);
      59                 :             : static int  floor_log2(uint32 n);
      60                 :             : static bool find_next_mcelem(const Datum *mcelem, int nmcelem, Datum value,
      61                 :             :                              int *index, TypeCacheEntry *typentry);
      62                 :             : static int  element_compare(const void *key1, const void *key2, void *arg);
      63                 :             : static int  float_compare_desc(const void *key1, const void *key2);
      64                 :             : 
      65                 :             : 
      66                 :             : /*
      67                 :             :  * scalararraysel_containment
      68                 :             :  *      Estimate selectivity of ScalarArrayOpExpr via array containment.
      69                 :             :  *
      70                 :             :  * If we have const =/<> ANY/ALL (array_var) then we can estimate the
      71                 :             :  * selectivity as though this were an array containment operator,
      72                 :             :  * array_var op ARRAY[const].
      73                 :             :  *
      74                 :             :  * scalararraysel() has already verified that the ScalarArrayOpExpr's operator
      75                 :             :  * is the array element type's default equality or inequality operator, and
      76                 :             :  * has aggressively simplified both inputs to constants.
      77                 :             :  *
      78                 :             :  * Returns selectivity (0..1), or -1 if we fail to estimate selectivity.
      79                 :             :  */
      80                 :             : Selectivity
      81                 :       17140 : scalararraysel_containment(PlannerInfo *root,
      82                 :             :                            Node *leftop, Node *rightop,
      83                 :             :                            Oid elemtype, bool isEquality, bool useOr,
      84                 :             :                            int varRelid)
      85                 :             : {
      86                 :             :     Selectivity selec;
      87                 :             :     VariableStatData vardata;
      88                 :             :     Datum       constval;
      89                 :             :     TypeCacheEntry *typentry;
      90                 :             :     FmgrInfo   *cmpfunc;
      91                 :             : 
      92                 :             :     /*
      93                 :             :      * rightop must be a variable, else punt.
      94                 :             :      */
      95                 :       17140 :     examine_variable(root, rightop, varRelid, &vardata);
      96         [ +  + ]:       17140 :     if (!vardata.rel)
      97                 :             :     {
      98         [ -  + ]:       16884 :         ReleaseVariableStats(vardata);
      99                 :       16884 :         return -1.0;
     100                 :             :     }
     101                 :             : 
     102                 :             :     /*
     103                 :             :      * leftop must be a constant, else punt.
     104                 :             :      */
     105         [ +  + ]:         256 :     if (!IsA(leftop, Const))
     106                 :             :     {
     107         [ -  + ]:         161 :         ReleaseVariableStats(vardata);
     108                 :         161 :         return -1.0;
     109                 :             :     }
     110         [ -  + ]:          95 :     if (((Const *) leftop)->constisnull)
     111                 :             :     {
     112                 :             :         /* qual can't succeed if null on left */
     113         [ #  # ]:           0 :         ReleaseVariableStats(vardata);
     114                 :           0 :         return (Selectivity) 0.0;
     115                 :             :     }
     116                 :          95 :     constval = ((Const *) leftop)->constvalue;
     117                 :             : 
     118                 :             :     /* Get element type's default comparison function */
     119                 :          95 :     typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO);
     120         [ -  + ]:          95 :     if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
     121                 :             :     {
     122         [ #  # ]:           0 :         ReleaseVariableStats(vardata);
     123                 :           0 :         return -1.0;
     124                 :             :     }
     125                 :          95 :     cmpfunc = &typentry->cmp_proc_finfo;
     126                 :             : 
     127                 :             :     /*
     128                 :             :      * If the operator is <>, swap ANY/ALL, then invert the result later.
     129                 :             :      */
     130         [ +  + ]:          95 :     if (!isEquality)
     131                 :          71 :         useOr = !useOr;
     132                 :             : 
     133                 :             :     /* Get array element stats for var, if available */
     134   [ +  +  +  - ]:         105 :     if (HeapTupleIsValid(vardata.statsTuple) &&
     135                 :          10 :         statistic_proc_security_check(&vardata, cmpfunc->fn_oid))
     136                 :          10 :     {
     137                 :             :         Form_pg_statistic stats;
     138                 :             :         AttStatsSlot sslot;
     139                 :             :         AttStatsSlot hslot;
     140                 :             : 
     141                 :          10 :         stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
     142                 :             : 
     143                 :             :         /* MCELEM will be an array of same type as element */
     144         [ +  - ]:          10 :         if (get_attstatsslot(&sslot, vardata.statsTuple,
     145                 :             :                              STATISTIC_KIND_MCELEM, InvalidOid,
     146                 :             :                              ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
     147                 :             :         {
     148                 :             :             /* For ALL case, also get histogram of distinct-element counts */
     149         [ -  + ]:          10 :             if (useOr ||
     150         [ #  # ]:           0 :                 !get_attstatsslot(&hslot, vardata.statsTuple,
     151                 :             :                                   STATISTIC_KIND_DECHIST, InvalidOid,
     152                 :             :                                   ATTSTATSSLOT_NUMBERS))
     153                 :          10 :                 memset(&hslot, 0, sizeof(hslot));
     154                 :             : 
     155                 :             :             /*
     156                 :             :              * For = ANY, estimate as var @> ARRAY[const].
     157                 :             :              *
     158                 :             :              * For = ALL, estimate as var <@ ARRAY[const].
     159                 :             :              */
     160         [ +  - ]:          10 :             if (useOr)
     161                 :          10 :                 selec = mcelem_array_contain_overlap_selec(sslot.values,
     162                 :             :                                                            sslot.nvalues,
     163                 :          10 :                                                            sslot.numbers,
     164                 :             :                                                            sslot.nnumbers,
     165                 :             :                                                            &constval, 1,
     166                 :             :                                                            OID_ARRAY_CONTAINS_OP,
     167                 :             :                                                            typentry);
     168                 :             :             else
     169                 :           0 :                 selec = mcelem_array_contained_selec(sslot.values,
     170                 :             :                                                      sslot.nvalues,
     171                 :           0 :                                                      sslot.numbers,
     172                 :             :                                                      sslot.nnumbers,
     173                 :             :                                                      &constval, 1,
     174                 :           0 :                                                      hslot.numbers,
     175                 :             :                                                      hslot.nnumbers,
     176                 :             :                                                      OID_ARRAY_CONTAINED_OP,
     177                 :             :                                                      typentry);
     178                 :             : 
     179                 :          10 :             free_attstatsslot(&hslot);
     180                 :          10 :             free_attstatsslot(&sslot);
     181                 :             :         }
     182                 :             :         else
     183                 :             :         {
     184                 :             :             /* No most-common-elements info, so do without */
     185         [ #  # ]:           0 :             if (useOr)
     186                 :           0 :                 selec = mcelem_array_contain_overlap_selec(NULL, 0,
     187                 :             :                                                            NULL, 0,
     188                 :             :                                                            &constval, 1,
     189                 :             :                                                            OID_ARRAY_CONTAINS_OP,
     190                 :             :                                                            typentry);
     191                 :             :             else
     192                 :           0 :                 selec = mcelem_array_contained_selec(NULL, 0,
     193                 :             :                                                      NULL, 0,
     194                 :             :                                                      &constval, 1,
     195                 :             :                                                      NULL, 0,
     196                 :             :                                                      OID_ARRAY_CONTAINED_OP,
     197                 :             :                                                      typentry);
     198                 :             :         }
     199                 :             : 
     200                 :             :         /*
     201                 :             :          * MCE stats count only non-null rows, so adjust for null rows.
     202                 :             :          */
     203                 :          10 :         selec *= (1.0 - stats->stanullfrac);
     204                 :             :     }
     205                 :             :     else
     206                 :             :     {
     207                 :             :         /* No stats at all, so do without */
     208         [ +  - ]:          85 :         if (useOr)
     209                 :          85 :             selec = mcelem_array_contain_overlap_selec(NULL, 0,
     210                 :             :                                                        NULL, 0,
     211                 :             :                                                        &constval, 1,
     212                 :             :                                                        OID_ARRAY_CONTAINS_OP,
     213                 :             :                                                        typentry);
     214                 :             :         else
     215                 :           0 :             selec = mcelem_array_contained_selec(NULL, 0,
     216                 :             :                                                  NULL, 0,
     217                 :             :                                                  &constval, 1,
     218                 :             :                                                  NULL, 0,
     219                 :             :                                                  OID_ARRAY_CONTAINED_OP,
     220                 :             :                                                  typentry);
     221                 :             :         /* we assume no nulls here, so no stanullfrac correction */
     222                 :             :     }
     223                 :             : 
     224         [ +  + ]:          95 :     ReleaseVariableStats(vardata);
     225                 :             : 
     226                 :             :     /*
     227                 :             :      * If the operator is <>, invert the results.
     228                 :             :      */
     229         [ +  + ]:          95 :     if (!isEquality)
     230                 :          71 :         selec = 1.0 - selec;
     231                 :             : 
     232   [ -  +  -  + ]:          95 :     CLAMP_PROBABILITY(selec);
     233                 :             : 
     234                 :          95 :     return selec;
     235                 :             : }
     236                 :             : 
     237                 :             : /*
     238                 :             :  * arraycontsel -- restriction selectivity for array @>, &&, <@ operators
     239                 :             :  */
     240                 :             : Datum
     241                 :         760 : arraycontsel(PG_FUNCTION_ARGS)
     242                 :             : {
     243                 :         760 :     PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
     244                 :         760 :     Oid         operator = PG_GETARG_OID(1);
     245                 :         760 :     List       *args = (List *) PG_GETARG_POINTER(2);
     246                 :         760 :     int         varRelid = PG_GETARG_INT32(3);
     247                 :             :     VariableStatData vardata;
     248                 :             :     Node       *other;
     249                 :             :     bool        varonleft;
     250                 :             :     Selectivity selec;
     251                 :             :     Oid         element_typeid;
     252                 :             : 
     253                 :             :     /*
     254                 :             :      * If expression is not (variable op something) or (something op
     255                 :             :      * variable), then punt and return a default estimate.
     256                 :             :      */
     257         [ -  + ]:         760 :     if (!get_restriction_variable(root, args, varRelid,
     258                 :             :                                   &vardata, &other, &varonleft))
     259         [ #  # ]:           0 :         PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
     260                 :             : 
     261                 :             :     /*
     262                 :             :      * Can't do anything useful if the something is not a constant, either.
     263                 :             :      */
     264         [ -  + ]:         760 :     if (!IsA(other, Const))
     265                 :             :     {
     266         [ #  # ]:           0 :         ReleaseVariableStats(vardata);
     267         [ #  # ]:           0 :         PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
     268                 :             :     }
     269                 :             : 
     270                 :             :     /*
     271                 :             :      * The "&&", "@>" and "<@" operators are strict, so we can cope with a
     272                 :             :      * NULL constant right away.
     273                 :             :      */
     274         [ -  + ]:         760 :     if (((Const *) other)->constisnull)
     275                 :             :     {
     276         [ #  # ]:           0 :         ReleaseVariableStats(vardata);
     277                 :           0 :         PG_RETURN_FLOAT8(0.0);
     278                 :             :     }
     279                 :             : 
     280                 :             :     /*
     281                 :             :      * If var is on the right, commute the operator, so that we can assume the
     282                 :             :      * var is on the left in what follows.
     283                 :             :      */
     284         [ +  + ]:         760 :     if (!varonleft)
     285                 :             :     {
     286         [ -  + ]:          20 :         if (operator == OID_ARRAY_CONTAINS_OP)
     287                 :           0 :             operator = OID_ARRAY_CONTAINED_OP;
     288         [ +  - ]:          20 :         else if (operator == OID_ARRAY_CONTAINED_OP)
     289                 :          20 :             operator = OID_ARRAY_CONTAINS_OP;
     290                 :             :     }
     291                 :             : 
     292                 :             :     /*
     293                 :             :      * OK, there's a Var and a Const we're dealing with here.  We need the
     294                 :             :      * Const to be an array with same element type as column, else we can't do
     295                 :             :      * anything useful.  (Such cases will likely fail at runtime, but here
     296                 :             :      * we'd rather just return a default estimate.)
     297                 :             :      */
     298                 :         760 :     element_typeid = get_base_element_type(((Const *) other)->consttype);
     299   [ +  -  +  - ]:        1520 :     if (element_typeid != InvalidOid &&
     300                 :         760 :         element_typeid == get_base_element_type(vardata.vartype))
     301                 :             :     {
     302                 :         760 :         selec = calc_arraycontsel(&vardata, ((Const *) other)->constvalue,
     303                 :             :                                   element_typeid, operator);
     304                 :             :     }
     305                 :             :     else
     306                 :             :     {
     307         [ #  # ]:           0 :         selec = DEFAULT_SEL(operator);
     308                 :             :     }
     309                 :             : 
     310         [ +  + ]:         760 :     ReleaseVariableStats(vardata);
     311                 :             : 
     312   [ -  +  -  + ]:         760 :     CLAMP_PROBABILITY(selec);
     313                 :             : 
     314                 :         760 :     PG_RETURN_FLOAT8((float8) selec);
     315                 :             : }
     316                 :             : 
     317                 :             : /*
     318                 :             :  * arraycontjoinsel -- join selectivity for array @>, &&, <@ operators
     319                 :             :  */
     320                 :             : Datum
     321                 :           0 : arraycontjoinsel(PG_FUNCTION_ARGS)
     322                 :             : {
     323                 :             :     /* For the moment this is just a stub */
     324                 :           0 :     Oid         operator = PG_GETARG_OID(1);
     325                 :             : 
     326         [ #  # ]:           0 :     PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
     327                 :             : }
     328                 :             : 
     329                 :             : /*
     330                 :             :  * Calculate selectivity for "arraycolumn @> const", "arraycolumn && const"
     331                 :             :  * or "arraycolumn <@ const" based on the statistics
     332                 :             :  *
     333                 :             :  * This function is mainly responsible for extracting the pg_statistic data
     334                 :             :  * to be used; we then pass the problem on to mcelem_array_selec().
     335                 :             :  */
     336                 :             : static Selectivity
     337                 :         760 : calc_arraycontsel(VariableStatData *vardata, Datum constval,
     338                 :             :                   Oid elemtype, Oid operator)
     339                 :             : {
     340                 :             :     Selectivity selec;
     341                 :             :     TypeCacheEntry *typentry;
     342                 :             :     FmgrInfo   *cmpfunc;
     343                 :             :     ArrayType  *array;
     344                 :             : 
     345                 :             :     /* Get element type's default comparison function */
     346                 :         760 :     typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO);
     347         [ -  + ]:         760 :     if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
     348         [ #  # ]:           0 :         return DEFAULT_SEL(operator);
     349                 :         760 :     cmpfunc = &typentry->cmp_proc_finfo;
     350                 :             : 
     351                 :             :     /*
     352                 :             :      * The caller made sure the const is an array with same element type, so
     353                 :             :      * get it now
     354                 :             :      */
     355                 :         760 :     array = DatumGetArrayTypeP(constval);
     356                 :             : 
     357   [ +  +  +  - ]:        1126 :     if (HeapTupleIsValid(vardata->statsTuple) &&
     358                 :         366 :         statistic_proc_security_check(vardata, cmpfunc->fn_oid))
     359                 :         366 :     {
     360                 :             :         Form_pg_statistic stats;
     361                 :             :         AttStatsSlot sslot;
     362                 :             :         AttStatsSlot hslot;
     363                 :             : 
     364                 :         366 :         stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
     365                 :             : 
     366                 :             :         /* MCELEM will be an array of same type as column */
     367         [ +  - ]:         366 :         if (get_attstatsslot(&sslot, vardata->statsTuple,
     368                 :             :                              STATISTIC_KIND_MCELEM, InvalidOid,
     369                 :             :                              ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
     370                 :             :         {
     371                 :             :             /*
     372                 :             :              * For "array <@ const" case we also need histogram of distinct
     373                 :             :              * element counts.
     374                 :             :              */
     375         [ +  + ]:         366 :             if (operator != OID_ARRAY_CONTAINED_OP ||
     376         [ -  + ]:          56 :                 !get_attstatsslot(&hslot, vardata->statsTuple,
     377                 :             :                                   STATISTIC_KIND_DECHIST, InvalidOid,
     378                 :             :                                   ATTSTATSSLOT_NUMBERS))
     379                 :         310 :                 memset(&hslot, 0, sizeof(hslot));
     380                 :             : 
     381                 :             :             /* Use the most-common-elements slot for the array Var. */
     382                 :         366 :             selec = mcelem_array_selec(array, typentry,
     383                 :         366 :                                        sslot.values, sslot.nvalues,
     384                 :         366 :                                        sslot.numbers, sslot.nnumbers,
     385                 :         366 :                                        hslot.numbers, hslot.nnumbers,
     386                 :             :                                        operator);
     387                 :             : 
     388                 :         366 :             free_attstatsslot(&hslot);
     389                 :         366 :             free_attstatsslot(&sslot);
     390                 :             :         }
     391                 :             :         else
     392                 :             :         {
     393                 :             :             /* No most-common-elements info, so do without */
     394                 :           0 :             selec = mcelem_array_selec(array, typentry,
     395                 :             :                                        NULL, 0, NULL, 0, NULL, 0,
     396                 :             :                                        operator);
     397                 :             :         }
     398                 :             : 
     399                 :             :         /*
     400                 :             :          * MCE stats count only non-null rows, so adjust for null rows.
     401                 :             :          */
     402                 :         366 :         selec *= (1.0 - stats->stanullfrac);
     403                 :             :     }
     404                 :             :     else
     405                 :             :     {
     406                 :             :         /* No stats at all, so do without */
     407                 :         394 :         selec = mcelem_array_selec(array, typentry,
     408                 :             :                                    NULL, 0, NULL, 0, NULL, 0,
     409                 :             :                                    operator);
     410                 :             :         /* we assume no nulls here, so no stanullfrac correction */
     411                 :             :     }
     412                 :             : 
     413                 :             :     /* If constant was toasted, release the copy we made */
     414         [ -  + ]:         760 :     if (PointerGetDatum(array) != constval)
     415                 :           0 :         pfree(array);
     416                 :             : 
     417                 :         760 :     return selec;
     418                 :             : }
     419                 :             : 
     420                 :             : /*
     421                 :             :  * Array selectivity estimation based on most common elements statistics
     422                 :             :  *
     423                 :             :  * This function just deconstructs and sorts the array constant's contents,
     424                 :             :  * and then passes the problem on to mcelem_array_contain_overlap_selec or
     425                 :             :  * mcelem_array_contained_selec depending on the operator.
     426                 :             :  */
     427                 :             : static Selectivity
     428                 :         760 : mcelem_array_selec(const ArrayType *array, TypeCacheEntry *typentry,
     429                 :             :                    const Datum *mcelem, int nmcelem,
     430                 :             :                    const float4 *numbers, int nnumbers,
     431                 :             :                    const float4 *hist, int nhist,
     432                 :             :                    Oid operator)
     433                 :             : {
     434                 :             :     Selectivity selec;
     435                 :             :     int         num_elems;
     436                 :             :     Datum      *elem_values;
     437                 :             :     bool       *elem_nulls;
     438                 :             :     bool        null_present;
     439                 :             :     int         nonnull_nitems;
     440                 :             :     int         i;
     441                 :             : 
     442                 :             :     /*
     443                 :             :      * Prepare constant array data for sorting.  Sorting lets us find unique
     444                 :             :      * elements and efficiently merge with the MCELEM array.
     445                 :             :      */
     446                 :         760 :     deconstruct_array(array,
     447                 :             :                       typentry->type_id,
     448                 :         760 :                       typentry->typlen,
     449                 :         760 :                       typentry->typbyval,
     450                 :         760 :                       typentry->typalign,
     451                 :             :                       &elem_values, &elem_nulls, &num_elems);
     452                 :             : 
     453                 :             :     /* Collapse out any null elements */
     454                 :         760 :     nonnull_nitems = 0;
     455                 :         760 :     null_present = false;
     456         [ +  + ]:        2954 :     for (i = 0; i < num_elems; i++)
     457                 :             :     {
     458         [ +  + ]:        2194 :         if (elem_nulls[i])
     459                 :          30 :             null_present = true;
     460                 :             :         else
     461                 :        2164 :             elem_values[nonnull_nitems++] = elem_values[i];
     462                 :             :     }
     463                 :             : 
     464                 :             :     /*
     465                 :             :      * Query "column @> '{anything, null}'" matches nothing.  For the other
     466                 :             :      * two operators, presence of a null in the constant can be ignored.
     467                 :             :      */
     468   [ +  +  +  + ]:         760 :     if (null_present && operator == OID_ARRAY_CONTAINS_OP)
     469                 :             :     {
     470                 :          10 :         pfree(elem_values);
     471                 :          10 :         pfree(elem_nulls);
     472                 :          10 :         return (Selectivity) 0.0;
     473                 :             :     }
     474                 :             : 
     475                 :             :     /* Sort extracted elements using their default comparison function. */
     476                 :         750 :     qsort_arg(elem_values, nonnull_nitems, sizeof(Datum),
     477                 :             :               element_compare, typentry);
     478                 :             : 
     479                 :             :     /* Separate cases according to operator */
     480   [ +  +  +  + ]:         750 :     if (operator == OID_ARRAY_CONTAINS_OP || operator == OID_ARRAY_OVERLAP_OP)
     481                 :         693 :         selec = mcelem_array_contain_overlap_selec(mcelem, nmcelem,
     482                 :             :                                                    numbers, nnumbers,
     483                 :             :                                                    elem_values, nonnull_nitems,
     484                 :             :                                                    operator, typentry);
     485         [ +  - ]:          57 :     else if (operator == OID_ARRAY_CONTAINED_OP)
     486                 :          57 :         selec = mcelem_array_contained_selec(mcelem, nmcelem,
     487                 :             :                                              numbers, nnumbers,
     488                 :             :                                              elem_values, nonnull_nitems,
     489                 :             :                                              hist, nhist,
     490                 :             :                                              operator, typentry);
     491                 :             :     else
     492                 :             :     {
     493         [ #  # ]:           0 :         elog(ERROR, "arraycontsel called for unrecognized operator %u",
     494                 :             :              operator);
     495                 :             :         selec = 0.0;            /* keep compiler quiet */
     496                 :             :     }
     497                 :             : 
     498                 :         750 :     pfree(elem_values);
     499                 :         750 :     pfree(elem_nulls);
     500                 :         750 :     return selec;
     501                 :             : }
     502                 :             : 
     503                 :             : /*
     504                 :             :  * Estimate selectivity of "column @> const" and "column && const" based on
     505                 :             :  * most common element statistics.  This estimation assumes element
     506                 :             :  * occurrences are independent.
     507                 :             :  *
     508                 :             :  * mcelem (of length nmcelem) and numbers (of length nnumbers) are from
     509                 :             :  * the array column's MCELEM statistics slot, or are NULL/0 if stats are
     510                 :             :  * not available.  array_data (of length nitems) is the constant's elements.
     511                 :             :  *
     512                 :             :  * Both the mcelem and array_data arrays are assumed presorted according
     513                 :             :  * to the element type's cmpfunc.  Null elements are not present.
     514                 :             :  *
     515                 :             :  * TODO: this estimate probably could be improved by using the distinct
     516                 :             :  * elements count histogram.  For example, excepting the special case of
     517                 :             :  * "column @> '{}'", we can multiply the calculated selectivity by the
     518                 :             :  * fraction of nonempty arrays in the column.
     519                 :             :  */
     520                 :             : static Selectivity
     521                 :         788 : mcelem_array_contain_overlap_selec(const Datum *mcelem, int nmcelem,
     522                 :             :                                    const float4 *numbers, int nnumbers,
     523                 :             :                                    const Datum *array_data, int nitems,
     524                 :             :                                    Oid operator, TypeCacheEntry *typentry)
     525                 :             : {
     526                 :             :     Selectivity selec,
     527                 :             :                 elem_selec;
     528                 :             :     int         mcelem_index,
     529                 :             :                 i;
     530                 :             :     bool        use_bsearch;
     531                 :             :     float4      minfreq;
     532                 :             : 
     533                 :             :     /*
     534                 :             :      * There should be three more Numbers than Values, because the last three
     535                 :             :      * cells should hold minimal and maximal frequency among the non-null
     536                 :             :      * elements, and then the frequency of null elements.  Ignore the Numbers
     537                 :             :      * if not right.
     538                 :             :      */
     539         [ +  + ]:         788 :     if (nnumbers != nmcelem + 3)
     540                 :             :     {
     541                 :         478 :         numbers = NULL;
     542                 :         478 :         nnumbers = 0;
     543                 :             :     }
     544                 :             : 
     545         [ +  + ]:         788 :     if (numbers)
     546                 :             :     {
     547                 :             :         /* Grab the minimal MCE frequency */
     548                 :         310 :         minfreq = numbers[nmcelem];
     549                 :             :     }
     550                 :             :     else
     551                 :             :     {
     552                 :             :         /*
     553                 :             :          * Without statistics, use DEFAULT_CONTAIN_SEL (the factor of 2 will
     554                 :             :          * be removed again below).
     555                 :             :          */
     556                 :         478 :         minfreq = 2 * (float4) DEFAULT_CONTAIN_SEL;
     557                 :             :     }
     558                 :             : 
     559                 :             :     /* Decide whether it is faster to use binary search or not. */
     560         [ +  + ]:         788 :     if (nitems * floor_log2((uint32) nmcelem) < nmcelem + nitems)
     561                 :         623 :         use_bsearch = true;
     562                 :             :     else
     563                 :         165 :         use_bsearch = false;
     564                 :             : 
     565         [ +  + ]:         788 :     if (operator == OID_ARRAY_CONTAINS_OP)
     566                 :             :     {
     567                 :             :         /*
     568                 :             :          * Initial selectivity for "column @> const" query is 1.0, and it will
     569                 :             :          * be decreased with each element of constant array.
     570                 :             :          */
     571                 :         666 :         selec = 1.0;
     572                 :             :     }
     573                 :             :     else
     574                 :             :     {
     575                 :             :         /*
     576                 :             :          * Initial selectivity for "column && const" query is 0.0, and it will
     577                 :             :          * be increased with each element of constant array.
     578                 :             :          */
     579                 :         122 :         selec = 0.0;
     580                 :             :     }
     581                 :             : 
     582                 :             :     /* Scan mcelem and array in parallel. */
     583                 :         788 :     mcelem_index = 0;
     584         [ +  + ]:        2926 :     for (i = 0; i < nitems; i++)
     585                 :             :     {
     586                 :        2138 :         bool        match = false;
     587                 :             : 
     588                 :             :         /* Ignore any duplicates in the array data. */
     589   [ +  +  -  + ]:        3718 :         if (i > 0 &&
     590                 :        1580 :             element_compare(&array_data[i - 1], &array_data[i], typentry) == 0)
     591                 :           0 :             continue;
     592                 :             : 
     593                 :             :         /* Find the smallest MCELEM >= this array item. */
     594         [ +  - ]:        2138 :         if (use_bsearch)
     595                 :             :         {
     596                 :        2138 :             match = find_next_mcelem(mcelem, nmcelem, array_data[i],
     597                 :             :                                      &mcelem_index, typentry);
     598                 :             :         }
     599                 :             :         else
     600                 :             :         {
     601         [ #  # ]:           0 :             while (mcelem_index < nmcelem)
     602                 :             :             {
     603                 :           0 :                 int         cmp = element_compare(&mcelem[mcelem_index],
     604                 :           0 :                                                   &array_data[i],
     605                 :             :                                                   typentry);
     606                 :             : 
     607         [ #  # ]:           0 :                 if (cmp < 0)
     608                 :           0 :                     mcelem_index++;
     609                 :             :                 else
     610                 :             :                 {
     611         [ #  # ]:           0 :                     if (cmp == 0)
     612                 :           0 :                         match = true;   /* mcelem is found */
     613                 :           0 :                     break;
     614                 :             :                 }
     615                 :             :             }
     616                 :             :         }
     617                 :             : 
     618   [ +  +  +  - ]:        2138 :         if (match && numbers)
     619                 :             :         {
     620                 :             :             /* MCELEM matches the array item; use its frequency. */
     621                 :         300 :             elem_selec = numbers[mcelem_index];
     622                 :         300 :             mcelem_index++;
     623                 :             :         }
     624                 :             :         else
     625                 :             :         {
     626                 :             :             /*
     627                 :             :              * The element is not in MCELEM.  Estimate its frequency as half
     628                 :             :              * that of the least-frequent MCE.  (We know it cannot be more
     629                 :             :              * than minfreq, and it could be a great deal less.  Half seems
     630                 :             :              * like a good compromise.)  For probably-historical reasons,
     631                 :             :              * clamp to not more than DEFAULT_CONTAIN_SEL.
     632                 :             :              */
     633         [ -  + ]:        1838 :             elem_selec = Min(DEFAULT_CONTAIN_SEL, minfreq / 2);
     634                 :             :         }
     635                 :             : 
     636                 :             :         /*
     637                 :             :          * Update overall selectivity using the current element's selectivity
     638                 :             :          * and an assumption of element occurrence independence.
     639                 :             :          */
     640         [ +  + ]:        2138 :         if (operator == OID_ARRAY_CONTAINS_OP)
     641                 :         524 :             selec *= elem_selec;
     642                 :             :         else
     643                 :        1614 :             selec = selec + elem_selec - selec * elem_selec;
     644                 :             : 
     645                 :             :         /* Clamp intermediate results to stay sane despite roundoff error */
     646   [ -  +  -  + ]:        2138 :         CLAMP_PROBABILITY(selec);
     647                 :             :     }
     648                 :             : 
     649                 :         788 :     return selec;
     650                 :             : }
     651                 :             : 
     652                 :             : /*
     653                 :             :  * Estimate selectivity of "column <@ const" based on most common element
     654                 :             :  * statistics.
     655                 :             :  *
     656                 :             :  * mcelem (of length nmcelem) and numbers (of length nnumbers) are from
     657                 :             :  * the array column's MCELEM statistics slot, or are NULL/0 if stats are
     658                 :             :  * not available.  array_data (of length nitems) is the constant's elements.
     659                 :             :  * hist (of length nhist) is from the array column's DECHIST statistics slot,
     660                 :             :  * or is NULL/0 if those stats are not available.
     661                 :             :  *
     662                 :             :  * Both the mcelem and array_data arrays are assumed presorted according
     663                 :             :  * to the element type's cmpfunc.  Null elements are not present.
     664                 :             :  *
     665                 :             :  * Independent element occurrence would imply a particular distribution of
     666                 :             :  * distinct element counts among matching rows.  Real data usually falsifies
     667                 :             :  * that assumption.  For example, in a set of 11-element integer arrays having
     668                 :             :  * elements in the range [0..10], element occurrences are typically not
     669                 :             :  * independent.  If they were, a sufficiently-large set would include all
     670                 :             :  * distinct element counts 0 through 11.  We correct for this using the
     671                 :             :  * histogram of distinct element counts.
     672                 :             :  *
     673                 :             :  * In the "column @> const" and "column && const" cases, we usually have a
     674                 :             :  * "const" with low number of elements (otherwise we have selectivity close
     675                 :             :  * to 0 or 1 respectively).  That's why the effect of dependence related
     676                 :             :  * to distinct element count distribution is negligible there.  In the
     677                 :             :  * "column <@ const" case, number of elements is usually high (otherwise we
     678                 :             :  * have selectivity close to 0).  That's why we should do a correction with
     679                 :             :  * the array distinct element count distribution here.
     680                 :             :  *
     681                 :             :  * Using the histogram of distinct element counts produces a different
     682                 :             :  * distribution law than independent occurrences of elements.  This
     683                 :             :  * distribution law can be described as follows:
     684                 :             :  *
     685                 :             :  * P(o1, o2, ..., on) = f1^o1 * (1 - f1)^(1 - o1) * f2^o2 *
     686                 :             :  *    (1 - f2)^(1 - o2) * ... * fn^on * (1 - fn)^(1 - on) * hist[m] / ind[m]
     687                 :             :  *
     688                 :             :  * where:
     689                 :             :  * o1, o2, ..., on - occurrences of elements 1, 2, ..., n
     690                 :             :  *      (1 - occurrence, 0 - no occurrence) in row
     691                 :             :  * f1, f2, ..., fn - frequencies of elements 1, 2, ..., n
     692                 :             :  *      (scalar values in [0..1]) according to collected statistics
     693                 :             :  * m = o1 + o2 + ... + on = total number of distinct elements in row
     694                 :             :  * hist[m] - histogram data for occurrence of m elements.
     695                 :             :  * ind[m] - probability of m occurrences from n events assuming their
     696                 :             :  *    probabilities to be equal to frequencies of array elements.
     697                 :             :  *
     698                 :             :  * ind[m] = sum(f1^o1 * (1 - f1)^(1 - o1) * f2^o2 * (1 - f2)^(1 - o2) *
     699                 :             :  * ... * fn^on * (1 - fn)^(1 - on), o1, o2, ..., on) | o1 + o2 + .. on = m
     700                 :             :  */
     701                 :             : static Selectivity
     702                 :          57 : mcelem_array_contained_selec(const Datum *mcelem, int nmcelem,
     703                 :             :                              const float4 *numbers, int nnumbers,
     704                 :             :                              const Datum *array_data, int nitems,
     705                 :             :                              const float4 *hist, int nhist,
     706                 :             :                              Oid operator, TypeCacheEntry *typentry)
     707                 :             : {
     708                 :             :     int         mcelem_index,
     709                 :             :                 i,
     710                 :          57 :                 unique_nitems = 0;
     711                 :             :     float       selec,
     712                 :             :                 minfreq,
     713                 :             :                 nullelem_freq;
     714                 :             :     float      *dist,
     715                 :             :                *mcelem_dist,
     716                 :             :                *hist_part;
     717                 :             :     float       avg_count,
     718                 :             :                 mult,
     719                 :             :                 rest;
     720                 :             :     float      *elem_selec;
     721                 :             : 
     722                 :             :     /*
     723                 :             :      * There should be three more Numbers than Values in the MCELEM slot,
     724                 :             :      * because the last three cells should hold minimal and maximal frequency
     725                 :             :      * among the non-null elements, and then the frequency of null elements.
     726                 :             :      * Punt if not right, because we can't do much without the element freqs.
     727                 :             :      */
     728   [ +  +  -  + ]:          57 :     if (numbers == NULL || nnumbers != nmcelem + 3)
     729                 :           1 :         return DEFAULT_CONTAIN_SEL;
     730                 :             : 
     731                 :             :     /* Can't do much without a count histogram, either */
     732   [ +  -  -  + ]:          56 :     if (hist == NULL || nhist < 3)
     733                 :           0 :         return DEFAULT_CONTAIN_SEL;
     734                 :             : 
     735                 :             :     /*
     736                 :             :      * Grab some of the summary statistics that compute_array_stats() stores:
     737                 :             :      * lowest MCE frequency, frequency of null elements, and average distinct
     738                 :             :      * element count.
     739                 :             :      */
     740                 :          56 :     minfreq = numbers[nmcelem];
     741                 :          56 :     nullelem_freq = numbers[nmcelem + 2];
     742                 :          56 :     avg_count = hist[nhist - 1];
     743                 :             : 
     744                 :             :     /*
     745                 :             :      * "rest" will be the sum of the frequencies of all elements not
     746                 :             :      * represented in MCELEM.  The average distinct element count is the sum
     747                 :             :      * of the frequencies of *all* elements.  Begin with that; we will proceed
     748                 :             :      * to subtract the MCELEM frequencies.
     749                 :             :      */
     750                 :          56 :     rest = avg_count;
     751                 :             : 
     752                 :             :     /*
     753                 :             :      * mult is a multiplier representing estimate of probability that each
     754                 :             :      * mcelem that is not present in constant doesn't occur.
     755                 :             :      */
     756                 :          56 :     mult = 1.0f;
     757                 :             : 
     758                 :             :     /*
     759                 :             :      * elem_selec is array of estimated frequencies for elements in the
     760                 :             :      * constant.
     761                 :             :      */
     762                 :          56 :     elem_selec = palloc_array(float, nitems);
     763                 :             : 
     764                 :             :     /* Scan mcelem and array in parallel. */
     765                 :          56 :     mcelem_index = 0;
     766         [ +  + ]:         174 :     for (i = 0; i < nitems; i++)
     767                 :             :     {
     768                 :         118 :         bool        match = false;
     769                 :             : 
     770                 :             :         /* Ignore any duplicates in the array data. */
     771   [ +  +  -  + ]:         210 :         if (i > 0 &&
     772                 :          92 :             element_compare(&array_data[i - 1], &array_data[i], typentry) == 0)
     773                 :           0 :             continue;
     774                 :             : 
     775                 :             :         /*
     776                 :             :          * Iterate over MCELEM until we find an entry greater than or equal to
     777                 :             :          * this element of the constant.  Update "rest" and "mult" for mcelem
     778                 :             :          * entries skipped over.
     779                 :             :          */
     780         [ +  - ]:        3144 :         while (mcelem_index < nmcelem)
     781                 :             :         {
     782                 :        3144 :             int         cmp = element_compare(&mcelem[mcelem_index],
     783                 :        3144 :                                               &array_data[i],
     784                 :             :                                               typentry);
     785                 :             : 
     786         [ +  + ]:        3144 :             if (cmp < 0)
     787                 :             :             {
     788                 :        3026 :                 mult *= (1.0f - numbers[mcelem_index]);
     789                 :        3026 :                 rest -= numbers[mcelem_index];
     790                 :        3026 :                 mcelem_index++;
     791                 :             :             }
     792                 :             :             else
     793                 :             :             {
     794         [ +  - ]:         118 :                 if (cmp == 0)
     795                 :         118 :                     match = true;   /* mcelem is found */
     796                 :         118 :                 break;
     797                 :             :             }
     798                 :             :         }
     799                 :             : 
     800         [ +  - ]:         118 :         if (match)
     801                 :             :         {
     802                 :             :             /* MCELEM matches the array item. */
     803                 :         118 :             elem_selec[unique_nitems] = numbers[mcelem_index];
     804                 :             :             /* "rest" is decremented for all mcelems, matched or not */
     805                 :         118 :             rest -= numbers[mcelem_index];
     806                 :         118 :             mcelem_index++;
     807                 :             :         }
     808                 :             :         else
     809                 :             :         {
     810                 :             :             /*
     811                 :             :              * The element is not in MCELEM.  Estimate its frequency as half
     812                 :             :              * that of the least-frequent MCE.  (We know it cannot be more
     813                 :             :              * than minfreq, and it could be a great deal less.  Half seems
     814                 :             :              * like a good compromise.)  For probably-historical reasons,
     815                 :             :              * clamp to not more than DEFAULT_CONTAIN_SEL.
     816                 :             :              */
     817         [ #  # ]:           0 :             elem_selec[unique_nitems] = Min(DEFAULT_CONTAIN_SEL,
     818                 :             :                                             minfreq / 2);
     819                 :             :         }
     820                 :             : 
     821                 :         118 :         unique_nitems++;
     822                 :             :     }
     823                 :             : 
     824                 :             :     /*
     825                 :             :      * If we handled all constant elements without exhausting the MCELEM
     826                 :             :      * array, finish walking it to complete calculation of "rest" and "mult".
     827                 :             :      */
     828         [ +  + ]:        5482 :     while (mcelem_index < nmcelem)
     829                 :             :     {
     830                 :        5426 :         mult *= (1.0f - numbers[mcelem_index]);
     831                 :        5426 :         rest -= numbers[mcelem_index];
     832                 :        5426 :         mcelem_index++;
     833                 :             :     }
     834                 :             : 
     835                 :             :     /*
     836                 :             :      * The presence of many distinct rare elements materially decreases
     837                 :             :      * selectivity.  Use the Poisson distribution to estimate the probability
     838                 :             :      * of a column value having zero occurrences of such elements.  See above
     839                 :             :      * for the definition of "rest".
     840                 :             :      */
     841                 :          56 :     mult *= exp(-rest);
     842                 :             : 
     843                 :             :     /*----------
     844                 :             :      * Using the distinct element count histogram requires
     845                 :             :      *      O(unique_nitems * (nmcelem + unique_nitems))
     846                 :             :      * operations.  Beyond a certain computational cost threshold, it's
     847                 :             :      * reasonable to sacrifice accuracy for decreased planning time.  We limit
     848                 :             :      * the number of operations to EFFORT * nmcelem; since nmcelem is limited
     849                 :             :      * by the column's statistics target, the work done is user-controllable.
     850                 :             :      *
     851                 :             :      * If the number of operations would be too large, we can reduce it
     852                 :             :      * without losing all accuracy by reducing unique_nitems and considering
     853                 :             :      * only the most-common elements of the constant array.  To make the
     854                 :             :      * results exactly match what we would have gotten with only those
     855                 :             :      * elements to start with, we'd have to remove any discarded elements'
     856                 :             :      * frequencies from "mult", but since this is only an approximation
     857                 :             :      * anyway, we don't bother with that.  Therefore it's sufficient to qsort
     858                 :             :      * elem_selec[] and take the largest elements.  (They will no longer match
     859                 :             :      * up with the elements of array_data[], but we don't care.)
     860                 :             :      *----------
     861                 :             :      */
     862                 :             : #define EFFORT 100
     863                 :             : 
     864         [ +  - ]:          56 :     if ((nmcelem + unique_nitems) > 0 &&
     865         [ -  + ]:          56 :         unique_nitems > EFFORT * nmcelem / (nmcelem + unique_nitems))
     866                 :             :     {
     867                 :             :         /*
     868                 :             :          * Use the quadratic formula to solve for largest allowable N.  We
     869                 :             :          * have A = 1, B = nmcelem, C = - EFFORT * nmcelem.
     870                 :             :          */
     871                 :           0 :         double      b = (double) nmcelem;
     872                 :             :         int         n;
     873                 :             : 
     874                 :           0 :         n = (int) ((sqrt(b * b + 4 * EFFORT * b) - b) / 2);
     875                 :             : 
     876                 :             :         /* Sort, then take just the first n elements */
     877                 :           0 :         qsort(elem_selec, unique_nitems, sizeof(float),
     878                 :             :               float_compare_desc);
     879                 :           0 :         unique_nitems = n;
     880                 :             :     }
     881                 :             : 
     882                 :             :     /*
     883                 :             :      * Calculate probabilities of each distinct element count for both mcelems
     884                 :             :      * and constant elements.  At this point, assume independent element
     885                 :             :      * occurrence.
     886                 :             :      */
     887                 :          56 :     dist = calc_distr(elem_selec, unique_nitems, unique_nitems, 0.0f);
     888                 :          56 :     mcelem_dist = calc_distr(numbers, nmcelem, unique_nitems, rest);
     889                 :             : 
     890                 :             :     /* ignore hist[nhist-1], which is the average not a histogram member */
     891                 :          56 :     hist_part = calc_hist(hist, nhist - 1, unique_nitems);
     892                 :             : 
     893                 :          56 :     selec = 0.0f;
     894         [ +  + ]:         230 :     for (i = 0; i <= unique_nitems; i++)
     895                 :             :     {
     896                 :             :         /*
     897                 :             :          * mult * dist[i] / mcelem_dist[i] gives us probability of qual
     898                 :             :          * matching from assumption of independent element occurrence with the
     899                 :             :          * condition that distinct element count = i.
     900                 :             :          */
     901         [ +  - ]:         174 :         if (mcelem_dist[i] > 0)
     902                 :         174 :             selec += hist_part[i] * mult * dist[i] / mcelem_dist[i];
     903                 :             :     }
     904                 :             : 
     905                 :          56 :     pfree(dist);
     906                 :          56 :     pfree(mcelem_dist);
     907                 :          56 :     pfree(hist_part);
     908                 :          56 :     pfree(elem_selec);
     909                 :             : 
     910                 :             :     /* Take into account occurrence of NULL element. */
     911                 :          56 :     selec *= (1.0f - nullelem_freq);
     912                 :             : 
     913   [ -  +  -  + ]:          56 :     CLAMP_PROBABILITY(selec);
     914                 :             : 
     915                 :          56 :     return selec;
     916                 :             : }
     917                 :             : 
     918                 :             : /*
     919                 :             :  * Calculate the first n distinct element count probabilities from a
     920                 :             :  * histogram of distinct element counts.
     921                 :             :  *
     922                 :             :  * Returns a palloc'd array of n+1 entries, with array[k] being the
     923                 :             :  * probability of element count k, k in [0..n].
     924                 :             :  *
     925                 :             :  * We assume that a histogram box with bounds a and b gives 1 / ((b - a + 1) *
     926                 :             :  * (nhist - 1)) probability to each value in (a,b) and an additional half of
     927                 :             :  * that to a and b themselves.
     928                 :             :  */
     929                 :             : static float *
     930                 :          56 : calc_hist(const float4 *hist, int nhist, int n)
     931                 :             : {
     932                 :             :     float      *hist_part;
     933                 :             :     int         k,
     934                 :          56 :                 i = 0;
     935                 :          56 :     float       prev_interval = 0,
     936                 :             :                 next_interval;
     937                 :             :     float       frac;
     938                 :             : 
     939                 :          56 :     hist_part = palloc_array(float, n + 1);
     940                 :             : 
     941                 :             :     /*
     942                 :             :      * frac is a probability contribution for each interval between histogram
     943                 :             :      * values.  We have nhist - 1 intervals, so contribution of each one will
     944                 :             :      * be 1 / (nhist - 1).
     945                 :             :      */
     946                 :          56 :     frac = 1.0f / ((float) (nhist - 1));
     947                 :             : 
     948         [ +  + ]:         230 :     for (k = 0; k <= n; k++)
     949                 :             :     {
     950                 :         174 :         int         count = 0;
     951                 :             : 
     952                 :             :         /*
     953                 :             :          * Count the histogram boundaries equal to k.  (Although the histogram
     954                 :             :          * should theoretically contain only exact integers, entries are
     955                 :             :          * floats so there could be roundoff error in large values.  Treat any
     956                 :             :          * fractional value as equal to the next larger k.)
     957                 :             :          */
     958   [ +  -  +  + ]:        1500 :         while (i < nhist && hist[i] <= k)
     959                 :             :         {
     960                 :        1326 :             count++;
     961                 :        1326 :             i++;
     962                 :             :         }
     963                 :             : 
     964         [ +  + ]:         174 :         if (count > 0)
     965                 :             :         {
     966                 :             :             /* k is an exact bound for at least one histogram box. */
     967                 :             :             float       val;
     968                 :             : 
     969                 :             :             /* Find length between current histogram value and the next one */
     970         [ +  - ]:         168 :             if (i < nhist)
     971                 :         168 :                 next_interval = hist[i] - hist[i - 1];
     972                 :             :             else
     973                 :           0 :                 next_interval = 0;
     974                 :             : 
     975                 :             :             /*
     976                 :             :              * count - 1 histogram boxes contain k exclusively.  They
     977                 :             :              * contribute a total of (count - 1) * frac probability.  Also
     978                 :             :              * factor in the partial histogram boxes on either side.
     979                 :             :              */
     980                 :         168 :             val = (float) (count - 1);
     981         [ +  - ]:         168 :             if (next_interval > 0)
     982                 :         168 :                 val += 0.5f / next_interval;
     983         [ +  + ]:         168 :             if (prev_interval > 0)
     984                 :         112 :                 val += 0.5f / prev_interval;
     985                 :         168 :             hist_part[k] = frac * val;
     986                 :             : 
     987                 :         168 :             prev_interval = next_interval;
     988                 :             :         }
     989                 :             :         else
     990                 :             :         {
     991                 :             :             /* k does not appear as an exact histogram bound. */
     992         [ +  - ]:           6 :             if (prev_interval > 0)
     993                 :           6 :                 hist_part[k] = frac / prev_interval;
     994                 :             :             else
     995                 :           0 :                 hist_part[k] = 0.0f;
     996                 :             :         }
     997                 :             :     }
     998                 :             : 
     999                 :          56 :     return hist_part;
    1000                 :             : }
    1001                 :             : 
    1002                 :             : /*
    1003                 :             :  * Consider n independent events with probabilities p[].  This function
    1004                 :             :  * calculates probabilities of exact k of events occurrence for k in [0..m].
    1005                 :             :  * Returns a palloc'd array of size m+1.
    1006                 :             :  *
    1007                 :             :  * "rest" is the sum of the probabilities of all low-probability events not
    1008                 :             :  * included in p.
    1009                 :             :  *
    1010                 :             :  * Imagine matrix M of size (n + 1) x (m + 1).  Element M[i,j] denotes the
    1011                 :             :  * probability that exactly j of first i events occur.  Obviously M[0,0] = 1.
    1012                 :             :  * For any constant j, each increment of i increases the probability iff the
    1013                 :             :  * event occurs.  So, by the law of total probability:
    1014                 :             :  *  M[i,j] = M[i - 1, j] * (1 - p[i]) + M[i - 1, j - 1] * p[i]
    1015                 :             :  *      for i > 0, j > 0.
    1016                 :             :  *  M[i,0] = M[i - 1, 0] * (1 - p[i]) for i > 0.
    1017                 :             :  */
    1018                 :             : static float *
    1019                 :         112 : calc_distr(const float *p, int n, int m, float rest)
    1020                 :             : {
    1021                 :             :     float      *row,
    1022                 :             :                *prev_row,
    1023                 :             :                *tmp;
    1024                 :             :     int         i,
    1025                 :             :                 j;
    1026                 :             : 
    1027                 :             :     /*
    1028                 :             :      * Since we return only the last row of the matrix and need only the
    1029                 :             :      * current and previous row for calculations, allocate two rows.
    1030                 :             :      */
    1031                 :         112 :     row = palloc_array(float, m + 1);
    1032                 :         112 :     prev_row = palloc_array(float, m + 1);
    1033                 :             : 
    1034                 :             :     /* M[0,0] = 1 */
    1035                 :         112 :     row[0] = 1.0f;
    1036         [ +  + ]:        8800 :     for (i = 1; i <= n; i++)
    1037                 :             :     {
    1038                 :        8688 :         float       t = p[i - 1];
    1039                 :             : 
    1040                 :             :         /* Swap rows */
    1041                 :        8688 :         tmp = row;
    1042                 :        8688 :         row = prev_row;
    1043                 :        8688 :         prev_row = tmp;
    1044                 :             : 
    1045                 :             :         /* Calculate next row */
    1046   [ +  +  +  + ]:       38254 :         for (j = 0; j <= i && j <= m; j++)
    1047                 :             :         {
    1048                 :       29566 :             float       val = 0.0f;
    1049                 :             : 
    1050         [ +  + ]:       29566 :             if (j < i)
    1051                 :       29330 :                 val += prev_row[j] * (1.0f - t);
    1052         [ +  + ]:       29566 :             if (j > 0)
    1053                 :       20878 :                 val += prev_row[j - 1] * t;
    1054                 :       29566 :             row[j] = val;
    1055                 :             :         }
    1056                 :             :     }
    1057                 :             : 
    1058                 :             :     /*
    1059                 :             :      * The presence of many distinct rare (not in "p") elements materially
    1060                 :             :      * decreases selectivity.  Model their collective occurrence with the
    1061                 :             :      * Poisson distribution.
    1062                 :             :      */
    1063         [ -  + ]:         112 :     if (rest > DEFAULT_CONTAIN_SEL)
    1064                 :             :     {
    1065                 :             :         float       t;
    1066                 :             : 
    1067                 :             :         /* Swap rows */
    1068                 :           0 :         tmp = row;
    1069                 :           0 :         row = prev_row;
    1070                 :           0 :         prev_row = tmp;
    1071                 :             : 
    1072         [ #  # ]:           0 :         for (i = 0; i <= m; i++)
    1073                 :           0 :             row[i] = 0.0f;
    1074                 :             : 
    1075                 :             :         /* Value of Poisson distribution for 0 occurrences */
    1076                 :           0 :         t = exp(-rest);
    1077                 :             : 
    1078                 :             :         /*
    1079                 :             :          * Calculate convolution of previously computed distribution and the
    1080                 :             :          * Poisson distribution.
    1081                 :             :          */
    1082         [ #  # ]:           0 :         for (i = 0; i <= m; i++)
    1083                 :             :         {
    1084         [ #  # ]:           0 :             for (j = 0; j <= m - i; j++)
    1085                 :           0 :                 row[j + i] += prev_row[j] * t;
    1086                 :             : 
    1087                 :             :             /* Get Poisson distribution value for (i + 1) occurrences */
    1088                 :           0 :             t *= rest / (float) (i + 1);
    1089                 :             :         }
    1090                 :             :     }
    1091                 :             : 
    1092                 :         112 :     pfree(prev_row);
    1093                 :         112 :     return row;
    1094                 :             : }
    1095                 :             : 
    1096                 :             : /* Fast function for floor value of 2 based logarithm calculation. */
    1097                 :             : static int
    1098                 :         788 : floor_log2(uint32 n)
    1099                 :             : {
    1100                 :         788 :     int         logval = 0;
    1101                 :             : 
    1102         [ +  + ]:         788 :     if (n == 0)
    1103                 :         478 :         return -1;
    1104         [ -  + ]:         310 :     if (n >= (1 << 16))
    1105                 :             :     {
    1106                 :           0 :         n >>= 16;
    1107                 :           0 :         logval += 16;
    1108                 :             :     }
    1109         [ +  + ]:         310 :     if (n >= (1 << 8))
    1110                 :             :     {
    1111                 :          80 :         n >>= 8;
    1112                 :          80 :         logval += 8;
    1113                 :             :     }
    1114         [ +  + ]:         310 :     if (n >= (1 << 4))
    1115                 :             :     {
    1116                 :         230 :         n >>= 4;
    1117                 :         230 :         logval += 4;
    1118                 :             :     }
    1119         [ +  + ]:         310 :     if (n >= (1 << 2))
    1120                 :             :     {
    1121                 :         220 :         n >>= 2;
    1122                 :         220 :         logval += 2;
    1123                 :             :     }
    1124         [ +  + ]:         310 :     if (n >= (1 << 1))
    1125                 :             :     {
    1126                 :         155 :         logval += 1;
    1127                 :             :     }
    1128                 :         310 :     return logval;
    1129                 :             : }
    1130                 :             : 
    1131                 :             : /*
    1132                 :             :  * find_next_mcelem binary-searches a most common elements array, starting
    1133                 :             :  * from *index, for the first member >= value.  It saves the position of the
    1134                 :             :  * match into *index and returns true if it's an exact match.  (Note: we
    1135                 :             :  * assume the mcelem elements are distinct so there can't be more than one
    1136                 :             :  * exact match.)
    1137                 :             :  */
    1138                 :             : static bool
    1139                 :        2138 : find_next_mcelem(const Datum *mcelem, int nmcelem, Datum value, int *index,
    1140                 :             :                  TypeCacheEntry *typentry)
    1141                 :             : {
    1142                 :        2138 :     int         l = *index,
    1143                 :        2138 :                 r = nmcelem - 1,
    1144                 :             :                 i,
    1145                 :             :                 res;
    1146                 :             : 
    1147         [ +  + ]:        3952 :     while (l <= r)
    1148                 :             :     {
    1149                 :        2114 :         i = (l + r) / 2;
    1150                 :        2114 :         res = element_compare(&mcelem[i], &value, typentry);
    1151         [ +  + ]:        2114 :         if (res == 0)
    1152                 :             :         {
    1153                 :         300 :             *index = i;
    1154                 :         300 :             return true;
    1155                 :             :         }
    1156         [ +  + ]:        1814 :         else if (res < 0)
    1157                 :         663 :             l = i + 1;
    1158                 :             :         else
    1159                 :        1151 :             r = i - 1;
    1160                 :             :     }
    1161                 :        1838 :     *index = l;
    1162                 :        1838 :     return false;
    1163                 :             : }
    1164                 :             : 
    1165                 :             : /*
    1166                 :             :  * Comparison function for elements.
    1167                 :             :  *
    1168                 :             :  * We use the element type's default btree opclass, and its default collation
    1169                 :             :  * if the type is collation-sensitive.
    1170                 :             :  *
    1171                 :             :  * XXX consider using SortSupport infrastructure
    1172                 :             :  */
    1173                 :             : static int
    1174                 :        8691 : element_compare(const void *key1, const void *key2, void *arg)
    1175                 :             : {
    1176                 :        8691 :     Datum       d1 = *((const Datum *) key1);
    1177                 :        8691 :     Datum       d2 = *((const Datum *) key2);
    1178                 :        8691 :     TypeCacheEntry *typentry = (TypeCacheEntry *) arg;
    1179                 :        8691 :     FmgrInfo   *cmpfunc = &typentry->cmp_proc_finfo;
    1180                 :             :     Datum       c;
    1181                 :             : 
    1182                 :        8691 :     c = FunctionCall2Coll(cmpfunc, typentry->typcollation, d1, d2);
    1183                 :        8691 :     return DatumGetInt32(c);
    1184                 :             : }
    1185                 :             : 
    1186                 :             : /*
    1187                 :             :  * Comparison function for sorting floats into descending order.
    1188                 :             :  */
    1189                 :             : static int
    1190                 :           0 : float_compare_desc(const void *key1, const void *key2)
    1191                 :             : {
    1192                 :           0 :     float       d1 = *((const float *) key1);
    1193                 :           0 :     float       d2 = *((const float *) key2);
    1194                 :             : 
    1195         [ #  # ]:           0 :     if (d1 > d2)
    1196                 :           0 :         return -1;
    1197         [ #  # ]:           0 :     else if (d1 < d2)
    1198                 :           0 :         return 1;
    1199                 :             :     else
    1200                 :           0 :         return 0;
    1201                 :             : }

Generated by: LCOV version 2.0-1