LCOV - code coverage report
Current view: top level - src/backend/statistics - mcv.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 531 583 91.1 %
Date: 2023-12-11 15:11:28 Functions: 17 21 81.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * mcv.c
       4             :  *    POSTGRES multivariate MCV lists
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/statistics/mcv.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include <math.h>
      18             : 
      19             : #include "access/htup_details.h"
      20             : #include "catalog/pg_collation.h"
      21             : #include "catalog/pg_statistic_ext.h"
      22             : #include "catalog/pg_statistic_ext_data.h"
      23             : #include "fmgr.h"
      24             : #include "funcapi.h"
      25             : #include "nodes/nodeFuncs.h"
      26             : #include "optimizer/clauses.h"
      27             : #include "statistics/extended_stats_internal.h"
      28             : #include "statistics/statistics.h"
      29             : #include "utils/array.h"
      30             : #include "utils/builtins.h"
      31             : #include "utils/bytea.h"
      32             : #include "utils/fmgroids.h"
      33             : #include "utils/fmgrprotos.h"
      34             : #include "utils/lsyscache.h"
      35             : #include "utils/selfuncs.h"
      36             : #include "utils/syscache.h"
      37             : #include "utils/typcache.h"
      38             : 
      39             : /*
      40             :  * Computes size of a serialized MCV item, depending on the number of
      41             :  * dimensions (columns) the statistic is defined on. The datum values are
      42             :  * stored in a separate array (deduplicated, to minimize the size), and
      43             :  * so the serialized items only store uint16 indexes into that array.
      44             :  *
      45             :  * Each serialized item stores (in this order):
      46             :  *
      47             :  * - indexes to values    (ndim * sizeof(uint16))
      48             :  * - null flags           (ndim * sizeof(bool))
      49             :  * - frequency            (sizeof(double))
      50             :  * - base_frequency       (sizeof(double))
      51             :  *
      52             :  * There is no alignment padding within an MCV item.
      53             :  * So in total each MCV item requires this many bytes:
      54             :  *
      55             :  *   ndim * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double)
      56             :  */
      57             : #define ITEM_SIZE(ndims)    \
      58             :     ((ndims) * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double))
      59             : 
      60             : /*
      61             :  * Used to compute size of serialized MCV list representation.
      62             :  */
      63             : #define MinSizeOfMCVList        \
      64             :     (VARHDRSZ + sizeof(uint32) * 3 + sizeof(AttrNumber))
      65             : 
      66             : /*
      67             :  * Size of the serialized MCV list, excluding the space needed for
      68             :  * deduplicated per-dimension values. The macro is meant to be used
      69             :  * when it's not yet safe to access the serialized info about amount
      70             :  * of data for each column.
      71             :  */
      72             : #define SizeOfMCVList(ndims,nitems) \
      73             :     ((MinSizeOfMCVList + sizeof(Oid) * (ndims)) + \
      74             :      ((ndims) * sizeof(DimensionInfo)) + \
      75             :      ((nitems) * ITEM_SIZE(ndims)))
      76             : 
      77             : static MultiSortSupport build_mss(StatsBuildData *data);
      78             : 
      79             : static SortItem *build_distinct_groups(int numrows, SortItem *items,
      80             :                                        MultiSortSupport mss, int *ndistinct);
      81             : 
      82             : static SortItem **build_column_frequencies(SortItem *groups, int ngroups,
      83             :                                            MultiSortSupport mss, int *ncounts);
      84             : 
      85             : static int  count_distinct_groups(int numrows, SortItem *items,
      86             :                                   MultiSortSupport mss);
      87             : 
      88             : /*
      89             :  * Compute new value for bitmap item, considering whether it's used for
      90             :  * clauses connected by AND/OR.
      91             :  */
      92             : #define RESULT_MERGE(value, is_or, match) \
      93             :     ((is_or) ? ((value) || (match)) : ((value) && (match)))
      94             : 
      95             : /*
      96             :  * When processing a list of clauses, the bitmap item may get set to a value
      97             :  * such that additional clauses can't change it. For example, when processing
      98             :  * a list of clauses connected to AND, as soon as the item gets set to 'false'
      99             :  * then it'll remain like that. Similarly clauses connected by OR and 'true'.
     100             :  *
     101             :  * Returns true when the value in the bitmap can't change no matter how the
     102             :  * remaining clauses are evaluated.
     103             :  */
     104             : #define RESULT_IS_FINAL(value, is_or)   ((is_or) ? (value) : (!(value)))
     105             : 
     106             : /*
     107             :  * get_mincount_for_mcv_list
     108             :  *      Determine the minimum number of times a value needs to appear in
     109             :  *      the sample for it to be included in the MCV list.
     110             :  *
     111             :  * We want to keep only values that appear sufficiently often in the
     112             :  * sample that it is reasonable to extrapolate their sample frequencies to
     113             :  * the entire table.  We do this by placing an upper bound on the relative
     114             :  * standard error of the sample frequency, so that any estimates the
     115             :  * planner generates from the MCV statistics can be expected to be
     116             :  * reasonably accurate.
     117             :  *
     118             :  * Since we are sampling without replacement, the sample frequency of a
     119             :  * particular value is described by a hypergeometric distribution.  A
     120             :  * common rule of thumb when estimating errors in this situation is to
     121             :  * require at least 10 instances of the value in the sample, in which case
     122             :  * the distribution can be approximated by a normal distribution, and
     123             :  * standard error analysis techniques can be applied.  Given a sample size
     124             :  * of n, a population size of N, and a sample frequency of p=cnt/n, the
     125             :  * standard error of the proportion p is given by
     126             :  *      SE = sqrt(p*(1-p)/n) * sqrt((N-n)/(N-1))
     127             :  * where the second term is the finite population correction.  To get
     128             :  * reasonably accurate planner estimates, we impose an upper bound on the
     129             :  * relative standard error of 20% -- i.e., SE/p < 0.2.  This 20% relative
     130             :  * error bound is fairly arbitrary, but has been found empirically to work
     131             :  * well.  Rearranging this formula gives a lower bound on the number of
     132             :  * instances of the value seen:
     133             :  *      cnt > n*(N-n) / (N-n+0.04*n*(N-1))
     134             :  * This bound is at most 25, and approaches 0 as n approaches 0 or N. The
     135             :  * case where n approaches 0 cannot happen in practice, since the sample
     136             :  * size is at least 300.  The case where n approaches N corresponds to
     137             :  * sampling the whole table, in which case it is reasonable to keep
     138             :  * the whole MCV list (have no lower bound), so it makes sense to apply
     139             :  * this formula for all inputs, even though the above derivation is
     140             :  * technically only valid when the right hand side is at least around 10.
     141             :  *
     142             :  * An alternative way to look at this formula is as follows -- assume that
     143             :  * the number of instances of the value seen scales up to the entire
     144             :  * table, so that the population count is K=N*cnt/n. Then the distribution
     145             :  * in the sample is a hypergeometric distribution parameterised by N, n
     146             :  * and K, and the bound above is mathematically equivalent to demanding
     147             :  * that the standard deviation of that distribution is less than 20% of
     148             :  * its mean.  Thus the relative errors in any planner estimates produced
     149             :  * from the MCV statistics are likely to be not too large.
     150             :  */
     151             : static double
     152         180 : get_mincount_for_mcv_list(int samplerows, double totalrows)
     153             : {
     154         180 :     double      n = samplerows;
     155         180 :     double      N = totalrows;
     156             :     double      numer,
     157             :                 denom;
     158             : 
     159         180 :     numer = n * (N - n);
     160         180 :     denom = N - n + 0.04 * n * (N - 1);
     161             : 
     162             :     /* Guard against division by zero (possible if n = N = 1) */
     163         180 :     if (denom == 0.0)
     164          12 :         return 0.0;
     165             : 
     166         168 :     return numer / denom;
     167             : }
     168             : 
     169             : /*
     170             :  * Builds MCV list from the set of sampled rows.
     171             :  *
     172             :  * The algorithm is quite simple:
     173             :  *
     174             :  *     (1) sort the data (default collation, '<' for the data type)
     175             :  *
     176             :  *     (2) count distinct groups, decide how many to keep
     177             :  *
     178             :  *     (3) build the MCV list using the threshold determined in (2)
     179             :  *
     180             :  *     (4) remove rows represented by the MCV from the sample
     181             :  *
     182             :  */
     183             : MCVList *
     184         180 : statext_mcv_build(StatsBuildData *data, double totalrows, int stattarget)
     185             : {
     186             :     int         i,
     187             :                 numattrs,
     188             :                 numrows,
     189             :                 ngroups,
     190             :                 nitems;
     191             :     double      mincount;
     192             :     SortItem   *items;
     193             :     SortItem   *groups;
     194         180 :     MCVList    *mcvlist = NULL;
     195             :     MultiSortSupport mss;
     196             : 
     197             :     /* comparator for all the columns */
     198         180 :     mss = build_mss(data);
     199             : 
     200             :     /* sort the rows */
     201         180 :     items = build_sorted_items(data, &nitems, mss,
     202             :                                data->nattnums, data->attnums);
     203             : 
     204         180 :     if (!items)
     205           0 :         return NULL;
     206             : 
     207             :     /* for convenience */
     208         180 :     numattrs = data->nattnums;
     209         180 :     numrows = data->numrows;
     210             : 
     211             :     /* transform the sorted rows into groups (sorted by frequency) */
     212         180 :     groups = build_distinct_groups(nitems, items, mss, &ngroups);
     213             : 
     214             :     /*
     215             :      * The maximum number of MCV items to store, based on the statistics
     216             :      * target we computed for the statistics object (from the target set for
     217             :      * the object itself, attributes and the system default). In any case, we
     218             :      * can't keep more groups than we have available.
     219             :      */
     220         180 :     nitems = stattarget;
     221         180 :     if (nitems > ngroups)
     222         108 :         nitems = ngroups;
     223             : 
     224             :     /*
     225             :      * Decide how many items to keep in the MCV list. We can't use the same
     226             :      * algorithm as per-column MCV lists, because that only considers the
     227             :      * actual group frequency - but we're primarily interested in how the
     228             :      * actual frequency differs from the base frequency (product of simple
     229             :      * per-column frequencies, as if the columns were independent).
     230             :      *
     231             :      * Using the same algorithm might exclude items that are close to the
     232             :      * "average" frequency of the sample. But that does not say whether the
     233             :      * observed frequency is close to the base frequency or not. We also need
     234             :      * to consider unexpectedly uncommon items (again, compared to the base
     235             :      * frequency), and the single-column algorithm does not have to.
     236             :      *
     237             :      * We simply decide how many items to keep by computing the minimum count
     238             :      * using get_mincount_for_mcv_list() and then keep all items that seem to
     239             :      * be more common than that.
     240             :      */
     241         180 :     mincount = get_mincount_for_mcv_list(numrows, totalrows);
     242             : 
     243             :     /*
     244             :      * Walk the groups until we find the first group with a count below the
     245             :      * mincount threshold (the index of that group is the number of groups we
     246             :      * want to keep).
     247             :      */
     248        8772 :     for (i = 0; i < nitems; i++)
     249             :     {
     250        8592 :         if (groups[i].count < mincount)
     251             :         {
     252           0 :             nitems = i;
     253           0 :             break;
     254             :         }
     255             :     }
     256             : 
     257             :     /*
     258             :      * At this point, we know the number of items for the MCV list. There
     259             :      * might be none (for uniform distribution with many groups), and in that
     260             :      * case, there will be no MCV list. Otherwise, construct the MCV list.
     261             :      */
     262         180 :     if (nitems > 0)
     263             :     {
     264             :         int         j;
     265             :         SortItem    key;
     266             :         MultiSortSupport tmp;
     267             : 
     268             :         /* frequencies for values in each attribute */
     269             :         SortItem  **freqs;
     270             :         int        *nfreqs;
     271             : 
     272             :         /* used to search values */
     273         180 :         tmp = (MultiSortSupport) palloc(offsetof(MultiSortSupportData, ssup)
     274             :                                         + sizeof(SortSupportData));
     275             : 
     276             :         /* compute frequencies for values in each column */
     277         180 :         nfreqs = (int *) palloc0(sizeof(int) * numattrs);
     278         180 :         freqs = build_column_frequencies(groups, ngroups, mss, nfreqs);
     279             : 
     280             :         /*
     281             :          * Allocate the MCV list structure, set the global parameters.
     282             :          */
     283         180 :         mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) +
     284         180 :                                       sizeof(MCVItem) * nitems);
     285             : 
     286         180 :         mcvlist->magic = STATS_MCV_MAGIC;
     287         180 :         mcvlist->type = STATS_MCV_TYPE_BASIC;
     288         180 :         mcvlist->ndimensions = numattrs;
     289         180 :         mcvlist->nitems = nitems;
     290             : 
     291             :         /* store info about data type OIDs */
     292         690 :         for (i = 0; i < numattrs; i++)
     293         510 :             mcvlist->types[i] = data->stats[i]->attrtypid;
     294             : 
     295             :         /* Copy the first chunk of groups into the result. */
     296        8772 :         for (i = 0; i < nitems; i++)
     297             :         {
     298             :             /* just point to the proper place in the list */
     299        8592 :             MCVItem    *item = &mcvlist->items[i];
     300             : 
     301        8592 :             item->values = (Datum *) palloc(sizeof(Datum) * numattrs);
     302        8592 :             item->isnull = (bool *) palloc(sizeof(bool) * numattrs);
     303             : 
     304             :             /* copy values for the group */
     305        8592 :             memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs);
     306        8592 :             memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs);
     307             : 
     308             :             /* groups should be sorted by frequency in descending order */
     309             :             Assert((i == 0) || (groups[i - 1].count >= groups[i].count));
     310             : 
     311             :             /* group frequency */
     312        8592 :             item->frequency = (double) groups[i].count / numrows;
     313             : 
     314             :             /* base frequency, if the attributes were independent */
     315        8592 :             item->base_frequency = 1.0;
     316       32850 :             for (j = 0; j < numattrs; j++)
     317             :             {
     318             :                 SortItem   *freq;
     319             : 
     320             :                 /* single dimension */
     321       24258 :                 tmp->ndims = 1;
     322       24258 :                 tmp->ssup[0] = mss->ssup[j];
     323             : 
     324             :                 /* fill search key */
     325       24258 :                 key.values = &groups[i].values[j];
     326       24258 :                 key.isnull = &groups[i].isnull[j];
     327             : 
     328       24258 :                 freq = (SortItem *) bsearch_arg(&key, freqs[j], nfreqs[j],
     329             :                                                 sizeof(SortItem),
     330             :                                                 multi_sort_compare, tmp);
     331             : 
     332       24258 :                 item->base_frequency *= ((double) freq->count) / numrows;
     333             :             }
     334             :         }
     335             : 
     336         180 :         pfree(nfreqs);
     337         180 :         pfree(freqs);
     338             :     }
     339             : 
     340         180 :     pfree(items);
     341         180 :     pfree(groups);
     342             : 
     343         180 :     return mcvlist;
     344             : }
     345             : 
     346             : /*
     347             :  * build_mss
     348             :  *      Build a MultiSortSupport for the given StatsBuildData.
     349             :  */
     350             : static MultiSortSupport
     351         180 : build_mss(StatsBuildData *data)
     352             : {
     353             :     int         i;
     354         180 :     int         numattrs = data->nattnums;
     355             : 
     356             :     /* Sort by multiple columns (using array of SortSupport) */
     357         180 :     MultiSortSupport mss = multi_sort_init(numattrs);
     358             : 
     359             :     /* prepare the sort functions for all the attributes */
     360         690 :     for (i = 0; i < numattrs; i++)
     361             :     {
     362         510 :         VacAttrStats *colstat = data->stats[i];
     363             :         TypeCacheEntry *type;
     364             : 
     365         510 :         type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
     366         510 :         if (type->lt_opr == InvalidOid) /* shouldn't happen */
     367           0 :             elog(ERROR, "cache lookup failed for ordering operator for type %u",
     368             :                  colstat->attrtypid);
     369             : 
     370         510 :         multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
     371             :     }
     372             : 
     373         180 :     return mss;
     374             : }
     375             : 
     376             : /*
     377             :  * count_distinct_groups
     378             :  *      Count distinct combinations of SortItems in the array.
     379             :  *
     380             :  * The array is assumed to be sorted according to the MultiSortSupport.
     381             :  */
     382             : static int
     383         180 : count_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss)
     384             : {
     385             :     int         i;
     386             :     int         ndistinct;
     387             : 
     388         180 :     ndistinct = 1;
     389      459018 :     for (i = 1; i < numrows; i++)
     390             :     {
     391             :         /* make sure the array really is sorted */
     392             :         Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
     393             : 
     394      458838 :         if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
     395       77712 :             ndistinct += 1;
     396             :     }
     397             : 
     398         180 :     return ndistinct;
     399             : }
     400             : 
     401             : /*
     402             :  * compare_sort_item_count
     403             :  *      Comparator for sorting items by count (frequencies) in descending
     404             :  *      order.
     405             :  */
     406             : static int
     407       78756 : compare_sort_item_count(const void *a, const void *b, void *arg)
     408             : {
     409       78756 :     SortItem   *ia = (SortItem *) a;
     410       78756 :     SortItem   *ib = (SortItem *) b;
     411             : 
     412       78756 :     if (ia->count == ib->count)
     413       77808 :         return 0;
     414         948 :     else if (ia->count > ib->count)
     415         630 :         return -1;
     416             : 
     417         318 :     return 1;
     418             : }
     419             : 
     420             : /*
     421             :  * build_distinct_groups
     422             :  *      Build an array of SortItems for distinct groups and counts matching
     423             :  *      items.
     424             :  *
     425             :  * The 'items' array is assumed to be sorted.
     426             :  */
     427             : static SortItem *
     428         180 : build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss,
     429             :                       int *ndistinct)
     430             : {
     431             :     int         i,
     432             :                 j;
     433         180 :     int         ngroups = count_distinct_groups(numrows, items, mss);
     434             : 
     435         180 :     SortItem   *groups = (SortItem *) palloc(ngroups * sizeof(SortItem));
     436             : 
     437         180 :     j = 0;
     438         180 :     groups[0] = items[0];
     439         180 :     groups[0].count = 1;
     440             : 
     441      459018 :     for (i = 1; i < numrows; i++)
     442             :     {
     443             :         /* Assume sorted in ascending order. */
     444             :         Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
     445             : 
     446             :         /* New distinct group detected. */
     447      458838 :         if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
     448             :         {
     449       77712 :             groups[++j] = items[i];
     450       77712 :             groups[j].count = 0;
     451             :         }
     452             : 
     453      458838 :         groups[j].count++;
     454             :     }
     455             : 
     456             :     /* ensure we filled the expected number of distinct groups */
     457             :     Assert(j + 1 == ngroups);
     458             : 
     459             :     /* Sort the distinct groups by frequency (in descending order). */
     460         180 :     qsort_interruptible(groups, ngroups, sizeof(SortItem),
     461             :                         compare_sort_item_count, NULL);
     462             : 
     463         180 :     *ndistinct = ngroups;
     464         180 :     return groups;
     465             : }
     466             : 
     467             : /* compare sort items (single dimension) */
     468             : static int
     469      834090 : sort_item_compare(const void *a, const void *b, void *arg)
     470             : {
     471      834090 :     SortSupport ssup = (SortSupport) arg;
     472      834090 :     SortItem   *ia = (SortItem *) a;
     473      834090 :     SortItem   *ib = (SortItem *) b;
     474             : 
     475     1668180 :     return ApplySortComparator(ia->values[0], ia->isnull[0],
     476      834090 :                                ib->values[0], ib->isnull[0],
     477             :                                ssup);
     478             : }
     479             : 
     480             : /*
     481             :  * build_column_frequencies
     482             :  *      Compute frequencies of values in each column.
     483             :  *
     484             :  * This returns an array of SortItems for each attribute the MCV is built
     485             :  * on, with a frequency (number of occurrences) for each value. This is
     486             :  * then used to compute "base" frequency of MCV items.
     487             :  *
     488             :  * All the memory is allocated in a single chunk, so that a single pfree
     489             :  * is enough to release it. We do not allocate space for values/isnull
     490             :  * arrays in the SortItems, because we can simply point into the input
     491             :  * groups directly.
     492             :  */
     493             : static SortItem **
     494         180 : build_column_frequencies(SortItem *groups, int ngroups,
     495             :                          MultiSortSupport mss, int *ncounts)
     496             : {
     497             :     int         i,
     498             :                 dim;
     499             :     SortItem  **result;
     500             :     char       *ptr;
     501             : 
     502             :     Assert(groups);
     503             :     Assert(ncounts);
     504             : 
     505             :     /* allocate arrays for all columns as a single chunk */
     506         180 :     ptr = palloc(MAXALIGN(sizeof(SortItem *) * mss->ndims) +
     507         180 :                  mss->ndims * MAXALIGN(sizeof(SortItem) * ngroups));
     508             : 
     509             :     /* initial array of pointers */
     510         180 :     result = (SortItem **) ptr;
     511         180 :     ptr += MAXALIGN(sizeof(SortItem *) * mss->ndims);
     512             : 
     513         690 :     for (dim = 0; dim < mss->ndims; dim++)
     514             :     {
     515         510 :         SortSupport ssup = &mss->ssup[dim];
     516             : 
     517             :         /* array of values for a single column */
     518         510 :         result[dim] = (SortItem *) ptr;
     519         510 :         ptr += MAXALIGN(sizeof(SortItem) * ngroups);
     520             : 
     521             :         /* extract data for the dimension */
     522      226968 :         for (i = 0; i < ngroups; i++)
     523             :         {
     524             :             /* point into the input groups */
     525      226458 :             result[dim][i].values = &groups[i].values[dim];
     526      226458 :             result[dim][i].isnull = &groups[i].isnull[dim];
     527      226458 :             result[dim][i].count = groups[i].count;
     528             :         }
     529             : 
     530             :         /* sort the values, deduplicate */
     531         510 :         qsort_interruptible(result[dim], ngroups, sizeof(SortItem),
     532             :                             sort_item_compare, ssup);
     533             : 
     534             :         /*
     535             :          * Identify distinct values, compute frequency (there might be
     536             :          * multiple MCV items containing this value, so we need to sum counts
     537             :          * from all of them.
     538             :          */
     539         510 :         ncounts[dim] = 1;
     540      226458 :         for (i = 1; i < ngroups; i++)
     541             :         {
     542      225948 :             if (sort_item_compare(&result[dim][i - 1], &result[dim][i], ssup) == 0)
     543             :             {
     544      136182 :                 result[dim][ncounts[dim] - 1].count += result[dim][i].count;
     545      136182 :                 continue;
     546             :             }
     547             : 
     548       89766 :             result[dim][ncounts[dim]] = result[dim][i];
     549             : 
     550       89766 :             ncounts[dim]++;
     551             :         }
     552             :     }
     553             : 
     554         180 :     return result;
     555             : }
     556             : 
     557             : /*
     558             :  * statext_mcv_load
     559             :  *      Load the MCV list for the indicated pg_statistic_ext_data tuple.
     560             :  */
     561             : MCVList *
     562         480 : statext_mcv_load(Oid mvoid, bool inh)
     563             : {
     564             :     MCVList    *result;
     565             :     bool        isnull;
     566             :     Datum       mcvlist;
     567         480 :     HeapTuple   htup = SearchSysCache2(STATEXTDATASTXOID,
     568             :                                        ObjectIdGetDatum(mvoid), BoolGetDatum(inh));
     569             : 
     570         480 :     if (!HeapTupleIsValid(htup))
     571           0 :         elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
     572             : 
     573         480 :     mcvlist = SysCacheGetAttr(STATEXTDATASTXOID, htup,
     574             :                               Anum_pg_statistic_ext_data_stxdmcv, &isnull);
     575             : 
     576         480 :     if (isnull)
     577           0 :         elog(ERROR,
     578             :              "requested statistics kind \"%c\" is not yet built for statistics object %u",
     579             :              STATS_EXT_DEPENDENCIES, mvoid);
     580             : 
     581         480 :     result = statext_mcv_deserialize(DatumGetByteaP(mcvlist));
     582             : 
     583         480 :     ReleaseSysCache(htup);
     584             : 
     585         480 :     return result;
     586             : }
     587             : 
     588             : 
     589             : /*
     590             :  * statext_mcv_serialize
     591             :  *      Serialize MCV list into a pg_mcv_list value.
     592             :  *
     593             :  * The MCV items may include values of various data types, and it's reasonable
     594             :  * to expect redundancy (values for a given attribute, repeated for multiple
     595             :  * MCV list items). So we deduplicate the values into arrays, and then replace
     596             :  * the values by indexes into those arrays.
     597             :  *
     598             :  * The overall structure of the serialized representation looks like this:
     599             :  *
     600             :  * +---------------+----------------+---------------------+-------+
     601             :  * | header fields | dimension info | deduplicated values | items |
     602             :  * +---------------+----------------+---------------------+-------+
     603             :  *
     604             :  * Where dimension info stores information about the type of the K-th
     605             :  * attribute (e.g. typlen, typbyval and length of deduplicated values).
     606             :  * Deduplicated values store deduplicated values for each attribute.  And
     607             :  * items store the actual MCV list items, with values replaced by indexes into
     608             :  * the arrays.
     609             :  *
     610             :  * When serializing the items, we use uint16 indexes. The number of MCV items
     611             :  * is limited by the statistics target (which is capped to 10k at the moment).
     612             :  * We might increase this to 65k and still fit into uint16, so there's a bit of
     613             :  * slack. Furthermore, this limit is on the number of distinct values per column,
     614             :  * and we usually have few of those (and various combinations of them for the
     615             :  * those MCV list). So uint16 seems fine for now.
     616             :  *
     617             :  * We don't really expect the serialization to save as much space as for
     618             :  * histograms, as we are not doing any bucket splits (which is the source
     619             :  * of high redundancy in histograms).
     620             :  *
     621             :  * TODO: Consider packing boolean flags (NULL) for each item into a single char
     622             :  * (or a longer type) instead of using an array of bool items.
     623             :  */
     624             : bytea *
     625         180 : statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats)
     626             : {
     627             :     int         i;
     628             :     int         dim;
     629         180 :     int         ndims = mcvlist->ndimensions;
     630             : 
     631             :     SortSupport ssup;
     632             :     DimensionInfo *info;
     633             : 
     634             :     Size        total_length;
     635             : 
     636             :     /* serialized items (indexes into arrays, etc.) */
     637             :     bytea      *raw;
     638             :     char       *ptr;
     639             :     char       *endptr PG_USED_FOR_ASSERTS_ONLY;
     640             : 
     641             :     /* values per dimension (and number of non-NULL values) */
     642         180 :     Datum     **values = (Datum **) palloc0(sizeof(Datum *) * ndims);
     643         180 :     int        *counts = (int *) palloc0(sizeof(int) * ndims);
     644             : 
     645             :     /*
     646             :      * We'll include some rudimentary information about the attribute types
     647             :      * (length, by-val flag), so that we don't have to look them up while
     648             :      * deserializing the MCV list (we already have the type OID in the
     649             :      * header).  This is safe because when changing the type of the attribute
     650             :      * the statistics gets dropped automatically.  We need to store the info
     651             :      * about the arrays of deduplicated values anyway.
     652             :      */
     653         180 :     info = (DimensionInfo *) palloc0(sizeof(DimensionInfo) * ndims);
     654             : 
     655             :     /* sort support data for all attributes included in the MCV list */
     656         180 :     ssup = (SortSupport) palloc0(sizeof(SortSupportData) * ndims);
     657             : 
     658             :     /* collect and deduplicate values for each dimension (attribute) */
     659         690 :     for (dim = 0; dim < ndims; dim++)
     660             :     {
     661             :         int         ndistinct;
     662             :         TypeCacheEntry *typentry;
     663             : 
     664             :         /*
     665             :          * Lookup the LT operator (can't get it from stats extra_data, as we
     666             :          * don't know how to interpret that - scalar vs. array etc.).
     667             :          */
     668         510 :         typentry = lookup_type_cache(stats[dim]->attrtypid, TYPECACHE_LT_OPR);
     669             : 
     670             :         /* copy important info about the data type (length, by-value) */
     671         510 :         info[dim].typlen = stats[dim]->attrtype->typlen;
     672         510 :         info[dim].typbyval = stats[dim]->attrtype->typbyval;
     673             : 
     674             :         /* allocate space for values in the attribute and collect them */
     675         510 :         values[dim] = (Datum *) palloc0(sizeof(Datum) * mcvlist->nitems);
     676             : 
     677       24768 :         for (i = 0; i < mcvlist->nitems; i++)
     678             :         {
     679             :             /* skip NULL values - we don't need to deduplicate those */
     680       24258 :             if (mcvlist->items[i].isnull[dim])
     681          66 :                 continue;
     682             : 
     683             :             /* append the value at the end */
     684       24192 :             values[dim][counts[dim]] = mcvlist->items[i].values[dim];
     685       24192 :             counts[dim] += 1;
     686             :         }
     687             : 
     688             :         /* if there are just NULL values in this dimension, we're done */
     689         510 :         if (counts[dim] == 0)
     690           6 :             continue;
     691             : 
     692             :         /* sort and deduplicate the data */
     693         504 :         ssup[dim].ssup_cxt = CurrentMemoryContext;
     694         504 :         ssup[dim].ssup_collation = stats[dim]->attrcollid;
     695         504 :         ssup[dim].ssup_nulls_first = false;
     696             : 
     697         504 :         PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);
     698             : 
     699         504 :         qsort_interruptible(values[dim], counts[dim], sizeof(Datum),
     700         504 :                             compare_scalars_simple, &ssup[dim]);
     701             : 
     702             :         /*
     703             :          * Walk through the array and eliminate duplicate values, but keep the
     704             :          * ordering (so that we can do a binary search later). We know there's
     705             :          * at least one item as (counts[dim] != 0), so we can skip the first
     706             :          * element.
     707             :          */
     708         504 :         ndistinct = 1;          /* number of distinct values */
     709       24192 :         for (i = 1; i < counts[dim]; i++)
     710             :         {
     711             :             /* expect sorted array */
     712             :             Assert(compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]) <= 0);
     713             : 
     714             :             /* if the value is the same as the previous one, we can skip it */
     715       23688 :             if (!compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]))
     716        9954 :                 continue;
     717             : 
     718       13734 :             values[dim][ndistinct] = values[dim][i];
     719       13734 :             ndistinct += 1;
     720             :         }
     721             : 
     722             :         /* we must not exceed PG_UINT16_MAX, as we use uint16 indexes */
     723             :         Assert(ndistinct <= PG_UINT16_MAX);
     724             : 
     725             :         /*
     726             :          * Store additional info about the attribute - number of deduplicated
     727             :          * values, and also size of the serialized data. For fixed-length data
     728             :          * types this is trivial to compute, for varwidth types we need to
     729             :          * actually walk the array and sum the sizes.
     730             :          */
     731         504 :         info[dim].nvalues = ndistinct;
     732             : 
     733         504 :         if (info[dim].typbyval) /* by-value data types */
     734             :         {
     735         372 :             info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
     736             : 
     737             :             /*
     738             :              * We copy the data into the MCV item during deserialization, so
     739             :              * we don't need to allocate any extra space.
     740             :              */
     741         372 :             info[dim].nbytes_aligned = 0;
     742             :         }
     743         132 :         else if (info[dim].typlen > 0)   /* fixed-length by-ref */
     744             :         {
     745             :             /*
     746             :              * We don't care about alignment in the serialized data, so we
     747             :              * pack the data as much as possible. But we also track how much
     748             :              * data will be needed after deserialization, and in that case we
     749             :              * need to account for alignment of each item.
     750             :              *
     751             :              * Note: As the items are fixed-length, we could easily compute
     752             :              * this during deserialization, but we do it here anyway.
     753             :              */
     754          24 :             info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
     755          24 :             info[dim].nbytes_aligned = info[dim].nvalues * MAXALIGN(info[dim].typlen);
     756             :         }
     757         108 :         else if (info[dim].typlen == -1)    /* varlena */
     758             :         {
     759         108 :             info[dim].nbytes = 0;
     760         108 :             info[dim].nbytes_aligned = 0;
     761        2874 :             for (i = 0; i < info[dim].nvalues; i++)
     762             :             {
     763             :                 Size        len;
     764             : 
     765             :                 /*
     766             :                  * For varlena values, we detoast the values and store the
     767             :                  * length and data separately. We don't bother with alignment
     768             :                  * here, which means that during deserialization we need to
     769             :                  * copy the fields and only access the copies.
     770             :                  */
     771        2766 :                 values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i]));
     772             : 
     773             :                 /* serialized length (uint32 length + data) */
     774        2766 :                 len = VARSIZE_ANY_EXHDR(values[dim][i]);
     775        2766 :                 info[dim].nbytes += sizeof(uint32); /* length */
     776        2766 :                 info[dim].nbytes += len;    /* value (no header) */
     777             : 
     778             :                 /*
     779             :                  * During deserialization we'll build regular varlena values
     780             :                  * with full headers, and we need to align them properly.
     781             :                  */
     782        2766 :                 info[dim].nbytes_aligned += MAXALIGN(VARHDRSZ + len);
     783             :             }
     784             :         }
     785           0 :         else if (info[dim].typlen == -2)    /* cstring */
     786             :         {
     787           0 :             info[dim].nbytes = 0;
     788           0 :             info[dim].nbytes_aligned = 0;
     789           0 :             for (i = 0; i < info[dim].nvalues; i++)
     790             :             {
     791             :                 Size        len;
     792             : 
     793             :                 /*
     794             :                  * cstring is handled similar to varlena - first we store the
     795             :                  * length as uint32 and then the data. We don't care about
     796             :                  * alignment, which means that during deserialization we need
     797             :                  * to copy the fields and only access the copies.
     798             :                  */
     799             : 
     800             :                 /* c-strings include terminator, so +1 byte */
     801           0 :                 len = strlen(DatumGetCString(values[dim][i])) + 1;
     802           0 :                 info[dim].nbytes += sizeof(uint32); /* length */
     803           0 :                 info[dim].nbytes += len;    /* value */
     804             : 
     805             :                 /* space needed for properly aligned deserialized copies */
     806           0 :                 info[dim].nbytes_aligned += MAXALIGN(len);
     807             :             }
     808             :         }
     809             : 
     810             :         /* we know (count>0) so there must be some data */
     811             :         Assert(info[dim].nbytes > 0);
     812             :     }
     813             : 
     814             :     /*
     815             :      * Now we can finally compute how much space we'll actually need for the
     816             :      * whole serialized MCV list (varlena header, MCV header, dimension info
     817             :      * for each attribute, deduplicated values and items).
     818             :      */
     819         180 :     total_length = (3 * sizeof(uint32)) /* magic + type + nitems */
     820             :         + sizeof(AttrNumber)    /* ndimensions */
     821         180 :         + (ndims * sizeof(Oid));    /* attribute types */
     822             : 
     823             :     /* dimension info */
     824         180 :     total_length += ndims * sizeof(DimensionInfo);
     825             : 
     826             :     /* add space for the arrays of deduplicated values */
     827         690 :     for (i = 0; i < ndims; i++)
     828         510 :         total_length += info[i].nbytes;
     829             : 
     830             :     /*
     831             :      * And finally account for the items (those are fixed-length, thanks to
     832             :      * replacing values with uint16 indexes into the deduplicated arrays).
     833             :      */
     834         180 :     total_length += mcvlist->nitems * ITEM_SIZE(dim);
     835             : 
     836             :     /*
     837             :      * Allocate space for the whole serialized MCV list (we'll skip bytes, so
     838             :      * we set them to zero to make the result more compressible).
     839             :      */
     840         180 :     raw = (bytea *) palloc0(VARHDRSZ + total_length);
     841         180 :     SET_VARSIZE(raw, VARHDRSZ + total_length);
     842             : 
     843         180 :     ptr = VARDATA(raw);
     844         180 :     endptr = ptr + total_length;
     845             : 
     846             :     /* copy the MCV list header fields, one by one */
     847         180 :     memcpy(ptr, &mcvlist->magic, sizeof(uint32));
     848         180 :     ptr += sizeof(uint32);
     849             : 
     850         180 :     memcpy(ptr, &mcvlist->type, sizeof(uint32));
     851         180 :     ptr += sizeof(uint32);
     852             : 
     853         180 :     memcpy(ptr, &mcvlist->nitems, sizeof(uint32));
     854         180 :     ptr += sizeof(uint32);
     855             : 
     856         180 :     memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber));
     857         180 :     ptr += sizeof(AttrNumber);
     858             : 
     859         180 :     memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);
     860         180 :     ptr += (sizeof(Oid) * ndims);
     861             : 
     862             :     /* store information about the attributes (data amounts, ...) */
     863         180 :     memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
     864         180 :     ptr += sizeof(DimensionInfo) * ndims;
     865             : 
     866             :     /* Copy the deduplicated values for all attributes to the output. */
     867         690 :     for (dim = 0; dim < ndims; dim++)
     868             :     {
     869             :         /* remember the starting point for Asserts later */
     870         510 :         char       *start PG_USED_FOR_ASSERTS_ONLY = ptr;
     871             : 
     872       14748 :         for (i = 0; i < info[dim].nvalues; i++)
     873             :         {
     874       14238 :             Datum       value = values[dim][i];
     875             : 
     876       14238 :             if (info[dim].typbyval) /* passed by value */
     877             :             {
     878             :                 Datum       tmp;
     879             : 
     880             :                 /*
     881             :                  * For byval types, we need to copy just the significant bytes
     882             :                  * - we can't use memcpy directly, as that assumes
     883             :                  * little-endian behavior.  store_att_byval does almost what
     884             :                  * we need, but it requires a properly aligned buffer - the
     885             :                  * output buffer does not guarantee that. So we simply use a
     886             :                  * local Datum variable (which guarantees proper alignment),
     887             :                  * and then copy the value from it.
     888             :                  */
     889       10362 :                 store_att_byval(&tmp, value, info[dim].typlen);
     890             : 
     891       10362 :                 memcpy(ptr, &tmp, info[dim].typlen);
     892       10362 :                 ptr += info[dim].typlen;
     893             :             }
     894        3876 :             else if (info[dim].typlen > 0)   /* passed by reference */
     895             :             {
     896             :                 /* no special alignment needed, treated as char array */
     897        1110 :                 memcpy(ptr, DatumGetPointer(value), info[dim].typlen);
     898        1110 :                 ptr += info[dim].typlen;
     899             :             }
     900        2766 :             else if (info[dim].typlen == -1)    /* varlena */
     901             :             {
     902        2766 :                 uint32      len = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
     903             : 
     904             :                 /* copy the length */
     905        2766 :                 memcpy(ptr, &len, sizeof(uint32));
     906        2766 :                 ptr += sizeof(uint32);
     907             : 
     908             :                 /* data from the varlena value (without the header) */
     909        2766 :                 memcpy(ptr, VARDATA_ANY(DatumGetPointer(value)), len);
     910        2766 :                 ptr += len;
     911             :             }
     912           0 :             else if (info[dim].typlen == -2)    /* cstring */
     913             :             {
     914           0 :                 uint32      len = (uint32) strlen(DatumGetCString(value)) + 1;
     915             : 
     916             :                 /* copy the length */
     917           0 :                 memcpy(ptr, &len, sizeof(uint32));
     918           0 :                 ptr += sizeof(uint32);
     919             : 
     920             :                 /* value */
     921           0 :                 memcpy(ptr, DatumGetCString(value), len);
     922           0 :                 ptr += len;
     923             :             }
     924             : 
     925             :             /* no underflows or overflows */
     926             :             Assert((ptr > start) && ((ptr - start) <= info[dim].nbytes));
     927             :         }
     928             : 
     929             :         /* we should get exactly nbytes of data for this dimension */
     930             :         Assert((ptr - start) == info[dim].nbytes);
     931             :     }
     932             : 
     933             :     /* Serialize the items, with uint16 indexes instead of the values. */
     934        8772 :     for (i = 0; i < mcvlist->nitems; i++)
     935             :     {
     936        8592 :         MCVItem    *mcvitem = &mcvlist->items[i];
     937             : 
     938             :         /* don't write beyond the allocated space */
     939             :         Assert(ptr <= (endptr - ITEM_SIZE(dim)));
     940             : 
     941             :         /* copy NULL and frequency flags into the serialized MCV */
     942        8592 :         memcpy(ptr, mcvitem->isnull, sizeof(bool) * ndims);
     943        8592 :         ptr += sizeof(bool) * ndims;
     944             : 
     945        8592 :         memcpy(ptr, &mcvitem->frequency, sizeof(double));
     946        8592 :         ptr += sizeof(double);
     947             : 
     948        8592 :         memcpy(ptr, &mcvitem->base_frequency, sizeof(double));
     949        8592 :         ptr += sizeof(double);
     950             : 
     951             :         /* store the indexes last */
     952       32850 :         for (dim = 0; dim < ndims; dim++)
     953             :         {
     954       24258 :             uint16      index = 0;
     955             :             Datum      *value;
     956             : 
     957             :             /* do the lookup only for non-NULL values */
     958       24258 :             if (!mcvitem->isnull[dim])
     959             :             {
     960       24192 :                 value = (Datum *) bsearch_arg(&mcvitem->values[dim], values[dim],
     961       24192 :                                               info[dim].nvalues, sizeof(Datum),
     962       24192 :                                               compare_scalars_simple, &ssup[dim]);
     963             : 
     964             :                 Assert(value != NULL);  /* serialization or deduplication
     965             :                                          * error */
     966             : 
     967             :                 /* compute index within the deduplicated array */
     968       24192 :                 index = (uint16) (value - values[dim]);
     969             : 
     970             :                 /* check the index is within expected bounds */
     971             :                 Assert(index < info[dim].nvalues);
     972             :             }
     973             : 
     974             :             /* copy the index into the serialized MCV */
     975       24258 :             memcpy(ptr, &index, sizeof(uint16));
     976       24258 :             ptr += sizeof(uint16);
     977             :         }
     978             : 
     979             :         /* make sure we don't overflow the allocated value */
     980             :         Assert(ptr <= endptr);
     981             :     }
     982             : 
     983             :     /* at this point we expect to match the total_length exactly */
     984             :     Assert(ptr == endptr);
     985             : 
     986         180 :     pfree(values);
     987         180 :     pfree(counts);
     988             : 
     989         180 :     return raw;
     990             : }
     991             : 
     992             : /*
     993             :  * statext_mcv_deserialize
     994             :  *      Reads serialized MCV list into MCVList structure.
     995             :  *
     996             :  * All the memory needed by the MCV list is allocated as a single chunk, so
     997             :  * it's possible to simply pfree() it at once.
     998             :  */
     999             : MCVList *
    1000         492 : statext_mcv_deserialize(bytea *data)
    1001             : {
    1002             :     int         dim,
    1003             :                 i;
    1004             :     Size        expected_size;
    1005             :     MCVList    *mcvlist;
    1006             :     char       *raw;
    1007             :     char       *ptr;
    1008             :     char       *endptr PG_USED_FOR_ASSERTS_ONLY;
    1009             : 
    1010             :     int         ndims,
    1011             :                 nitems;
    1012         492 :     DimensionInfo *info = NULL;
    1013             : 
    1014             :     /* local allocation buffer (used only for deserialization) */
    1015         492 :     Datum     **map = NULL;
    1016             : 
    1017             :     /* MCV list */
    1018             :     Size        mcvlen;
    1019             : 
    1020             :     /* buffer used for the result */
    1021             :     Size        datalen;
    1022             :     char       *dataptr;
    1023             :     char       *valuesptr;
    1024             :     char       *isnullptr;
    1025             : 
    1026         492 :     if (data == NULL)
    1027           0 :         return NULL;
    1028             : 
    1029             :     /*
    1030             :      * We can't possibly deserialize a MCV list if there's not even a complete
    1031             :      * header. We need an explicit formula here, because we serialize the
    1032             :      * header fields one by one, so we need to ignore struct alignment.
    1033             :      */
    1034         492 :     if (VARSIZE_ANY(data) < MinSizeOfMCVList)
    1035           0 :         elog(ERROR, "invalid MCV size %zu (expected at least %zu)",
    1036             :              VARSIZE_ANY(data), MinSizeOfMCVList);
    1037             : 
    1038             :     /* read the MCV list header */
    1039         492 :     mcvlist = (MCVList *) palloc0(offsetof(MCVList, items));
    1040             : 
    1041             :     /* pointer to the data part (skip the varlena header) */
    1042         492 :     raw = (char *) data;
    1043         492 :     ptr = VARDATA_ANY(raw);
    1044         492 :     endptr = (char *) raw + VARSIZE_ANY(data);
    1045             : 
    1046             :     /* get the header and perform further sanity checks */
    1047         492 :     memcpy(&mcvlist->magic, ptr, sizeof(uint32));
    1048         492 :     ptr += sizeof(uint32);
    1049             : 
    1050         492 :     memcpy(&mcvlist->type, ptr, sizeof(uint32));
    1051         492 :     ptr += sizeof(uint32);
    1052             : 
    1053         492 :     memcpy(&mcvlist->nitems, ptr, sizeof(uint32));
    1054         492 :     ptr += sizeof(uint32);
    1055             : 
    1056         492 :     memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber));
    1057         492 :     ptr += sizeof(AttrNumber);
    1058             : 
    1059         492 :     if (mcvlist->magic != STATS_MCV_MAGIC)
    1060           0 :         elog(ERROR, "invalid MCV magic %u (expected %u)",
    1061             :              mcvlist->magic, STATS_MCV_MAGIC);
    1062             : 
    1063         492 :     if (mcvlist->type != STATS_MCV_TYPE_BASIC)
    1064           0 :         elog(ERROR, "invalid MCV type %u (expected %u)",
    1065             :              mcvlist->type, STATS_MCV_TYPE_BASIC);
    1066             : 
    1067         492 :     if (mcvlist->ndimensions == 0)
    1068           0 :         elog(ERROR, "invalid zero-length dimension array in MCVList");
    1069         492 :     else if ((mcvlist->ndimensions > STATS_MAX_DIMENSIONS) ||
    1070         492 :              (mcvlist->ndimensions < 0))
    1071           0 :         elog(ERROR, "invalid length (%d) dimension array in MCVList",
    1072             :              mcvlist->ndimensions);
    1073             : 
    1074         492 :     if (mcvlist->nitems == 0)
    1075           0 :         elog(ERROR, "invalid zero-length item array in MCVList");
    1076         492 :     else if (mcvlist->nitems > STATS_MCVLIST_MAX_ITEMS)
    1077           0 :         elog(ERROR, "invalid length (%u) item array in MCVList",
    1078             :              mcvlist->nitems);
    1079             : 
    1080         492 :     nitems = mcvlist->nitems;
    1081         492 :     ndims = mcvlist->ndimensions;
    1082             : 
    1083             :     /*
    1084             :      * Check amount of data including DimensionInfo for all dimensions and
    1085             :      * also the serialized items (including uint16 indexes). Also, walk
    1086             :      * through the dimension information and add it to the sum.
    1087             :      */
    1088         492 :     expected_size = SizeOfMCVList(ndims, nitems);
    1089             : 
    1090             :     /*
    1091             :      * Check that we have at least the dimension and info records, along with
    1092             :      * the items. We don't know the size of the serialized values yet. We need
    1093             :      * to do this check first, before accessing the dimension info.
    1094             :      */
    1095         492 :     if (VARSIZE_ANY(data) < expected_size)
    1096           0 :         elog(ERROR, "invalid MCV size %zu (expected %zu)",
    1097             :              VARSIZE_ANY(data), expected_size);
    1098             : 
    1099             :     /* Now copy the array of type Oids. */
    1100         492 :     memcpy(mcvlist->types, ptr, sizeof(Oid) * ndims);
    1101         492 :     ptr += (sizeof(Oid) * ndims);
    1102             : 
    1103             :     /* Now it's safe to access the dimension info. */
    1104         492 :     info = palloc(ndims * sizeof(DimensionInfo));
    1105             : 
    1106         492 :     memcpy(info, ptr, ndims * sizeof(DimensionInfo));
    1107         492 :     ptr += (ndims * sizeof(DimensionInfo));
    1108             : 
    1109             :     /* account for the value arrays */
    1110        2118 :     for (dim = 0; dim < ndims; dim++)
    1111             :     {
    1112             :         /*
    1113             :          * XXX I wonder if we can/should rely on asserts here. Maybe those
    1114             :          * checks should be done every time?
    1115             :          */
    1116             :         Assert(info[dim].nvalues >= 0);
    1117             :         Assert(info[dim].nbytes >= 0);
    1118             : 
    1119        1626 :         expected_size += info[dim].nbytes;
    1120             :     }
    1121             : 
    1122             :     /*
    1123             :      * Now we know the total expected MCV size, including all the pieces
    1124             :      * (header, dimension info. items and deduplicated data). So do the final
    1125             :      * check on size.
    1126             :      */
    1127         492 :     if (VARSIZE_ANY(data) != expected_size)
    1128           0 :         elog(ERROR, "invalid MCV size %zu (expected %zu)",
    1129             :              VARSIZE_ANY(data), expected_size);
    1130             : 
    1131             :     /*
    1132             :      * We need an array of Datum values for each dimension, so that we can
    1133             :      * easily translate the uint16 indexes later. We also need a top-level
    1134             :      * array of pointers to those per-dimension arrays.
    1135             :      *
    1136             :      * While allocating the arrays for dimensions, compute how much space we
    1137             :      * need for a copy of the by-ref data, as we can't simply point to the
    1138             :      * original values (it might go away).
    1139             :      */
    1140         492 :     datalen = 0;                /* space for by-ref data */
    1141         492 :     map = (Datum **) palloc(ndims * sizeof(Datum *));
    1142             : 
    1143        2118 :     for (dim = 0; dim < ndims; dim++)
    1144             :     {
    1145        1626 :         map[dim] = (Datum *) palloc(sizeof(Datum) * info[dim].nvalues);
    1146             : 
    1147             :         /* space needed for a copy of data for by-ref types */
    1148        1626 :         datalen += info[dim].nbytes_aligned;
    1149             :     }
    1150             : 
    1151             :     /*
    1152             :      * Now resize the MCV list so that the allocation includes all the data.
    1153             :      *
    1154             :      * Allocate space for a copy of the data, as we can't simply reference the
    1155             :      * serialized data - it's not aligned properly, and it may disappear while
    1156             :      * we're still using the MCV list, e.g. due to catcache release.
    1157             :      *
    1158             :      * We do care about alignment here, because we will allocate all the
    1159             :      * pieces at once, but then use pointers to different parts.
    1160             :      */
    1161         492 :     mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
    1162             : 
    1163             :     /* arrays of values and isnull flags for all MCV items */
    1164         492 :     mcvlen += nitems * MAXALIGN(sizeof(Datum) * ndims);
    1165         492 :     mcvlen += nitems * MAXALIGN(sizeof(bool) * ndims);
    1166             : 
    1167             :     /* we don't quite need to align this, but it makes some asserts easier */
    1168         492 :     mcvlen += MAXALIGN(datalen);
    1169             : 
    1170             :     /* now resize the deserialized MCV list, and compute pointers to parts */
    1171         492 :     mcvlist = repalloc(mcvlist, mcvlen);
    1172             : 
    1173             :     /* pointer to the beginning of values/isnull arrays */
    1174         492 :     valuesptr = (char *) mcvlist
    1175         492 :         + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
    1176             : 
    1177         492 :     isnullptr = valuesptr + (nitems * MAXALIGN(sizeof(Datum) * ndims));
    1178             : 
    1179         492 :     dataptr = isnullptr + (nitems * MAXALIGN(sizeof(bool) * ndims));
    1180             : 
    1181             :     /*
    1182             :      * Build mapping (index => value) for translating the serialized data into
    1183             :      * the in-memory representation.
    1184             :      */
    1185        2118 :     for (dim = 0; dim < ndims; dim++)
    1186             :     {
    1187             :         /* remember start position in the input array */
    1188        1626 :         char       *start PG_USED_FOR_ASSERTS_ONLY = ptr;
    1189             : 
    1190        1626 :         if (info[dim].typbyval)
    1191             :         {
    1192             :             /* for by-val types we simply copy data into the mapping */
    1193       43926 :             for (i = 0; i < info[dim].nvalues; i++)
    1194             :             {
    1195       42858 :                 Datum       v = 0;
    1196             : 
    1197       42858 :                 memcpy(&v, ptr, info[dim].typlen);
    1198       42858 :                 ptr += info[dim].typlen;
    1199             : 
    1200       42858 :                 map[dim][i] = fetch_att(&v, true, info[dim].typlen);
    1201             : 
    1202             :                 /* no under/overflow of input array */
    1203             :                 Assert(ptr <= (start + info[dim].nbytes));
    1204             :             }
    1205             :         }
    1206             :         else
    1207             :         {
    1208             :             /* for by-ref types we need to also make a copy of the data */
    1209             : 
    1210             :             /* passed by reference, but fixed length (name, tid, ...) */
    1211         558 :             if (info[dim].typlen > 0)
    1212             :             {
    1213        2202 :                 for (i = 0; i < info[dim].nvalues; i++)
    1214             :                 {
    1215        2160 :                     memcpy(dataptr, ptr, info[dim].typlen);
    1216        2160 :                     ptr += info[dim].typlen;
    1217             : 
    1218             :                     /* just point into the array */
    1219        2160 :                     map[dim][i] = PointerGetDatum(dataptr);
    1220        2160 :                     dataptr += MAXALIGN(info[dim].typlen);
    1221             :                 }
    1222             :             }
    1223         516 :             else if (info[dim].typlen == -1)
    1224             :             {
    1225             :                 /* varlena */
    1226       14988 :                 for (i = 0; i < info[dim].nvalues; i++)
    1227             :                 {
    1228             :                     uint32      len;
    1229             : 
    1230             :                     /* read the uint32 length */
    1231       14472 :                     memcpy(&len, ptr, sizeof(uint32));
    1232       14472 :                     ptr += sizeof(uint32);
    1233             : 
    1234             :                     /* the length is data-only */
    1235       14472 :                     SET_VARSIZE(dataptr, len + VARHDRSZ);
    1236       14472 :                     memcpy(VARDATA(dataptr), ptr, len);
    1237       14472 :                     ptr += len;
    1238             : 
    1239             :                     /* just point into the array */
    1240       14472 :                     map[dim][i] = PointerGetDatum(dataptr);
    1241             : 
    1242             :                     /* skip to place of the next deserialized value */
    1243       14472 :                     dataptr += MAXALIGN(len + VARHDRSZ);
    1244             :                 }
    1245             :             }
    1246           0 :             else if (info[dim].typlen == -2)
    1247             :             {
    1248             :                 /* cstring */
    1249           0 :                 for (i = 0; i < info[dim].nvalues; i++)
    1250             :                 {
    1251             :                     uint32      len;
    1252             : 
    1253           0 :                     memcpy(&len, ptr, sizeof(uint32));
    1254           0 :                     ptr += sizeof(uint32);
    1255             : 
    1256           0 :                     memcpy(dataptr, ptr, len);
    1257           0 :                     ptr += len;
    1258             : 
    1259             :                     /* just point into the array */
    1260           0 :                     map[dim][i] = PointerGetDatum(dataptr);
    1261           0 :                     dataptr += MAXALIGN(len);
    1262             :                 }
    1263             :             }
    1264             : 
    1265             :             /* no under/overflow of input array */
    1266             :             Assert(ptr <= (start + info[dim].nbytes));
    1267             : 
    1268             :             /* no overflow of the output mcv value */
    1269             :             Assert(dataptr <= ((char *) mcvlist + mcvlen));
    1270             :         }
    1271             : 
    1272             :         /* check we consumed input data for this dimension exactly */
    1273             :         Assert(ptr == (start + info[dim].nbytes));
    1274             :     }
    1275             : 
    1276             :     /* we should have also filled the MCV list exactly */
    1277             :     Assert(dataptr == ((char *) mcvlist + mcvlen));
    1278             : 
    1279             :     /* deserialize the MCV items and translate the indexes to Datums */
    1280       30438 :     for (i = 0; i < nitems; i++)
    1281             :     {
    1282       29946 :         MCVItem    *item = &mcvlist->items[i];
    1283             : 
    1284       29946 :         item->values = (Datum *) valuesptr;
    1285       29946 :         valuesptr += MAXALIGN(sizeof(Datum) * ndims);
    1286             : 
    1287       29946 :         item->isnull = (bool *) isnullptr;
    1288       29946 :         isnullptr += MAXALIGN(sizeof(bool) * ndims);
    1289             : 
    1290       29946 :         memcpy(item->isnull, ptr, sizeof(bool) * ndims);
    1291       29946 :         ptr += sizeof(bool) * ndims;
    1292             : 
    1293       29946 :         memcpy(&item->frequency, ptr, sizeof(double));
    1294       29946 :         ptr += sizeof(double);
    1295             : 
    1296       29946 :         memcpy(&item->base_frequency, ptr, sizeof(double));
    1297       29946 :         ptr += sizeof(double);
    1298             : 
    1299             :         /* finally translate the indexes (for non-NULL only) */
    1300      133788 :         for (dim = 0; dim < ndims; dim++)
    1301             :         {
    1302             :             uint16      index;
    1303             : 
    1304      103842 :             memcpy(&index, ptr, sizeof(uint16));
    1305      103842 :             ptr += sizeof(uint16);
    1306             : 
    1307      103842 :             if (item->isnull[dim])
    1308         306 :                 continue;
    1309             : 
    1310      103536 :             item->values[dim] = map[dim][index];
    1311             :         }
    1312             : 
    1313             :         /* check we're not overflowing the input */
    1314             :         Assert(ptr <= endptr);
    1315             :     }
    1316             : 
    1317             :     /* check that we processed all the data */
    1318             :     Assert(ptr == endptr);
    1319             : 
    1320             :     /* release the buffers used for mapping */
    1321        2118 :     for (dim = 0; dim < ndims; dim++)
    1322        1626 :         pfree(map[dim]);
    1323             : 
    1324         492 :     pfree(map);
    1325             : 
    1326         492 :     return mcvlist;
    1327             : }
    1328             : 
    1329             : /*
    1330             :  * SRF with details about buckets of a histogram:
    1331             :  *
    1332             :  * - item ID (0...nitems)
    1333             :  * - values (string array)
    1334             :  * - nulls only (boolean array)
    1335             :  * - frequency (double precision)
    1336             :  * - base_frequency (double precision)
    1337             :  *
    1338             :  * The input is the OID of the statistics, and there are no rows returned if
    1339             :  * the statistics contains no histogram.
    1340             :  */
    1341             : Datum
    1342          30 : pg_stats_ext_mcvlist_items(PG_FUNCTION_ARGS)
    1343             : {
    1344             :     FuncCallContext *funcctx;
    1345             : 
    1346             :     /* stuff done only on the first call of the function */
    1347          30 :     if (SRF_IS_FIRSTCALL())
    1348             :     {
    1349             :         MemoryContext oldcontext;
    1350             :         MCVList    *mcvlist;
    1351             :         TupleDesc   tupdesc;
    1352             : 
    1353             :         /* create a function context for cross-call persistence */
    1354          12 :         funcctx = SRF_FIRSTCALL_INIT();
    1355             : 
    1356             :         /* switch to memory context appropriate for multiple function calls */
    1357          12 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
    1358             : 
    1359          12 :         mcvlist = statext_mcv_deserialize(PG_GETARG_BYTEA_P(0));
    1360             : 
    1361          12 :         funcctx->user_fctx = mcvlist;
    1362             : 
    1363             :         /* total number of tuples to be returned */
    1364          12 :         funcctx->max_calls = 0;
    1365          12 :         if (funcctx->user_fctx != NULL)
    1366          12 :             funcctx->max_calls = mcvlist->nitems;
    1367             : 
    1368             :         /* Build a tuple descriptor for our result type */
    1369          12 :         if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
    1370           0 :             ereport(ERROR,
    1371             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1372             :                      errmsg("function returning record called in context "
    1373             :                             "that cannot accept type record")));
    1374          12 :         tupdesc = BlessTupleDesc(tupdesc);
    1375             : 
    1376             :         /*
    1377             :          * generate attribute metadata needed later to produce tuples from raw
    1378             :          * C strings
    1379             :          */
    1380          12 :         funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
    1381             : 
    1382          12 :         MemoryContextSwitchTo(oldcontext);
    1383             :     }
    1384             : 
    1385             :     /* stuff done on every call of the function */
    1386          30 :     funcctx = SRF_PERCALL_SETUP();
    1387             : 
    1388          30 :     if (funcctx->call_cntr < funcctx->max_calls)   /* do when there is more
    1389             :                                                      * left to send */
    1390             :     {
    1391             :         Datum       values[5];
    1392             :         bool        nulls[5];
    1393             :         HeapTuple   tuple;
    1394             :         Datum       result;
    1395          18 :         ArrayBuildState *astate_values = NULL;
    1396          18 :         ArrayBuildState *astate_nulls = NULL;
    1397             : 
    1398             :         int         i;
    1399             :         MCVList    *mcvlist;
    1400             :         MCVItem    *item;
    1401             : 
    1402          18 :         mcvlist = (MCVList *) funcctx->user_fctx;
    1403             : 
    1404             :         Assert(funcctx->call_cntr < mcvlist->nitems);
    1405             : 
    1406          18 :         item = &mcvlist->items[funcctx->call_cntr];
    1407             : 
    1408          72 :         for (i = 0; i < mcvlist->ndimensions; i++)
    1409             :         {
    1410             : 
    1411          54 :             astate_nulls = accumArrayResult(astate_nulls,
    1412          54 :                                             BoolGetDatum(item->isnull[i]),
    1413             :                                             false,
    1414             :                                             BOOLOID,
    1415             :                                             CurrentMemoryContext);
    1416             : 
    1417          54 :             if (!item->isnull[i])
    1418             :             {
    1419             :                 bool        isvarlena;
    1420             :                 Oid         outfunc;
    1421             :                 FmgrInfo    fmgrinfo;
    1422             :                 Datum       val;
    1423             :                 text       *txt;
    1424             : 
    1425             :                 /* lookup output func for the type */
    1426          30 :                 getTypeOutputInfo(mcvlist->types[i], &outfunc, &isvarlena);
    1427          30 :                 fmgr_info(outfunc, &fmgrinfo);
    1428             : 
    1429          30 :                 val = FunctionCall1(&fmgrinfo, item->values[i]);
    1430          30 :                 txt = cstring_to_text(DatumGetPointer(val));
    1431             : 
    1432          30 :                 astate_values = accumArrayResult(astate_values,
    1433             :                                                  PointerGetDatum(txt),
    1434             :                                                  false,
    1435             :                                                  TEXTOID,
    1436             :                                                  CurrentMemoryContext);
    1437             :             }
    1438             :             else
    1439          24 :                 astate_values = accumArrayResult(astate_values,
    1440             :                                                  (Datum) 0,
    1441             :                                                  true,
    1442             :                                                  TEXTOID,
    1443             :                                                  CurrentMemoryContext);
    1444             :         }
    1445             : 
    1446          18 :         values[0] = Int32GetDatum(funcctx->call_cntr);
    1447          18 :         values[1] = makeArrayResult(astate_values, CurrentMemoryContext);
    1448          18 :         values[2] = makeArrayResult(astate_nulls, CurrentMemoryContext);
    1449          18 :         values[3] = Float8GetDatum(item->frequency);
    1450          18 :         values[4] = Float8GetDatum(item->base_frequency);
    1451             : 
    1452             :         /* no NULLs in the tuple */
    1453          18 :         memset(nulls, 0, sizeof(nulls));
    1454             : 
    1455             :         /* build a tuple */
    1456          18 :         tuple = heap_form_tuple(funcctx->attinmeta->tupdesc, values, nulls);
    1457             : 
    1458             :         /* make the tuple into a datum */
    1459          18 :         result = HeapTupleGetDatum(tuple);
    1460             : 
    1461          18 :         SRF_RETURN_NEXT(funcctx, result);
    1462             :     }
    1463             :     else                        /* do when there is no more left */
    1464             :     {
    1465          12 :         SRF_RETURN_DONE(funcctx);
    1466             :     }
    1467             : }
    1468             : 
    1469             : /*
    1470             :  * pg_mcv_list_in       - input routine for type pg_mcv_list.
    1471             :  *
    1472             :  * pg_mcv_list is real enough to be a table column, but it has no operations
    1473             :  * of its own, and disallows input too
    1474             :  */
    1475             : Datum
    1476           0 : pg_mcv_list_in(PG_FUNCTION_ARGS)
    1477             : {
    1478             :     /*
    1479             :      * pg_mcv_list stores the data in binary form and parsing text input is
    1480             :      * not needed, so disallow this.
    1481             :      */
    1482           0 :     ereport(ERROR,
    1483             :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1484             :              errmsg("cannot accept a value of type %s", "pg_mcv_list")));
    1485             : 
    1486             :     PG_RETURN_VOID();           /* keep compiler quiet */
    1487             : }
    1488             : 
    1489             : 
    1490             : /*
    1491             :  * pg_mcv_list_out      - output routine for type pg_mcv_list.
    1492             :  *
    1493             :  * MCV lists are serialized into a bytea value, so we simply call byteaout()
    1494             :  * to serialize the value into text. But it'd be nice to serialize that into
    1495             :  * a meaningful representation (e.g. for inspection by people).
    1496             :  *
    1497             :  * XXX This should probably return something meaningful, similar to what
    1498             :  * pg_dependencies_out does. Not sure how to deal with the deduplicated
    1499             :  * values, though - do we want to expand that or not?
    1500             :  */
    1501             : Datum
    1502           0 : pg_mcv_list_out(PG_FUNCTION_ARGS)
    1503             : {
    1504           0 :     return byteaout(fcinfo);
    1505             : }
    1506             : 
    1507             : /*
    1508             :  * pg_mcv_list_recv     - binary input routine for type pg_mcv_list.
    1509             :  */
    1510             : Datum
    1511           0 : pg_mcv_list_recv(PG_FUNCTION_ARGS)
    1512             : {
    1513           0 :     ereport(ERROR,
    1514             :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1515             :              errmsg("cannot accept a value of type %s", "pg_mcv_list")));
    1516             : 
    1517             :     PG_RETURN_VOID();           /* keep compiler quiet */
    1518             : }
    1519             : 
    1520             : /*
    1521             :  * pg_mcv_list_send     - binary output routine for type pg_mcv_list.
    1522             :  *
    1523             :  * MCV lists are serialized in a bytea value (although the type is named
    1524             :  * differently), so let's just send that.
    1525             :  */
    1526             : Datum
    1527           0 : pg_mcv_list_send(PG_FUNCTION_ARGS)
    1528             : {
    1529           0 :     return byteasend(fcinfo);
    1530             : }
    1531             : 
    1532             : /*
    1533             :  * match the attribute/expression to a dimension of the statistic
    1534             :  *
    1535             :  * Returns the zero-based index of the matching statistics dimension.
    1536             :  * Optionally determines the collation.
    1537             :  */
    1538             : static int
    1539        1152 : mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid)
    1540             : {
    1541             :     int         idx;
    1542             : 
    1543        1152 :     if (IsA(expr, Var))
    1544             :     {
    1545             :         /* simple Var, so just lookup using varattno */
    1546         906 :         Var        *var = (Var *) expr;
    1547             : 
    1548         906 :         if (collid)
    1549         840 :             *collid = var->varcollid;
    1550             : 
    1551         906 :         idx = bms_member_index(keys, var->varattno);
    1552             : 
    1553         906 :         if (idx < 0)
    1554           0 :             elog(ERROR, "variable not found in statistics object");
    1555             :     }
    1556             :     else
    1557             :     {
    1558             :         /* expression - lookup in stats expressions */
    1559             :         ListCell   *lc;
    1560             : 
    1561         246 :         if (collid)
    1562         240 :             *collid = exprCollation(expr);
    1563             : 
    1564             :         /* expressions are stored after the simple columns */
    1565         246 :         idx = bms_num_members(keys);
    1566         456 :         foreach(lc, exprs)
    1567             :         {
    1568         456 :             Node       *stat_expr = (Node *) lfirst(lc);
    1569             : 
    1570         456 :             if (equal(expr, stat_expr))
    1571         246 :                 break;
    1572             : 
    1573         210 :             idx++;
    1574             :         }
    1575             : 
    1576         246 :         if (lc == NULL)
    1577           0 :             elog(ERROR, "expression not found in statistics object");
    1578             :     }
    1579             : 
    1580        1152 :     return idx;
    1581             : }
    1582             : 
    1583             : /*
    1584             :  * mcv_get_match_bitmap
    1585             :  *  Evaluate clauses using the MCV list, and update the match bitmap.
    1586             :  *
    1587             :  * A match bitmap keeps match/mismatch status for each MCV item, and we
    1588             :  * update it based on additional clauses. We also use it to skip items
    1589             :  * that can't possibly match (e.g. item marked as "mismatch" can't change
    1590             :  * to "match" when evaluating AND clause list).
    1591             :  *
    1592             :  * The function also returns a flag indicating whether there was an
    1593             :  * equality condition for all attributes, the minimum frequency in the MCV
    1594             :  * list, and a total MCV frequency (sum of frequencies for all items).
    1595             :  *
    1596             :  * XXX Currently the match bitmap uses a bool for each MCV item, which is
    1597             :  * somewhat wasteful as we could do with just a single bit, thus reducing
    1598             :  * the size to ~1/8. It would also allow us to combine bitmaps simply using
    1599             :  * & and |, which should be faster than min/max. The bitmaps are fairly
    1600             :  * small, though (thanks to the cap on the MCV list size).
    1601             :  */
    1602             : static bool *
    1603         714 : mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
    1604             :                      Bitmapset *keys, List *exprs,
    1605             :                      MCVList *mcvlist, bool is_or)
    1606             : {
    1607             :     ListCell   *l;
    1608             :     bool       *matches;
    1609             : 
    1610             :     /* The bitmap may be partially built. */
    1611             :     Assert(clauses != NIL);
    1612             :     Assert(mcvlist != NULL);
    1613             :     Assert(mcvlist->nitems > 0);
    1614             :     Assert(mcvlist->nitems <= STATS_MCVLIST_MAX_ITEMS);
    1615             : 
    1616         714 :     matches = palloc(sizeof(bool) * mcvlist->nitems);
    1617         714 :     memset(matches, !is_or, sizeof(bool) * mcvlist->nitems);
    1618             : 
    1619             :     /*
    1620             :      * Loop through the list of clauses, and for each of them evaluate all the
    1621             :      * MCV items not yet eliminated by the preceding clauses.
    1622             :      */
    1623        2034 :     foreach(l, clauses)
    1624             :     {
    1625        1320 :         Node       *clause = (Node *) lfirst(l);
    1626             : 
    1627             :         /* if it's a RestrictInfo, then extract the clause */
    1628        1320 :         if (IsA(clause, RestrictInfo))
    1629        1218 :             clause = (Node *) ((RestrictInfo *) clause)->clause;
    1630             : 
    1631             :         /*
    1632             :          * Handle the various types of clauses - OpClause, NullTest and
    1633             :          * AND/OR/NOT
    1634             :          */
    1635        1320 :         if (is_opclause(clause))
    1636             :         {
    1637         876 :             OpExpr     *expr = (OpExpr *) clause;
    1638             :             FmgrInfo    opproc;
    1639             : 
    1640             :             /* valid only after examine_opclause_args returns true */
    1641             :             Node       *clause_expr;
    1642             :             Const      *cst;
    1643             :             bool        expronleft;
    1644             :             int         idx;
    1645             :             Oid         collid;
    1646             : 
    1647         876 :             fmgr_info(get_opcode(expr->opno), &opproc);
    1648             : 
    1649             :             /* extract the var/expr and const from the expression */
    1650         876 :             if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
    1651           0 :                 elog(ERROR, "incompatible clause");
    1652             : 
    1653             :             /* match the attribute/expression to a dimension of the statistic */
    1654         876 :             idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
    1655             : 
    1656             :             /*
    1657             :              * Walk through the MCV items and evaluate the current clause. We
    1658             :              * can skip items that were already ruled out, and terminate if
    1659             :              * there are no remaining MCV items that might possibly match.
    1660             :              */
    1661       58632 :             for (int i = 0; i < mcvlist->nitems; i++)
    1662             :             {
    1663       57756 :                 bool        match = true;
    1664       57756 :                 MCVItem    *item = &mcvlist->items[i];
    1665             : 
    1666             :                 Assert(idx >= 0);
    1667             : 
    1668             :                 /*
    1669             :                  * When the MCV item or the Const value is NULL we can treat
    1670             :                  * this as a mismatch. We must not call the operator because
    1671             :                  * of strictness.
    1672             :                  */
    1673       57756 :                 if (item->isnull[idx] || cst->constisnull)
    1674             :                 {
    1675          48 :                     matches[i] = RESULT_MERGE(matches[i], is_or, false);
    1676          48 :                     continue;
    1677             :                 }
    1678             : 
    1679             :                 /*
    1680             :                  * Skip MCV items that can't change result in the bitmap. Once
    1681             :                  * the value gets false for AND-lists, or true for OR-lists,
    1682             :                  * we don't need to look at more clauses.
    1683             :                  */
    1684       57708 :                 if (RESULT_IS_FINAL(matches[i], is_or))
    1685       26964 :                     continue;
    1686             : 
    1687             :                 /*
    1688             :                  * First check whether the constant is below the lower
    1689             :                  * boundary (in that case we can skip the bucket, because
    1690             :                  * there's no overlap).
    1691             :                  *
    1692             :                  * We don't store collations used to build the statistics, but
    1693             :                  * we can use the collation for the attribute itself, as
    1694             :                  * stored in varcollid. We do reset the statistics after a
    1695             :                  * type change (including collation change), so this is OK.
    1696             :                  * For expressions, we use the collation extracted from the
    1697             :                  * expression itself.
    1698             :                  */
    1699       30744 :                 if (expronleft)
    1700       28128 :                     match = DatumGetBool(FunctionCall2Coll(&opproc,
    1701             :                                                            collid,
    1702       28128 :                                                            item->values[idx],
    1703       28128 :                                                            cst->constvalue));
    1704             :                 else
    1705        2616 :                     match = DatumGetBool(FunctionCall2Coll(&opproc,
    1706             :                                                            collid,
    1707        2616 :                                                            cst->constvalue,
    1708        2616 :                                                            item->values[idx]));
    1709             : 
    1710             :                 /* update the match bitmap with the result */
    1711       30744 :                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
    1712             :             }
    1713             :         }
    1714         444 :         else if (IsA(clause, ScalarArrayOpExpr))
    1715             :         {
    1716         204 :             ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
    1717             :             FmgrInfo    opproc;
    1718             : 
    1719             :             /* valid only after examine_opclause_args returns true */
    1720             :             Node       *clause_expr;
    1721             :             Const      *cst;
    1722             :             bool        expronleft;
    1723             :             Oid         collid;
    1724             :             int         idx;
    1725             : 
    1726             :             /* array evaluation */
    1727             :             ArrayType  *arrayval;
    1728             :             int16       elmlen;
    1729             :             bool        elmbyval;
    1730             :             char        elmalign;
    1731             :             int         num_elems;
    1732             :             Datum      *elem_values;
    1733             :             bool       *elem_nulls;
    1734             : 
    1735         204 :             fmgr_info(get_opcode(expr->opno), &opproc);
    1736             : 
    1737             :             /* extract the var/expr and const from the expression */
    1738         204 :             if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
    1739           0 :                 elog(ERROR, "incompatible clause");
    1740             : 
    1741             :             /* We expect Var on left */
    1742         204 :             if (!expronleft)
    1743           0 :                 elog(ERROR, "incompatible clause");
    1744             : 
    1745             :             /*
    1746             :              * Deconstruct the array constant, unless it's NULL (we'll cover
    1747             :              * that case below)
    1748             :              */
    1749         204 :             if (!cst->constisnull)
    1750             :             {
    1751         204 :                 arrayval = DatumGetArrayTypeP(cst->constvalue);
    1752         204 :                 get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
    1753             :                                      &elmlen, &elmbyval, &elmalign);
    1754         204 :                 deconstruct_array(arrayval,
    1755             :                                   ARR_ELEMTYPE(arrayval),
    1756             :                                   elmlen, elmbyval, elmalign,
    1757             :                                   &elem_values, &elem_nulls, &num_elems);
    1758             :             }
    1759             : 
    1760             :             /* match the attribute/expression to a dimension of the statistic */
    1761         204 :             idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
    1762             : 
    1763             :             /*
    1764             :              * Walk through the MCV items and evaluate the current clause. We
    1765             :              * can skip items that were already ruled out, and terminate if
    1766             :              * there are no remaining MCV items that might possibly match.
    1767             :              */
    1768       15696 :             for (int i = 0; i < mcvlist->nitems; i++)
    1769             :             {
    1770             :                 int         j;
    1771       15492 :                 bool        match = !expr->useOr;
    1772       15492 :                 MCVItem    *item = &mcvlist->items[i];
    1773             : 
    1774             :                 /*
    1775             :                  * When the MCV item or the Const value is NULL we can treat
    1776             :                  * this as a mismatch. We must not call the operator because
    1777             :                  * of strictness.
    1778             :                  */
    1779       15492 :                 if (item->isnull[idx] || cst->constisnull)
    1780             :                 {
    1781          18 :                     matches[i] = RESULT_MERGE(matches[i], is_or, false);
    1782          18 :                     continue;
    1783             :                 }
    1784             : 
    1785             :                 /*
    1786             :                  * Skip MCV items that can't change result in the bitmap. Once
    1787             :                  * the value gets false for AND-lists, or true for OR-lists,
    1788             :                  * we don't need to look at more clauses.
    1789             :                  */
    1790       15474 :                 if (RESULT_IS_FINAL(matches[i], is_or))
    1791        8016 :                     continue;
    1792             : 
    1793       28338 :                 for (j = 0; j < num_elems; j++)
    1794             :                 {
    1795       23634 :                     Datum       elem_value = elem_values[j];
    1796       23634 :                     bool        elem_isnull = elem_nulls[j];
    1797             :                     bool        elem_match;
    1798             : 
    1799             :                     /* NULL values always evaluate as not matching. */
    1800       23634 :                     if (elem_isnull)
    1801             :                     {
    1802        2112 :                         match = RESULT_MERGE(match, expr->useOr, false);
    1803        2112 :                         continue;
    1804             :                     }
    1805             : 
    1806             :                     /*
    1807             :                      * Stop evaluating the array elements once we reach a
    1808             :                      * matching value that can't change - ALL() is the same as
    1809             :                      * AND-list, ANY() is the same as OR-list.
    1810             :                      */
    1811       21522 :                     if (RESULT_IS_FINAL(match, expr->useOr))
    1812        2754 :                         break;
    1813             : 
    1814       18768 :                     elem_match = DatumGetBool(FunctionCall2Coll(&opproc,
    1815             :                                                                 collid,
    1816       18768 :                                                                 item->values[idx],
    1817             :                                                                 elem_value));
    1818             : 
    1819       18768 :                     match = RESULT_MERGE(match, expr->useOr, elem_match);
    1820             :                 }
    1821             : 
    1822             :                 /* update the match bitmap with the result */
    1823        7458 :                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
    1824             :             }
    1825             :         }
    1826         240 :         else if (IsA(clause, NullTest))
    1827             :         {
    1828          66 :             NullTest   *expr = (NullTest *) clause;
    1829          66 :             Node       *clause_expr = (Node *) (expr->arg);
    1830             : 
    1831             :             /* match the attribute/expression to a dimension of the statistic */
    1832          66 :             int         idx = mcv_match_expression(clause_expr, keys, exprs, NULL);
    1833             : 
    1834             :             /*
    1835             :              * Walk through the MCV items and evaluate the current clause. We
    1836             :              * can skip items that were already ruled out, and terminate if
    1837             :              * there are no remaining MCV items that might possibly match.
    1838             :              */
    1839        6078 :             for (int i = 0; i < mcvlist->nitems; i++)
    1840             :             {
    1841        6012 :                 bool        match = false;  /* assume mismatch */
    1842        6012 :                 MCVItem    *item = &mcvlist->items[i];
    1843             : 
    1844             :                 /* if the clause mismatches the MCV item, update the bitmap */
    1845        6012 :                 switch (expr->nulltesttype)
    1846             :                 {
    1847        4212 :                     case IS_NULL:
    1848        4212 :                         match = (item->isnull[idx]) ? true : match;
    1849        4212 :                         break;
    1850             : 
    1851        1800 :                     case IS_NOT_NULL:
    1852        1800 :                         match = (!item->isnull[idx]) ? true : match;
    1853        1800 :                         break;
    1854             :                 }
    1855             : 
    1856             :                 /* now, update the match bitmap, depending on OR/AND type */
    1857        6012 :                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
    1858             :             }
    1859             :         }
    1860         174 :         else if (is_orclause(clause) || is_andclause(clause))
    1861          60 :         {
    1862             :             /* AND/OR clause, with all subclauses being compatible */
    1863             : 
    1864             :             int         i;
    1865          60 :             BoolExpr   *bool_clause = ((BoolExpr *) clause);
    1866          60 :             List       *bool_clauses = bool_clause->args;
    1867             : 
    1868             :             /* match/mismatch bitmap for each MCV item */
    1869          60 :             bool       *bool_matches = NULL;
    1870             : 
    1871             :             Assert(bool_clauses != NIL);
    1872             :             Assert(list_length(bool_clauses) >= 2);
    1873             : 
    1874             :             /* build the match bitmap for the OR-clauses */
    1875          60 :             bool_matches = mcv_get_match_bitmap(root, bool_clauses, keys, exprs,
    1876          60 :                                                 mcvlist, is_orclause(clause));
    1877             : 
    1878             :             /*
    1879             :              * Merge the bitmap produced by mcv_get_match_bitmap into the
    1880             :              * current one. We need to consider if we're evaluating AND or OR
    1881             :              * condition when merging the results.
    1882             :              */
    1883        3756 :             for (i = 0; i < mcvlist->nitems; i++)
    1884        3696 :                 matches[i] = RESULT_MERGE(matches[i], is_or, bool_matches[i]);
    1885             : 
    1886          60 :             pfree(bool_matches);
    1887             :         }
    1888         114 :         else if (is_notclause(clause))
    1889             :         {
    1890             :             /* NOT clause, with all subclauses compatible */
    1891             : 
    1892             :             int         i;
    1893          30 :             BoolExpr   *not_clause = ((BoolExpr *) clause);
    1894          30 :             List       *not_args = not_clause->args;
    1895             : 
    1896             :             /* match/mismatch bitmap for each MCV item */
    1897          30 :             bool       *not_matches = NULL;
    1898             : 
    1899             :             Assert(not_args != NIL);
    1900             :             Assert(list_length(not_args) == 1);
    1901             : 
    1902             :             /* build the match bitmap for the NOT-clause */
    1903          30 :             not_matches = mcv_get_match_bitmap(root, not_args, keys, exprs,
    1904             :                                                mcvlist, false);
    1905             : 
    1906             :             /*
    1907             :              * Merge the bitmap produced by mcv_get_match_bitmap into the
    1908             :              * current one. We're handling a NOT clause, so invert the result
    1909             :              * before merging it into the global bitmap.
    1910             :              */
    1911         150 :             for (i = 0; i < mcvlist->nitems; i++)
    1912         120 :                 matches[i] = RESULT_MERGE(matches[i], is_or, !not_matches[i]);
    1913             : 
    1914          30 :             pfree(not_matches);
    1915             :         }
    1916          84 :         else if (IsA(clause, Var))
    1917             :         {
    1918             :             /* Var (has to be a boolean Var, possibly from below NOT) */
    1919             : 
    1920          78 :             Var        *var = (Var *) (clause);
    1921             : 
    1922             :             /* match the attribute to a dimension of the statistic */
    1923          78 :             int         idx = bms_member_index(keys, var->varattno);
    1924             : 
    1925             :             Assert(var->vartype == BOOLOID);
    1926             : 
    1927             :             /*
    1928             :              * Walk through the MCV items and evaluate the current clause. We
    1929             :              * can skip items that were already ruled out, and terminate if
    1930             :              * there are no remaining MCV items that might possibly match.
    1931             :              */
    1932         378 :             for (int i = 0; i < mcvlist->nitems; i++)
    1933             :             {
    1934         300 :                 MCVItem    *item = &mcvlist->items[i];
    1935         300 :                 bool        match = false;
    1936             : 
    1937             :                 /* if the item is NULL, it's a mismatch */
    1938         300 :                 if (!item->isnull[idx] && DatumGetBool(item->values[idx]))
    1939         150 :                     match = true;
    1940             : 
    1941             :                 /* update the result bitmap */
    1942         300 :                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
    1943             :             }
    1944             :         }
    1945             :         else
    1946             :         {
    1947             :             /* Otherwise, it must be a bare boolean-returning expression */
    1948             :             int         idx;
    1949             : 
    1950             :             /* match the expression to a dimension of the statistic */
    1951           6 :             idx = mcv_match_expression(clause, keys, exprs, NULL);
    1952             : 
    1953             :             /*
    1954             :              * Walk through the MCV items and evaluate the current clause. We
    1955             :              * can skip items that were already ruled out, and terminate if
    1956             :              * there are no remaining MCV items that might possibly match.
    1957             :              */
    1958         222 :             for (int i = 0; i < mcvlist->nitems; i++)
    1959             :             {
    1960             :                 bool        match;
    1961         216 :                 MCVItem    *item = &mcvlist->items[i];
    1962             : 
    1963             :                 /* "match" just means it's bool TRUE */
    1964         216 :                 match = !item->isnull[idx] && DatumGetBool(item->values[idx]);
    1965             : 
    1966             :                 /* now, update the match bitmap, depending on OR/AND type */
    1967         216 :                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
    1968             :             }
    1969             :         }
    1970             :     }
    1971             : 
    1972         714 :     return matches;
    1973             : }
    1974             : 
    1975             : 
    1976             : /*
    1977             :  * mcv_combine_selectivities
    1978             :  *      Combine per-column and multi-column MCV selectivity estimates.
    1979             :  *
    1980             :  * simple_sel is a "simple" selectivity estimate (produced without using any
    1981             :  * extended statistics, essentially assuming independence of columns/clauses).
    1982             :  *
    1983             :  * mcv_sel and mcv_basesel are sums of the frequencies and base frequencies of
    1984             :  * all matching MCV items.  The difference (mcv_sel - mcv_basesel) is then
    1985             :  * essentially interpreted as a correction to be added to simple_sel, as
    1986             :  * described below.
    1987             :  *
    1988             :  * mcv_totalsel is the sum of the frequencies of all MCV items (not just the
    1989             :  * matching ones).  This is used as an upper bound on the portion of the
    1990             :  * selectivity estimates not covered by the MCV statistics.
    1991             :  *
    1992             :  * Note: While simple and base selectivities are defined in a quite similar
    1993             :  * way, the values are computed differently and are not therefore equal. The
    1994             :  * simple selectivity is computed as a product of per-clause estimates, while
    1995             :  * the base selectivity is computed by adding up base frequencies of matching
    1996             :  * items of the multi-column MCV list. So the values may differ for two main
    1997             :  * reasons - (a) the MCV list may not cover 100% of the data and (b) some of
    1998             :  * the MCV items did not match the estimated clauses.
    1999             :  *
    2000             :  * As both (a) and (b) reduce the base selectivity value, it generally holds
    2001             :  * that (simple_sel >= mcv_basesel). If the MCV list covers all the data, the
    2002             :  * values may be equal.
    2003             :  *
    2004             :  * So, other_sel = (simple_sel - mcv_basesel) is an estimate for the part not
    2005             :  * covered by the MCV list, and (mcv_sel - mcv_basesel) may be seen as a
    2006             :  * correction for the part covered by the MCV list. Those two statements are
    2007             :  * actually equivalent.
    2008             :  */
    2009             : Selectivity
    2010         672 : mcv_combine_selectivities(Selectivity simple_sel,
    2011             :                           Selectivity mcv_sel,
    2012             :                           Selectivity mcv_basesel,
    2013             :                           Selectivity mcv_totalsel)
    2014             : {
    2015             :     Selectivity other_sel;
    2016             :     Selectivity sel;
    2017             : 
    2018             :     /* estimated selectivity of values not covered by MCV matches */
    2019         672 :     other_sel = simple_sel - mcv_basesel;
    2020         672 :     CLAMP_PROBABILITY(other_sel);
    2021             : 
    2022             :     /* this non-MCV selectivity cannot exceed 1 - mcv_totalsel */
    2023         672 :     if (other_sel > 1.0 - mcv_totalsel)
    2024         444 :         other_sel = 1.0 - mcv_totalsel;
    2025             : 
    2026             :     /* overall selectivity is the sum of the MCV and non-MCV parts */
    2027         672 :     sel = mcv_sel + other_sel;
    2028         672 :     CLAMP_PROBABILITY(sel);
    2029             : 
    2030         672 :     return sel;
    2031             : }
    2032             : 
    2033             : 
    2034             : /*
    2035             :  * mcv_clauselist_selectivity
    2036             :  *      Use MCV statistics to estimate the selectivity of an implicitly-ANDed
    2037             :  *      list of clauses.
    2038             :  *
    2039             :  * This determines which MCV items match every clause in the list and returns
    2040             :  * the sum of the frequencies of those items.
    2041             :  *
    2042             :  * In addition, it returns the sum of the base frequencies of each of those
    2043             :  * items (that is the sum of the selectivities that each item would have if
    2044             :  * the columns were independent of one another), and the total selectivity of
    2045             :  * all the MCV items (not just the matching ones).  These are expected to be
    2046             :  * used together with a "simple" selectivity estimate (one based only on
    2047             :  * per-column statistics) to produce an overall selectivity estimate that
    2048             :  * makes use of both per-column and multi-column statistics --- see
    2049             :  * mcv_combine_selectivities().
    2050             :  */
    2051             : Selectivity
    2052         384 : mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat,
    2053             :                            List *clauses, int varRelid,
    2054             :                            JoinType jointype, SpecialJoinInfo *sjinfo,
    2055             :                            RelOptInfo *rel,
    2056             :                            Selectivity *basesel, Selectivity *totalsel)
    2057             : {
    2058             :     int         i;
    2059             :     MCVList    *mcv;
    2060         384 :     Selectivity s = 0.0;
    2061         384 :     RangeTblEntry *rte = root->simple_rte_array[rel->relid];
    2062             : 
    2063             :     /* match/mismatch bitmap for each MCV item */
    2064         384 :     bool       *matches = NULL;
    2065             : 
    2066             :     /* load the MCV list stored in the statistics object */
    2067         384 :     mcv = statext_mcv_load(stat->statOid, rte->inh);
    2068             : 
    2069             :     /* build a match bitmap for the clauses */
    2070         384 :     matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs,
    2071             :                                    mcv, false);
    2072             : 
    2073             :     /* sum frequencies for all the matching MCV items */
    2074         384 :     *basesel = 0.0;
    2075         384 :     *totalsel = 0.0;
    2076       25104 :     for (i = 0; i < mcv->nitems; i++)
    2077             :     {
    2078       24720 :         *totalsel += mcv->items[i].frequency;
    2079             : 
    2080       24720 :         if (matches[i] != false)
    2081             :         {
    2082         462 :             *basesel += mcv->items[i].base_frequency;
    2083         462 :             s += mcv->items[i].frequency;
    2084             :         }
    2085             :     }
    2086             : 
    2087         384 :     return s;
    2088             : }
    2089             : 
    2090             : 
    2091             : /*
    2092             :  * mcv_clause_selectivity_or
    2093             :  *      Use MCV statistics to estimate the selectivity of a clause that
    2094             :  *      appears in an ORed list of clauses.
    2095             :  *
    2096             :  * As with mcv_clauselist_selectivity() this determines which MCV items match
    2097             :  * the clause and returns both the sum of the frequencies and the sum of the
    2098             :  * base frequencies of those items, as well as the sum of the frequencies of
    2099             :  * all MCV items (not just the matching ones) so that this information can be
    2100             :  * used by mcv_combine_selectivities() to produce a selectivity estimate that
    2101             :  * makes use of both per-column and multi-column statistics.
    2102             :  *
    2103             :  * Additionally, we return information to help compute the overall selectivity
    2104             :  * of the ORed list of clauses assumed to contain this clause.  This function
    2105             :  * is intended to be called for each clause in the ORed list of clauses,
    2106             :  * allowing the overall selectivity to be computed using the following
    2107             :  * algorithm:
    2108             :  *
    2109             :  * Suppose P[n] = P(C[1] OR C[2] OR ... OR C[n]) is the combined selectivity
    2110             :  * of the first n clauses in the list.  Then the combined selectivity taking
    2111             :  * into account the next clause C[n+1] can be written as
    2112             :  *
    2113             :  *      P[n+1] = P[n] + P(C[n+1]) - P((C[1] OR ... OR C[n]) AND C[n+1])
    2114             :  *
    2115             :  * The final term above represents the overlap between the clauses examined so
    2116             :  * far and the (n+1)'th clause.  To estimate its selectivity, we track the
    2117             :  * match bitmap for the ORed list of clauses examined so far and examine its
    2118             :  * intersection with the match bitmap for the (n+1)'th clause.
    2119             :  *
    2120             :  * We then also return the sums of the MCV item frequencies and base
    2121             :  * frequencies for the match bitmap intersection corresponding to the overlap
    2122             :  * term above, so that they can be combined with a simple selectivity estimate
    2123             :  * for that term.
    2124             :  *
    2125             :  * The parameter "or_matches" is an in/out parameter tracking the match bitmap
    2126             :  * for the clauses examined so far.  The caller is expected to set it to NULL
    2127             :  * the first time it calls this function.
    2128             :  */
    2129             : Selectivity
    2130         240 : mcv_clause_selectivity_or(PlannerInfo *root, StatisticExtInfo *stat,
    2131             :                           MCVList *mcv, Node *clause, bool **or_matches,
    2132             :                           Selectivity *basesel, Selectivity *overlap_mcvsel,
    2133             :                           Selectivity *overlap_basesel, Selectivity *totalsel)
    2134             : {
    2135         240 :     Selectivity s = 0.0;
    2136             :     bool       *new_matches;
    2137             :     int         i;
    2138             : 
    2139             :     /* build the OR-matches bitmap, if not built already */
    2140         240 :     if (*or_matches == NULL)
    2141          96 :         *or_matches = palloc0(sizeof(bool) * mcv->nitems);
    2142             : 
    2143             :     /* build the match bitmap for the new clause */
    2144         240 :     new_matches = mcv_get_match_bitmap(root, list_make1(clause), stat->keys,
    2145             :                                        stat->exprs, mcv, false);
    2146             : 
    2147             :     /*
    2148             :      * Sum the frequencies for all the MCV items matching this clause and also
    2149             :      * those matching the overlap between this clause and any of the preceding
    2150             :      * clauses as described above.
    2151             :      */
    2152         240 :     *basesel = 0.0;
    2153         240 :     *overlap_mcvsel = 0.0;
    2154         240 :     *overlap_basesel = 0.0;
    2155         240 :     *totalsel = 0.0;
    2156       15516 :     for (i = 0; i < mcv->nitems; i++)
    2157             :     {
    2158       15276 :         *totalsel += mcv->items[i].frequency;
    2159             : 
    2160       15276 :         if (new_matches[i])
    2161             :         {
    2162         336 :             s += mcv->items[i].frequency;
    2163         336 :             *basesel += mcv->items[i].base_frequency;
    2164             : 
    2165         336 :             if ((*or_matches)[i])
    2166             :             {
    2167         144 :                 *overlap_mcvsel += mcv->items[i].frequency;
    2168         144 :                 *overlap_basesel += mcv->items[i].base_frequency;
    2169             :             }
    2170             :         }
    2171             : 
    2172             :         /* update the OR-matches bitmap for the next clause */
    2173       15276 :         (*or_matches)[i] = (*or_matches)[i] || new_matches[i];
    2174             :     }
    2175             : 
    2176         240 :     pfree(new_matches);
    2177             : 
    2178         240 :     return s;
    2179             : }

Generated by: LCOV version 1.14