LCOV - code coverage report
Current view: top level - src/backend/statistics - mcv.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 533 583 91.4 %
Date: 2024-11-21 08:14:44 Functions: 18 21 85.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * mcv.c
       4             :  *    POSTGRES multivariate MCV lists
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/statistics/mcv.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include <math.h>
      18             : 
      19             : #include "access/htup_details.h"
      20             : #include "catalog/pg_statistic_ext.h"
      21             : #include "catalog/pg_statistic_ext_data.h"
      22             : #include "fmgr.h"
      23             : #include "funcapi.h"
      24             : #include "nodes/nodeFuncs.h"
      25             : #include "statistics/extended_stats_internal.h"
      26             : #include "statistics/statistics.h"
      27             : #include "utils/array.h"
      28             : #include "utils/builtins.h"
      29             : #include "utils/fmgrprotos.h"
      30             : #include "utils/lsyscache.h"
      31             : #include "utils/selfuncs.h"
      32             : #include "utils/syscache.h"
      33             : #include "utils/typcache.h"
      34             : 
      35             : /*
      36             :  * Computes size of a serialized MCV item, depending on the number of
      37             :  * dimensions (columns) the statistic is defined on. The datum values are
      38             :  * stored in a separate array (deduplicated, to minimize the size), and
      39             :  * so the serialized items only store uint16 indexes into that array.
      40             :  *
      41             :  * Each serialized item stores (in this order):
      42             :  *
      43             :  * - indexes to values    (ndim * sizeof(uint16))
      44             :  * - null flags           (ndim * sizeof(bool))
      45             :  * - frequency            (sizeof(double))
      46             :  * - base_frequency       (sizeof(double))
      47             :  *
      48             :  * There is no alignment padding within an MCV item.
      49             :  * So in total each MCV item requires this many bytes:
      50             :  *
      51             :  *   ndim * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double)
      52             :  */
      53             : #define ITEM_SIZE(ndims)    \
      54             :     ((ndims) * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double))
      55             : 
      56             : /*
      57             :  * Used to compute size of serialized MCV list representation.
      58             :  */
      59             : #define MinSizeOfMCVList        \
      60             :     (VARHDRSZ + sizeof(uint32) * 3 + sizeof(AttrNumber))
      61             : 
      62             : /*
      63             :  * Size of the serialized MCV list, excluding the space needed for
      64             :  * deduplicated per-dimension values. The macro is meant to be used
      65             :  * when it's not yet safe to access the serialized info about amount
      66             :  * of data for each column.
      67             :  */
      68             : #define SizeOfMCVList(ndims,nitems) \
      69             :     ((MinSizeOfMCVList + sizeof(Oid) * (ndims)) + \
      70             :      ((ndims) * sizeof(DimensionInfo)) + \
      71             :      ((nitems) * ITEM_SIZE(ndims)))
      72             : 
      73             : static MultiSortSupport build_mss(StatsBuildData *data);
      74             : 
      75             : static SortItem *build_distinct_groups(int numrows, SortItem *items,
      76             :                                        MultiSortSupport mss, int *ndistinct);
      77             : 
      78             : static SortItem **build_column_frequencies(SortItem *groups, int ngroups,
      79             :                                            MultiSortSupport mss, int *ncounts);
      80             : 
      81             : static int  count_distinct_groups(int numrows, SortItem *items,
      82             :                                   MultiSortSupport mss);
      83             : 
      84             : /*
      85             :  * Compute new value for bitmap item, considering whether it's used for
      86             :  * clauses connected by AND/OR.
      87             :  */
      88             : #define RESULT_MERGE(value, is_or, match) \
      89             :     ((is_or) ? ((value) || (match)) : ((value) && (match)))
      90             : 
      91             : /*
      92             :  * When processing a list of clauses, the bitmap item may get set to a value
      93             :  * such that additional clauses can't change it. For example, when processing
      94             :  * a list of clauses connected to AND, as soon as the item gets set to 'false'
      95             :  * then it'll remain like that. Similarly clauses connected by OR and 'true'.
      96             :  *
      97             :  * Returns true when the value in the bitmap can't change no matter how the
      98             :  * remaining clauses are evaluated.
      99             :  */
     100             : #define RESULT_IS_FINAL(value, is_or)   ((is_or) ? (value) : (!(value)))
     101             : 
     102             : /*
     103             :  * get_mincount_for_mcv_list
     104             :  *      Determine the minimum number of times a value needs to appear in
     105             :  *      the sample for it to be included in the MCV list.
     106             :  *
     107             :  * We want to keep only values that appear sufficiently often in the
     108             :  * sample that it is reasonable to extrapolate their sample frequencies to
     109             :  * the entire table.  We do this by placing an upper bound on the relative
     110             :  * standard error of the sample frequency, so that any estimates the
     111             :  * planner generates from the MCV statistics can be expected to be
     112             :  * reasonably accurate.
     113             :  *
     114             :  * Since we are sampling without replacement, the sample frequency of a
     115             :  * particular value is described by a hypergeometric distribution.  A
     116             :  * common rule of thumb when estimating errors in this situation is to
     117             :  * require at least 10 instances of the value in the sample, in which case
     118             :  * the distribution can be approximated by a normal distribution, and
     119             :  * standard error analysis techniques can be applied.  Given a sample size
     120             :  * of n, a population size of N, and a sample frequency of p=cnt/n, the
     121             :  * standard error of the proportion p is given by
     122             :  *      SE = sqrt(p*(1-p)/n) * sqrt((N-n)/(N-1))
     123             :  * where the second term is the finite population correction.  To get
     124             :  * reasonably accurate planner estimates, we impose an upper bound on the
     125             :  * relative standard error of 20% -- i.e., SE/p < 0.2.  This 20% relative
     126             :  * error bound is fairly arbitrary, but has been found empirically to work
     127             :  * well.  Rearranging this formula gives a lower bound on the number of
     128             :  * instances of the value seen:
     129             :  *      cnt > n*(N-n) / (N-n+0.04*n*(N-1))
     130             :  * This bound is at most 25, and approaches 0 as n approaches 0 or N. The
     131             :  * case where n approaches 0 cannot happen in practice, since the sample
     132             :  * size is at least 300.  The case where n approaches N corresponds to
     133             :  * sampling the whole table, in which case it is reasonable to keep
     134             :  * the whole MCV list (have no lower bound), so it makes sense to apply
     135             :  * this formula for all inputs, even though the above derivation is
     136             :  * technically only valid when the right hand side is at least around 10.
     137             :  *
     138             :  * An alternative way to look at this formula is as follows -- assume that
     139             :  * the number of instances of the value seen scales up to the entire
     140             :  * table, so that the population count is K=N*cnt/n. Then the distribution
     141             :  * in the sample is a hypergeometric distribution parameterised by N, n
     142             :  * and K, and the bound above is mathematically equivalent to demanding
     143             :  * that the standard deviation of that distribution is less than 20% of
     144             :  * its mean.  Thus the relative errors in any planner estimates produced
     145             :  * from the MCV statistics are likely to be not too large.
     146             :  */
     147             : static double
     148         192 : get_mincount_for_mcv_list(int samplerows, double totalrows)
     149             : {
     150         192 :     double      n = samplerows;
     151         192 :     double      N = totalrows;
     152             :     double      numer,
     153             :                 denom;
     154             : 
     155         192 :     numer = n * (N - n);
     156         192 :     denom = N - n + 0.04 * n * (N - 1);
     157             : 
     158             :     /* Guard against division by zero (possible if n = N = 1) */
     159         192 :     if (denom == 0.0)
     160          12 :         return 0.0;
     161             : 
     162         180 :     return numer / denom;
     163             : }
     164             : 
     165             : /*
     166             :  * Builds MCV list from the set of sampled rows.
     167             :  *
     168             :  * The algorithm is quite simple:
     169             :  *
     170             :  *     (1) sort the data (default collation, '<' for the data type)
     171             :  *
     172             :  *     (2) count distinct groups, decide how many to keep
     173             :  *
     174             :  *     (3) build the MCV list using the threshold determined in (2)
     175             :  *
     176             :  *     (4) remove rows represented by the MCV from the sample
     177             :  *
     178             :  */
     179             : MCVList *
     180         192 : statext_mcv_build(StatsBuildData *data, double totalrows, int stattarget)
     181             : {
     182             :     int         i,
     183             :                 numattrs,
     184             :                 numrows,
     185             :                 ngroups,
     186             :                 nitems;
     187             :     double      mincount;
     188             :     SortItem   *items;
     189             :     SortItem   *groups;
     190         192 :     MCVList    *mcvlist = NULL;
     191             :     MultiSortSupport mss;
     192             : 
     193             :     /* comparator for all the columns */
     194         192 :     mss = build_mss(data);
     195             : 
     196             :     /* sort the rows */
     197         192 :     items = build_sorted_items(data, &nitems, mss,
     198             :                                data->nattnums, data->attnums);
     199             : 
     200         192 :     if (!items)
     201           0 :         return NULL;
     202             : 
     203             :     /* for convenience */
     204         192 :     numattrs = data->nattnums;
     205         192 :     numrows = data->numrows;
     206             : 
     207             :     /* transform the sorted rows into groups (sorted by frequency) */
     208         192 :     groups = build_distinct_groups(nitems, items, mss, &ngroups);
     209             : 
     210             :     /*
     211             :      * The maximum number of MCV items to store, based on the statistics
     212             :      * target we computed for the statistics object (from the target set for
     213             :      * the object itself, attributes and the system default). In any case, we
     214             :      * can't keep more groups than we have available.
     215             :      */
     216         192 :     nitems = stattarget;
     217         192 :     if (nitems > ngroups)
     218         120 :         nitems = ngroups;
     219             : 
     220             :     /*
     221             :      * Decide how many items to keep in the MCV list. We can't use the same
     222             :      * algorithm as per-column MCV lists, because that only considers the
     223             :      * actual group frequency - but we're primarily interested in how the
     224             :      * actual frequency differs from the base frequency (product of simple
     225             :      * per-column frequencies, as if the columns were independent).
     226             :      *
     227             :      * Using the same algorithm might exclude items that are close to the
     228             :      * "average" frequency of the sample. But that does not say whether the
     229             :      * observed frequency is close to the base frequency or not. We also need
     230             :      * to consider unexpectedly uncommon items (again, compared to the base
     231             :      * frequency), and the single-column algorithm does not have to.
     232             :      *
     233             :      * We simply decide how many items to keep by computing the minimum count
     234             :      * using get_mincount_for_mcv_list() and then keep all items that seem to
     235             :      * be more common than that.
     236             :      */
     237         192 :     mincount = get_mincount_for_mcv_list(numrows, totalrows);
     238             : 
     239             :     /*
     240             :      * Walk the groups until we find the first group with a count below the
     241             :      * mincount threshold (the index of that group is the number of groups we
     242             :      * want to keep).
     243             :      */
     244        8820 :     for (i = 0; i < nitems; i++)
     245             :     {
     246        8628 :         if (groups[i].count < mincount)
     247             :         {
     248           0 :             nitems = i;
     249           0 :             break;
     250             :         }
     251             :     }
     252             : 
     253             :     /*
     254             :      * At this point, we know the number of items for the MCV list. There
     255             :      * might be none (for uniform distribution with many groups), and in that
     256             :      * case, there will be no MCV list. Otherwise, construct the MCV list.
     257             :      */
     258         192 :     if (nitems > 0)
     259             :     {
     260             :         int         j;
     261             :         SortItem    key;
     262             :         MultiSortSupport tmp;
     263             : 
     264             :         /* frequencies for values in each attribute */
     265             :         SortItem  **freqs;
     266             :         int        *nfreqs;
     267             : 
     268             :         /* used to search values */
     269         192 :         tmp = (MultiSortSupport) palloc(offsetof(MultiSortSupportData, ssup)
     270             :                                         + sizeof(SortSupportData));
     271             : 
     272             :         /* compute frequencies for values in each column */
     273         192 :         nfreqs = (int *) palloc0(sizeof(int) * numattrs);
     274         192 :         freqs = build_column_frequencies(groups, ngroups, mss, nfreqs);
     275             : 
     276             :         /*
     277             :          * Allocate the MCV list structure, set the global parameters.
     278             :          */
     279         192 :         mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) +
     280         192 :                                       sizeof(MCVItem) * nitems);
     281             : 
     282         192 :         mcvlist->magic = STATS_MCV_MAGIC;
     283         192 :         mcvlist->type = STATS_MCV_TYPE_BASIC;
     284         192 :         mcvlist->ndimensions = numattrs;
     285         192 :         mcvlist->nitems = nitems;
     286             : 
     287             :         /* store info about data type OIDs */
     288         726 :         for (i = 0; i < numattrs; i++)
     289         534 :             mcvlist->types[i] = data->stats[i]->attrtypid;
     290             : 
     291             :         /* Copy the first chunk of groups into the result. */
     292        8820 :         for (i = 0; i < nitems; i++)
     293             :         {
     294             :             /* just point to the proper place in the list */
     295        8628 :             MCVItem    *item = &mcvlist->items[i];
     296             : 
     297        8628 :             item->values = (Datum *) palloc(sizeof(Datum) * numattrs);
     298        8628 :             item->isnull = (bool *) palloc(sizeof(bool) * numattrs);
     299             : 
     300             :             /* copy values for the group */
     301        8628 :             memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs);
     302        8628 :             memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs);
     303             : 
     304             :             /* groups should be sorted by frequency in descending order */
     305             :             Assert((i == 0) || (groups[i - 1].count >= groups[i].count));
     306             : 
     307             :             /* group frequency */
     308        8628 :             item->frequency = (double) groups[i].count / numrows;
     309             : 
     310             :             /* base frequency, if the attributes were independent */
     311        8628 :             item->base_frequency = 1.0;
     312       32958 :             for (j = 0; j < numattrs; j++)
     313             :             {
     314             :                 SortItem   *freq;
     315             : 
     316             :                 /* single dimension */
     317       24330 :                 tmp->ndims = 1;
     318       24330 :                 tmp->ssup[0] = mss->ssup[j];
     319             : 
     320             :                 /* fill search key */
     321       24330 :                 key.values = &groups[i].values[j];
     322       24330 :                 key.isnull = &groups[i].isnull[j];
     323             : 
     324       24330 :                 freq = (SortItem *) bsearch_arg(&key, freqs[j], nfreqs[j],
     325             :                                                 sizeof(SortItem),
     326             :                                                 multi_sort_compare, tmp);
     327             : 
     328       24330 :                 item->base_frequency *= ((double) freq->count) / numrows;
     329             :             }
     330             :         }
     331             : 
     332         192 :         pfree(nfreqs);
     333         192 :         pfree(freqs);
     334             :     }
     335             : 
     336         192 :     pfree(items);
     337         192 :     pfree(groups);
     338             : 
     339         192 :     return mcvlist;
     340             : }
     341             : 
     342             : /*
     343             :  * build_mss
     344             :  *      Build a MultiSortSupport for the given StatsBuildData.
     345             :  */
     346             : static MultiSortSupport
     347         192 : build_mss(StatsBuildData *data)
     348             : {
     349             :     int         i;
     350         192 :     int         numattrs = data->nattnums;
     351             : 
     352             :     /* Sort by multiple columns (using array of SortSupport) */
     353         192 :     MultiSortSupport mss = multi_sort_init(numattrs);
     354             : 
     355             :     /* prepare the sort functions for all the attributes */
     356         726 :     for (i = 0; i < numattrs; i++)
     357             :     {
     358         534 :         VacAttrStats *colstat = data->stats[i];
     359             :         TypeCacheEntry *type;
     360             : 
     361         534 :         type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
     362         534 :         if (type->lt_opr == InvalidOid) /* shouldn't happen */
     363           0 :             elog(ERROR, "cache lookup failed for ordering operator for type %u",
     364             :                  colstat->attrtypid);
     365             : 
     366         534 :         multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
     367             :     }
     368             : 
     369         192 :     return mss;
     370             : }
     371             : 
     372             : /*
     373             :  * count_distinct_groups
     374             :  *      Count distinct combinations of SortItems in the array.
     375             :  *
     376             :  * The array is assumed to be sorted according to the MultiSortSupport.
     377             :  */
     378             : static int
     379         192 : count_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss)
     380             : {
     381             :     int         i;
     382             :     int         ndistinct;
     383             : 
     384         192 :     ndistinct = 1;
     385      459054 :     for (i = 1; i < numrows; i++)
     386             :     {
     387             :         /* make sure the array really is sorted */
     388             :         Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
     389             : 
     390      458862 :         if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
     391       77736 :             ndistinct += 1;
     392             :     }
     393             : 
     394         192 :     return ndistinct;
     395             : }
     396             : 
     397             : /*
     398             :  * compare_sort_item_count
     399             :  *      Comparator for sorting items by count (frequencies) in descending
     400             :  *      order.
     401             :  */
     402             : static int
     403       78780 : compare_sort_item_count(const void *a, const void *b, void *arg)
     404             : {
     405       78780 :     SortItem   *ia = (SortItem *) a;
     406       78780 :     SortItem   *ib = (SortItem *) b;
     407             : 
     408       78780 :     if (ia->count == ib->count)
     409       77832 :         return 0;
     410         948 :     else if (ia->count > ib->count)
     411         630 :         return -1;
     412             : 
     413         318 :     return 1;
     414             : }
     415             : 
     416             : /*
     417             :  * build_distinct_groups
     418             :  *      Build an array of SortItems for distinct groups and counts matching
     419             :  *      items.
     420             :  *
     421             :  * The 'items' array is assumed to be sorted.
     422             :  */
     423             : static SortItem *
     424         192 : build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss,
     425             :                       int *ndistinct)
     426             : {
     427             :     int         i,
     428             :                 j;
     429         192 :     int         ngroups = count_distinct_groups(numrows, items, mss);
     430             : 
     431         192 :     SortItem   *groups = (SortItem *) palloc(ngroups * sizeof(SortItem));
     432             : 
     433         192 :     j = 0;
     434         192 :     groups[0] = items[0];
     435         192 :     groups[0].count = 1;
     436             : 
     437      459054 :     for (i = 1; i < numrows; i++)
     438             :     {
     439             :         /* Assume sorted in ascending order. */
     440             :         Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
     441             : 
     442             :         /* New distinct group detected. */
     443      458862 :         if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
     444             :         {
     445       77736 :             groups[++j] = items[i];
     446       77736 :             groups[j].count = 0;
     447             :         }
     448             : 
     449      458862 :         groups[j].count++;
     450             :     }
     451             : 
     452             :     /* ensure we filled the expected number of distinct groups */
     453             :     Assert(j + 1 == ngroups);
     454             : 
     455             :     /* Sort the distinct groups by frequency (in descending order). */
     456         192 :     qsort_interruptible(groups, ngroups, sizeof(SortItem),
     457             :                         compare_sort_item_count, NULL);
     458             : 
     459         192 :     *ndistinct = ngroups;
     460         192 :     return groups;
     461             : }
     462             : 
     463             : /* compare sort items (single dimension) */
     464             : static int
     465      834186 : sort_item_compare(const void *a, const void *b, void *arg)
     466             : {
     467      834186 :     SortSupport ssup = (SortSupport) arg;
     468      834186 :     SortItem   *ia = (SortItem *) a;
     469      834186 :     SortItem   *ib = (SortItem *) b;
     470             : 
     471     1668372 :     return ApplySortComparator(ia->values[0], ia->isnull[0],
     472      834186 :                                ib->values[0], ib->isnull[0],
     473             :                                ssup);
     474             : }
     475             : 
     476             : /*
     477             :  * build_column_frequencies
     478             :  *      Compute frequencies of values in each column.
     479             :  *
     480             :  * This returns an array of SortItems for each attribute the MCV is built
     481             :  * on, with a frequency (number of occurrences) for each value. This is
     482             :  * then used to compute "base" frequency of MCV items.
     483             :  *
     484             :  * All the memory is allocated in a single chunk, so that a single pfree
     485             :  * is enough to release it. We do not allocate space for values/isnull
     486             :  * arrays in the SortItems, because we can simply point into the input
     487             :  * groups directly.
     488             :  */
     489             : static SortItem **
     490         192 : build_column_frequencies(SortItem *groups, int ngroups,
     491             :                          MultiSortSupport mss, int *ncounts)
     492             : {
     493             :     int         i,
     494             :                 dim;
     495             :     SortItem  **result;
     496             :     char       *ptr;
     497             : 
     498             :     Assert(groups);
     499             :     Assert(ncounts);
     500             : 
     501             :     /* allocate arrays for all columns as a single chunk */
     502         192 :     ptr = palloc(MAXALIGN(sizeof(SortItem *) * mss->ndims) +
     503         192 :                  mss->ndims * MAXALIGN(sizeof(SortItem) * ngroups));
     504             : 
     505             :     /* initial array of pointers */
     506         192 :     result = (SortItem **) ptr;
     507         192 :     ptr += MAXALIGN(sizeof(SortItem *) * mss->ndims);
     508             : 
     509         726 :     for (dim = 0; dim < mss->ndims; dim++)
     510             :     {
     511         534 :         SortSupport ssup = &mss->ssup[dim];
     512             : 
     513             :         /* array of values for a single column */
     514         534 :         result[dim] = (SortItem *) ptr;
     515         534 :         ptr += MAXALIGN(sizeof(SortItem) * ngroups);
     516             : 
     517             :         /* extract data for the dimension */
     518      227064 :         for (i = 0; i < ngroups; i++)
     519             :         {
     520             :             /* point into the input groups */
     521      226530 :             result[dim][i].values = &groups[i].values[dim];
     522      226530 :             result[dim][i].isnull = &groups[i].isnull[dim];
     523      226530 :             result[dim][i].count = groups[i].count;
     524             :         }
     525             : 
     526             :         /* sort the values, deduplicate */
     527         534 :         qsort_interruptible(result[dim], ngroups, sizeof(SortItem),
     528             :                             sort_item_compare, ssup);
     529             : 
     530             :         /*
     531             :          * Identify distinct values, compute frequency (there might be
     532             :          * multiple MCV items containing this value, so we need to sum counts
     533             :          * from all of them.
     534             :          */
     535         534 :         ncounts[dim] = 1;
     536      226530 :         for (i = 1; i < ngroups; i++)
     537             :         {
     538      225996 :             if (sort_item_compare(&result[dim][i - 1], &result[dim][i], ssup) == 0)
     539             :             {
     540      136200 :                 result[dim][ncounts[dim] - 1].count += result[dim][i].count;
     541      136200 :                 continue;
     542             :             }
     543             : 
     544       89796 :             result[dim][ncounts[dim]] = result[dim][i];
     545             : 
     546       89796 :             ncounts[dim]++;
     547             :         }
     548             :     }
     549             : 
     550         192 :     return result;
     551             : }
     552             : 
     553             : /*
     554             :  * statext_mcv_load
     555             :  *      Load the MCV list for the indicated pg_statistic_ext_data tuple.
     556             :  */
     557             : MCVList *
     558         480 : statext_mcv_load(Oid mvoid, bool inh)
     559             : {
     560             :     MCVList    *result;
     561             :     bool        isnull;
     562             :     Datum       mcvlist;
     563         480 :     HeapTuple   htup = SearchSysCache2(STATEXTDATASTXOID,
     564             :                                        ObjectIdGetDatum(mvoid), BoolGetDatum(inh));
     565             : 
     566         480 :     if (!HeapTupleIsValid(htup))
     567           0 :         elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
     568             : 
     569         480 :     mcvlist = SysCacheGetAttr(STATEXTDATASTXOID, htup,
     570             :                               Anum_pg_statistic_ext_data_stxdmcv, &isnull);
     571             : 
     572         480 :     if (isnull)
     573           0 :         elog(ERROR,
     574             :              "requested statistics kind \"%c\" is not yet built for statistics object %u",
     575             :              STATS_EXT_MCV, mvoid);
     576             : 
     577         480 :     result = statext_mcv_deserialize(DatumGetByteaP(mcvlist));
     578             : 
     579         480 :     ReleaseSysCache(htup);
     580             : 
     581         480 :     return result;
     582             : }
     583             : 
     584             : 
     585             : /*
     586             :  * statext_mcv_serialize
     587             :  *      Serialize MCV list into a pg_mcv_list value.
     588             :  *
     589             :  * The MCV items may include values of various data types, and it's reasonable
     590             :  * to expect redundancy (values for a given attribute, repeated for multiple
     591             :  * MCV list items). So we deduplicate the values into arrays, and then replace
     592             :  * the values by indexes into those arrays.
     593             :  *
     594             :  * The overall structure of the serialized representation looks like this:
     595             :  *
     596             :  * +---------------+----------------+---------------------+-------+
     597             :  * | header fields | dimension info | deduplicated values | items |
     598             :  * +---------------+----------------+---------------------+-------+
     599             :  *
     600             :  * Where dimension info stores information about the type of the K-th
     601             :  * attribute (e.g. typlen, typbyval and length of deduplicated values).
     602             :  * Deduplicated values store deduplicated values for each attribute.  And
     603             :  * items store the actual MCV list items, with values replaced by indexes into
     604             :  * the arrays.
     605             :  *
     606             :  * When serializing the items, we use uint16 indexes. The number of MCV items
     607             :  * is limited by the statistics target (which is capped to 10k at the moment).
     608             :  * We might increase this to 65k and still fit into uint16, so there's a bit of
     609             :  * slack. Furthermore, this limit is on the number of distinct values per column,
     610             :  * and we usually have few of those (and various combinations of them for the
     611             :  * those MCV list). So uint16 seems fine for now.
     612             :  *
     613             :  * We don't really expect the serialization to save as much space as for
     614             :  * histograms, as we are not doing any bucket splits (which is the source
     615             :  * of high redundancy in histograms).
     616             :  *
     617             :  * TODO: Consider packing boolean flags (NULL) for each item into a single char
     618             :  * (or a longer type) instead of using an array of bool items.
     619             :  */
     620             : bytea *
     621         192 : statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats)
     622             : {
     623             :     int         i;
     624             :     int         dim;
     625         192 :     int         ndims = mcvlist->ndimensions;
     626             : 
     627             :     SortSupport ssup;
     628             :     DimensionInfo *info;
     629             : 
     630             :     Size        total_length;
     631             : 
     632             :     /* serialized items (indexes into arrays, etc.) */
     633             :     bytea      *raw;
     634             :     char       *ptr;
     635             :     char       *endptr PG_USED_FOR_ASSERTS_ONLY;
     636             : 
     637             :     /* values per dimension (and number of non-NULL values) */
     638         192 :     Datum     **values = (Datum **) palloc0(sizeof(Datum *) * ndims);
     639         192 :     int        *counts = (int *) palloc0(sizeof(int) * ndims);
     640             : 
     641             :     /*
     642             :      * We'll include some rudimentary information about the attribute types
     643             :      * (length, by-val flag), so that we don't have to look them up while
     644             :      * deserializing the MCV list (we already have the type OID in the
     645             :      * header).  This is safe because when changing the type of the attribute
     646             :      * the statistics gets dropped automatically.  We need to store the info
     647             :      * about the arrays of deduplicated values anyway.
     648             :      */
     649         192 :     info = (DimensionInfo *) palloc0(sizeof(DimensionInfo) * ndims);
     650             : 
     651             :     /* sort support data for all attributes included in the MCV list */
     652         192 :     ssup = (SortSupport) palloc0(sizeof(SortSupportData) * ndims);
     653             : 
     654             :     /* collect and deduplicate values for each dimension (attribute) */
     655         726 :     for (dim = 0; dim < ndims; dim++)
     656             :     {
     657             :         int         ndistinct;
     658             :         TypeCacheEntry *typentry;
     659             : 
     660             :         /*
     661             :          * Lookup the LT operator (can't get it from stats extra_data, as we
     662             :          * don't know how to interpret that - scalar vs. array etc.).
     663             :          */
     664         534 :         typentry = lookup_type_cache(stats[dim]->attrtypid, TYPECACHE_LT_OPR);
     665             : 
     666             :         /* copy important info about the data type (length, by-value) */
     667         534 :         info[dim].typlen = stats[dim]->attrtype->typlen;
     668         534 :         info[dim].typbyval = stats[dim]->attrtype->typbyval;
     669             : 
     670             :         /* allocate space for values in the attribute and collect them */
     671         534 :         values[dim] = (Datum *) palloc0(sizeof(Datum) * mcvlist->nitems);
     672             : 
     673       24864 :         for (i = 0; i < mcvlist->nitems; i++)
     674             :         {
     675             :             /* skip NULL values - we don't need to deduplicate those */
     676       24330 :             if (mcvlist->items[i].isnull[dim])
     677          66 :                 continue;
     678             : 
     679             :             /* append the value at the end */
     680       24264 :             values[dim][counts[dim]] = mcvlist->items[i].values[dim];
     681       24264 :             counts[dim] += 1;
     682             :         }
     683             : 
     684             :         /* if there are just NULL values in this dimension, we're done */
     685         534 :         if (counts[dim] == 0)
     686           6 :             continue;
     687             : 
     688             :         /* sort and deduplicate the data */
     689         528 :         ssup[dim].ssup_cxt = CurrentMemoryContext;
     690         528 :         ssup[dim].ssup_collation = stats[dim]->attrcollid;
     691         528 :         ssup[dim].ssup_nulls_first = false;
     692             : 
     693         528 :         PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);
     694             : 
     695         528 :         qsort_interruptible(values[dim], counts[dim], sizeof(Datum),
     696         528 :                             compare_scalars_simple, &ssup[dim]);
     697             : 
     698             :         /*
     699             :          * Walk through the array and eliminate duplicate values, but keep the
     700             :          * ordering (so that we can do a binary search later). We know there's
     701             :          * at least one item as (counts[dim] != 0), so we can skip the first
     702             :          * element.
     703             :          */
     704         528 :         ndistinct = 1;          /* number of distinct values */
     705       24264 :         for (i = 1; i < counts[dim]; i++)
     706             :         {
     707             :             /* expect sorted array */
     708             :             Assert(compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]) <= 0);
     709             : 
     710             :             /* if the value is the same as the previous one, we can skip it */
     711       23736 :             if (!compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]))
     712        9972 :                 continue;
     713             : 
     714       13764 :             values[dim][ndistinct] = values[dim][i];
     715       13764 :             ndistinct += 1;
     716             :         }
     717             : 
     718             :         /* we must not exceed PG_UINT16_MAX, as we use uint16 indexes */
     719             :         Assert(ndistinct <= PG_UINT16_MAX);
     720             : 
     721             :         /*
     722             :          * Store additional info about the attribute - number of deduplicated
     723             :          * values, and also size of the serialized data. For fixed-length data
     724             :          * types this is trivial to compute, for varwidth types we need to
     725             :          * actually walk the array and sum the sizes.
     726             :          */
     727         528 :         info[dim].nvalues = ndistinct;
     728             : 
     729         528 :         if (info[dim].typbyval) /* by-value data types */
     730             :         {
     731         384 :             info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
     732             : 
     733             :             /*
     734             :              * We copy the data into the MCV item during deserialization, so
     735             :              * we don't need to allocate any extra space.
     736             :              */
     737         384 :             info[dim].nbytes_aligned = 0;
     738             :         }
     739         144 :         else if (info[dim].typlen > 0)   /* fixed-length by-ref */
     740             :         {
     741             :             /*
     742             :              * We don't care about alignment in the serialized data, so we
     743             :              * pack the data as much as possible. But we also track how much
     744             :              * data will be needed after deserialization, and in that case we
     745             :              * need to account for alignment of each item.
     746             :              *
     747             :              * Note: As the items are fixed-length, we could easily compute
     748             :              * this during deserialization, but we do it here anyway.
     749             :              */
     750          24 :             info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
     751          24 :             info[dim].nbytes_aligned = info[dim].nvalues * MAXALIGN(info[dim].typlen);
     752             :         }
     753         120 :         else if (info[dim].typlen == -1)    /* varlena */
     754             :         {
     755         120 :             info[dim].nbytes = 0;
     756         120 :             info[dim].nbytes_aligned = 0;
     757        2910 :             for (i = 0; i < info[dim].nvalues; i++)
     758             :             {
     759             :                 Size        len;
     760             : 
     761             :                 /*
     762             :                  * For varlena values, we detoast the values and store the
     763             :                  * length and data separately. We don't bother with alignment
     764             :                  * here, which means that during deserialization we need to
     765             :                  * copy the fields and only access the copies.
     766             :                  */
     767        2790 :                 values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i]));
     768             : 
     769             :                 /* serialized length (uint32 length + data) */
     770        2790 :                 len = VARSIZE_ANY_EXHDR(values[dim][i]);
     771        2790 :                 info[dim].nbytes += sizeof(uint32); /* length */
     772        2790 :                 info[dim].nbytes += len;    /* value (no header) */
     773             : 
     774             :                 /*
     775             :                  * During deserialization we'll build regular varlena values
     776             :                  * with full headers, and we need to align them properly.
     777             :                  */
     778        2790 :                 info[dim].nbytes_aligned += MAXALIGN(VARHDRSZ + len);
     779             :             }
     780             :         }
     781           0 :         else if (info[dim].typlen == -2)    /* cstring */
     782             :         {
     783           0 :             info[dim].nbytes = 0;
     784           0 :             info[dim].nbytes_aligned = 0;
     785           0 :             for (i = 0; i < info[dim].nvalues; i++)
     786             :             {
     787             :                 Size        len;
     788             : 
     789             :                 /*
     790             :                  * cstring is handled similar to varlena - first we store the
     791             :                  * length as uint32 and then the data. We don't care about
     792             :                  * alignment, which means that during deserialization we need
     793             :                  * to copy the fields and only access the copies.
     794             :                  */
     795             : 
     796             :                 /* c-strings include terminator, so +1 byte */
     797           0 :                 len = strlen(DatumGetCString(values[dim][i])) + 1;
     798           0 :                 info[dim].nbytes += sizeof(uint32); /* length */
     799           0 :                 info[dim].nbytes += len;    /* value */
     800             : 
     801             :                 /* space needed for properly aligned deserialized copies */
     802           0 :                 info[dim].nbytes_aligned += MAXALIGN(len);
     803             :             }
     804             :         }
     805             : 
     806             :         /* we know (count>0) so there must be some data */
     807             :         Assert(info[dim].nbytes > 0);
     808             :     }
     809             : 
     810             :     /*
     811             :      * Now we can finally compute how much space we'll actually need for the
     812             :      * whole serialized MCV list (varlena header, MCV header, dimension info
     813             :      * for each attribute, deduplicated values and items).
     814             :      */
     815         192 :     total_length = (3 * sizeof(uint32)) /* magic + type + nitems */
     816             :         + sizeof(AttrNumber)    /* ndimensions */
     817         192 :         + (ndims * sizeof(Oid));    /* attribute types */
     818             : 
     819             :     /* dimension info */
     820         192 :     total_length += ndims * sizeof(DimensionInfo);
     821             : 
     822             :     /* add space for the arrays of deduplicated values */
     823         726 :     for (i = 0; i < ndims; i++)
     824         534 :         total_length += info[i].nbytes;
     825             : 
     826             :     /*
     827             :      * And finally account for the items (those are fixed-length, thanks to
     828             :      * replacing values with uint16 indexes into the deduplicated arrays).
     829             :      */
     830         192 :     total_length += mcvlist->nitems * ITEM_SIZE(dim);
     831             : 
     832             :     /*
     833             :      * Allocate space for the whole serialized MCV list (we'll skip bytes, so
     834             :      * we set them to zero to make the result more compressible).
     835             :      */
     836         192 :     raw = (bytea *) palloc0(VARHDRSZ + total_length);
     837         192 :     SET_VARSIZE(raw, VARHDRSZ + total_length);
     838             : 
     839         192 :     ptr = VARDATA(raw);
     840         192 :     endptr = ptr + total_length;
     841             : 
     842             :     /* copy the MCV list header fields, one by one */
     843         192 :     memcpy(ptr, &mcvlist->magic, sizeof(uint32));
     844         192 :     ptr += sizeof(uint32);
     845             : 
     846         192 :     memcpy(ptr, &mcvlist->type, sizeof(uint32));
     847         192 :     ptr += sizeof(uint32);
     848             : 
     849         192 :     memcpy(ptr, &mcvlist->nitems, sizeof(uint32));
     850         192 :     ptr += sizeof(uint32);
     851             : 
     852         192 :     memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber));
     853         192 :     ptr += sizeof(AttrNumber);
     854             : 
     855         192 :     memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);
     856         192 :     ptr += (sizeof(Oid) * ndims);
     857             : 
     858             :     /* store information about the attributes (data amounts, ...) */
     859         192 :     memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
     860         192 :     ptr += sizeof(DimensionInfo) * ndims;
     861             : 
     862             :     /* Copy the deduplicated values for all attributes to the output. */
     863         726 :     for (dim = 0; dim < ndims; dim++)
     864             :     {
     865             :         /* remember the starting point for Asserts later */
     866         534 :         char       *start PG_USED_FOR_ASSERTS_ONLY = ptr;
     867             : 
     868       14826 :         for (i = 0; i < info[dim].nvalues; i++)
     869             :         {
     870       14292 :             Datum       value = values[dim][i];
     871             : 
     872       14292 :             if (info[dim].typbyval) /* passed by value */
     873             :             {
     874             :                 Datum       tmp;
     875             : 
     876             :                 /*
     877             :                  * For byval types, we need to copy just the significant bytes
     878             :                  * - we can't use memcpy directly, as that assumes
     879             :                  * little-endian behavior.  store_att_byval does almost what
     880             :                  * we need, but it requires a properly aligned buffer - the
     881             :                  * output buffer does not guarantee that. So we simply use a
     882             :                  * local Datum variable (which guarantees proper alignment),
     883             :                  * and then copy the value from it.
     884             :                  */
     885       10392 :                 store_att_byval(&tmp, value, info[dim].typlen);
     886             : 
     887       10392 :                 memcpy(ptr, &tmp, info[dim].typlen);
     888       10392 :                 ptr += info[dim].typlen;
     889             :             }
     890        3900 :             else if (info[dim].typlen > 0)   /* passed by reference */
     891             :             {
     892             :                 /* no special alignment needed, treated as char array */
     893        1110 :                 memcpy(ptr, DatumGetPointer(value), info[dim].typlen);
     894        1110 :                 ptr += info[dim].typlen;
     895             :             }
     896        2790 :             else if (info[dim].typlen == -1)    /* varlena */
     897             :             {
     898        2790 :                 uint32      len = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
     899             : 
     900             :                 /* copy the length */
     901        2790 :                 memcpy(ptr, &len, sizeof(uint32));
     902        2790 :                 ptr += sizeof(uint32);
     903             : 
     904             :                 /* data from the varlena value (without the header) */
     905        2790 :                 memcpy(ptr, VARDATA_ANY(DatumGetPointer(value)), len);
     906        2790 :                 ptr += len;
     907             :             }
     908           0 :             else if (info[dim].typlen == -2)    /* cstring */
     909             :             {
     910           0 :                 uint32      len = (uint32) strlen(DatumGetCString(value)) + 1;
     911             : 
     912             :                 /* copy the length */
     913           0 :                 memcpy(ptr, &len, sizeof(uint32));
     914           0 :                 ptr += sizeof(uint32);
     915             : 
     916             :                 /* value */
     917           0 :                 memcpy(ptr, DatumGetCString(value), len);
     918           0 :                 ptr += len;
     919             :             }
     920             : 
     921             :             /* no underflows or overflows */
     922             :             Assert((ptr > start) && ((ptr - start) <= info[dim].nbytes));
     923             :         }
     924             : 
     925             :         /* we should get exactly nbytes of data for this dimension */
     926             :         Assert((ptr - start) == info[dim].nbytes);
     927             :     }
     928             : 
     929             :     /* Serialize the items, with uint16 indexes instead of the values. */
     930        8820 :     for (i = 0; i < mcvlist->nitems; i++)
     931             :     {
     932        8628 :         MCVItem    *mcvitem = &mcvlist->items[i];
     933             : 
     934             :         /* don't write beyond the allocated space */
     935             :         Assert(ptr <= (endptr - ITEM_SIZE(dim)));
     936             : 
     937             :         /* copy NULL and frequency flags into the serialized MCV */
     938        8628 :         memcpy(ptr, mcvitem->isnull, sizeof(bool) * ndims);
     939        8628 :         ptr += sizeof(bool) * ndims;
     940             : 
     941        8628 :         memcpy(ptr, &mcvitem->frequency, sizeof(double));
     942        8628 :         ptr += sizeof(double);
     943             : 
     944        8628 :         memcpy(ptr, &mcvitem->base_frequency, sizeof(double));
     945        8628 :         ptr += sizeof(double);
     946             : 
     947             :         /* store the indexes last */
     948       32958 :         for (dim = 0; dim < ndims; dim++)
     949             :         {
     950       24330 :             uint16      index = 0;
     951             :             Datum      *value;
     952             : 
     953             :             /* do the lookup only for non-NULL values */
     954       24330 :             if (!mcvitem->isnull[dim])
     955             :             {
     956       24264 :                 value = (Datum *) bsearch_arg(&mcvitem->values[dim], values[dim],
     957       24264 :                                               info[dim].nvalues, sizeof(Datum),
     958       24264 :                                               compare_scalars_simple, &ssup[dim]);
     959             : 
     960             :                 Assert(value != NULL);  /* serialization or deduplication
     961             :                                          * error */
     962             : 
     963             :                 /* compute index within the deduplicated array */
     964       24264 :                 index = (uint16) (value - values[dim]);
     965             : 
     966             :                 /* check the index is within expected bounds */
     967             :                 Assert(index < info[dim].nvalues);
     968             :             }
     969             : 
     970             :             /* copy the index into the serialized MCV */
     971       24330 :             memcpy(ptr, &index, sizeof(uint16));
     972       24330 :             ptr += sizeof(uint16);
     973             :         }
     974             : 
     975             :         /* make sure we don't overflow the allocated value */
     976             :         Assert(ptr <= endptr);
     977             :     }
     978             : 
     979             :     /* at this point we expect to match the total_length exactly */
     980             :     Assert(ptr == endptr);
     981             : 
     982         192 :     pfree(values);
     983         192 :     pfree(counts);
     984             : 
     985         192 :     return raw;
     986             : }
     987             : 
     988             : /*
     989             :  * statext_mcv_deserialize
     990             :  *      Reads serialized MCV list into MCVList structure.
     991             :  *
     992             :  * All the memory needed by the MCV list is allocated as a single chunk, so
     993             :  * it's possible to simply pfree() it at once.
     994             :  */
     995             : MCVList *
     996         504 : statext_mcv_deserialize(bytea *data)
     997             : {
     998             :     int         dim,
     999             :                 i;
    1000             :     Size        expected_size;
    1001             :     MCVList    *mcvlist;
    1002             :     char       *raw;
    1003             :     char       *ptr;
    1004             :     char       *endptr PG_USED_FOR_ASSERTS_ONLY;
    1005             : 
    1006             :     int         ndims,
    1007             :                 nitems;
    1008         504 :     DimensionInfo *info = NULL;
    1009             : 
    1010             :     /* local allocation buffer (used only for deserialization) */
    1011         504 :     Datum     **map = NULL;
    1012             : 
    1013             :     /* MCV list */
    1014             :     Size        mcvlen;
    1015             : 
    1016             :     /* buffer used for the result */
    1017             :     Size        datalen;
    1018             :     char       *dataptr;
    1019             :     char       *valuesptr;
    1020             :     char       *isnullptr;
    1021             : 
    1022         504 :     if (data == NULL)
    1023           0 :         return NULL;
    1024             : 
    1025             :     /*
    1026             :      * We can't possibly deserialize a MCV list if there's not even a complete
    1027             :      * header. We need an explicit formula here, because we serialize the
    1028             :      * header fields one by one, so we need to ignore struct alignment.
    1029             :      */
    1030         504 :     if (VARSIZE_ANY(data) < MinSizeOfMCVList)
    1031           0 :         elog(ERROR, "invalid MCV size %zu (expected at least %zu)",
    1032             :              VARSIZE_ANY(data), MinSizeOfMCVList);
    1033             : 
    1034             :     /* read the MCV list header */
    1035         504 :     mcvlist = (MCVList *) palloc0(offsetof(MCVList, items));
    1036             : 
    1037             :     /* pointer to the data part (skip the varlena header) */
    1038         504 :     raw = (char *) data;
    1039         504 :     ptr = VARDATA_ANY(raw);
    1040         504 :     endptr = (char *) raw + VARSIZE_ANY(data);
    1041             : 
    1042             :     /* get the header and perform further sanity checks */
    1043         504 :     memcpy(&mcvlist->magic, ptr, sizeof(uint32));
    1044         504 :     ptr += sizeof(uint32);
    1045             : 
    1046         504 :     memcpy(&mcvlist->type, ptr, sizeof(uint32));
    1047         504 :     ptr += sizeof(uint32);
    1048             : 
    1049         504 :     memcpy(&mcvlist->nitems, ptr, sizeof(uint32));
    1050         504 :     ptr += sizeof(uint32);
    1051             : 
    1052         504 :     memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber));
    1053         504 :     ptr += sizeof(AttrNumber);
    1054             : 
    1055         504 :     if (mcvlist->magic != STATS_MCV_MAGIC)
    1056           0 :         elog(ERROR, "invalid MCV magic %u (expected %u)",
    1057             :              mcvlist->magic, STATS_MCV_MAGIC);
    1058             : 
    1059         504 :     if (mcvlist->type != STATS_MCV_TYPE_BASIC)
    1060           0 :         elog(ERROR, "invalid MCV type %u (expected %u)",
    1061             :              mcvlist->type, STATS_MCV_TYPE_BASIC);
    1062             : 
    1063         504 :     if (mcvlist->ndimensions == 0)
    1064           0 :         elog(ERROR, "invalid zero-length dimension array in MCVList");
    1065         504 :     else if ((mcvlist->ndimensions > STATS_MAX_DIMENSIONS) ||
    1066         504 :              (mcvlist->ndimensions < 0))
    1067           0 :         elog(ERROR, "invalid length (%d) dimension array in MCVList",
    1068             :              mcvlist->ndimensions);
    1069             : 
    1070         504 :     if (mcvlist->nitems == 0)
    1071           0 :         elog(ERROR, "invalid zero-length item array in MCVList");
    1072         504 :     else if (mcvlist->nitems > STATS_MCVLIST_MAX_ITEMS)
    1073           0 :         elog(ERROR, "invalid length (%u) item array in MCVList",
    1074             :              mcvlist->nitems);
    1075             : 
    1076         504 :     nitems = mcvlist->nitems;
    1077         504 :     ndims = mcvlist->ndimensions;
    1078             : 
    1079             :     /*
    1080             :      * Check amount of data including DimensionInfo for all dimensions and
    1081             :      * also the serialized items (including uint16 indexes). Also, walk
    1082             :      * through the dimension information and add it to the sum.
    1083             :      */
    1084         504 :     expected_size = SizeOfMCVList(ndims, nitems);
    1085             : 
    1086             :     /*
    1087             :      * Check that we have at least the dimension and info records, along with
    1088             :      * the items. We don't know the size of the serialized values yet. We need
    1089             :      * to do this check first, before accessing the dimension info.
    1090             :      */
    1091         504 :     if (VARSIZE_ANY(data) < expected_size)
    1092           0 :         elog(ERROR, "invalid MCV size %zu (expected %zu)",
    1093             :              VARSIZE_ANY(data), expected_size);
    1094             : 
    1095             :     /* Now copy the array of type Oids. */
    1096         504 :     memcpy(mcvlist->types, ptr, sizeof(Oid) * ndims);
    1097         504 :     ptr += (sizeof(Oid) * ndims);
    1098             : 
    1099             :     /* Now it's safe to access the dimension info. */
    1100         504 :     info = palloc(ndims * sizeof(DimensionInfo));
    1101             : 
    1102         504 :     memcpy(info, ptr, ndims * sizeof(DimensionInfo));
    1103         504 :     ptr += (ndims * sizeof(DimensionInfo));
    1104             : 
    1105             :     /* account for the value arrays */
    1106        2154 :     for (dim = 0; dim < ndims; dim++)
    1107             :     {
    1108             :         /*
    1109             :          * XXX I wonder if we can/should rely on asserts here. Maybe those
    1110             :          * checks should be done every time?
    1111             :          */
    1112             :         Assert(info[dim].nvalues >= 0);
    1113             :         Assert(info[dim].nbytes >= 0);
    1114             : 
    1115        1650 :         expected_size += info[dim].nbytes;
    1116             :     }
    1117             : 
    1118             :     /*
    1119             :      * Now we know the total expected MCV size, including all the pieces
    1120             :      * (header, dimension info. items and deduplicated data). So do the final
    1121             :      * check on size.
    1122             :      */
    1123         504 :     if (VARSIZE_ANY(data) != expected_size)
    1124           0 :         elog(ERROR, "invalid MCV size %zu (expected %zu)",
    1125             :              VARSIZE_ANY(data), expected_size);
    1126             : 
    1127             :     /*
    1128             :      * We need an array of Datum values for each dimension, so that we can
    1129             :      * easily translate the uint16 indexes later. We also need a top-level
    1130             :      * array of pointers to those per-dimension arrays.
    1131             :      *
    1132             :      * While allocating the arrays for dimensions, compute how much space we
    1133             :      * need for a copy of the by-ref data, as we can't simply point to the
    1134             :      * original values (it might go away).
    1135             :      */
    1136         504 :     datalen = 0;                /* space for by-ref data */
    1137         504 :     map = (Datum **) palloc(ndims * sizeof(Datum *));
    1138             : 
    1139        2154 :     for (dim = 0; dim < ndims; dim++)
    1140             :     {
    1141        1650 :         map[dim] = (Datum *) palloc(sizeof(Datum) * info[dim].nvalues);
    1142             : 
    1143             :         /* space needed for a copy of data for by-ref types */
    1144        1650 :         datalen += info[dim].nbytes_aligned;
    1145             :     }
    1146             : 
    1147             :     /*
    1148             :      * Now resize the MCV list so that the allocation includes all the data.
    1149             :      *
    1150             :      * Allocate space for a copy of the data, as we can't simply reference the
    1151             :      * serialized data - it's not aligned properly, and it may disappear while
    1152             :      * we're still using the MCV list, e.g. due to catcache release.
    1153             :      *
    1154             :      * We do care about alignment here, because we will allocate all the
    1155             :      * pieces at once, but then use pointers to different parts.
    1156             :      */
    1157         504 :     mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
    1158             : 
    1159             :     /* arrays of values and isnull flags for all MCV items */
    1160         504 :     mcvlen += nitems * MAXALIGN(sizeof(Datum) * ndims);
    1161         504 :     mcvlen += nitems * MAXALIGN(sizeof(bool) * ndims);
    1162             : 
    1163             :     /* we don't quite need to align this, but it makes some asserts easier */
    1164         504 :     mcvlen += MAXALIGN(datalen);
    1165             : 
    1166             :     /* now resize the deserialized MCV list, and compute pointers to parts */
    1167         504 :     mcvlist = repalloc(mcvlist, mcvlen);
    1168             : 
    1169             :     /* pointer to the beginning of values/isnull arrays */
    1170         504 :     valuesptr = (char *) mcvlist
    1171         504 :         + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
    1172             : 
    1173         504 :     isnullptr = valuesptr + (nitems * MAXALIGN(sizeof(Datum) * ndims));
    1174             : 
    1175         504 :     dataptr = isnullptr + (nitems * MAXALIGN(sizeof(bool) * ndims));
    1176             : 
    1177             :     /*
    1178             :      * Build mapping (index => value) for translating the serialized data into
    1179             :      * the in-memory representation.
    1180             :      */
    1181        2154 :     for (dim = 0; dim < ndims; dim++)
    1182             :     {
    1183             :         /* remember start position in the input array */
    1184        1650 :         char       *start PG_USED_FOR_ASSERTS_ONLY = ptr;
    1185             : 
    1186        1650 :         if (info[dim].typbyval)
    1187             :         {
    1188             :             /* for by-val types we simply copy data into the mapping */
    1189       43968 :             for (i = 0; i < info[dim].nvalues; i++)
    1190             :             {
    1191       42888 :                 Datum       v = 0;
    1192             : 
    1193       42888 :                 memcpy(&v, ptr, info[dim].typlen);
    1194       42888 :                 ptr += info[dim].typlen;
    1195             : 
    1196       42888 :                 map[dim][i] = fetch_att(&v, true, info[dim].typlen);
    1197             : 
    1198             :                 /* no under/overflow of input array */
    1199             :                 Assert(ptr <= (start + info[dim].nbytes));
    1200             :             }
    1201             :         }
    1202             :         else
    1203             :         {
    1204             :             /* for by-ref types we need to also make a copy of the data */
    1205             : 
    1206             :             /* passed by reference, but fixed length (name, tid, ...) */
    1207         570 :             if (info[dim].typlen > 0)
    1208             :             {
    1209        2202 :                 for (i = 0; i < info[dim].nvalues; i++)
    1210             :                 {
    1211        2160 :                     memcpy(dataptr, ptr, info[dim].typlen);
    1212        2160 :                     ptr += info[dim].typlen;
    1213             : 
    1214             :                     /* just point into the array */
    1215        2160 :                     map[dim][i] = PointerGetDatum(dataptr);
    1216        2160 :                     dataptr += MAXALIGN(info[dim].typlen);
    1217             :                 }
    1218             :             }
    1219         528 :             else if (info[dim].typlen == -1)
    1220             :             {
    1221             :                 /* varlena */
    1222       15024 :                 for (i = 0; i < info[dim].nvalues; i++)
    1223             :                 {
    1224             :                     uint32      len;
    1225             : 
    1226             :                     /* read the uint32 length */
    1227       14496 :                     memcpy(&len, ptr, sizeof(uint32));
    1228       14496 :                     ptr += sizeof(uint32);
    1229             : 
    1230             :                     /* the length is data-only */
    1231       14496 :                     SET_VARSIZE(dataptr, len + VARHDRSZ);
    1232       14496 :                     memcpy(VARDATA(dataptr), ptr, len);
    1233       14496 :                     ptr += len;
    1234             : 
    1235             :                     /* just point into the array */
    1236       14496 :                     map[dim][i] = PointerGetDatum(dataptr);
    1237             : 
    1238             :                     /* skip to place of the next deserialized value */
    1239       14496 :                     dataptr += MAXALIGN(len + VARHDRSZ);
    1240             :                 }
    1241             :             }
    1242           0 :             else if (info[dim].typlen == -2)
    1243             :             {
    1244             :                 /* cstring */
    1245           0 :                 for (i = 0; i < info[dim].nvalues; i++)
    1246             :                 {
    1247             :                     uint32      len;
    1248             : 
    1249           0 :                     memcpy(&len, ptr, sizeof(uint32));
    1250           0 :                     ptr += sizeof(uint32);
    1251             : 
    1252           0 :                     memcpy(dataptr, ptr, len);
    1253           0 :                     ptr += len;
    1254             : 
    1255             :                     /* just point into the array */
    1256           0 :                     map[dim][i] = PointerGetDatum(dataptr);
    1257           0 :                     dataptr += MAXALIGN(len);
    1258             :                 }
    1259             :             }
    1260             : 
    1261             :             /* no under/overflow of input array */
    1262             :             Assert(ptr <= (start + info[dim].nbytes));
    1263             : 
    1264             :             /* no overflow of the output mcv value */
    1265             :             Assert(dataptr <= ((char *) mcvlist + mcvlen));
    1266             :         }
    1267             : 
    1268             :         /* check we consumed input data for this dimension exactly */
    1269             :         Assert(ptr == (start + info[dim].nbytes));
    1270             :     }
    1271             : 
    1272             :     /* we should have also filled the MCV list exactly */
    1273             :     Assert(dataptr == ((char *) mcvlist + mcvlen));
    1274             : 
    1275             :     /* deserialize the MCV items and translate the indexes to Datums */
    1276       30486 :     for (i = 0; i < nitems; i++)
    1277             :     {
    1278       29982 :         MCVItem    *item = &mcvlist->items[i];
    1279             : 
    1280       29982 :         item->values = (Datum *) valuesptr;
    1281       29982 :         valuesptr += MAXALIGN(sizeof(Datum) * ndims);
    1282             : 
    1283       29982 :         item->isnull = (bool *) isnullptr;
    1284       29982 :         isnullptr += MAXALIGN(sizeof(bool) * ndims);
    1285             : 
    1286       29982 :         memcpy(item->isnull, ptr, sizeof(bool) * ndims);
    1287       29982 :         ptr += sizeof(bool) * ndims;
    1288             : 
    1289       29982 :         memcpy(&item->frequency, ptr, sizeof(double));
    1290       29982 :         ptr += sizeof(double);
    1291             : 
    1292       29982 :         memcpy(&item->base_frequency, ptr, sizeof(double));
    1293       29982 :         ptr += sizeof(double);
    1294             : 
    1295             :         /* finally translate the indexes (for non-NULL only) */
    1296      133896 :         for (dim = 0; dim < ndims; dim++)
    1297             :         {
    1298             :             uint16      index;
    1299             : 
    1300      103914 :             memcpy(&index, ptr, sizeof(uint16));
    1301      103914 :             ptr += sizeof(uint16);
    1302             : 
    1303      103914 :             if (item->isnull[dim])
    1304         306 :                 continue;
    1305             : 
    1306      103608 :             item->values[dim] = map[dim][index];
    1307             :         }
    1308             : 
    1309             :         /* check we're not overflowing the input */
    1310             :         Assert(ptr <= endptr);
    1311             :     }
    1312             : 
    1313             :     /* check that we processed all the data */
    1314             :     Assert(ptr == endptr);
    1315             : 
    1316             :     /* release the buffers used for mapping */
    1317        2154 :     for (dim = 0; dim < ndims; dim++)
    1318        1650 :         pfree(map[dim]);
    1319             : 
    1320         504 :     pfree(map);
    1321             : 
    1322         504 :     return mcvlist;
    1323             : }
    1324             : 
    1325             : /*
    1326             :  * SRF with details about buckets of a histogram:
    1327             :  *
    1328             :  * - item ID (0...nitems)
    1329             :  * - values (string array)
    1330             :  * - nulls only (boolean array)
    1331             :  * - frequency (double precision)
    1332             :  * - base_frequency (double precision)
    1333             :  *
    1334             :  * The input is the OID of the statistics, and there are no rows returned if
    1335             :  * the statistics contains no histogram.
    1336             :  */
    1337             : Datum
    1338          78 : pg_stats_ext_mcvlist_items(PG_FUNCTION_ARGS)
    1339             : {
    1340             :     FuncCallContext *funcctx;
    1341             : 
    1342             :     /* stuff done only on the first call of the function */
    1343          78 :     if (SRF_IS_FIRSTCALL())
    1344             :     {
    1345             :         MemoryContext oldcontext;
    1346             :         MCVList    *mcvlist;
    1347             :         TupleDesc   tupdesc;
    1348             : 
    1349             :         /* create a function context for cross-call persistence */
    1350          24 :         funcctx = SRF_FIRSTCALL_INIT();
    1351             : 
    1352             :         /* switch to memory context appropriate for multiple function calls */
    1353          24 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
    1354             : 
    1355          24 :         mcvlist = statext_mcv_deserialize(PG_GETARG_BYTEA_P(0));
    1356             : 
    1357          24 :         funcctx->user_fctx = mcvlist;
    1358             : 
    1359             :         /* total number of tuples to be returned */
    1360          24 :         funcctx->max_calls = 0;
    1361          24 :         if (funcctx->user_fctx != NULL)
    1362          24 :             funcctx->max_calls = mcvlist->nitems;
    1363             : 
    1364             :         /* Build a tuple descriptor for our result type */
    1365          24 :         if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
    1366           0 :             ereport(ERROR,
    1367             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1368             :                      errmsg("function returning record called in context "
    1369             :                             "that cannot accept type record")));
    1370          24 :         tupdesc = BlessTupleDesc(tupdesc);
    1371             : 
    1372             :         /*
    1373             :          * generate attribute metadata needed later to produce tuples from raw
    1374             :          * C strings
    1375             :          */
    1376          24 :         funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
    1377             : 
    1378          24 :         MemoryContextSwitchTo(oldcontext);
    1379             :     }
    1380             : 
    1381             :     /* stuff done on every call of the function */
    1382          78 :     funcctx = SRF_PERCALL_SETUP();
    1383             : 
    1384          78 :     if (funcctx->call_cntr < funcctx->max_calls)   /* do when there is more
    1385             :                                                      * left to send */
    1386             :     {
    1387             :         Datum       values[5];
    1388             :         bool        nulls[5];
    1389             :         HeapTuple   tuple;
    1390             :         Datum       result;
    1391          54 :         ArrayBuildState *astate_values = NULL;
    1392          54 :         ArrayBuildState *astate_nulls = NULL;
    1393             : 
    1394             :         int         i;
    1395             :         MCVList    *mcvlist;
    1396             :         MCVItem    *item;
    1397             : 
    1398          54 :         mcvlist = (MCVList *) funcctx->user_fctx;
    1399             : 
    1400             :         Assert(funcctx->call_cntr < mcvlist->nitems);
    1401             : 
    1402          54 :         item = &mcvlist->items[funcctx->call_cntr];
    1403             : 
    1404         180 :         for (i = 0; i < mcvlist->ndimensions; i++)
    1405             :         {
    1406             : 
    1407         126 :             astate_nulls = accumArrayResult(astate_nulls,
    1408         126 :                                             BoolGetDatum(item->isnull[i]),
    1409             :                                             false,
    1410             :                                             BOOLOID,
    1411             :                                             CurrentMemoryContext);
    1412             : 
    1413         126 :             if (!item->isnull[i])
    1414             :             {
    1415             :                 bool        isvarlena;
    1416             :                 Oid         outfunc;
    1417             :                 FmgrInfo    fmgrinfo;
    1418             :                 Datum       val;
    1419             :                 text       *txt;
    1420             : 
    1421             :                 /* lookup output func for the type */
    1422         102 :                 getTypeOutputInfo(mcvlist->types[i], &outfunc, &isvarlena);
    1423         102 :                 fmgr_info(outfunc, &fmgrinfo);
    1424             : 
    1425         102 :                 val = FunctionCall1(&fmgrinfo, item->values[i]);
    1426         102 :                 txt = cstring_to_text(DatumGetPointer(val));
    1427             : 
    1428         102 :                 astate_values = accumArrayResult(astate_values,
    1429             :                                                  PointerGetDatum(txt),
    1430             :                                                  false,
    1431             :                                                  TEXTOID,
    1432             :                                                  CurrentMemoryContext);
    1433             :             }
    1434             :             else
    1435          24 :                 astate_values = accumArrayResult(astate_values,
    1436             :                                                  (Datum) 0,
    1437             :                                                  true,
    1438             :                                                  TEXTOID,
    1439             :                                                  CurrentMemoryContext);
    1440             :         }
    1441             : 
    1442          54 :         values[0] = Int32GetDatum(funcctx->call_cntr);
    1443          54 :         values[1] = makeArrayResult(astate_values, CurrentMemoryContext);
    1444          54 :         values[2] = makeArrayResult(astate_nulls, CurrentMemoryContext);
    1445          54 :         values[3] = Float8GetDatum(item->frequency);
    1446          54 :         values[4] = Float8GetDatum(item->base_frequency);
    1447             : 
    1448             :         /* no NULLs in the tuple */
    1449          54 :         memset(nulls, 0, sizeof(nulls));
    1450             : 
    1451             :         /* build a tuple */
    1452          54 :         tuple = heap_form_tuple(funcctx->attinmeta->tupdesc, values, nulls);
    1453             : 
    1454             :         /* make the tuple into a datum */
    1455          54 :         result = HeapTupleGetDatum(tuple);
    1456             : 
    1457          54 :         SRF_RETURN_NEXT(funcctx, result);
    1458             :     }
    1459             :     else                        /* do when there is no more left */
    1460             :     {
    1461          24 :         SRF_RETURN_DONE(funcctx);
    1462             :     }
    1463             : }
    1464             : 
    1465             : /*
    1466             :  * pg_mcv_list_in       - input routine for type pg_mcv_list.
    1467             :  *
    1468             :  * pg_mcv_list is real enough to be a table column, but it has no operations
    1469             :  * of its own, and disallows input too
    1470             :  */
    1471             : Datum
    1472           0 : pg_mcv_list_in(PG_FUNCTION_ARGS)
    1473             : {
    1474             :     /*
    1475             :      * pg_mcv_list stores the data in binary form and parsing text input is
    1476             :      * not needed, so disallow this.
    1477             :      */
    1478           0 :     ereport(ERROR,
    1479             :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1480             :              errmsg("cannot accept a value of type %s", "pg_mcv_list")));
    1481             : 
    1482             :     PG_RETURN_VOID();           /* keep compiler quiet */
    1483             : }
    1484             : 
    1485             : 
    1486             : /*
    1487             :  * pg_mcv_list_out      - output routine for type pg_mcv_list.
    1488             :  *
    1489             :  * MCV lists are serialized into a bytea value, so we simply call byteaout()
    1490             :  * to serialize the value into text. But it'd be nice to serialize that into
    1491             :  * a meaningful representation (e.g. for inspection by people).
    1492             :  *
    1493             :  * XXX This should probably return something meaningful, similar to what
    1494             :  * pg_dependencies_out does. Not sure how to deal with the deduplicated
    1495             :  * values, though - do we want to expand that or not?
    1496             :  */
    1497             : Datum
    1498          12 : pg_mcv_list_out(PG_FUNCTION_ARGS)
    1499             : {
    1500          12 :     return byteaout(fcinfo);
    1501             : }
    1502             : 
    1503             : /*
    1504             :  * pg_mcv_list_recv     - binary input routine for type pg_mcv_list.
    1505             :  */
    1506             : Datum
    1507           0 : pg_mcv_list_recv(PG_FUNCTION_ARGS)
    1508             : {
    1509           0 :     ereport(ERROR,
    1510             :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1511             :              errmsg("cannot accept a value of type %s", "pg_mcv_list")));
    1512             : 
    1513             :     PG_RETURN_VOID();           /* keep compiler quiet */
    1514             : }
    1515             : 
    1516             : /*
    1517             :  * pg_mcv_list_send     - binary output routine for type pg_mcv_list.
    1518             :  *
    1519             :  * MCV lists are serialized in a bytea value (although the type is named
    1520             :  * differently), so let's just send that.
    1521             :  */
    1522             : Datum
    1523           0 : pg_mcv_list_send(PG_FUNCTION_ARGS)
    1524             : {
    1525           0 :     return byteasend(fcinfo);
    1526             : }
    1527             : 
    1528             : /*
    1529             :  * match the attribute/expression to a dimension of the statistic
    1530             :  *
    1531             :  * Returns the zero-based index of the matching statistics dimension.
    1532             :  * Optionally determines the collation.
    1533             :  */
    1534             : static int
    1535        1152 : mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid)
    1536             : {
    1537             :     int         idx;
    1538             : 
    1539        1152 :     if (IsA(expr, Var))
    1540             :     {
    1541             :         /* simple Var, so just lookup using varattno */
    1542         906 :         Var        *var = (Var *) expr;
    1543             : 
    1544         906 :         if (collid)
    1545         840 :             *collid = var->varcollid;
    1546             : 
    1547         906 :         idx = bms_member_index(keys, var->varattno);
    1548             : 
    1549         906 :         if (idx < 0)
    1550           0 :             elog(ERROR, "variable not found in statistics object");
    1551             :     }
    1552             :     else
    1553             :     {
    1554             :         /* expression - lookup in stats expressions */
    1555             :         ListCell   *lc;
    1556             : 
    1557         246 :         if (collid)
    1558         240 :             *collid = exprCollation(expr);
    1559             : 
    1560             :         /* expressions are stored after the simple columns */
    1561         246 :         idx = bms_num_members(keys);
    1562         456 :         foreach(lc, exprs)
    1563             :         {
    1564         456 :             Node       *stat_expr = (Node *) lfirst(lc);
    1565             : 
    1566         456 :             if (equal(expr, stat_expr))
    1567         246 :                 break;
    1568             : 
    1569         210 :             idx++;
    1570             :         }
    1571             : 
    1572         246 :         if (lc == NULL)
    1573           0 :             elog(ERROR, "expression not found in statistics object");
    1574             :     }
    1575             : 
    1576        1152 :     return idx;
    1577             : }
    1578             : 
    1579             : /*
    1580             :  * mcv_get_match_bitmap
    1581             :  *  Evaluate clauses using the MCV list, and update the match bitmap.
    1582             :  *
    1583             :  * A match bitmap keeps match/mismatch status for each MCV item, and we
    1584             :  * update it based on additional clauses. We also use it to skip items
    1585             :  * that can't possibly match (e.g. item marked as "mismatch" can't change
    1586             :  * to "match" when evaluating AND clause list).
    1587             :  *
    1588             :  * The function also returns a flag indicating whether there was an
    1589             :  * equality condition for all attributes, the minimum frequency in the MCV
    1590             :  * list, and a total MCV frequency (sum of frequencies for all items).
    1591             :  *
    1592             :  * XXX Currently the match bitmap uses a bool for each MCV item, which is
    1593             :  * somewhat wasteful as we could do with just a single bit, thus reducing
    1594             :  * the size to ~1/8. It would also allow us to combine bitmaps simply using
    1595             :  * & and |, which should be faster than min/max. The bitmaps are fairly
    1596             :  * small, though (thanks to the cap on the MCV list size).
    1597             :  */
    1598             : static bool *
    1599         714 : mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
    1600             :                      Bitmapset *keys, List *exprs,
    1601             :                      MCVList *mcvlist, bool is_or)
    1602             : {
    1603             :     ListCell   *l;
    1604             :     bool       *matches;
    1605             : 
    1606             :     /* The bitmap may be partially built. */
    1607             :     Assert(clauses != NIL);
    1608             :     Assert(mcvlist != NULL);
    1609             :     Assert(mcvlist->nitems > 0);
    1610             :     Assert(mcvlist->nitems <= STATS_MCVLIST_MAX_ITEMS);
    1611             : 
    1612         714 :     matches = palloc(sizeof(bool) * mcvlist->nitems);
    1613         714 :     memset(matches, !is_or, sizeof(bool) * mcvlist->nitems);
    1614             : 
    1615             :     /*
    1616             :      * Loop through the list of clauses, and for each of them evaluate all the
    1617             :      * MCV items not yet eliminated by the preceding clauses.
    1618             :      */
    1619        2034 :     foreach(l, clauses)
    1620             :     {
    1621        1320 :         Node       *clause = (Node *) lfirst(l);
    1622             : 
    1623             :         /* if it's a RestrictInfo, then extract the clause */
    1624        1320 :         if (IsA(clause, RestrictInfo))
    1625        1218 :             clause = (Node *) ((RestrictInfo *) clause)->clause;
    1626             : 
    1627             :         /*
    1628             :          * Handle the various types of clauses - OpClause, NullTest and
    1629             :          * AND/OR/NOT
    1630             :          */
    1631        1320 :         if (is_opclause(clause))
    1632             :         {
    1633         876 :             OpExpr     *expr = (OpExpr *) clause;
    1634             :             FmgrInfo    opproc;
    1635             : 
    1636             :             /* valid only after examine_opclause_args returns true */
    1637             :             Node       *clause_expr;
    1638             :             Const      *cst;
    1639             :             bool        expronleft;
    1640             :             int         idx;
    1641             :             Oid         collid;
    1642             : 
    1643         876 :             fmgr_info(get_opcode(expr->opno), &opproc);
    1644             : 
    1645             :             /* extract the var/expr and const from the expression */
    1646         876 :             if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
    1647           0 :                 elog(ERROR, "incompatible clause");
    1648             : 
    1649             :             /* match the attribute/expression to a dimension of the statistic */
    1650         876 :             idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
    1651             : 
    1652             :             /*
    1653             :              * Walk through the MCV items and evaluate the current clause. We
    1654             :              * can skip items that were already ruled out, and terminate if
    1655             :              * there are no remaining MCV items that might possibly match.
    1656             :              */
    1657       58632 :             for (int i = 0; i < mcvlist->nitems; i++)
    1658             :             {
    1659       57756 :                 bool        match = true;
    1660       57756 :                 MCVItem    *item = &mcvlist->items[i];
    1661             : 
    1662             :                 Assert(idx >= 0);
    1663             : 
    1664             :                 /*
    1665             :                  * When the MCV item or the Const value is NULL we can treat
    1666             :                  * this as a mismatch. We must not call the operator because
    1667             :                  * of strictness.
    1668             :                  */
    1669       57756 :                 if (item->isnull[idx] || cst->constisnull)
    1670             :                 {
    1671          48 :                     matches[i] = RESULT_MERGE(matches[i], is_or, false);
    1672          48 :                     continue;
    1673             :                 }
    1674             : 
    1675             :                 /*
    1676             :                  * Skip MCV items that can't change result in the bitmap. Once
    1677             :                  * the value gets false for AND-lists, or true for OR-lists,
    1678             :                  * we don't need to look at more clauses.
    1679             :                  */
    1680       57708 :                 if (RESULT_IS_FINAL(matches[i], is_or))
    1681       26964 :                     continue;
    1682             : 
    1683             :                 /*
    1684             :                  * First check whether the constant is below the lower
    1685             :                  * boundary (in that case we can skip the bucket, because
    1686             :                  * there's no overlap).
    1687             :                  *
    1688             :                  * We don't store collations used to build the statistics, but
    1689             :                  * we can use the collation for the attribute itself, as
    1690             :                  * stored in varcollid. We do reset the statistics after a
    1691             :                  * type change (including collation change), so this is OK.
    1692             :                  * For expressions, we use the collation extracted from the
    1693             :                  * expression itself.
    1694             :                  */
    1695       30744 :                 if (expronleft)
    1696       28128 :                     match = DatumGetBool(FunctionCall2Coll(&opproc,
    1697             :                                                            collid,
    1698       28128 :                                                            item->values[idx],
    1699       28128 :                                                            cst->constvalue));
    1700             :                 else
    1701        2616 :                     match = DatumGetBool(FunctionCall2Coll(&opproc,
    1702             :                                                            collid,
    1703        2616 :                                                            cst->constvalue,
    1704        2616 :                                                            item->values[idx]));
    1705             : 
    1706             :                 /* update the match bitmap with the result */
    1707       30744 :                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
    1708             :             }
    1709             :         }
    1710         444 :         else if (IsA(clause, ScalarArrayOpExpr))
    1711             :         {
    1712         204 :             ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
    1713             :             FmgrInfo    opproc;
    1714             : 
    1715             :             /* valid only after examine_opclause_args returns true */
    1716             :             Node       *clause_expr;
    1717             :             Const      *cst;
    1718             :             bool        expronleft;
    1719             :             Oid         collid;
    1720             :             int         idx;
    1721             : 
    1722             :             /* array evaluation */
    1723             :             ArrayType  *arrayval;
    1724             :             int16       elmlen;
    1725             :             bool        elmbyval;
    1726             :             char        elmalign;
    1727             :             int         num_elems;
    1728             :             Datum      *elem_values;
    1729             :             bool       *elem_nulls;
    1730             : 
    1731         204 :             fmgr_info(get_opcode(expr->opno), &opproc);
    1732             : 
    1733             :             /* extract the var/expr and const from the expression */
    1734         204 :             if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
    1735           0 :                 elog(ERROR, "incompatible clause");
    1736             : 
    1737             :             /* We expect Var on left */
    1738         204 :             if (!expronleft)
    1739           0 :                 elog(ERROR, "incompatible clause");
    1740             : 
    1741             :             /*
    1742             :              * Deconstruct the array constant, unless it's NULL (we'll cover
    1743             :              * that case below)
    1744             :              */
    1745         204 :             if (!cst->constisnull)
    1746             :             {
    1747         204 :                 arrayval = DatumGetArrayTypeP(cst->constvalue);
    1748         204 :                 get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
    1749             :                                      &elmlen, &elmbyval, &elmalign);
    1750         204 :                 deconstruct_array(arrayval,
    1751             :                                   ARR_ELEMTYPE(arrayval),
    1752             :                                   elmlen, elmbyval, elmalign,
    1753             :                                   &elem_values, &elem_nulls, &num_elems);
    1754             :             }
    1755             : 
    1756             :             /* match the attribute/expression to a dimension of the statistic */
    1757         204 :             idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
    1758             : 
    1759             :             /*
    1760             :              * Walk through the MCV items and evaluate the current clause. We
    1761             :              * can skip items that were already ruled out, and terminate if
    1762             :              * there are no remaining MCV items that might possibly match.
    1763             :              */
    1764       15696 :             for (int i = 0; i < mcvlist->nitems; i++)
    1765             :             {
    1766             :                 int         j;
    1767       15492 :                 bool        match = !expr->useOr;
    1768       15492 :                 MCVItem    *item = &mcvlist->items[i];
    1769             : 
    1770             :                 /*
    1771             :                  * When the MCV item or the Const value is NULL we can treat
    1772             :                  * this as a mismatch. We must not call the operator because
    1773             :                  * of strictness.
    1774             :                  */
    1775       15492 :                 if (item->isnull[idx] || cst->constisnull)
    1776             :                 {
    1777          18 :                     matches[i] = RESULT_MERGE(matches[i], is_or, false);
    1778          18 :                     continue;
    1779             :                 }
    1780             : 
    1781             :                 /*
    1782             :                  * Skip MCV items that can't change result in the bitmap. Once
    1783             :                  * the value gets false for AND-lists, or true for OR-lists,
    1784             :                  * we don't need to look at more clauses.
    1785             :                  */
    1786       15474 :                 if (RESULT_IS_FINAL(matches[i], is_or))
    1787        8016 :                     continue;
    1788             : 
    1789       28338 :                 for (j = 0; j < num_elems; j++)
    1790             :                 {
    1791       23634 :                     Datum       elem_value = elem_values[j];
    1792       23634 :                     bool        elem_isnull = elem_nulls[j];
    1793             :                     bool        elem_match;
    1794             : 
    1795             :                     /* NULL values always evaluate as not matching. */
    1796       23634 :                     if (elem_isnull)
    1797             :                     {
    1798        2112 :                         match = RESULT_MERGE(match, expr->useOr, false);
    1799        2112 :                         continue;
    1800             :                     }
    1801             : 
    1802             :                     /*
    1803             :                      * Stop evaluating the array elements once we reach a
    1804             :                      * matching value that can't change - ALL() is the same as
    1805             :                      * AND-list, ANY() is the same as OR-list.
    1806             :                      */
    1807       21522 :                     if (RESULT_IS_FINAL(match, expr->useOr))
    1808        2754 :                         break;
    1809             : 
    1810       18768 :                     elem_match = DatumGetBool(FunctionCall2Coll(&opproc,
    1811             :                                                                 collid,
    1812       18768 :                                                                 item->values[idx],
    1813             :                                                                 elem_value));
    1814             : 
    1815       18768 :                     match = RESULT_MERGE(match, expr->useOr, elem_match);
    1816             :                 }
    1817             : 
    1818             :                 /* update the match bitmap with the result */
    1819        7458 :                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
    1820             :             }
    1821             :         }
    1822         240 :         else if (IsA(clause, NullTest))
    1823             :         {
    1824          66 :             NullTest   *expr = (NullTest *) clause;
    1825          66 :             Node       *clause_expr = (Node *) (expr->arg);
    1826             : 
    1827             :             /* match the attribute/expression to a dimension of the statistic */
    1828          66 :             int         idx = mcv_match_expression(clause_expr, keys, exprs, NULL);
    1829             : 
    1830             :             /*
    1831             :              * Walk through the MCV items and evaluate the current clause. We
    1832             :              * can skip items that were already ruled out, and terminate if
    1833             :              * there are no remaining MCV items that might possibly match.
    1834             :              */
    1835        6078 :             for (int i = 0; i < mcvlist->nitems; i++)
    1836             :             {
    1837        6012 :                 bool        match = false;  /* assume mismatch */
    1838        6012 :                 MCVItem    *item = &mcvlist->items[i];
    1839             : 
    1840             :                 /* if the clause mismatches the MCV item, update the bitmap */
    1841        6012 :                 switch (expr->nulltesttype)
    1842             :                 {
    1843        4212 :                     case IS_NULL:
    1844        4212 :                         match = (item->isnull[idx]) ? true : match;
    1845        4212 :                         break;
    1846             : 
    1847        1800 :                     case IS_NOT_NULL:
    1848        1800 :                         match = (!item->isnull[idx]) ? true : match;
    1849        1800 :                         break;
    1850             :                 }
    1851             : 
    1852             :                 /* now, update the match bitmap, depending on OR/AND type */
    1853        6012 :                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
    1854             :             }
    1855             :         }
    1856         174 :         else if (is_orclause(clause) || is_andclause(clause))
    1857          60 :         {
    1858             :             /* AND/OR clause, with all subclauses being compatible */
    1859             : 
    1860             :             int         i;
    1861          60 :             BoolExpr   *bool_clause = ((BoolExpr *) clause);
    1862          60 :             List       *bool_clauses = bool_clause->args;
    1863             : 
    1864             :             /* match/mismatch bitmap for each MCV item */
    1865          60 :             bool       *bool_matches = NULL;
    1866             : 
    1867             :             Assert(bool_clauses != NIL);
    1868             :             Assert(list_length(bool_clauses) >= 2);
    1869             : 
    1870             :             /* build the match bitmap for the OR-clauses */
    1871          60 :             bool_matches = mcv_get_match_bitmap(root, bool_clauses, keys, exprs,
    1872          60 :                                                 mcvlist, is_orclause(clause));
    1873             : 
    1874             :             /*
    1875             :              * Merge the bitmap produced by mcv_get_match_bitmap into the
    1876             :              * current one. We need to consider if we're evaluating AND or OR
    1877             :              * condition when merging the results.
    1878             :              */
    1879        3756 :             for (i = 0; i < mcvlist->nitems; i++)
    1880        3696 :                 matches[i] = RESULT_MERGE(matches[i], is_or, bool_matches[i]);
    1881             : 
    1882          60 :             pfree(bool_matches);
    1883             :         }
    1884         114 :         else if (is_notclause(clause))
    1885             :         {
    1886             :             /* NOT clause, with all subclauses compatible */
    1887             : 
    1888             :             int         i;
    1889          30 :             BoolExpr   *not_clause = ((BoolExpr *) clause);
    1890          30 :             List       *not_args = not_clause->args;
    1891             : 
    1892             :             /* match/mismatch bitmap for each MCV item */
    1893          30 :             bool       *not_matches = NULL;
    1894             : 
    1895             :             Assert(not_args != NIL);
    1896             :             Assert(list_length(not_args) == 1);
    1897             : 
    1898             :             /* build the match bitmap for the NOT-clause */
    1899          30 :             not_matches = mcv_get_match_bitmap(root, not_args, keys, exprs,
    1900             :                                                mcvlist, false);
    1901             : 
    1902             :             /*
    1903             :              * Merge the bitmap produced by mcv_get_match_bitmap into the
    1904             :              * current one. We're handling a NOT clause, so invert the result
    1905             :              * before merging it into the global bitmap.
    1906             :              */
    1907         150 :             for (i = 0; i < mcvlist->nitems; i++)
    1908         120 :                 matches[i] = RESULT_MERGE(matches[i], is_or, !not_matches[i]);
    1909             : 
    1910          30 :             pfree(not_matches);
    1911             :         }
    1912          84 :         else if (IsA(clause, Var))
    1913             :         {
    1914             :             /* Var (has to be a boolean Var, possibly from below NOT) */
    1915             : 
    1916          78 :             Var        *var = (Var *) (clause);
    1917             : 
    1918             :             /* match the attribute to a dimension of the statistic */
    1919          78 :             int         idx = bms_member_index(keys, var->varattno);
    1920             : 
    1921             :             Assert(var->vartype == BOOLOID);
    1922             : 
    1923             :             /*
    1924             :              * Walk through the MCV items and evaluate the current clause. We
    1925             :              * can skip items that were already ruled out, and terminate if
    1926             :              * there are no remaining MCV items that might possibly match.
    1927             :              */
    1928         378 :             for (int i = 0; i < mcvlist->nitems; i++)
    1929             :             {
    1930         300 :                 MCVItem    *item = &mcvlist->items[i];
    1931         300 :                 bool        match = false;
    1932             : 
    1933             :                 /* if the item is NULL, it's a mismatch */
    1934         300 :                 if (!item->isnull[idx] && DatumGetBool(item->values[idx]))
    1935         150 :                     match = true;
    1936             : 
    1937             :                 /* update the result bitmap */
    1938         300 :                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
    1939             :             }
    1940             :         }
    1941             :         else
    1942             :         {
    1943             :             /* Otherwise, it must be a bare boolean-returning expression */
    1944             :             int         idx;
    1945             : 
    1946             :             /* match the expression to a dimension of the statistic */
    1947           6 :             idx = mcv_match_expression(clause, keys, exprs, NULL);
    1948             : 
    1949             :             /*
    1950             :              * Walk through the MCV items and evaluate the current clause. We
    1951             :              * can skip items that were already ruled out, and terminate if
    1952             :              * there are no remaining MCV items that might possibly match.
    1953             :              */
    1954         222 :             for (int i = 0; i < mcvlist->nitems; i++)
    1955             :             {
    1956             :                 bool        match;
    1957         216 :                 MCVItem    *item = &mcvlist->items[i];
    1958             : 
    1959             :                 /* "match" just means it's bool TRUE */
    1960         216 :                 match = !item->isnull[idx] && DatumGetBool(item->values[idx]);
    1961             : 
    1962             :                 /* now, update the match bitmap, depending on OR/AND type */
    1963         216 :                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
    1964             :             }
    1965             :         }
    1966             :     }
    1967             : 
    1968         714 :     return matches;
    1969             : }
    1970             : 
    1971             : 
    1972             : /*
    1973             :  * mcv_combine_selectivities
    1974             :  *      Combine per-column and multi-column MCV selectivity estimates.
    1975             :  *
    1976             :  * simple_sel is a "simple" selectivity estimate (produced without using any
    1977             :  * extended statistics, essentially assuming independence of columns/clauses).
    1978             :  *
    1979             :  * mcv_sel and mcv_basesel are sums of the frequencies and base frequencies of
    1980             :  * all matching MCV items.  The difference (mcv_sel - mcv_basesel) is then
    1981             :  * essentially interpreted as a correction to be added to simple_sel, as
    1982             :  * described below.
    1983             :  *
    1984             :  * mcv_totalsel is the sum of the frequencies of all MCV items (not just the
    1985             :  * matching ones).  This is used as an upper bound on the portion of the
    1986             :  * selectivity estimates not covered by the MCV statistics.
    1987             :  *
    1988             :  * Note: While simple and base selectivities are defined in a quite similar
    1989             :  * way, the values are computed differently and are not therefore equal. The
    1990             :  * simple selectivity is computed as a product of per-clause estimates, while
    1991             :  * the base selectivity is computed by adding up base frequencies of matching
    1992             :  * items of the multi-column MCV list. So the values may differ for two main
    1993             :  * reasons - (a) the MCV list may not cover 100% of the data and (b) some of
    1994             :  * the MCV items did not match the estimated clauses.
    1995             :  *
    1996             :  * As both (a) and (b) reduce the base selectivity value, it generally holds
    1997             :  * that (simple_sel >= mcv_basesel). If the MCV list covers all the data, the
    1998             :  * values may be equal.
    1999             :  *
    2000             :  * So, other_sel = (simple_sel - mcv_basesel) is an estimate for the part not
    2001             :  * covered by the MCV list, and (mcv_sel - mcv_basesel) may be seen as a
    2002             :  * correction for the part covered by the MCV list. Those two statements are
    2003             :  * actually equivalent.
    2004             :  */
    2005             : Selectivity
    2006         672 : mcv_combine_selectivities(Selectivity simple_sel,
    2007             :                           Selectivity mcv_sel,
    2008             :                           Selectivity mcv_basesel,
    2009             :                           Selectivity mcv_totalsel)
    2010             : {
    2011             :     Selectivity other_sel;
    2012             :     Selectivity sel;
    2013             : 
    2014             :     /* estimated selectivity of values not covered by MCV matches */
    2015         672 :     other_sel = simple_sel - mcv_basesel;
    2016         672 :     CLAMP_PROBABILITY(other_sel);
    2017             : 
    2018             :     /* this non-MCV selectivity cannot exceed 1 - mcv_totalsel */
    2019         672 :     if (other_sel > 1.0 - mcv_totalsel)
    2020         444 :         other_sel = 1.0 - mcv_totalsel;
    2021             : 
    2022             :     /* overall selectivity is the sum of the MCV and non-MCV parts */
    2023         672 :     sel = mcv_sel + other_sel;
    2024         672 :     CLAMP_PROBABILITY(sel);
    2025             : 
    2026         672 :     return sel;
    2027             : }
    2028             : 
    2029             : 
    2030             : /*
    2031             :  * mcv_clauselist_selectivity
    2032             :  *      Use MCV statistics to estimate the selectivity of an implicitly-ANDed
    2033             :  *      list of clauses.
    2034             :  *
    2035             :  * This determines which MCV items match every clause in the list and returns
    2036             :  * the sum of the frequencies of those items.
    2037             :  *
    2038             :  * In addition, it returns the sum of the base frequencies of each of those
    2039             :  * items (that is the sum of the selectivities that each item would have if
    2040             :  * the columns were independent of one another), and the total selectivity of
    2041             :  * all the MCV items (not just the matching ones).  These are expected to be
    2042             :  * used together with a "simple" selectivity estimate (one based only on
    2043             :  * per-column statistics) to produce an overall selectivity estimate that
    2044             :  * makes use of both per-column and multi-column statistics --- see
    2045             :  * mcv_combine_selectivities().
    2046             :  */
    2047             : Selectivity
    2048         384 : mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat,
    2049             :                            List *clauses, int varRelid,
    2050             :                            JoinType jointype, SpecialJoinInfo *sjinfo,
    2051             :                            RelOptInfo *rel,
    2052             :                            Selectivity *basesel, Selectivity *totalsel)
    2053             : {
    2054             :     int         i;
    2055             :     MCVList    *mcv;
    2056         384 :     Selectivity s = 0.0;
    2057         384 :     RangeTblEntry *rte = root->simple_rte_array[rel->relid];
    2058             : 
    2059             :     /* match/mismatch bitmap for each MCV item */
    2060         384 :     bool       *matches = NULL;
    2061             : 
    2062             :     /* load the MCV list stored in the statistics object */
    2063         384 :     mcv = statext_mcv_load(stat->statOid, rte->inh);
    2064             : 
    2065             :     /* build a match bitmap for the clauses */
    2066         384 :     matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs,
    2067             :                                    mcv, false);
    2068             : 
    2069             :     /* sum frequencies for all the matching MCV items */
    2070         384 :     *basesel = 0.0;
    2071         384 :     *totalsel = 0.0;
    2072       25104 :     for (i = 0; i < mcv->nitems; i++)
    2073             :     {
    2074       24720 :         *totalsel += mcv->items[i].frequency;
    2075             : 
    2076       24720 :         if (matches[i] != false)
    2077             :         {
    2078         462 :             *basesel += mcv->items[i].base_frequency;
    2079         462 :             s += mcv->items[i].frequency;
    2080             :         }
    2081             :     }
    2082             : 
    2083         384 :     return s;
    2084             : }
    2085             : 
    2086             : 
    2087             : /*
    2088             :  * mcv_clause_selectivity_or
    2089             :  *      Use MCV statistics to estimate the selectivity of a clause that
    2090             :  *      appears in an ORed list of clauses.
    2091             :  *
    2092             :  * As with mcv_clauselist_selectivity() this determines which MCV items match
    2093             :  * the clause and returns both the sum of the frequencies and the sum of the
    2094             :  * base frequencies of those items, as well as the sum of the frequencies of
    2095             :  * all MCV items (not just the matching ones) so that this information can be
    2096             :  * used by mcv_combine_selectivities() to produce a selectivity estimate that
    2097             :  * makes use of both per-column and multi-column statistics.
    2098             :  *
    2099             :  * Additionally, we return information to help compute the overall selectivity
    2100             :  * of the ORed list of clauses assumed to contain this clause.  This function
    2101             :  * is intended to be called for each clause in the ORed list of clauses,
    2102             :  * allowing the overall selectivity to be computed using the following
    2103             :  * algorithm:
    2104             :  *
    2105             :  * Suppose P[n] = P(C[1] OR C[2] OR ... OR C[n]) is the combined selectivity
    2106             :  * of the first n clauses in the list.  Then the combined selectivity taking
    2107             :  * into account the next clause C[n+1] can be written as
    2108             :  *
    2109             :  *      P[n+1] = P[n] + P(C[n+1]) - P((C[1] OR ... OR C[n]) AND C[n+1])
    2110             :  *
    2111             :  * The final term above represents the overlap between the clauses examined so
    2112             :  * far and the (n+1)'th clause.  To estimate its selectivity, we track the
    2113             :  * match bitmap for the ORed list of clauses examined so far and examine its
    2114             :  * intersection with the match bitmap for the (n+1)'th clause.
    2115             :  *
    2116             :  * We then also return the sums of the MCV item frequencies and base
    2117             :  * frequencies for the match bitmap intersection corresponding to the overlap
    2118             :  * term above, so that they can be combined with a simple selectivity estimate
    2119             :  * for that term.
    2120             :  *
    2121             :  * The parameter "or_matches" is an in/out parameter tracking the match bitmap
    2122             :  * for the clauses examined so far.  The caller is expected to set it to NULL
    2123             :  * the first time it calls this function.
    2124             :  */
    2125             : Selectivity
    2126         240 : mcv_clause_selectivity_or(PlannerInfo *root, StatisticExtInfo *stat,
    2127             :                           MCVList *mcv, Node *clause, bool **or_matches,
    2128             :                           Selectivity *basesel, Selectivity *overlap_mcvsel,
    2129             :                           Selectivity *overlap_basesel, Selectivity *totalsel)
    2130             : {
    2131         240 :     Selectivity s = 0.0;
    2132             :     bool       *new_matches;
    2133             :     int         i;
    2134             : 
    2135             :     /* build the OR-matches bitmap, if not built already */
    2136         240 :     if (*or_matches == NULL)
    2137          96 :         *or_matches = palloc0(sizeof(bool) * mcv->nitems);
    2138             : 
    2139             :     /* build the match bitmap for the new clause */
    2140         240 :     new_matches = mcv_get_match_bitmap(root, list_make1(clause), stat->keys,
    2141             :                                        stat->exprs, mcv, false);
    2142             : 
    2143             :     /*
    2144             :      * Sum the frequencies for all the MCV items matching this clause and also
    2145             :      * those matching the overlap between this clause and any of the preceding
    2146             :      * clauses as described above.
    2147             :      */
    2148         240 :     *basesel = 0.0;
    2149         240 :     *overlap_mcvsel = 0.0;
    2150         240 :     *overlap_basesel = 0.0;
    2151         240 :     *totalsel = 0.0;
    2152       15516 :     for (i = 0; i < mcv->nitems; i++)
    2153             :     {
    2154       15276 :         *totalsel += mcv->items[i].frequency;
    2155             : 
    2156       15276 :         if (new_matches[i])
    2157             :         {
    2158         336 :             s += mcv->items[i].frequency;
    2159         336 :             *basesel += mcv->items[i].base_frequency;
    2160             : 
    2161         336 :             if ((*or_matches)[i])
    2162             :             {
    2163         144 :                 *overlap_mcvsel += mcv->items[i].frequency;
    2164         144 :                 *overlap_basesel += mcv->items[i].base_frequency;
    2165             :             }
    2166             :         }
    2167             : 
    2168             :         /* update the OR-matches bitmap for the next clause */
    2169       15276 :         (*or_matches)[i] = (*or_matches)[i] || new_matches[i];
    2170             :     }
    2171             : 
    2172         240 :     pfree(new_matches);
    2173             : 
    2174         240 :     return s;
    2175             : }

Generated by: LCOV version 1.14