LCOV - code coverage report
Current view: top level - src/backend/statistics - mcv.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 579 645 89.8 %
Date: 2026-02-01 08:17:44 Functions: 19 23 82.6 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * mcv.c
       4             :  *    POSTGRES multivariate MCV lists
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/statistics/mcv.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/htup_details.h"
      18             : #include "catalog/pg_statistic_ext.h"
      19             : #include "catalog/pg_statistic_ext_data.h"
      20             : #include "fmgr.h"
      21             : #include "funcapi.h"
      22             : #include "nodes/nodeFuncs.h"
      23             : #include "statistics/extended_stats_internal.h"
      24             : #include "statistics/statistics.h"
      25             : #include "utils/array.h"
      26             : #include "utils/builtins.h"
      27             : #include "utils/fmgrprotos.h"
      28             : #include "utils/lsyscache.h"
      29             : #include "utils/selfuncs.h"
      30             : #include "utils/syscache.h"
      31             : #include "utils/typcache.h"
      32             : 
      33             : /*
      34             :  * Computes size of a serialized MCV item, depending on the number of
      35             :  * dimensions (columns) the statistic is defined on. The datum values are
      36             :  * stored in a separate array (deduplicated, to minimize the size), and
      37             :  * so the serialized items only store uint16 indexes into that array.
      38             :  *
      39             :  * Each serialized item stores (in this order):
      40             :  *
      41             :  * - indexes to values    (ndim * sizeof(uint16))
      42             :  * - null flags           (ndim * sizeof(bool))
      43             :  * - frequency            (sizeof(double))
      44             :  * - base_frequency       (sizeof(double))
      45             :  *
      46             :  * There is no alignment padding within an MCV item.
      47             :  * So in total each MCV item requires this many bytes:
      48             :  *
      49             :  *   ndim * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double)
      50             :  */
      51             : #define ITEM_SIZE(ndims)    \
      52             :     ((ndims) * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double))
      53             : 
      54             : /*
      55             :  * Used to compute size of serialized MCV list representation.
      56             :  */
      57             : #define MinSizeOfMCVList        \
      58             :     (VARHDRSZ + sizeof(uint32) * 3 + sizeof(AttrNumber))
      59             : 
      60             : /*
      61             :  * Size of the serialized MCV list, excluding the space needed for
      62             :  * deduplicated per-dimension values. The macro is meant to be used
      63             :  * when it's not yet safe to access the serialized info about amount
      64             :  * of data for each column.
      65             :  */
      66             : #define SizeOfMCVList(ndims,nitems) \
      67             :     ((MinSizeOfMCVList + sizeof(Oid) * (ndims)) + \
      68             :      ((ndims) * sizeof(DimensionInfo)) + \
      69             :      ((nitems) * ITEM_SIZE(ndims)))
      70             : 
      71             : static MultiSortSupport build_mss(StatsBuildData *data);
      72             : 
      73             : static SortItem *build_distinct_groups(int numrows, SortItem *items,
      74             :                                        MultiSortSupport mss, int *ndistinct);
      75             : 
      76             : static SortItem **build_column_frequencies(SortItem *groups, int ngroups,
      77             :                                            MultiSortSupport mss, int *ncounts);
      78             : 
      79             : static int  count_distinct_groups(int numrows, SortItem *items,
      80             :                                   MultiSortSupport mss);
      81             : 
      82             : /*
      83             :  * Compute new value for bitmap item, considering whether it's used for
      84             :  * clauses connected by AND/OR.
      85             :  */
      86             : #define RESULT_MERGE(value, is_or, match) \
      87             :     ((is_or) ? ((value) || (match)) : ((value) && (match)))
      88             : 
      89             : /*
      90             :  * When processing a list of clauses, the bitmap item may get set to a value
      91             :  * such that additional clauses can't change it. For example, when processing
      92             :  * a list of clauses connected to AND, as soon as the item gets set to 'false'
      93             :  * then it'll remain like that. Similarly clauses connected by OR and 'true'.
      94             :  *
      95             :  * Returns true when the value in the bitmap can't change no matter how the
      96             :  * remaining clauses are evaluated.
      97             :  */
      98             : #define RESULT_IS_FINAL(value, is_or)   ((is_or) ? (value) : (!(value)))
      99             : 
     100             : /*
     101             :  * get_mincount_for_mcv_list
     102             :  *      Determine the minimum number of times a value needs to appear in
     103             :  *      the sample for it to be included in the MCV list.
     104             :  *
     105             :  * We want to keep only values that appear sufficiently often in the
     106             :  * sample that it is reasonable to extrapolate their sample frequencies to
     107             :  * the entire table.  We do this by placing an upper bound on the relative
     108             :  * standard error of the sample frequency, so that any estimates the
     109             :  * planner generates from the MCV statistics can be expected to be
     110             :  * reasonably accurate.
     111             :  *
     112             :  * Since we are sampling without replacement, the sample frequency of a
     113             :  * particular value is described by a hypergeometric distribution.  A
     114             :  * common rule of thumb when estimating errors in this situation is to
     115             :  * require at least 10 instances of the value in the sample, in which case
     116             :  * the distribution can be approximated by a normal distribution, and
     117             :  * standard error analysis techniques can be applied.  Given a sample size
     118             :  * of n, a population size of N, and a sample frequency of p=cnt/n, the
     119             :  * standard error of the proportion p is given by
     120             :  *      SE = sqrt(p*(1-p)/n) * sqrt((N-n)/(N-1))
     121             :  * where the second term is the finite population correction.  To get
     122             :  * reasonably accurate planner estimates, we impose an upper bound on the
     123             :  * relative standard error of 20% -- i.e., SE/p < 0.2.  This 20% relative
     124             :  * error bound is fairly arbitrary, but has been found empirically to work
     125             :  * well.  Rearranging this formula gives a lower bound on the number of
     126             :  * instances of the value seen:
     127             :  *      cnt > n*(N-n) / (N-n+0.04*n*(N-1))
     128             :  * This bound is at most 25, and approaches 0 as n approaches 0 or N. The
     129             :  * case where n approaches 0 cannot happen in practice, since the sample
     130             :  * size is at least 300.  The case where n approaches N corresponds to
     131             :  * sampling the whole table, in which case it is reasonable to keep
     132             :  * the whole MCV list (have no lower bound), so it makes sense to apply
     133             :  * this formula for all inputs, even though the above derivation is
     134             :  * technically only valid when the right hand side is at least around 10.
     135             :  *
     136             :  * An alternative way to look at this formula is as follows -- assume that
     137             :  * the number of instances of the value seen scales up to the entire
     138             :  * table, so that the population count is K=N*cnt/n. Then the distribution
     139             :  * in the sample is a hypergeometric distribution parameterised by N, n
     140             :  * and K, and the bound above is mathematically equivalent to demanding
     141             :  * that the standard deviation of that distribution is less than 20% of
     142             :  * its mean.  Thus the relative errors in any planner estimates produced
     143             :  * from the MCV statistics are likely to be not too large.
     144             :  */
     145             : static double
     146         268 : get_mincount_for_mcv_list(int samplerows, double totalrows)
     147             : {
     148         268 :     double      n = samplerows;
     149         268 :     double      N = totalrows;
     150             :     double      numer,
     151             :                 denom;
     152             : 
     153         268 :     numer = n * (N - n);
     154         268 :     denom = N - n + 0.04 * n * (N - 1);
     155             : 
     156             :     /* Guard against division by zero (possible if n = N = 1) */
     157         268 :     if (denom == 0.0)
     158          12 :         return 0.0;
     159             : 
     160         256 :     return numer / denom;
     161             : }
     162             : 
     163             : /*
     164             :  * Builds MCV list from the set of sampled rows.
     165             :  *
     166             :  * The algorithm is quite simple:
     167             :  *
     168             :  *     (1) sort the data (default collation, '<' for the data type)
     169             :  *
     170             :  *     (2) count distinct groups, decide how many to keep
     171             :  *
     172             :  *     (3) build the MCV list using the threshold determined in (2)
     173             :  *
     174             :  *     (4) remove rows represented by the MCV from the sample
     175             :  *
     176             :  */
     177             : MCVList *
     178         268 : statext_mcv_build(StatsBuildData *data, double totalrows, int stattarget)
     179             : {
     180             :     int         i,
     181             :                 numattrs,
     182             :                 numrows,
     183             :                 ngroups,
     184             :                 nitems;
     185             :     double      mincount;
     186             :     SortItem   *items;
     187             :     SortItem   *groups;
     188         268 :     MCVList    *mcvlist = NULL;
     189             :     MultiSortSupport mss;
     190             : 
     191             :     /* comparator for all the columns */
     192         268 :     mss = build_mss(data);
     193             : 
     194             :     /* sort the rows */
     195         268 :     items = build_sorted_items(data, &nitems, mss,
     196             :                                data->nattnums, data->attnums);
     197             : 
     198         268 :     if (!items)
     199           0 :         return NULL;
     200             : 
     201             :     /* for convenience */
     202         268 :     numattrs = data->nattnums;
     203         268 :     numrows = data->numrows;
     204             : 
     205             :     /* transform the sorted rows into groups (sorted by frequency) */
     206         268 :     groups = build_distinct_groups(nitems, items, mss, &ngroups);
     207             : 
     208             :     /*
     209             :      * The maximum number of MCV items to store, based on the statistics
     210             :      * target we computed for the statistics object (from the target set for
     211             :      * the object itself, attributes and the system default). In any case, we
     212             :      * can't keep more groups than we have available.
     213             :      */
     214         268 :     nitems = stattarget;
     215         268 :     if (nitems > ngroups)
     216         172 :         nitems = ngroups;
     217             : 
     218             :     /*
     219             :      * Decide how many items to keep in the MCV list. We can't use the same
     220             :      * algorithm as per-column MCV lists, because that only considers the
     221             :      * actual group frequency - but we're primarily interested in how the
     222             :      * actual frequency differs from the base frequency (product of simple
     223             :      * per-column frequencies, as if the columns were independent).
     224             :      *
     225             :      * Using the same algorithm might exclude items that are close to the
     226             :      * "average" frequency of the sample. But that does not say whether the
     227             :      * observed frequency is close to the base frequency or not. We also need
     228             :      * to consider unexpectedly uncommon items (again, compared to the base
     229             :      * frequency), and the single-column algorithm does not have to.
     230             :      *
     231             :      * We simply decide how many items to keep by computing the minimum count
     232             :      * using get_mincount_for_mcv_list() and then keep all items that seem to
     233             :      * be more common than that.
     234             :      */
     235         268 :     mincount = get_mincount_for_mcv_list(numrows, totalrows);
     236             : 
     237             :     /*
     238             :      * Walk the groups until we find the first group with a count below the
     239             :      * mincount threshold (the index of that group is the number of groups we
     240             :      * want to keep).
     241             :      */
     242       10844 :     for (i = 0; i < nitems; i++)
     243             :     {
     244       10576 :         if (groups[i].count < mincount)
     245             :         {
     246           0 :             nitems = i;
     247           0 :             break;
     248             :         }
     249             :     }
     250             : 
     251             :     /*
     252             :      * At this point, we know the number of items for the MCV list. There
     253             :      * might be none (for uniform distribution with many groups), and in that
     254             :      * case, there will be no MCV list. Otherwise, construct the MCV list.
     255             :      */
     256         268 :     if (nitems > 0)
     257             :     {
     258             :         int         j;
     259             :         SortItem    key;
     260             :         MultiSortSupport tmp;
     261             : 
     262             :         /* frequencies for values in each attribute */
     263             :         SortItem  **freqs;
     264             :         int        *nfreqs;
     265             : 
     266             :         /* used to search values */
     267         268 :         tmp = (MultiSortSupport) palloc(offsetof(MultiSortSupportData, ssup)
     268             :                                         + sizeof(SortSupportData));
     269             : 
     270             :         /* compute frequencies for values in each column */
     271         268 :         nfreqs = palloc0_array(int, numattrs);
     272         268 :         freqs = build_column_frequencies(groups, ngroups, mss, nfreqs);
     273             : 
     274             :         /*
     275             :          * Allocate the MCV list structure, set the global parameters.
     276             :          */
     277         268 :         mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) +
     278         268 :                                       sizeof(MCVItem) * nitems);
     279             : 
     280         268 :         mcvlist->magic = STATS_MCV_MAGIC;
     281         268 :         mcvlist->type = STATS_MCV_TYPE_BASIC;
     282         268 :         mcvlist->ndimensions = numattrs;
     283         268 :         mcvlist->nitems = nitems;
     284             : 
     285             :         /* store info about data type OIDs */
     286         978 :         for (i = 0; i < numattrs; i++)
     287         710 :             mcvlist->types[i] = data->stats[i]->attrtypid;
     288             : 
     289             :         /* Copy the first chunk of groups into the result. */
     290       10844 :         for (i = 0; i < nitems; i++)
     291             :         {
     292             :             /* just point to the proper place in the list */
     293       10576 :             MCVItem    *item = &mcvlist->items[i];
     294             : 
     295       10576 :             item->values = palloc_array(Datum, numattrs);
     296       10576 :             item->isnull = palloc_array(bool, numattrs);
     297             : 
     298             :             /* copy values for the group */
     299       10576 :             memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs);
     300       10576 :             memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs);
     301             : 
     302             :             /* groups should be sorted by frequency in descending order */
     303             :             Assert((i == 0) || (groups[i - 1].count >= groups[i].count));
     304             : 
     305             :             /* group frequency */
     306       10576 :             item->frequency = (double) groups[i].count / numrows;
     307             : 
     308             :             /* base frequency, if the attributes were independent */
     309       10576 :             item->base_frequency = 1.0;
     310       38898 :             for (j = 0; j < numattrs; j++)
     311             :             {
     312             :                 SortItem   *freq;
     313             : 
     314             :                 /* single dimension */
     315       28322 :                 tmp->ndims = 1;
     316       28322 :                 tmp->ssup[0] = mss->ssup[j];
     317             : 
     318             :                 /* fill search key */
     319       28322 :                 key.values = &groups[i].values[j];
     320       28322 :                 key.isnull = &groups[i].isnull[j];
     321             : 
     322       28322 :                 freq = (SortItem *) bsearch_arg(&key, freqs[j], nfreqs[j],
     323             :                                                 sizeof(SortItem),
     324             :                                                 multi_sort_compare, tmp);
     325             : 
     326       28322 :                 item->base_frequency *= ((double) freq->count) / numrows;
     327             :             }
     328             :         }
     329             : 
     330         268 :         pfree(nfreqs);
     331         268 :         pfree(freqs);
     332             :     }
     333             : 
     334         268 :     pfree(items);
     335         268 :     pfree(groups);
     336             : 
     337         268 :     return mcvlist;
     338             : }
     339             : 
     340             : /*
     341             :  * build_mss
     342             :  *      Build a MultiSortSupport for the given StatsBuildData.
     343             :  */
     344             : static MultiSortSupport
     345         268 : build_mss(StatsBuildData *data)
     346             : {
     347             :     int         i;
     348         268 :     int         numattrs = data->nattnums;
     349             : 
     350             :     /* Sort by multiple columns (using array of SortSupport) */
     351         268 :     MultiSortSupport mss = multi_sort_init(numattrs);
     352             : 
     353             :     /* prepare the sort functions for all the attributes */
     354         978 :     for (i = 0; i < numattrs; i++)
     355             :     {
     356         710 :         VacAttrStats *colstat = data->stats[i];
     357             :         TypeCacheEntry *type;
     358             : 
     359         710 :         type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
     360         710 :         if (type->lt_opr == InvalidOid) /* shouldn't happen */
     361           0 :             elog(ERROR, "cache lookup failed for ordering operator for type %u",
     362             :                  colstat->attrtypid);
     363             : 
     364         710 :         multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
     365             :     }
     366             : 
     367         268 :     return mss;
     368             : }
     369             : 
     370             : /*
     371             :  * count_distinct_groups
     372             :  *      Count distinct combinations of SortItems in the array.
     373             :  *
     374             :  * The array is assumed to be sorted according to the MultiSortSupport.
     375             :  */
     376             : static int
     377         268 : count_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss)
     378             : {
     379             :     int         i;
     380             :     int         ndistinct;
     381             : 
     382         268 :     ndistinct = 1;
     383      483846 :     for (i = 1; i < numrows; i++)
     384             :     {
     385             :         /* make sure the array really is sorted */
     386             :         Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
     387             : 
     388      483578 :         if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
     389       90456 :             ndistinct += 1;
     390             :     }
     391             : 
     392         268 :     return ndistinct;
     393             : }
     394             : 
     395             : /*
     396             :  * compare_sort_item_count
     397             :  *      Comparator for sorting items by count (frequencies) in descending
     398             :  *      order.
     399             :  */
     400             : static int
     401       97566 : compare_sort_item_count(const void *a, const void *b, void *arg)
     402             : {
     403       97566 :     const SortItem *ia = a;
     404       97566 :     const SortItem *ib = b;
     405             : 
     406       97566 :     if (ia->count == ib->count)
     407       96586 :         return 0;
     408         980 :     else if (ia->count > ib->count)
     409         650 :         return -1;
     410             : 
     411         330 :     return 1;
     412             : }
     413             : 
     414             : /*
     415             :  * build_distinct_groups
     416             :  *      Build an array of SortItems for distinct groups and counts matching
     417             :  *      items.
     418             :  *
     419             :  * The 'items' array is assumed to be sorted.
     420             :  */
     421             : static SortItem *
     422         268 : build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss,
     423             :                       int *ndistinct)
     424             : {
     425             :     int         i,
     426             :                 j;
     427         268 :     int         ngroups = count_distinct_groups(numrows, items, mss);
     428             : 
     429         268 :     SortItem   *groups = (SortItem *) palloc(ngroups * sizeof(SortItem));
     430             : 
     431         268 :     j = 0;
     432         268 :     groups[0] = items[0];
     433         268 :     groups[0].count = 1;
     434             : 
     435      483846 :     for (i = 1; i < numrows; i++)
     436             :     {
     437             :         /* Assume sorted in ascending order. */
     438             :         Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
     439             : 
     440             :         /* New distinct group detected. */
     441      483578 :         if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
     442             :         {
     443       90456 :             groups[++j] = items[i];
     444       90456 :             groups[j].count = 0;
     445             :         }
     446             : 
     447      483578 :         groups[j].count++;
     448             :     }
     449             : 
     450             :     /* ensure we filled the expected number of distinct groups */
     451             :     Assert(j + 1 == ngroups);
     452             : 
     453             :     /* Sort the distinct groups by frequency (in descending order). */
     454         268 :     qsort_interruptible(groups, ngroups, sizeof(SortItem),
     455             :                         compare_sort_item_count, NULL);
     456             : 
     457         268 :     *ndistinct = ngroups;
     458         268 :     return groups;
     459             : }
     460             : 
     461             : /* compare sort items (single dimension) */
     462             : static int
     463     1032554 : sort_item_compare(const void *a, const void *b, void *arg)
     464             : {
     465     1032554 :     SortSupport ssup = (SortSupport) arg;
     466     1032554 :     const SortItem *ia = a;
     467     1032554 :     const SortItem *ib = b;
     468             : 
     469     2065108 :     return ApplySortComparator(ia->values[0], ia->isnull[0],
     470     1032554 :                                ib->values[0], ib->isnull[0],
     471             :                                ssup);
     472             : }
     473             : 
     474             : /*
     475             :  * build_column_frequencies
     476             :  *      Compute frequencies of values in each column.
     477             :  *
     478             :  * This returns an array of SortItems for each attribute the MCV is built
     479             :  * on, with a frequency (number of occurrences) for each value. This is
     480             :  * then used to compute "base" frequency of MCV items.
     481             :  *
     482             :  * All the memory is allocated in a single chunk, so that a single pfree
     483             :  * is enough to release it. We do not allocate space for values/isnull
     484             :  * arrays in the SortItems, because we can simply point into the input
     485             :  * groups directly.
     486             :  */
     487             : static SortItem **
     488         268 : build_column_frequencies(SortItem *groups, int ngroups,
     489             :                          MultiSortSupport mss, int *ncounts)
     490             : {
     491             :     int         i,
     492             :                 dim;
     493             :     SortItem  **result;
     494             :     char       *ptr;
     495             : 
     496             :     Assert(groups);
     497             :     Assert(ncounts);
     498             : 
     499             :     /* allocate arrays for all columns as a single chunk */
     500         268 :     ptr = palloc(MAXALIGN(sizeof(SortItem *) * mss->ndims) +
     501         268 :                  mss->ndims * MAXALIGN(sizeof(SortItem) * ngroups));
     502             : 
     503             :     /* initial array of pointers */
     504         268 :     result = (SortItem **) ptr;
     505         268 :     ptr += MAXALIGN(sizeof(SortItem *) * mss->ndims);
     506             : 
     507         978 :     for (dim = 0; dim < mss->ndims; dim++)
     508             :     {
     509         710 :         SortSupport ssup = &mss->ssup[dim];
     510             : 
     511             :         /* array of values for a single column */
     512         710 :         result[dim] = (SortItem *) ptr;
     513         710 :         ptr += MAXALIGN(sizeof(SortItem) * ngroups);
     514             : 
     515             :         /* extract data for the dimension */
     516      252928 :         for (i = 0; i < ngroups; i++)
     517             :         {
     518             :             /* point into the input groups */
     519      252218 :             result[dim][i].values = &groups[i].values[dim];
     520      252218 :             result[dim][i].isnull = &groups[i].isnull[dim];
     521      252218 :             result[dim][i].count = groups[i].count;
     522             :         }
     523             : 
     524             :         /* sort the values, deduplicate */
     525         710 :         qsort_interruptible(result[dim], ngroups, sizeof(SortItem),
     526             :                             sort_item_compare, ssup);
     527             : 
     528             :         /*
     529             :          * Identify distinct values, compute frequency (there might be
     530             :          * multiple MCV items containing this value, so we need to sum counts
     531             :          * from all of them.
     532             :          */
     533         710 :         ncounts[dim] = 1;
     534      252218 :         for (i = 1; i < ngroups; i++)
     535             :         {
     536      251508 :             if (sort_item_compare(&result[dim][i - 1], &result[dim][i], ssup) == 0)
     537             :             {
     538      148664 :                 result[dim][ncounts[dim] - 1].count += result[dim][i].count;
     539      148664 :                 continue;
     540             :             }
     541             : 
     542      102844 :             result[dim][ncounts[dim]] = result[dim][i];
     543             : 
     544      102844 :             ncounts[dim]++;
     545             :         }
     546             :     }
     547             : 
     548         268 :     return result;
     549             : }
     550             : 
     551             : /*
     552             :  * statext_mcv_load
     553             :  *      Load the MCV list for the indicated pg_statistic_ext_data tuple.
     554             :  */
     555             : MCVList *
     556         606 : statext_mcv_load(Oid mvoid, bool inh)
     557             : {
     558             :     MCVList    *result;
     559             :     bool        isnull;
     560             :     Datum       mcvlist;
     561         606 :     HeapTuple   htup = SearchSysCache2(STATEXTDATASTXOID,
     562             :                                        ObjectIdGetDatum(mvoid), BoolGetDatum(inh));
     563             : 
     564         606 :     if (!HeapTupleIsValid(htup))
     565           0 :         elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
     566             : 
     567         606 :     mcvlist = SysCacheGetAttr(STATEXTDATASTXOID, htup,
     568             :                               Anum_pg_statistic_ext_data_stxdmcv, &isnull);
     569             : 
     570         606 :     if (isnull)
     571           0 :         elog(ERROR,
     572             :              "requested statistics kind \"%c\" is not yet built for statistics object %u",
     573             :              STATS_EXT_MCV, mvoid);
     574             : 
     575         606 :     result = statext_mcv_deserialize(DatumGetByteaP(mcvlist));
     576             : 
     577         606 :     ReleaseSysCache(htup);
     578             : 
     579         606 :     return result;
     580             : }
     581             : 
     582             : 
     583             : /*
     584             :  * statext_mcv_serialize
     585             :  *      Serialize MCV list into a pg_mcv_list value.
     586             :  *
     587             :  * The MCV items may include values of various data types, and it's reasonable
     588             :  * to expect redundancy (values for a given attribute, repeated for multiple
     589             :  * MCV list items). So we deduplicate the values into arrays, and then replace
     590             :  * the values by indexes into those arrays.
     591             :  *
     592             :  * The overall structure of the serialized representation looks like this:
     593             :  *
     594             :  * +---------------+----------------+---------------------+-------+
     595             :  * | header fields | dimension info | deduplicated values | items |
     596             :  * +---------------+----------------+---------------------+-------+
     597             :  *
     598             :  * Where dimension info stores information about the type of the K-th
     599             :  * attribute (e.g. typlen, typbyval and length of deduplicated values).
     600             :  * Deduplicated values store deduplicated values for each attribute.  And
     601             :  * items store the actual MCV list items, with values replaced by indexes into
     602             :  * the arrays.
     603             :  *
     604             :  * When serializing the items, we use uint16 indexes. The number of MCV items
     605             :  * is limited by the statistics target (which is capped to 10k at the moment).
     606             :  * We might increase this to 65k and still fit into uint16, so there's a bit of
     607             :  * slack. Furthermore, this limit is on the number of distinct values per column,
     608             :  * and we usually have few of those (and various combinations of them for the
     609             :  * those MCV list). So uint16 seems fine for now.
     610             :  *
     611             :  * We don't really expect the serialization to save as much space as for
     612             :  * histograms, as we are not doing any bucket splits (which is the source
     613             :  * of high redundancy in histograms).
     614             :  *
     615             :  * TODO: Consider packing boolean flags (NULL) for each item into a single char
     616             :  * (or a longer type) instead of using an array of bool items.
     617             :  */
     618             : bytea *
     619         298 : statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats)
     620             : {
     621             :     int         i;
     622             :     int         dim;
     623         298 :     int         ndims = mcvlist->ndimensions;
     624             : 
     625             :     SortSupport ssup;
     626             :     DimensionInfo *info;
     627             : 
     628             :     Size        total_length;
     629             : 
     630             :     /* serialized items (indexes into arrays, etc.) */
     631             :     bytea      *raw;
     632             :     char       *ptr;
     633             :     char       *endptr PG_USED_FOR_ASSERTS_ONLY;
     634             : 
     635             :     /* values per dimension (and number of non-NULL values) */
     636         298 :     Datum     **values = palloc0_array(Datum *, ndims);
     637         298 :     int        *counts = palloc0_array(int, ndims);
     638             : 
     639             :     /*
     640             :      * We'll include some rudimentary information about the attribute types
     641             :      * (length, by-val flag), so that we don't have to look them up while
     642             :      * deserializing the MCV list (we already have the type OID in the
     643             :      * header).  This is safe because when changing the type of the attribute
     644             :      * the statistics gets dropped automatically.  We need to store the info
     645             :      * about the arrays of deduplicated values anyway.
     646             :      */
     647         298 :     info = palloc0_array(DimensionInfo, ndims);
     648             : 
     649             :     /* sort support data for all attributes included in the MCV list */
     650         298 :     ssup = palloc0_array(SortSupportData, ndims);
     651             : 
     652             :     /* collect and deduplicate values for each dimension (attribute) */
     653        1092 :     for (dim = 0; dim < ndims; dim++)
     654             :     {
     655             :         int         ndistinct;
     656             :         TypeCacheEntry *typentry;
     657             : 
     658             :         /*
     659             :          * Lookup the LT operator (can't get it from stats extra_data, as we
     660             :          * don't know how to interpret that - scalar vs. array etc.).
     661             :          */
     662         794 :         typentry = lookup_type_cache(stats[dim]->attrtypid, TYPECACHE_LT_OPR);
     663             : 
     664             :         /* copy important info about the data type (length, by-value) */
     665         794 :         info[dim].typlen = stats[dim]->attrtype->typlen;
     666         794 :         info[dim].typbyval = stats[dim]->attrtype->typbyval;
     667             : 
     668             :         /* allocate space for values in the attribute and collect them */
     669         794 :         values[dim] = palloc0_array(Datum, mcvlist->nitems);
     670             : 
     671       29704 :         for (i = 0; i < mcvlist->nitems; i++)
     672             :         {
     673             :             /* skip NULL values - we don't need to deduplicate those */
     674       28910 :             if (mcvlist->items[i].isnull[dim])
     675         140 :                 continue;
     676             : 
     677             :             /* append the value at the end */
     678       28770 :             values[dim][counts[dim]] = mcvlist->items[i].values[dim];
     679       28770 :             counts[dim] += 1;
     680             :         }
     681             : 
     682             :         /* if there are just NULL values in this dimension, we're done */
     683         794 :         if (counts[dim] == 0)
     684           8 :             continue;
     685             : 
     686             :         /* sort and deduplicate the data */
     687         786 :         ssup[dim].ssup_cxt = CurrentMemoryContext;
     688         786 :         ssup[dim].ssup_collation = stats[dim]->attrcollid;
     689         786 :         ssup[dim].ssup_nulls_first = false;
     690             : 
     691         786 :         PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);
     692             : 
     693         786 :         qsort_interruptible(values[dim], counts[dim], sizeof(Datum),
     694         786 :                             compare_scalars_simple, &ssup[dim]);
     695             : 
     696             :         /*
     697             :          * Walk through the array and eliminate duplicate values, but keep the
     698             :          * ordering (so that we can do a binary search later). We know there's
     699             :          * at least one item as (counts[dim] != 0), so we can skip the first
     700             :          * element.
     701             :          */
     702         786 :         ndistinct = 1;          /* number of distinct values */
     703       28770 :         for (i = 1; i < counts[dim]; i++)
     704             :         {
     705             :             /* expect sorted array */
     706             :             Assert(compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]) <= 0);
     707             : 
     708             :             /* if the value is the same as the previous one, we can skip it */
     709       27984 :             if (!compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]))
     710       11660 :                 continue;
     711             : 
     712       16324 :             values[dim][ndistinct] = values[dim][i];
     713       16324 :             ndistinct += 1;
     714             :         }
     715             : 
     716             :         /* we must not exceed PG_UINT16_MAX, as we use uint16 indexes */
     717             :         Assert(ndistinct <= PG_UINT16_MAX);
     718             : 
     719             :         /*
     720             :          * Store additional info about the attribute - number of deduplicated
     721             :          * values, and also size of the serialized data. For fixed-length data
     722             :          * types this is trivial to compute, for varwidth types we need to
     723             :          * actually walk the array and sum the sizes.
     724             :          */
     725         786 :         info[dim].nvalues = ndistinct;
     726             : 
     727         786 :         if (info[dim].typbyval) /* by-value data types */
     728             :         {
     729         506 :             info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
     730             : 
     731             :             /*
     732             :              * We copy the data into the MCV item during deserialization, so
     733             :              * we don't need to allocate any extra space.
     734             :              */
     735         506 :             info[dim].nbytes_aligned = 0;
     736             :         }
     737         280 :         else if (info[dim].typlen > 0)   /* fixed-length by-ref */
     738             :         {
     739             :             /*
     740             :              * We don't care about alignment in the serialized data, so we
     741             :              * pack the data as much as possible. But we also track how much
     742             :              * data will be needed after deserialization, and in that case we
     743             :              * need to account for alignment of each item.
     744             :              *
     745             :              * Note: As the items are fixed-length, we could easily compute
     746             :              * this during deserialization, but we do it here anyway.
     747             :              */
     748          24 :             info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
     749          24 :             info[dim].nbytes_aligned = info[dim].nvalues * MAXALIGN(info[dim].typlen);
     750             :         }
     751         256 :         else if (info[dim].typlen == -1)    /* varlena */
     752             :         {
     753         256 :             info[dim].nbytes = 0;
     754         256 :             info[dim].nbytes_aligned = 0;
     755        3782 :             for (i = 0; i < info[dim].nvalues; i++)
     756             :             {
     757             :                 Size        len;
     758             : 
     759             :                 /*
     760             :                  * For varlena values, we detoast the values and store the
     761             :                  * length and data separately. We don't bother with alignment
     762             :                  * here, which means that during deserialization we need to
     763             :                  * copy the fields and only access the copies.
     764             :                  */
     765        3526 :                 values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i]));
     766             : 
     767             :                 /* serialized length (uint32 length + data) */
     768        3526 :                 len = VARSIZE_ANY_EXHDR(DatumGetPointer(values[dim][i]));
     769        3526 :                 info[dim].nbytes += sizeof(uint32); /* length */
     770        3526 :                 info[dim].nbytes += len;    /* value (no header) */
     771             : 
     772             :                 /*
     773             :                  * During deserialization we'll build regular varlena values
     774             :                  * with full headers, and we need to align them properly.
     775             :                  */
     776        3526 :                 info[dim].nbytes_aligned += MAXALIGN(VARHDRSZ + len);
     777             :             }
     778             :         }
     779           0 :         else if (info[dim].typlen == -2)    /* cstring */
     780             :         {
     781           0 :             info[dim].nbytes = 0;
     782           0 :             info[dim].nbytes_aligned = 0;
     783           0 :             for (i = 0; i < info[dim].nvalues; i++)
     784             :             {
     785             :                 Size        len;
     786             : 
     787             :                 /*
     788             :                  * cstring is handled similar to varlena - first we store the
     789             :                  * length as uint32 and then the data. We don't care about
     790             :                  * alignment, which means that during deserialization we need
     791             :                  * to copy the fields and only access the copies.
     792             :                  */
     793             : 
     794             :                 /* c-strings include terminator, so +1 byte */
     795           0 :                 len = strlen(DatumGetCString(values[dim][i])) + 1;
     796           0 :                 info[dim].nbytes += sizeof(uint32); /* length */
     797           0 :                 info[dim].nbytes += len;    /* value */
     798             : 
     799             :                 /* space needed for properly aligned deserialized copies */
     800           0 :                 info[dim].nbytes_aligned += MAXALIGN(len);
     801             :             }
     802             :         }
     803             : 
     804             :         /* we know (count>0) so there must be some data */
     805             :         Assert(info[dim].nbytes > 0);
     806             :     }
     807             : 
     808             :     /*
     809             :      * Now we can finally compute how much space we'll actually need for the
     810             :      * whole serialized MCV list (varlena header, MCV header, dimension info
     811             :      * for each attribute, deduplicated values and items).
     812             :      */
     813         298 :     total_length = (3 * sizeof(uint32)) /* magic + type + nitems */
     814             :         + sizeof(AttrNumber)    /* ndimensions */
     815         298 :         + (ndims * sizeof(Oid));    /* attribute types */
     816             : 
     817             :     /* dimension info */
     818         298 :     total_length += ndims * sizeof(DimensionInfo);
     819             : 
     820             :     /* add space for the arrays of deduplicated values */
     821        1092 :     for (i = 0; i < ndims; i++)
     822         794 :         total_length += info[i].nbytes;
     823             : 
     824             :     /*
     825             :      * And finally account for the items (those are fixed-length, thanks to
     826             :      * replacing values with uint16 indexes into the deduplicated arrays).
     827             :      */
     828         298 :     total_length += mcvlist->nitems * ITEM_SIZE(dim);
     829             : 
     830             :     /*
     831             :      * Allocate space for the whole serialized MCV list (we'll skip bytes, so
     832             :      * we set them to zero to make the result more compressible).
     833             :      */
     834         298 :     raw = (bytea *) palloc0(VARHDRSZ + total_length);
     835         298 :     SET_VARSIZE(raw, VARHDRSZ + total_length);
     836             : 
     837         298 :     ptr = VARDATA(raw);
     838         298 :     endptr = ptr + total_length;
     839             : 
     840             :     /* copy the MCV list header fields, one by one */
     841         298 :     memcpy(ptr, &mcvlist->magic, sizeof(uint32));
     842         298 :     ptr += sizeof(uint32);
     843             : 
     844         298 :     memcpy(ptr, &mcvlist->type, sizeof(uint32));
     845         298 :     ptr += sizeof(uint32);
     846             : 
     847         298 :     memcpy(ptr, &mcvlist->nitems, sizeof(uint32));
     848         298 :     ptr += sizeof(uint32);
     849             : 
     850         298 :     memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber));
     851         298 :     ptr += sizeof(AttrNumber);
     852             : 
     853         298 :     memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);
     854         298 :     ptr += (sizeof(Oid) * ndims);
     855             : 
     856             :     /* store information about the attributes (data amounts, ...) */
     857         298 :     memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
     858         298 :     ptr += sizeof(DimensionInfo) * ndims;
     859             : 
     860             :     /* Copy the deduplicated values for all attributes to the output. */
     861        1092 :     for (dim = 0; dim < ndims; dim++)
     862             :     {
     863             :         /* remember the starting point for Asserts later */
     864         794 :         char       *start PG_USED_FOR_ASSERTS_ONLY = ptr;
     865             : 
     866       17904 :         for (i = 0; i < info[dim].nvalues; i++)
     867             :         {
     868       17110 :             Datum       value = values[dim][i];
     869             : 
     870       17110 :             if (info[dim].typbyval) /* passed by value */
     871             :             {
     872             :                 Datum       tmp;
     873             : 
     874             :                 /*
     875             :                  * For byval types, we need to copy just the significant bytes
     876             :                  * - we can't use memcpy directly, as that assumes
     877             :                  * little-endian behavior.  store_att_byval does almost what
     878             :                  * we need, but it requires a properly aligned buffer - the
     879             :                  * output buffer does not guarantee that. So we simply use a
     880             :                  * local Datum variable (which guarantees proper alignment),
     881             :                  * and then copy the value from it.
     882             :                  */
     883       12474 :                 store_att_byval(&tmp, value, info[dim].typlen);
     884             : 
     885       12474 :                 memcpy(ptr, &tmp, info[dim].typlen);
     886       12474 :                 ptr += info[dim].typlen;
     887             :             }
     888        4636 :             else if (info[dim].typlen > 0)   /* passed by reference */
     889             :             {
     890             :                 /* no special alignment needed, treated as char array */
     891        1110 :                 memcpy(ptr, DatumGetPointer(value), info[dim].typlen);
     892        1110 :                 ptr += info[dim].typlen;
     893             :             }
     894        3526 :             else if (info[dim].typlen == -1)    /* varlena */
     895             :             {
     896        3526 :                 uint32      len = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
     897             : 
     898             :                 /* copy the length */
     899        3526 :                 memcpy(ptr, &len, sizeof(uint32));
     900        3526 :                 ptr += sizeof(uint32);
     901             : 
     902             :                 /* data from the varlena value (without the header) */
     903        3526 :                 memcpy(ptr, VARDATA_ANY(DatumGetPointer(value)), len);
     904        3526 :                 ptr += len;
     905             :             }
     906           0 :             else if (info[dim].typlen == -2)    /* cstring */
     907             :             {
     908           0 :                 uint32      len = (uint32) strlen(DatumGetCString(value)) + 1;
     909             : 
     910             :                 /* copy the length */
     911           0 :                 memcpy(ptr, &len, sizeof(uint32));
     912           0 :                 ptr += sizeof(uint32);
     913             : 
     914             :                 /* value */
     915           0 :                 memcpy(ptr, DatumGetCString(value), len);
     916           0 :                 ptr += len;
     917             :             }
     918             : 
     919             :             /* no underflows or overflows */
     920             :             Assert((ptr > start) && ((ptr - start) <= info[dim].nbytes));
     921             :         }
     922             : 
     923             :         /* we should get exactly nbytes of data for this dimension */
     924             :         Assert((ptr - start) == info[dim].nbytes);
     925             :     }
     926             : 
     927             :     /* Serialize the items, with uint16 indexes instead of the values. */
     928       11078 :     for (i = 0; i < mcvlist->nitems; i++)
     929             :     {
     930       10780 :         MCVItem    *mcvitem = &mcvlist->items[i];
     931             : 
     932             :         /* don't write beyond the allocated space */
     933             :         Assert(ptr <= (endptr - ITEM_SIZE(dim)));
     934             : 
     935             :         /* copy NULL and frequency flags into the serialized MCV */
     936       10780 :         memcpy(ptr, mcvitem->isnull, sizeof(bool) * ndims);
     937       10780 :         ptr += sizeof(bool) * ndims;
     938             : 
     939       10780 :         memcpy(ptr, &mcvitem->frequency, sizeof(double));
     940       10780 :         ptr += sizeof(double);
     941             : 
     942       10780 :         memcpy(ptr, &mcvitem->base_frequency, sizeof(double));
     943       10780 :         ptr += sizeof(double);
     944             : 
     945             :         /* store the indexes last */
     946       39690 :         for (dim = 0; dim < ndims; dim++)
     947             :         {
     948       28910 :             uint16      index = 0;
     949             :             Datum      *value;
     950             : 
     951             :             /* do the lookup only for non-NULL values */
     952       28910 :             if (!mcvitem->isnull[dim])
     953             :             {
     954       28770 :                 value = (Datum *) bsearch_arg(&mcvitem->values[dim], values[dim],
     955       28770 :                                               info[dim].nvalues, sizeof(Datum),
     956       28770 :                                               compare_scalars_simple, &ssup[dim]);
     957             : 
     958             :                 Assert(value != NULL);  /* serialization or deduplication
     959             :                                          * error */
     960             : 
     961             :                 /* compute index within the deduplicated array */
     962       28770 :                 index = (uint16) (value - values[dim]);
     963             : 
     964             :                 /* check the index is within expected bounds */
     965             :                 Assert(index < info[dim].nvalues);
     966             :             }
     967             : 
     968             :             /* copy the index into the serialized MCV */
     969       28910 :             memcpy(ptr, &index, sizeof(uint16));
     970       28910 :             ptr += sizeof(uint16);
     971             :         }
     972             : 
     973             :         /* make sure we don't overflow the allocated value */
     974             :         Assert(ptr <= endptr);
     975             :     }
     976             : 
     977             :     /* at this point we expect to match the total_length exactly */
     978             :     Assert(ptr == endptr);
     979             : 
     980         298 :     pfree(values);
     981         298 :     pfree(counts);
     982             : 
     983         298 :     return raw;
     984             : }
     985             : 
     986             : /*
     987             :  * statext_mcv_deserialize
     988             :  *      Reads serialized MCV list into MCVList structure.
     989             :  *
     990             :  * All the memory needed by the MCV list is allocated as a single chunk, so
     991             :  * it's possible to simply pfree() it at once.
     992             :  */
     993             : MCVList *
     994         748 : statext_mcv_deserialize(bytea *data)
     995             : {
     996             :     int         dim,
     997             :                 i;
     998             :     Size        expected_size;
     999             :     MCVList    *mcvlist;
    1000             :     char       *raw;
    1001             :     char       *ptr;
    1002             :     char       *endptr PG_USED_FOR_ASSERTS_ONLY;
    1003             : 
    1004             :     int         ndims,
    1005             :                 nitems;
    1006         748 :     DimensionInfo *info = NULL;
    1007             : 
    1008             :     /* local allocation buffer (used only for deserialization) */
    1009         748 :     Datum     **map = NULL;
    1010             : 
    1011             :     /* MCV list */
    1012             :     Size        mcvlen;
    1013             : 
    1014             :     /* buffer used for the result */
    1015             :     Size        datalen;
    1016             :     char       *dataptr;
    1017             :     char       *valuesptr;
    1018             :     char       *isnullptr;
    1019             : 
    1020         748 :     if (data == NULL)
    1021           0 :         return NULL;
    1022             : 
    1023             :     /*
    1024             :      * We can't possibly deserialize a MCV list if there's not even a complete
    1025             :      * header. We need an explicit formula here, because we serialize the
    1026             :      * header fields one by one, so we need to ignore struct alignment.
    1027             :      */
    1028         748 :     if (VARSIZE_ANY(data) < MinSizeOfMCVList)
    1029           0 :         elog(ERROR, "invalid MCV size %zu (expected at least %zu)",
    1030             :              VARSIZE_ANY(data), MinSizeOfMCVList);
    1031             : 
    1032             :     /* read the MCV list header */
    1033         748 :     mcvlist = (MCVList *) palloc0(offsetof(MCVList, items));
    1034             : 
    1035             :     /* pointer to the data part (skip the varlena header) */
    1036         748 :     raw = (char *) data;
    1037         748 :     ptr = VARDATA_ANY(raw);
    1038         748 :     endptr = raw + VARSIZE_ANY(data);
    1039             : 
    1040             :     /* get the header and perform further sanity checks */
    1041         748 :     memcpy(&mcvlist->magic, ptr, sizeof(uint32));
    1042         748 :     ptr += sizeof(uint32);
    1043             : 
    1044         748 :     memcpy(&mcvlist->type, ptr, sizeof(uint32));
    1045         748 :     ptr += sizeof(uint32);
    1046             : 
    1047         748 :     memcpy(&mcvlist->nitems, ptr, sizeof(uint32));
    1048         748 :     ptr += sizeof(uint32);
    1049             : 
    1050         748 :     memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber));
    1051         748 :     ptr += sizeof(AttrNumber);
    1052             : 
    1053         748 :     if (mcvlist->magic != STATS_MCV_MAGIC)
    1054           0 :         elog(ERROR, "invalid MCV magic %u (expected %u)",
    1055             :              mcvlist->magic, STATS_MCV_MAGIC);
    1056             : 
    1057         748 :     if (mcvlist->type != STATS_MCV_TYPE_BASIC)
    1058           0 :         elog(ERROR, "invalid MCV type %u (expected %u)",
    1059             :              mcvlist->type, STATS_MCV_TYPE_BASIC);
    1060             : 
    1061         748 :     if (mcvlist->ndimensions == 0)
    1062           0 :         elog(ERROR, "invalid zero-length dimension array in MCVList");
    1063         748 :     else if ((mcvlist->ndimensions > STATS_MAX_DIMENSIONS) ||
    1064         748 :              (mcvlist->ndimensions < 0))
    1065           0 :         elog(ERROR, "invalid length (%d) dimension array in MCVList",
    1066             :              mcvlist->ndimensions);
    1067             : 
    1068         748 :     if (mcvlist->nitems == 0)
    1069           0 :         elog(ERROR, "invalid zero-length item array in MCVList");
    1070         748 :     else if (mcvlist->nitems > STATS_MCVLIST_MAX_ITEMS)
    1071           0 :         elog(ERROR, "invalid length (%u) item array in MCVList",
    1072             :              mcvlist->nitems);
    1073             : 
    1074         748 :     nitems = mcvlist->nitems;
    1075         748 :     ndims = mcvlist->ndimensions;
    1076             : 
    1077             :     /*
    1078             :      * Check amount of data including DimensionInfo for all dimensions and
    1079             :      * also the serialized items (including uint16 indexes). Also, walk
    1080             :      * through the dimension information and add it to the sum.
    1081             :      */
    1082         748 :     expected_size = SizeOfMCVList(ndims, nitems);
    1083             : 
    1084             :     /*
    1085             :      * Check that we have at least the dimension and info records, along with
    1086             :      * the items. We don't know the size of the serialized values yet. We need
    1087             :      * to do this check first, before accessing the dimension info.
    1088             :      */
    1089         748 :     if (VARSIZE_ANY(data) < expected_size)
    1090           0 :         elog(ERROR, "invalid MCV size %zu (expected %zu)",
    1091             :              VARSIZE_ANY(data), expected_size);
    1092             : 
    1093             :     /* Now copy the array of type Oids. */
    1094         748 :     memcpy(mcvlist->types, ptr, sizeof(Oid) * ndims);
    1095         748 :     ptr += (sizeof(Oid) * ndims);
    1096             : 
    1097             :     /* Now it's safe to access the dimension info. */
    1098         748 :     info = palloc(ndims * sizeof(DimensionInfo));
    1099             : 
    1100         748 :     memcpy(info, ptr, ndims * sizeof(DimensionInfo));
    1101         748 :     ptr += (ndims * sizeof(DimensionInfo));
    1102             : 
    1103             :     /* account for the value arrays */
    1104        2958 :     for (dim = 0; dim < ndims; dim++)
    1105             :     {
    1106             :         /*
    1107             :          * XXX I wonder if we can/should rely on asserts here. Maybe those
    1108             :          * checks should be done every time?
    1109             :          */
    1110             :         Assert(info[dim].nvalues >= 0);
    1111             :         Assert(info[dim].nbytes >= 0);
    1112             : 
    1113        2210 :         expected_size += info[dim].nbytes;
    1114             :     }
    1115             : 
    1116             :     /*
    1117             :      * Now we know the total expected MCV size, including all the pieces
    1118             :      * (header, dimension info. items and deduplicated data). So do the final
    1119             :      * check on size.
    1120             :      */
    1121         748 :     if (VARSIZE_ANY(data) != expected_size)
    1122           0 :         elog(ERROR, "invalid MCV size %zu (expected %zu)",
    1123             :              VARSIZE_ANY(data), expected_size);
    1124             : 
    1125             :     /*
    1126             :      * We need an array of Datum values for each dimension, so that we can
    1127             :      * easily translate the uint16 indexes later. We also need a top-level
    1128             :      * array of pointers to those per-dimension arrays.
    1129             :      *
    1130             :      * While allocating the arrays for dimensions, compute how much space we
    1131             :      * need for a copy of the by-ref data, as we can't simply point to the
    1132             :      * original values (it might go away).
    1133             :      */
    1134         748 :     datalen = 0;                /* space for by-ref data */
    1135         748 :     map = palloc_array(Datum *, ndims);
    1136             : 
    1137        2958 :     for (dim = 0; dim < ndims; dim++)
    1138             :     {
    1139        2210 :         map[dim] = palloc_array(Datum, info[dim].nvalues);
    1140             : 
    1141             :         /* space needed for a copy of data for by-ref types */
    1142        2210 :         datalen += info[dim].nbytes_aligned;
    1143             :     }
    1144             : 
    1145             :     /*
    1146             :      * Now resize the MCV list so that the allocation includes all the data.
    1147             :      *
    1148             :      * Allocate space for a copy of the data, as we can't simply reference the
    1149             :      * serialized data - it's not aligned properly, and it may disappear while
    1150             :      * we're still using the MCV list, e.g. due to catcache release.
    1151             :      *
    1152             :      * We do care about alignment here, because we will allocate all the
    1153             :      * pieces at once, but then use pointers to different parts.
    1154             :      */
    1155         748 :     mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
    1156             : 
    1157             :     /* arrays of values and isnull flags for all MCV items */
    1158         748 :     mcvlen += nitems * MAXALIGN(sizeof(Datum) * ndims);
    1159         748 :     mcvlen += nitems * MAXALIGN(sizeof(bool) * ndims);
    1160             : 
    1161             :     /* we don't quite need to align this, but it makes some asserts easier */
    1162         748 :     mcvlen += MAXALIGN(datalen);
    1163             : 
    1164             :     /* now resize the deserialized MCV list, and compute pointers to parts */
    1165         748 :     mcvlist = repalloc(mcvlist, mcvlen);
    1166             : 
    1167             :     /* pointer to the beginning of values/isnull arrays */
    1168         748 :     valuesptr = (char *) mcvlist
    1169         748 :         + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
    1170             : 
    1171         748 :     isnullptr = valuesptr + (nitems * MAXALIGN(sizeof(Datum) * ndims));
    1172             : 
    1173         748 :     dataptr = isnullptr + (nitems * MAXALIGN(sizeof(bool) * ndims));
    1174             : 
    1175             :     /*
    1176             :      * Build mapping (index => value) for translating the serialized data into
    1177             :      * the in-memory representation.
    1178             :      */
    1179        2958 :     for (dim = 0; dim < ndims; dim++)
    1180             :     {
    1181             :         /* remember start position in the input array */
    1182        2210 :         char       *start PG_USED_FOR_ASSERTS_ONLY = ptr;
    1183             : 
    1184        2210 :         if (info[dim].typbyval)
    1185             :         {
    1186             :             /* for by-val types we simply copy data into the mapping */
    1187       63832 :             for (i = 0; i < info[dim].nvalues; i++)
    1188             :             {
    1189       62304 :                 Datum       v = 0;
    1190             : 
    1191       62304 :                 memcpy(&v, ptr, info[dim].typlen);
    1192       62304 :                 ptr += info[dim].typlen;
    1193             : 
    1194       62304 :                 map[dim][i] = fetch_att(&v, true, info[dim].typlen);
    1195             : 
    1196             :                 /* no under/overflow of input array */
    1197             :                 Assert(ptr <= (start + info[dim].nbytes));
    1198             :             }
    1199             :         }
    1200             :         else
    1201             :         {
    1202             :             /* for by-ref types we need to also make a copy of the data */
    1203             : 
    1204             :             /* passed by reference, but fixed length (name, tid, ...) */
    1205         682 :             if (info[dim].typlen > 0)
    1206             :             {
    1207        2202 :                 for (i = 0; i < info[dim].nvalues; i++)
    1208             :                 {
    1209        2160 :                     memcpy(dataptr, ptr, info[dim].typlen);
    1210        2160 :                     ptr += info[dim].typlen;
    1211             : 
    1212             :                     /* just point into the array */
    1213        2160 :                     map[dim][i] = PointerGetDatum(dataptr);
    1214        2160 :                     dataptr += MAXALIGN(info[dim].typlen);
    1215             :                 }
    1216             :             }
    1217         640 :             else if (info[dim].typlen == -1)
    1218             :             {
    1219             :                 /* varlena */
    1220       15776 :                 for (i = 0; i < info[dim].nvalues; i++)
    1221             :                 {
    1222             :                     uint32      len;
    1223             : 
    1224             :                     /* read the uint32 length */
    1225       15136 :                     memcpy(&len, ptr, sizeof(uint32));
    1226       15136 :                     ptr += sizeof(uint32);
    1227             : 
    1228             :                     /* the length is data-only */
    1229       15136 :                     SET_VARSIZE(dataptr, len + VARHDRSZ);
    1230       15136 :                     memcpy(VARDATA(dataptr), ptr, len);
    1231       15136 :                     ptr += len;
    1232             : 
    1233             :                     /* just point into the array */
    1234       15136 :                     map[dim][i] = PointerGetDatum(dataptr);
    1235             : 
    1236             :                     /* skip to place of the next deserialized value */
    1237       15136 :                     dataptr += MAXALIGN(len + VARHDRSZ);
    1238             :                 }
    1239             :             }
    1240           0 :             else if (info[dim].typlen == -2)
    1241             :             {
    1242             :                 /* cstring */
    1243           0 :                 for (i = 0; i < info[dim].nvalues; i++)
    1244             :                 {
    1245             :                     uint32      len;
    1246             : 
    1247           0 :                     memcpy(&len, ptr, sizeof(uint32));
    1248           0 :                     ptr += sizeof(uint32);
    1249             : 
    1250           0 :                     memcpy(dataptr, ptr, len);
    1251           0 :                     ptr += len;
    1252             : 
    1253             :                     /* just point into the array */
    1254           0 :                     map[dim][i] = PointerGetDatum(dataptr);
    1255           0 :                     dataptr += MAXALIGN(len);
    1256             :                 }
    1257             :             }
    1258             : 
    1259             :             /* no under/overflow of input array */
    1260             :             Assert(ptr <= (start + info[dim].nbytes));
    1261             : 
    1262             :             /* no overflow of the output mcv value */
    1263             :             Assert(dataptr <= ((char *) mcvlist + mcvlen));
    1264             :         }
    1265             : 
    1266             :         /* check we consumed input data for this dimension exactly */
    1267             :         Assert(ptr == (start + info[dim].nbytes));
    1268             :     }
    1269             : 
    1270             :     /* we should have also filled the MCV list exactly */
    1271             :     Assert(dataptr == ((char *) mcvlist + mcvlen));
    1272             : 
    1273             :     /* deserialize the MCV items and translate the indexes to Datums */
    1274       50030 :     for (i = 0; i < nitems; i++)
    1275             :     {
    1276       49282 :         MCVItem    *item = &mcvlist->items[i];
    1277             : 
    1278       49282 :         item->values = (Datum *) valuesptr;
    1279       49282 :         valuesptr += MAXALIGN(sizeof(Datum) * ndims);
    1280             : 
    1281       49282 :         item->isnull = (bool *) isnullptr;
    1282       49282 :         isnullptr += MAXALIGN(sizeof(bool) * ndims);
    1283             : 
    1284       49282 :         memcpy(item->isnull, ptr, sizeof(bool) * ndims);
    1285       49282 :         ptr += sizeof(bool) * ndims;
    1286             : 
    1287       49282 :         memcpy(&item->frequency, ptr, sizeof(double));
    1288       49282 :         ptr += sizeof(double);
    1289             : 
    1290       49282 :         memcpy(&item->base_frequency, ptr, sizeof(double));
    1291       49282 :         ptr += sizeof(double);
    1292             : 
    1293             :         /* finally translate the indexes (for non-NULL only) */
    1294      192168 :         for (dim = 0; dim < ndims; dim++)
    1295             :         {
    1296             :             uint16      index;
    1297             : 
    1298      142886 :             memcpy(&index, ptr, sizeof(uint16));
    1299      142886 :             ptr += sizeof(uint16);
    1300             : 
    1301      142886 :             if (item->isnull[dim])
    1302         392 :                 continue;
    1303             : 
    1304      142494 :             item->values[dim] = map[dim][index];
    1305             :         }
    1306             : 
    1307             :         /* check we're not overflowing the input */
    1308             :         Assert(ptr <= endptr);
    1309             :     }
    1310             : 
    1311             :     /* check that we processed all the data */
    1312             :     Assert(ptr == endptr);
    1313             : 
    1314             :     /* release the buffers used for mapping */
    1315        2958 :     for (dim = 0; dim < ndims; dim++)
    1316        2210 :         pfree(map[dim]);
    1317             : 
    1318         748 :     pfree(map);
    1319             : 
    1320         748 :     return mcvlist;
    1321             : }
    1322             : 
    1323             : /*
    1324             :  * SRF with details about buckets of a histogram:
    1325             :  *
    1326             :  * - item ID (0...nitems)
    1327             :  * - values (string array)
    1328             :  * - nulls only (boolean array)
    1329             :  * - frequency (double precision)
    1330             :  * - base_frequency (double precision)
    1331             :  *
    1332             :  * The input is the OID of the statistics, and there are no rows returned if
    1333             :  * the statistics contains no histogram.
    1334             :  */
    1335             : Datum
    1336        6896 : pg_stats_ext_mcvlist_items(PG_FUNCTION_ARGS)
    1337             : {
    1338             :     FuncCallContext *funcctx;
    1339             : 
    1340             :     /* stuff done only on the first call of the function */
    1341        6896 :     if (SRF_IS_FIRSTCALL())
    1342             :     {
    1343             :         MemoryContext oldcontext;
    1344             :         MCVList    *mcvlist;
    1345             :         TupleDesc   tupdesc;
    1346             : 
    1347             :         /* create a function context for cross-call persistence */
    1348         142 :         funcctx = SRF_FIRSTCALL_INIT();
    1349             : 
    1350             :         /* switch to memory context appropriate for multiple function calls */
    1351         142 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
    1352             : 
    1353         142 :         mcvlist = statext_mcv_deserialize(PG_GETARG_BYTEA_P(0));
    1354             : 
    1355         142 :         funcctx->user_fctx = mcvlist;
    1356             : 
    1357             :         /* total number of tuples to be returned */
    1358         142 :         funcctx->max_calls = 0;
    1359         142 :         if (funcctx->user_fctx != NULL)
    1360         142 :             funcctx->max_calls = mcvlist->nitems;
    1361             : 
    1362             :         /* Build a tuple descriptor for our result type */
    1363         142 :         if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
    1364           0 :             ereport(ERROR,
    1365             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1366             :                      errmsg("function returning record called in context "
    1367             :                             "that cannot accept type record")));
    1368         142 :         tupdesc = BlessTupleDesc(tupdesc);
    1369             : 
    1370             :         /*
    1371             :          * generate attribute metadata needed later to produce tuples from raw
    1372             :          * C strings
    1373             :          */
    1374         142 :         funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
    1375             : 
    1376         142 :         MemoryContextSwitchTo(oldcontext);
    1377             :     }
    1378             : 
    1379             :     /* stuff done on every call of the function */
    1380        6896 :     funcctx = SRF_PERCALL_SETUP();
    1381             : 
    1382        6896 :     if (funcctx->call_cntr < funcctx->max_calls)   /* do when there is more
    1383             :                                                      * left to send */
    1384             :     {
    1385             :         Datum       values[5];
    1386             :         bool        nulls[5];
    1387             :         HeapTuple   tuple;
    1388             :         Datum       result;
    1389        6754 :         ArrayBuildState *astate_values = NULL;
    1390        6754 :         ArrayBuildState *astate_nulls = NULL;
    1391             : 
    1392             :         int         i;
    1393             :         MCVList    *mcvlist;
    1394             :         MCVItem    *item;
    1395             : 
    1396        6754 :         mcvlist = (MCVList *) funcctx->user_fctx;
    1397             : 
    1398             :         Assert(funcctx->call_cntr < mcvlist->nitems);
    1399             : 
    1400        6754 :         item = &mcvlist->items[funcctx->call_cntr];
    1401             : 
    1402       20652 :         for (i = 0; i < mcvlist->ndimensions; i++)
    1403             :         {
    1404             : 
    1405       13898 :             astate_nulls = accumArrayResult(astate_nulls,
    1406       13898 :                                             BoolGetDatum(item->isnull[i]),
    1407             :                                             false,
    1408             :                                             BOOLOID,
    1409             :                                             CurrentMemoryContext);
    1410             : 
    1411       13898 :             if (!item->isnull[i])
    1412             :             {
    1413             :                 bool        isvarlena;
    1414             :                 Oid         outfunc;
    1415             :                 FmgrInfo    fmgrinfo;
    1416             :                 Datum       val;
    1417             :                 text       *txt;
    1418             : 
    1419             :                 /* lookup output func for the type */
    1420       13788 :                 getTypeOutputInfo(mcvlist->types[i], &outfunc, &isvarlena);
    1421       13788 :                 fmgr_info(outfunc, &fmgrinfo);
    1422             : 
    1423       13788 :                 val = FunctionCall1(&fmgrinfo, item->values[i]);
    1424       13788 :                 txt = cstring_to_text(DatumGetPointer(val));
    1425             : 
    1426       13788 :                 astate_values = accumArrayResult(astate_values,
    1427             :                                                  PointerGetDatum(txt),
    1428             :                                                  false,
    1429             :                                                  TEXTOID,
    1430             :                                                  CurrentMemoryContext);
    1431             :             }
    1432             :             else
    1433         110 :                 astate_values = accumArrayResult(astate_values,
    1434             :                                                  (Datum) 0,
    1435             :                                                  true,
    1436             :                                                  TEXTOID,
    1437             :                                                  CurrentMemoryContext);
    1438             :         }
    1439             : 
    1440        6754 :         values[0] = Int32GetDatum(funcctx->call_cntr);
    1441        6754 :         values[1] = makeArrayResult(astate_values, CurrentMemoryContext);
    1442        6754 :         values[2] = makeArrayResult(astate_nulls, CurrentMemoryContext);
    1443        6754 :         values[3] = Float8GetDatum(item->frequency);
    1444        6754 :         values[4] = Float8GetDatum(item->base_frequency);
    1445             : 
    1446             :         /* no NULLs in the tuple */
    1447        6754 :         memset(nulls, 0, sizeof(nulls));
    1448             : 
    1449             :         /* build a tuple */
    1450        6754 :         tuple = heap_form_tuple(funcctx->attinmeta->tupdesc, values, nulls);
    1451             : 
    1452             :         /* make the tuple into a datum */
    1453        6754 :         result = HeapTupleGetDatum(tuple);
    1454             : 
    1455        6754 :         SRF_RETURN_NEXT(funcctx, result);
    1456             :     }
    1457             :     else                        /* do when there is no more left */
    1458             :     {
    1459         142 :         SRF_RETURN_DONE(funcctx);
    1460             :     }
    1461             : }
    1462             : 
    1463             : /*
    1464             :  * pg_mcv_list_in       - input routine for type pg_mcv_list.
    1465             :  *
    1466             :  * pg_mcv_list is real enough to be a table column, but it has no operations
    1467             :  * of its own, and disallows input too
    1468             :  */
    1469             : Datum
    1470           0 : pg_mcv_list_in(PG_FUNCTION_ARGS)
    1471             : {
    1472             :     /*
    1473             :      * pg_mcv_list stores the data in binary form and parsing text input is
    1474             :      * not needed, so disallow this.
    1475             :      */
    1476           0 :     ereport(ERROR,
    1477             :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1478             :              errmsg("cannot accept a value of type %s", "pg_mcv_list")));
    1479             : 
    1480             :     PG_RETURN_VOID();           /* keep compiler quiet */
    1481             : }
    1482             : 
    1483             : 
    1484             : /*
    1485             :  * pg_mcv_list_out      - output routine for type pg_mcv_list.
    1486             :  *
    1487             :  * MCV lists are serialized into a bytea value, so we simply call byteaout()
    1488             :  * to serialize the value into text. But it'd be nice to serialize that into
    1489             :  * a meaningful representation (e.g. for inspection by people).
    1490             :  *
    1491             :  * XXX This should probably return something meaningful, similar to what
    1492             :  * pg_dependencies_out does. Not sure how to deal with the deduplicated
    1493             :  * values, though - do we want to expand that or not?
    1494             :  */
    1495             : Datum
    1496          22 : pg_mcv_list_out(PG_FUNCTION_ARGS)
    1497             : {
    1498          22 :     return byteaout(fcinfo);
    1499             : }
    1500             : 
    1501             : /*
    1502             :  * pg_mcv_list_recv     - binary input routine for type pg_mcv_list.
    1503             :  */
    1504             : Datum
    1505           0 : pg_mcv_list_recv(PG_FUNCTION_ARGS)
    1506             : {
    1507           0 :     ereport(ERROR,
    1508             :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1509             :              errmsg("cannot accept a value of type %s", "pg_mcv_list")));
    1510             : 
    1511             :     PG_RETURN_VOID();           /* keep compiler quiet */
    1512             : }
    1513             : 
    1514             : /*
    1515             :  * pg_mcv_list_send     - binary output routine for type pg_mcv_list.
    1516             :  *
    1517             :  * MCV lists are serialized in a bytea value (although the type is named
    1518             :  * differently), so let's just send that.
    1519             :  */
    1520             : Datum
    1521           0 : pg_mcv_list_send(PG_FUNCTION_ARGS)
    1522             : {
    1523           0 :     return byteasend(fcinfo);
    1524             : }
    1525             : 
    1526             : /*
    1527             :  * match the attribute/expression to a dimension of the statistic
    1528             :  *
    1529             :  * Returns the zero-based index of the matching statistics dimension.
    1530             :  * Optionally determines the collation.
    1531             :  */
    1532             : static int
    1533        1410 : mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid)
    1534             : {
    1535             :     int         idx;
    1536             : 
    1537        1410 :     if (IsA(expr, Var))
    1538             :     {
    1539             :         /* simple Var, so just lookup using varattno */
    1540        1164 :         Var        *var = (Var *) expr;
    1541             : 
    1542        1164 :         if (collid)
    1543        1098 :             *collid = var->varcollid;
    1544             : 
    1545        1164 :         idx = bms_member_index(keys, var->varattno);
    1546             : 
    1547        1164 :         if (idx < 0)
    1548           0 :             elog(ERROR, "variable not found in statistics object");
    1549             :     }
    1550             :     else
    1551             :     {
    1552             :         /* expression - lookup in stats expressions */
    1553             :         ListCell   *lc;
    1554             : 
    1555         246 :         if (collid)
    1556         240 :             *collid = exprCollation(expr);
    1557             : 
    1558             :         /* expressions are stored after the simple columns */
    1559         246 :         idx = bms_num_members(keys);
    1560         456 :         foreach(lc, exprs)
    1561             :         {
    1562         456 :             Node       *stat_expr = (Node *) lfirst(lc);
    1563             : 
    1564         456 :             if (equal(expr, stat_expr))
    1565         246 :                 break;
    1566             : 
    1567         210 :             idx++;
    1568             :         }
    1569             : 
    1570         246 :         if (lc == NULL)
    1571           0 :             elog(ERROR, "expression not found in statistics object");
    1572             :     }
    1573             : 
    1574        1410 :     return idx;
    1575             : }
    1576             : 
    1577             : /*
    1578             :  * mcv_get_match_bitmap
    1579             :  *  Evaluate clauses using the MCV list, and update the match bitmap.
    1580             :  *
    1581             :  * A match bitmap keeps match/mismatch status for each MCV item, and we
    1582             :  * update it based on additional clauses. We also use it to skip items
    1583             :  * that can't possibly match (e.g. item marked as "mismatch" can't change
    1584             :  * to "match" when evaluating AND clause list).
    1585             :  *
    1586             :  * The function also returns a flag indicating whether there was an
    1587             :  * equality condition for all attributes, the minimum frequency in the MCV
    1588             :  * list, and a total MCV frequency (sum of frequencies for all items).
    1589             :  *
    1590             :  * XXX Currently the match bitmap uses a bool for each MCV item, which is
    1591             :  * somewhat wasteful as we could do with just a single bit, thus reducing
    1592             :  * the size to ~1/8. It would also allow us to combine bitmaps simply using
    1593             :  * & and |, which should be faster than min/max. The bitmaps are fairly
    1594             :  * small, though (thanks to the cap on the MCV list size).
    1595             :  */
    1596             : static bool *
    1597         846 : mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
    1598             :                      Bitmapset *keys, List *exprs,
    1599             :                      MCVList *mcvlist, bool is_or)
    1600             : {
    1601             :     ListCell   *l;
    1602             :     bool       *matches;
    1603             : 
    1604             :     /* The bitmap may be partially built. */
    1605             :     Assert(clauses != NIL);
    1606             :     Assert(mcvlist != NULL);
    1607             :     Assert(mcvlist->nitems > 0);
    1608             :     Assert(mcvlist->nitems <= STATS_MCVLIST_MAX_ITEMS);
    1609             : 
    1610         846 :     matches = palloc_array(bool, mcvlist->nitems);
    1611         846 :     memset(matches, !is_or, sizeof(bool) * mcvlist->nitems);
    1612             : 
    1613             :     /*
    1614             :      * Loop through the list of clauses, and for each of them evaluate all the
    1615             :      * MCV items not yet eliminated by the preceding clauses.
    1616             :      */
    1617        2430 :     foreach(l, clauses)
    1618             :     {
    1619        1584 :         Node       *clause = (Node *) lfirst(l);
    1620             : 
    1621             :         /* if it's a RestrictInfo, then extract the clause */
    1622        1584 :         if (IsA(clause, RestrictInfo))
    1623        1470 :             clause = (Node *) ((RestrictInfo *) clause)->clause;
    1624             : 
    1625             :         /*
    1626             :          * Handle the various types of clauses - OpClause, NullTest and
    1627             :          * AND/OR/NOT
    1628             :          */
    1629        1584 :         if (is_opclause(clause))
    1630             :         {
    1631        1062 :             OpExpr     *expr = (OpExpr *) clause;
    1632             :             FmgrInfo    opproc;
    1633             : 
    1634             :             /* valid only after examine_opclause_args returns true */
    1635             :             Node       *clause_expr;
    1636             :             Const      *cst;
    1637             :             bool        expronleft;
    1638             :             int         idx;
    1639             :             Oid         collid;
    1640             : 
    1641        1062 :             fmgr_info(get_opcode(expr->opno), &opproc);
    1642             : 
    1643             :             /* extract the var/expr and const from the expression */
    1644        1062 :             if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
    1645           0 :                 elog(ERROR, "incompatible clause");
    1646             : 
    1647             :             /* match the attribute/expression to a dimension of the statistic */
    1648        1062 :             idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
    1649             : 
    1650             :             /*
    1651             :              * Walk through the MCV items and evaluate the current clause. We
    1652             :              * can skip items that were already ruled out, and terminate if
    1653             :              * there are no remaining MCV items that might possibly match.
    1654             :              */
    1655       77418 :             for (int i = 0; i < mcvlist->nitems; i++)
    1656             :             {
    1657       76356 :                 bool        match = true;
    1658       76356 :                 MCVItem    *item = &mcvlist->items[i];
    1659             : 
    1660             :                 Assert(idx >= 0);
    1661             : 
    1662             :                 /*
    1663             :                  * When the MCV item or the Const value is NULL we can treat
    1664             :                  * this as a mismatch. We must not call the operator because
    1665             :                  * of strictness.
    1666             :                  */
    1667       76356 :                 if (item->isnull[idx] || cst->constisnull)
    1668             :                 {
    1669          48 :                     matches[i] = RESULT_MERGE(matches[i], is_or, false);
    1670          48 :                     continue;
    1671             :                 }
    1672             : 
    1673             :                 /*
    1674             :                  * Skip MCV items that can't change result in the bitmap. Once
    1675             :                  * the value gets false for AND-lists, or true for OR-lists,
    1676             :                  * we don't need to look at more clauses.
    1677             :                  */
    1678       76308 :                 if (RESULT_IS_FINAL(matches[i], is_or))
    1679       30522 :                     continue;
    1680             : 
    1681             :                 /*
    1682             :                  * First check whether the constant is below the lower
    1683             :                  * boundary (in that case we can skip the bucket, because
    1684             :                  * there's no overlap).
    1685             :                  *
    1686             :                  * We don't store collations used to build the statistics, but
    1687             :                  * we can use the collation for the attribute itself, as
    1688             :                  * stored in varcollid. We do reset the statistics after a
    1689             :                  * type change (including collation change), so this is OK.
    1690             :                  * For expressions, we use the collation extracted from the
    1691             :                  * expression itself.
    1692             :                  */
    1693       45786 :                 if (expronleft)
    1694       43170 :                     match = DatumGetBool(FunctionCall2Coll(&opproc,
    1695             :                                                            collid,
    1696       43170 :                                                            item->values[idx],
    1697       43170 :                                                            cst->constvalue));
    1698             :                 else
    1699        2616 :                     match = DatumGetBool(FunctionCall2Coll(&opproc,
    1700             :                                                            collid,
    1701        2616 :                                                            cst->constvalue,
    1702        2616 :                                                            item->values[idx]));
    1703             : 
    1704             :                 /* update the match bitmap with the result */
    1705       45786 :                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
    1706             :             }
    1707             :         }
    1708         522 :         else if (IsA(clause, ScalarArrayOpExpr))
    1709             :         {
    1710         276 :             ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
    1711             :             FmgrInfo    opproc;
    1712             : 
    1713             :             /* valid only after examine_opclause_args returns true */
    1714             :             Node       *clause_expr;
    1715             :             Const      *cst;
    1716             :             bool        expronleft;
    1717             :             Oid         collid;
    1718             :             int         idx;
    1719             : 
    1720             :             /* array evaluation */
    1721             :             ArrayType  *arrayval;
    1722             :             int16       elmlen;
    1723             :             bool        elmbyval;
    1724             :             char        elmalign;
    1725             :             int         num_elems;
    1726             :             Datum      *elem_values;
    1727             :             bool       *elem_nulls;
    1728             : 
    1729         276 :             fmgr_info(get_opcode(expr->opno), &opproc);
    1730             : 
    1731             :             /* extract the var/expr and const from the expression */
    1732         276 :             if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
    1733           0 :                 elog(ERROR, "incompatible clause");
    1734             : 
    1735             :             /* We expect Var on left */
    1736         276 :             if (!expronleft)
    1737           0 :                 elog(ERROR, "incompatible clause");
    1738             : 
    1739             :             /*
    1740             :              * Deconstruct the array constant, unless it's NULL (we'll cover
    1741             :              * that case below)
    1742             :              */
    1743         276 :             if (!cst->constisnull)
    1744             :             {
    1745         276 :                 arrayval = DatumGetArrayTypeP(cst->constvalue);
    1746         276 :                 get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
    1747             :                                      &elmlen, &elmbyval, &elmalign);
    1748         276 :                 deconstruct_array(arrayval,
    1749             :                                   ARR_ELEMTYPE(arrayval),
    1750             :                                   elmlen, elmbyval, elmalign,
    1751             :                                   &elem_values, &elem_nulls, &num_elems);
    1752             :             }
    1753             : 
    1754             :             /* match the attribute/expression to a dimension of the statistic */
    1755         276 :             idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
    1756             : 
    1757             :             /*
    1758             :              * Walk through the MCV items and evaluate the current clause. We
    1759             :              * can skip items that were already ruled out, and terminate if
    1760             :              * there are no remaining MCV items that might possibly match.
    1761             :              */
    1762       22968 :             for (int i = 0; i < mcvlist->nitems; i++)
    1763             :             {
    1764             :                 int         j;
    1765       22692 :                 bool        match = !expr->useOr;
    1766       22692 :                 MCVItem    *item = &mcvlist->items[i];
    1767             : 
    1768             :                 /*
    1769             :                  * When the MCV item or the Const value is NULL we can treat
    1770             :                  * this as a mismatch. We must not call the operator because
    1771             :                  * of strictness.
    1772             :                  */
    1773       22692 :                 if (item->isnull[idx] || cst->constisnull)
    1774             :                 {
    1775          18 :                     matches[i] = RESULT_MERGE(matches[i], is_or, false);
    1776          18 :                     continue;
    1777             :                 }
    1778             : 
    1779             :                 /*
    1780             :                  * Skip MCV items that can't change result in the bitmap. Once
    1781             :                  * the value gets false for AND-lists, or true for OR-lists,
    1782             :                  * we don't need to look at more clauses.
    1783             :                  */
    1784       22674 :                 if (RESULT_IS_FINAL(matches[i], is_or))
    1785       11544 :                     continue;
    1786             : 
    1787       35754 :                 for (j = 0; j < num_elems; j++)
    1788             :                 {
    1789       30978 :                     Datum       elem_value = elem_values[j];
    1790       30978 :                     bool        elem_isnull = elem_nulls[j];
    1791             :                     bool        elem_match;
    1792             : 
    1793             :                     /* NULL values always evaluate as not matching. */
    1794       30978 :                     if (elem_isnull)
    1795             :                     {
    1796        2112 :                         match = RESULT_MERGE(match, expr->useOr, false);
    1797        2112 :                         continue;
    1798             :                     }
    1799             : 
    1800             :                     /*
    1801             :                      * Stop evaluating the array elements once we reach a
    1802             :                      * matching value that can't change - ALL() is the same as
    1803             :                      * AND-list, ANY() is the same as OR-list.
    1804             :                      */
    1805       28866 :                     if (RESULT_IS_FINAL(match, expr->useOr))
    1806        6354 :                         break;
    1807             : 
    1808       22512 :                     elem_match = DatumGetBool(FunctionCall2Coll(&opproc,
    1809             :                                                                 collid,
    1810       22512 :                                                                 item->values[idx],
    1811             :                                                                 elem_value));
    1812             : 
    1813       22512 :                     match = RESULT_MERGE(match, expr->useOr, elem_match);
    1814             :                 }
    1815             : 
    1816             :                 /* update the match bitmap with the result */
    1817       11130 :                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
    1818             :             }
    1819             :         }
    1820         246 :         else if (IsA(clause, NullTest))
    1821             :         {
    1822          66 :             NullTest   *expr = (NullTest *) clause;
    1823          66 :             Node       *clause_expr = (Node *) (expr->arg);
    1824             : 
    1825             :             /* match the attribute/expression to a dimension of the statistic */
    1826          66 :             int         idx = mcv_match_expression(clause_expr, keys, exprs, NULL);
    1827             : 
    1828             :             /*
    1829             :              * Walk through the MCV items and evaluate the current clause. We
    1830             :              * can skip items that were already ruled out, and terminate if
    1831             :              * there are no remaining MCV items that might possibly match.
    1832             :              */
    1833        6078 :             for (int i = 0; i < mcvlist->nitems; i++)
    1834             :             {
    1835        6012 :                 bool        match = false;  /* assume mismatch */
    1836        6012 :                 MCVItem    *item = &mcvlist->items[i];
    1837             : 
    1838             :                 /* if the clause mismatches the MCV item, update the bitmap */
    1839        6012 :                 switch (expr->nulltesttype)
    1840             :                 {
    1841        4212 :                     case IS_NULL:
    1842        4212 :                         match = (item->isnull[idx]) ? true : match;
    1843        4212 :                         break;
    1844             : 
    1845        1800 :                     case IS_NOT_NULL:
    1846        1800 :                         match = (!item->isnull[idx]) ? true : match;
    1847        1800 :                         break;
    1848             :                 }
    1849             : 
    1850             :                 /* now, update the match bitmap, depending on OR/AND type */
    1851        6012 :                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
    1852             :             }
    1853             :         }
    1854         180 :         else if (is_orclause(clause) || is_andclause(clause))
    1855          66 :         {
    1856             :             /* AND/OR clause, with all subclauses being compatible */
    1857             : 
    1858             :             int         i;
    1859          66 :             BoolExpr   *bool_clause = ((BoolExpr *) clause);
    1860          66 :             List       *bool_clauses = bool_clause->args;
    1861             : 
    1862             :             /* match/mismatch bitmap for each MCV item */
    1863          66 :             bool       *bool_matches = NULL;
    1864             : 
    1865             :             Assert(bool_clauses != NIL);
    1866             :             Assert(list_length(bool_clauses) >= 2);
    1867             : 
    1868             :             /* build the match bitmap for the OR-clauses */
    1869          66 :             bool_matches = mcv_get_match_bitmap(root, bool_clauses, keys, exprs,
    1870          66 :                                                 mcvlist, is_orclause(clause));
    1871             : 
    1872             :             /*
    1873             :              * Merge the bitmap produced by mcv_get_match_bitmap into the
    1874             :              * current one. We need to consider if we're evaluating AND or OR
    1875             :              * condition when merging the results.
    1876             :              */
    1877        4362 :             for (i = 0; i < mcvlist->nitems; i++)
    1878        4296 :                 matches[i] = RESULT_MERGE(matches[i], is_or, bool_matches[i]);
    1879             : 
    1880          66 :             pfree(bool_matches);
    1881             :         }
    1882         114 :         else if (is_notclause(clause))
    1883             :         {
    1884             :             /* NOT clause, with all subclauses compatible */
    1885             : 
    1886             :             int         i;
    1887          30 :             BoolExpr   *not_clause = ((BoolExpr *) clause);
    1888          30 :             List       *not_args = not_clause->args;
    1889             : 
    1890             :             /* match/mismatch bitmap for each MCV item */
    1891          30 :             bool       *not_matches = NULL;
    1892             : 
    1893             :             Assert(not_args != NIL);
    1894             :             Assert(list_length(not_args) == 1);
    1895             : 
    1896             :             /* build the match bitmap for the NOT-clause */
    1897          30 :             not_matches = mcv_get_match_bitmap(root, not_args, keys, exprs,
    1898             :                                                mcvlist, false);
    1899             : 
    1900             :             /*
    1901             :              * Merge the bitmap produced by mcv_get_match_bitmap into the
    1902             :              * current one. We're handling a NOT clause, so invert the result
    1903             :              * before merging it into the global bitmap.
    1904             :              */
    1905         150 :             for (i = 0; i < mcvlist->nitems; i++)
    1906         120 :                 matches[i] = RESULT_MERGE(matches[i], is_or, !not_matches[i]);
    1907             : 
    1908          30 :             pfree(not_matches);
    1909             :         }
    1910          84 :         else if (IsA(clause, Var))
    1911             :         {
    1912             :             /* Var (has to be a boolean Var, possibly from below NOT) */
    1913             : 
    1914          78 :             Var        *var = (Var *) (clause);
    1915             : 
    1916             :             /* match the attribute to a dimension of the statistic */
    1917          78 :             int         idx = bms_member_index(keys, var->varattno);
    1918             : 
    1919             :             Assert(var->vartype == BOOLOID);
    1920             : 
    1921             :             /*
    1922             :              * Walk through the MCV items and evaluate the current clause. We
    1923             :              * can skip items that were already ruled out, and terminate if
    1924             :              * there are no remaining MCV items that might possibly match.
    1925             :              */
    1926         378 :             for (int i = 0; i < mcvlist->nitems; i++)
    1927             :             {
    1928         300 :                 MCVItem    *item = &mcvlist->items[i];
    1929         300 :                 bool        match = false;
    1930             : 
    1931             :                 /* if the item is NULL, it's a mismatch */
    1932         300 :                 if (!item->isnull[idx] && DatumGetBool(item->values[idx]))
    1933         150 :                     match = true;
    1934             : 
    1935             :                 /* update the result bitmap */
    1936         300 :                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
    1937             :             }
    1938             :         }
    1939             :         else
    1940             :         {
    1941             :             /* Otherwise, it must be a bare boolean-returning expression */
    1942             :             int         idx;
    1943             : 
    1944             :             /* match the expression to a dimension of the statistic */
    1945           6 :             idx = mcv_match_expression(clause, keys, exprs, NULL);
    1946             : 
    1947             :             /*
    1948             :              * Walk through the MCV items and evaluate the current clause. We
    1949             :              * can skip items that were already ruled out, and terminate if
    1950             :              * there are no remaining MCV items that might possibly match.
    1951             :              */
    1952         222 :             for (int i = 0; i < mcvlist->nitems; i++)
    1953             :             {
    1954             :                 bool        match;
    1955         216 :                 MCVItem    *item = &mcvlist->items[i];
    1956             : 
    1957             :                 /* "match" just means it's bool TRUE */
    1958         216 :                 match = !item->isnull[idx] && DatumGetBool(item->values[idx]);
    1959             : 
    1960             :                 /* now, update the match bitmap, depending on OR/AND type */
    1961         216 :                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
    1962             :             }
    1963             :         }
    1964             :     }
    1965             : 
    1966         846 :     return matches;
    1967             : }
    1968             : 
    1969             : 
    1970             : /*
    1971             :  * mcv_combine_selectivities
    1972             :  *      Combine per-column and multi-column MCV selectivity estimates.
    1973             :  *
    1974             :  * simple_sel is a "simple" selectivity estimate (produced without using any
    1975             :  * extended statistics, essentially assuming independence of columns/clauses).
    1976             :  *
    1977             :  * mcv_sel and mcv_basesel are sums of the frequencies and base frequencies of
    1978             :  * all matching MCV items.  The difference (mcv_sel - mcv_basesel) is then
    1979             :  * essentially interpreted as a correction to be added to simple_sel, as
    1980             :  * described below.
    1981             :  *
    1982             :  * mcv_totalsel is the sum of the frequencies of all MCV items (not just the
    1983             :  * matching ones).  This is used as an upper bound on the portion of the
    1984             :  * selectivity estimates not covered by the MCV statistics.
    1985             :  *
    1986             :  * Note: While simple and base selectivities are defined in a quite similar
    1987             :  * way, the values are computed differently and are not therefore equal. The
    1988             :  * simple selectivity is computed as a product of per-clause estimates, while
    1989             :  * the base selectivity is computed by adding up base frequencies of matching
    1990             :  * items of the multi-column MCV list. So the values may differ for two main
    1991             :  * reasons - (a) the MCV list may not cover 100% of the data and (b) some of
    1992             :  * the MCV items did not match the estimated clauses.
    1993             :  *
    1994             :  * As both (a) and (b) reduce the base selectivity value, it generally holds
    1995             :  * that (simple_sel >= mcv_basesel). If the MCV list covers all the data, the
    1996             :  * values may be equal.
    1997             :  *
    1998             :  * So, other_sel = (simple_sel - mcv_basesel) is an estimate for the part not
    1999             :  * covered by the MCV list, and (mcv_sel - mcv_basesel) may be seen as a
    2000             :  * correction for the part covered by the MCV list. Those two statements are
    2001             :  * actually equivalent.
    2002             :  */
    2003             : Selectivity
    2004         798 : mcv_combine_selectivities(Selectivity simple_sel,
    2005             :                           Selectivity mcv_sel,
    2006             :                           Selectivity mcv_basesel,
    2007             :                           Selectivity mcv_totalsel)
    2008             : {
    2009             :     Selectivity other_sel;
    2010             :     Selectivity sel;
    2011             : 
    2012             :     /* estimated selectivity of values not covered by MCV matches */
    2013         798 :     other_sel = simple_sel - mcv_basesel;
    2014         798 :     CLAMP_PROBABILITY(other_sel);
    2015             : 
    2016             :     /* this non-MCV selectivity cannot exceed 1 - mcv_totalsel */
    2017         798 :     if (other_sel > 1.0 - mcv_totalsel)
    2018         444 :         other_sel = 1.0 - mcv_totalsel;
    2019             : 
    2020             :     /* overall selectivity is the sum of the MCV and non-MCV parts */
    2021         798 :     sel = mcv_sel + other_sel;
    2022         798 :     CLAMP_PROBABILITY(sel);
    2023             : 
    2024         798 :     return sel;
    2025             : }
    2026             : 
    2027             : 
    2028             : /*
    2029             :  * mcv_clauselist_selectivity
    2030             :  *      Use MCV statistics to estimate the selectivity of an implicitly-ANDed
    2031             :  *      list of clauses.
    2032             :  *
    2033             :  * This determines which MCV items match every clause in the list and returns
    2034             :  * the sum of the frequencies of those items.
    2035             :  *
    2036             :  * In addition, it returns the sum of the base frequencies of each of those
    2037             :  * items (that is the sum of the selectivities that each item would have if
    2038             :  * the columns were independent of one another), and the total selectivity of
    2039             :  * all the MCV items (not just the matching ones).  These are expected to be
    2040             :  * used together with a "simple" selectivity estimate (one based only on
    2041             :  * per-column statistics) to produce an overall selectivity estimate that
    2042             :  * makes use of both per-column and multi-column statistics --- see
    2043             :  * mcv_combine_selectivities().
    2044             :  */
    2045             : Selectivity
    2046         510 : mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat,
    2047             :                            List *clauses, int varRelid,
    2048             :                            JoinType jointype, SpecialJoinInfo *sjinfo,
    2049             :                            RelOptInfo *rel,
    2050             :                            Selectivity *basesel, Selectivity *totalsel)
    2051             : {
    2052             :     int         i;
    2053             :     MCVList    *mcv;
    2054         510 :     Selectivity s = 0.0;
    2055         510 :     RangeTblEntry *rte = root->simple_rte_array[rel->relid];
    2056             : 
    2057             :     /* match/mismatch bitmap for each MCV item */
    2058         510 :     bool       *matches = NULL;
    2059             : 
    2060             :     /* load the MCV list stored in the statistics object */
    2061         510 :     mcv = statext_mcv_load(stat->statOid, rte->inh);
    2062             : 
    2063             :     /* build a match bitmap for the clauses */
    2064         510 :     matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs,
    2065             :                                    mcv, false);
    2066             : 
    2067             :     /* sum frequencies for all the matching MCV items */
    2068         510 :     *basesel = 0.0;
    2069         510 :     *totalsel = 0.0;
    2070       37830 :     for (i = 0; i < mcv->nitems; i++)
    2071             :     {
    2072       37320 :         *totalsel += mcv->items[i].frequency;
    2073             : 
    2074       37320 :         if (matches[i] != false)
    2075             :         {
    2076         666 :             *basesel += mcv->items[i].base_frequency;
    2077         666 :             s += mcv->items[i].frequency;
    2078             :         }
    2079             :     }
    2080             : 
    2081         510 :     return s;
    2082             : }
    2083             : 
    2084             : 
    2085             : /*
    2086             :  * mcv_clause_selectivity_or
    2087             :  *      Use MCV statistics to estimate the selectivity of a clause that
    2088             :  *      appears in an ORed list of clauses.
    2089             :  *
    2090             :  * As with mcv_clauselist_selectivity() this determines which MCV items match
    2091             :  * the clause and returns both the sum of the frequencies and the sum of the
    2092             :  * base frequencies of those items, as well as the sum of the frequencies of
    2093             :  * all MCV items (not just the matching ones) so that this information can be
    2094             :  * used by mcv_combine_selectivities() to produce a selectivity estimate that
    2095             :  * makes use of both per-column and multi-column statistics.
    2096             :  *
    2097             :  * Additionally, we return information to help compute the overall selectivity
    2098             :  * of the ORed list of clauses assumed to contain this clause.  This function
    2099             :  * is intended to be called for each clause in the ORed list of clauses,
    2100             :  * allowing the overall selectivity to be computed using the following
    2101             :  * algorithm:
    2102             :  *
    2103             :  * Suppose P[n] = P(C[1] OR C[2] OR ... OR C[n]) is the combined selectivity
    2104             :  * of the first n clauses in the list.  Then the combined selectivity taking
    2105             :  * into account the next clause C[n+1] can be written as
    2106             :  *
    2107             :  *      P[n+1] = P[n] + P(C[n+1]) - P((C[1] OR ... OR C[n]) AND C[n+1])
    2108             :  *
    2109             :  * The final term above represents the overlap between the clauses examined so
    2110             :  * far and the (n+1)'th clause.  To estimate its selectivity, we track the
    2111             :  * match bitmap for the ORed list of clauses examined so far and examine its
    2112             :  * intersection with the match bitmap for the (n+1)'th clause.
    2113             :  *
    2114             :  * We then also return the sums of the MCV item frequencies and base
    2115             :  * frequencies for the match bitmap intersection corresponding to the overlap
    2116             :  * term above, so that they can be combined with a simple selectivity estimate
    2117             :  * for that term.
    2118             :  *
    2119             :  * The parameter "or_matches" is an in/out parameter tracking the match bitmap
    2120             :  * for the clauses examined so far.  The caller is expected to set it to NULL
    2121             :  * the first time it calls this function.
    2122             :  */
    2123             : Selectivity
    2124         240 : mcv_clause_selectivity_or(PlannerInfo *root, StatisticExtInfo *stat,
    2125             :                           MCVList *mcv, Node *clause, bool **or_matches,
    2126             :                           Selectivity *basesel, Selectivity *overlap_mcvsel,
    2127             :                           Selectivity *overlap_basesel, Selectivity *totalsel)
    2128             : {
    2129         240 :     Selectivity s = 0.0;
    2130             :     bool       *new_matches;
    2131             :     int         i;
    2132             : 
    2133             :     /* build the OR-matches bitmap, if not built already */
    2134         240 :     if (*or_matches == NULL)
    2135          96 :         *or_matches = palloc0_array(bool, mcv->nitems);
    2136             : 
    2137             :     /* build the match bitmap for the new clause */
    2138         240 :     new_matches = mcv_get_match_bitmap(root, list_make1(clause), stat->keys,
    2139             :                                        stat->exprs, mcv, false);
    2140             : 
    2141             :     /*
    2142             :      * Sum the frequencies for all the MCV items matching this clause and also
    2143             :      * those matching the overlap between this clause and any of the preceding
    2144             :      * clauses as described above.
    2145             :      */
    2146         240 :     *basesel = 0.0;
    2147         240 :     *overlap_mcvsel = 0.0;
    2148         240 :     *overlap_basesel = 0.0;
    2149         240 :     *totalsel = 0.0;
    2150       15516 :     for (i = 0; i < mcv->nitems; i++)
    2151             :     {
    2152       15276 :         *totalsel += mcv->items[i].frequency;
    2153             : 
    2154       15276 :         if (new_matches[i])
    2155             :         {
    2156         336 :             s += mcv->items[i].frequency;
    2157         336 :             *basesel += mcv->items[i].base_frequency;
    2158             : 
    2159         336 :             if ((*or_matches)[i])
    2160             :             {
    2161         144 :                 *overlap_mcvsel += mcv->items[i].frequency;
    2162         144 :                 *overlap_basesel += mcv->items[i].base_frequency;
    2163             :             }
    2164             :         }
    2165             : 
    2166             :         /* update the OR-matches bitmap for the next clause */
    2167       15276 :         (*or_matches)[i] = (*or_matches)[i] || new_matches[i];
    2168             :     }
    2169             : 
    2170         240 :     pfree(new_matches);
    2171             : 
    2172         240 :     return s;
    2173             : }
    2174             : 
    2175             : /*
    2176             :  * Free allocations of a MCVList.
    2177             :  */
    2178             : void
    2179           0 : statext_mcv_free(MCVList *mcvlist)
    2180             : {
    2181           0 :     for (int i = 0; i < mcvlist->nitems; i++)
    2182             :     {
    2183           0 :         MCVItem    *item = &mcvlist->items[i];
    2184             : 
    2185           0 :         pfree(item->values);
    2186           0 :         pfree(item->isnull);
    2187             :     }
    2188           0 :     pfree(mcvlist);
    2189           0 : }
    2190             : 
    2191             : /*
    2192             :  * Create the MCV composite datum, which is a serialization of an array of
    2193             :  * MCVItems.
    2194             :  *
    2195             :  * The inputs consist of four separate arrays of equal length "numitems"
    2196             :  * (mcv_elems, mcv_nulls, freqs and base_freqs) that form the basics of
    2197             :  * what is stored in the catalogs.  These form an array of composite
    2198             :  * records defined by the three atttypX arrays of equal length "numattrs".
    2199             :  *
    2200             :  * If any data element fails to convert to the input type specified for that
    2201             :  * attribute, then function will return a NULL Datum if elevel < ERROR.
    2202             :  */
    2203             : Datum
    2204          30 : statext_mcv_import(int elevel, int numattrs,
    2205             :                    Oid *atttypids, int32 *atttypmods, Oid *atttypcolls,
    2206             :                    int nitems, Datum *mcv_elems, bool *mcv_nulls,
    2207             :                    float8 *freqs, float8 *base_freqs)
    2208             : {
    2209             :     MCVList    *mcvlist;
    2210             :     bytea      *bytes;
    2211             :     VacAttrStats **vastats;
    2212             : 
    2213             :     /*
    2214             :      * Allocate the MCV list structure, set the global parameters.
    2215             :      */
    2216          30 :     mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) +
    2217          30 :                                   (sizeof(MCVItem) * nitems));
    2218             : 
    2219          30 :     mcvlist->magic = STATS_MCV_MAGIC;
    2220          30 :     mcvlist->type = STATS_MCV_TYPE_BASIC;
    2221          30 :     mcvlist->ndimensions = numattrs;
    2222          30 :     mcvlist->nitems = nitems;
    2223             : 
    2224             :     /* Set the values for the 1-D arrays and allocate space for the 2-D arrays */
    2225         234 :     for (int i = 0; i < nitems; i++)
    2226             :     {
    2227         204 :         MCVItem    *item = &mcvlist->items[i];
    2228             : 
    2229         204 :         item->frequency = freqs[i];
    2230         204 :         item->base_frequency = base_freqs[i];
    2231         204 :         item->values = (Datum *) palloc0_array(Datum, numattrs);
    2232         204 :         item->isnull = (bool *) palloc0_array(bool, numattrs);
    2233             :     }
    2234             : 
    2235             :     /*
    2236             :      * Walk through each dimension, determine the input function for that
    2237             :      * type, and then attempt to convert all values in that column via that
    2238             :      * function.  We approach this column-wise because it is simpler to deal
    2239             :      * with one input function at time, and possibly more cache-friendly.
    2240             :      */
    2241         114 :     for (int j = 0; j < numattrs; j++)
    2242             :     {
    2243             :         FmgrInfo    finfo;
    2244             :         Oid         ioparam;
    2245             :         Oid         infunc;
    2246          84 :         int         index = j;
    2247             : 
    2248          84 :         getTypeInputInfo(atttypids[j], &infunc, &ioparam);
    2249          84 :         fmgr_info(infunc, &finfo);
    2250             : 
    2251             :         /* store info about data type OIDs */
    2252          84 :         mcvlist->types[j] = atttypids[j];
    2253             : 
    2254         672 :         for (int i = 0; i < nitems; i++)
    2255             :         {
    2256         588 :             MCVItem    *item = &mcvlist->items[i];
    2257             : 
    2258         588 :             if (mcv_nulls[index])
    2259             :             {
    2260             :                 /* NULL value detected, hence no input to process */
    2261          38 :                 item->values[j] = (Datum) 0;
    2262          38 :                 item->isnull[j] = true;
    2263             :             }
    2264             :             else
    2265             :             {
    2266         550 :                 char       *s = TextDatumGetCString(mcv_elems[index]);
    2267         550 :                 ErrorSaveContext escontext = {T_ErrorSaveContext};
    2268             : 
    2269         550 :                 if (!InputFunctionCallSafe(&finfo, s, ioparam, atttypmods[j],
    2270         550 :                                            (Node *) &escontext, &item->values[j]))
    2271             :                 {
    2272           0 :                     ereport(elevel,
    2273             :                             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    2274             :                              errmsg("could not parse MCV element \"%s\": incorrect value", s)));
    2275           0 :                     pfree(s);
    2276           0 :                     goto error;
    2277             :                 }
    2278             : 
    2279         550 :                 pfree(s);
    2280             :             }
    2281             : 
    2282         588 :             index += numattrs;
    2283             :         }
    2284             :     }
    2285             : 
    2286             :     /*
    2287             :      * The function statext_mcv_serialize() requires an array of pointers to
    2288             :      * VacAttrStats records, but only a few fields within those records have
    2289             :      * to be filled out.
    2290             :      */
    2291          30 :     vastats = (VacAttrStats **) palloc0_array(VacAttrStats *, numattrs);
    2292             : 
    2293         114 :     for (int i = 0; i < numattrs; i++)
    2294             :     {
    2295          84 :         Oid         typid = atttypids[i];
    2296             :         HeapTuple   typtuple;
    2297             : 
    2298          84 :         typtuple = SearchSysCacheCopy1(TYPEOID, ObjectIdGetDatum(typid));
    2299             : 
    2300          84 :         if (!HeapTupleIsValid(typtuple))
    2301           0 :             elog(ERROR, "cache lookup failed for type %u", typid);
    2302             : 
    2303          84 :         vastats[i] = palloc0_object(VacAttrStats);
    2304             : 
    2305          84 :         vastats[i]->attrtype = (Form_pg_type) GETSTRUCT(typtuple);
    2306          84 :         vastats[i]->attrtypid = typid;
    2307          84 :         vastats[i]->attrcollid = atttypcolls[i];
    2308             :     }
    2309             : 
    2310          30 :     bytes = statext_mcv_serialize(mcvlist, vastats);
    2311             : 
    2312         114 :     for (int i = 0; i < numattrs; i++)
    2313             :     {
    2314          84 :         pfree(vastats[i]);
    2315             :     }
    2316          30 :     pfree((void *) vastats);
    2317             : 
    2318          30 :     pfree(mcv_elems);
    2319          30 :     pfree(mcv_nulls);
    2320             : 
    2321          30 :     if (bytes == NULL)
    2322             :     {
    2323           0 :         ereport(elevel,
    2324             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    2325             :                  errmsg("could not import MCV list")));
    2326           0 :         goto error;
    2327             :     }
    2328             : 
    2329          30 :     return PointerGetDatum(bytes);
    2330             : 
    2331           0 : error:
    2332           0 :     statext_mcv_free(mcvlist);
    2333           0 :     return (Datum) 0;
    2334             : }

Generated by: LCOV version 1.16