LCOV - code coverage report
Current view: top level - src/backend/statistics - mcv.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 89.8 % 645 579
Test Date: 2026-03-13 08:14:54 Functions: 82.6 % 23 19
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * mcv.c
       4              :  *    POSTGRES multivariate MCV lists
       5              :  *
       6              :  *
       7              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       8              :  * Portions Copyright (c) 1994, Regents of the University of California
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *    src/backend/statistics/mcv.c
      12              :  *
      13              :  *-------------------------------------------------------------------------
      14              :  */
      15              : #include "postgres.h"
      16              : 
      17              : #include "access/htup_details.h"
      18              : #include "catalog/pg_statistic_ext.h"
      19              : #include "catalog/pg_statistic_ext_data.h"
      20              : #include "fmgr.h"
      21              : #include "funcapi.h"
      22              : #include "nodes/nodeFuncs.h"
      23              : #include "statistics/extended_stats_internal.h"
      24              : #include "statistics/statistics.h"
      25              : #include "utils/array.h"
      26              : #include "utils/builtins.h"
      27              : #include "utils/fmgrprotos.h"
      28              : #include "utils/lsyscache.h"
      29              : #include "utils/selfuncs.h"
      30              : #include "utils/syscache.h"
      31              : #include "utils/typcache.h"
      32              : 
      33              : /*
      34              :  * Computes size of a serialized MCV item, depending on the number of
      35              :  * dimensions (columns) the statistic is defined on. The datum values are
      36              :  * stored in a separate array (deduplicated, to minimize the size), and
      37              :  * so the serialized items only store uint16 indexes into that array.
      38              :  *
      39              :  * Each serialized item stores (in this order):
      40              :  *
      41              :  * - indexes to values    (ndim * sizeof(uint16))
      42              :  * - null flags           (ndim * sizeof(bool))
      43              :  * - frequency            (sizeof(double))
      44              :  * - base_frequency       (sizeof(double))
      45              :  *
      46              :  * There is no alignment padding within an MCV item.
      47              :  * So in total each MCV item requires this many bytes:
      48              :  *
      49              :  *   ndim * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double)
      50              :  */
      51              : #define ITEM_SIZE(ndims)    \
      52              :     ((ndims) * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double))
      53              : 
      54              : /*
      55              :  * Used to compute size of serialized MCV list representation.
      56              :  */
      57              : #define MinSizeOfMCVList        \
      58              :     (VARHDRSZ + sizeof(uint32) * 3 + sizeof(AttrNumber))
      59              : 
      60              : /*
      61              :  * Size of the serialized MCV list, excluding the space needed for
      62              :  * deduplicated per-dimension values. The macro is meant to be used
      63              :  * when it's not yet safe to access the serialized info about amount
      64              :  * of data for each column.
      65              :  */
      66              : #define SizeOfMCVList(ndims,nitems) \
      67              :     ((MinSizeOfMCVList + sizeof(Oid) * (ndims)) + \
      68              :      ((ndims) * sizeof(DimensionInfo)) + \
      69              :      ((nitems) * ITEM_SIZE(ndims)))
      70              : 
      71              : static MultiSortSupport build_mss(StatsBuildData *data);
      72              : 
      73              : static SortItem *build_distinct_groups(int numrows, SortItem *items,
      74              :                                        MultiSortSupport mss, int *ndistinct);
      75              : 
      76              : static SortItem **build_column_frequencies(SortItem *groups, int ngroups,
      77              :                                            MultiSortSupport mss, int *ncounts);
      78              : 
      79              : static int  count_distinct_groups(int numrows, SortItem *items,
      80              :                                   MultiSortSupport mss);
      81              : 
      82              : /*
      83              :  * Compute new value for bitmap item, considering whether it's used for
      84              :  * clauses connected by AND/OR.
      85              :  */
      86              : #define RESULT_MERGE(value, is_or, match) \
      87              :     ((is_or) ? ((value) || (match)) : ((value) && (match)))
      88              : 
      89              : /*
      90              :  * When processing a list of clauses, the bitmap item may get set to a value
      91              :  * such that additional clauses can't change it. For example, when processing
      92              :  * a list of clauses connected to AND, as soon as the item gets set to 'false'
      93              :  * then it'll remain like that. Similarly clauses connected by OR and 'true'.
      94              :  *
      95              :  * Returns true when the value in the bitmap can't change no matter how the
      96              :  * remaining clauses are evaluated.
      97              :  */
      98              : #define RESULT_IS_FINAL(value, is_or)   ((is_or) ? (value) : (!(value)))
      99              : 
     100              : /*
     101              :  * get_mincount_for_mcv_list
     102              :  *      Determine the minimum number of times a value needs to appear in
     103              :  *      the sample for it to be included in the MCV list.
     104              :  *
     105              :  * We want to keep only values that appear sufficiently often in the
     106              :  * sample that it is reasonable to extrapolate their sample frequencies to
     107              :  * the entire table.  We do this by placing an upper bound on the relative
     108              :  * standard error of the sample frequency, so that any estimates the
     109              :  * planner generates from the MCV statistics can be expected to be
     110              :  * reasonably accurate.
     111              :  *
     112              :  * Since we are sampling without replacement, the sample frequency of a
     113              :  * particular value is described by a hypergeometric distribution.  A
     114              :  * common rule of thumb when estimating errors in this situation is to
     115              :  * require at least 10 instances of the value in the sample, in which case
     116              :  * the distribution can be approximated by a normal distribution, and
     117              :  * standard error analysis techniques can be applied.  Given a sample size
     118              :  * of n, a population size of N, and a sample frequency of p=cnt/n, the
     119              :  * standard error of the proportion p is given by
     120              :  *      SE = sqrt(p*(1-p)/n) * sqrt((N-n)/(N-1))
     121              :  * where the second term is the finite population correction.  To get
     122              :  * reasonably accurate planner estimates, we impose an upper bound on the
     123              :  * relative standard error of 20% -- i.e., SE/p < 0.2.  This 20% relative
     124              :  * error bound is fairly arbitrary, but has been found empirically to work
     125              :  * well.  Rearranging this formula gives a lower bound on the number of
     126              :  * instances of the value seen:
     127              :  *      cnt > n*(N-n) / (N-n+0.04*n*(N-1))
     128              :  * This bound is at most 25, and approaches 0 as n approaches 0 or N. The
     129              :  * case where n approaches 0 cannot happen in practice, since the sample
     130              :  * size is at least 300.  The case where n approaches N corresponds to
     131              :  * sampling the whole table, in which case it is reasonable to keep
     132              :  * the whole MCV list (have no lower bound), so it makes sense to apply
     133              :  * this formula for all inputs, even though the above derivation is
     134              :  * technically only valid when the right hand side is at least around 10.
     135              :  *
     136              :  * An alternative way to look at this formula is as follows -- assume that
     137              :  * the number of instances of the value seen scales up to the entire
     138              :  * table, so that the population count is K=N*cnt/n. Then the distribution
     139              :  * in the sample is a hypergeometric distribution parameterised by N, n
     140              :  * and K, and the bound above is mathematically equivalent to demanding
     141              :  * that the standard deviation of that distribution is less than 20% of
     142              :  * its mean.  Thus the relative errors in any planner estimates produced
     143              :  * from the MCV statistics are likely to be not too large.
     144              :  */
     145              : static double
     146          151 : get_mincount_for_mcv_list(int samplerows, double totalrows)
     147              : {
     148          151 :     double      n = samplerows;
     149          151 :     double      N = totalrows;
     150              :     double      numer,
     151              :                 denom;
     152              : 
     153          151 :     numer = n * (N - n);
     154          151 :     denom = N - n + 0.04 * n * (N - 1);
     155              : 
     156              :     /* Guard against division by zero (possible if n = N = 1) */
     157          151 :     if (denom == 0.0)
     158            6 :         return 0.0;
     159              : 
     160          145 :     return numer / denom;
     161              : }
     162              : 
     163              : /*
     164              :  * Builds MCV list from the set of sampled rows.
     165              :  *
     166              :  * The algorithm is quite simple:
     167              :  *
     168              :  *     (1) sort the data (default collation, '<' for the data type)
     169              :  *
     170              :  *     (2) count distinct groups, decide how many to keep
     171              :  *
     172              :  *     (3) build the MCV list using the threshold determined in (2)
     173              :  *
     174              :  *     (4) remove rows represented by the MCV from the sample
     175              :  *
     176              :  */
     177              : MCVList *
     178          151 : statext_mcv_build(StatsBuildData *data, double totalrows, int stattarget)
     179              : {
     180              :     int         i,
     181              :                 numattrs,
     182              :                 numrows,
     183              :                 ngroups,
     184              :                 nitems;
     185              :     double      mincount;
     186              :     SortItem   *items;
     187              :     SortItem   *groups;
     188          151 :     MCVList    *mcvlist = NULL;
     189              :     MultiSortSupport mss;
     190              : 
     191              :     /* comparator for all the columns */
     192          151 :     mss = build_mss(data);
     193              : 
     194              :     /* sort the rows */
     195          151 :     items = build_sorted_items(data, &nitems, mss,
     196              :                                data->nattnums, data->attnums);
     197              : 
     198          151 :     if (!items)
     199            0 :         return NULL;
     200              : 
     201              :     /* for convenience */
     202          151 :     numattrs = data->nattnums;
     203          151 :     numrows = data->numrows;
     204              : 
     205              :     /* transform the sorted rows into groups (sorted by frequency) */
     206          151 :     groups = build_distinct_groups(nitems, items, mss, &ngroups);
     207              : 
     208              :     /*
     209              :      * The maximum number of MCV items to store, based on the statistics
     210              :      * target we computed for the statistics object (from the target set for
     211              :      * the object itself, attributes and the system default). In any case, we
     212              :      * can't keep more groups than we have available.
     213              :      */
     214          151 :     nitems = stattarget;
     215          151 :     if (nitems > ngroups)
     216          103 :         nitems = ngroups;
     217              : 
     218              :     /*
     219              :      * Decide how many items to keep in the MCV list. We can't use the same
     220              :      * algorithm as per-column MCV lists, because that only considers the
     221              :      * actual group frequency - but we're primarily interested in how the
     222              :      * actual frequency differs from the base frequency (product of simple
     223              :      * per-column frequencies, as if the columns were independent).
     224              :      *
     225              :      * Using the same algorithm might exclude items that are close to the
     226              :      * "average" frequency of the sample. But that does not say whether the
     227              :      * observed frequency is close to the base frequency or not. We also need
     228              :      * to consider unexpectedly uncommon items (again, compared to the base
     229              :      * frequency), and the single-column algorithm does not have to.
     230              :      *
     231              :      * We simply decide how many items to keep by computing the minimum count
     232              :      * using get_mincount_for_mcv_list() and then keep all items that seem to
     233              :      * be more common than that.
     234              :      */
     235          151 :     mincount = get_mincount_for_mcv_list(numrows, totalrows);
     236              : 
     237              :     /*
     238              :      * Walk the groups until we find the first group with a count below the
     239              :      * mincount threshold (the index of that group is the number of groups we
     240              :      * want to keep).
     241              :      */
     242         5697 :     for (i = 0; i < nitems; i++)
     243              :     {
     244         5546 :         if (groups[i].count < mincount)
     245              :         {
     246            0 :             nitems = i;
     247            0 :             break;
     248              :         }
     249              :     }
     250              : 
     251              :     /*
     252              :      * At this point, we know the number of items for the MCV list. There
     253              :      * might be none (for uniform distribution with many groups), and in that
     254              :      * case, there will be no MCV list. Otherwise, construct the MCV list.
     255              :      */
     256          151 :     if (nitems > 0)
     257              :     {
     258              :         int         j;
     259              :         SortItem    key;
     260              :         MultiSortSupport tmp;
     261              : 
     262              :         /* frequencies for values in each attribute */
     263              :         SortItem  **freqs;
     264              :         int        *nfreqs;
     265              : 
     266              :         /* used to search values */
     267          151 :         tmp = (MultiSortSupport) palloc(offsetof(MultiSortSupportData, ssup)
     268              :                                         + sizeof(SortSupportData));
     269              : 
     270              :         /* compute frequencies for values in each column */
     271          151 :         nfreqs = palloc0_array(int, numattrs);
     272          151 :         freqs = build_column_frequencies(groups, ngroups, mss, nfreqs);
     273              : 
     274              :         /*
     275              :          * Allocate the MCV list structure, set the global parameters.
     276              :          */
     277          151 :         mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) +
     278          151 :                                       sizeof(MCVItem) * nitems);
     279              : 
     280          151 :         mcvlist->magic = STATS_MCV_MAGIC;
     281          151 :         mcvlist->type = STATS_MCV_TYPE_BASIC;
     282          151 :         mcvlist->ndimensions = numattrs;
     283          151 :         mcvlist->nitems = nitems;
     284              : 
     285              :         /* store info about data type OIDs */
     286          543 :         for (i = 0; i < numattrs; i++)
     287          392 :             mcvlist->types[i] = data->stats[i]->attrtypid;
     288              : 
     289              :         /* Copy the first chunk of groups into the result. */
     290         5697 :         for (i = 0; i < nitems; i++)
     291              :         {
     292              :             /* just point to the proper place in the list */
     293         5546 :             MCVItem    *item = &mcvlist->items[i];
     294              : 
     295         5546 :             item->values = palloc_array(Datum, numattrs);
     296         5546 :             item->isnull = palloc_array(bool, numattrs);
     297              : 
     298              :             /* copy values for the group */
     299         5546 :             memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs);
     300         5546 :             memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs);
     301              : 
     302              :             /* groups should be sorted by frequency in descending order */
     303              :             Assert((i == 0) || (groups[i - 1].count >= groups[i].count));
     304              : 
     305              :             /* group frequency */
     306         5546 :             item->frequency = (double) groups[i].count / numrows;
     307              : 
     308              :             /* base frequency, if the attributes were independent */
     309         5546 :             item->base_frequency = 1.0;
     310        20232 :             for (j = 0; j < numattrs; j++)
     311              :             {
     312              :                 SortItem   *freq;
     313              : 
     314              :                 /* single dimension */
     315        14686 :                 tmp->ndims = 1;
     316        14686 :                 tmp->ssup[0] = mss->ssup[j];
     317              : 
     318              :                 /* fill search key */
     319        14686 :                 key.values = &groups[i].values[j];
     320        14686 :                 key.isnull = &groups[i].isnull[j];
     321              : 
     322        14686 :                 freq = (SortItem *) bsearch_arg(&key, freqs[j], nfreqs[j],
     323              :                                                 sizeof(SortItem),
     324              :                                                 multi_sort_compare, tmp);
     325              : 
     326        14686 :                 item->base_frequency *= ((double) freq->count) / numrows;
     327              :             }
     328              :         }
     329              : 
     330          151 :         pfree(nfreqs);
     331          151 :         pfree(freqs);
     332              :     }
     333              : 
     334          151 :     pfree(items);
     335          151 :     pfree(groups);
     336              : 
     337          151 :     return mcvlist;
     338              : }
     339              : 
     340              : /*
     341              :  * build_mss
     342              :  *      Build a MultiSortSupport for the given StatsBuildData.
     343              :  */
     344              : static MultiSortSupport
     345          151 : build_mss(StatsBuildData *data)
     346              : {
     347              :     int         i;
     348          151 :     int         numattrs = data->nattnums;
     349              : 
     350              :     /* Sort by multiple columns (using array of SortSupport) */
     351          151 :     MultiSortSupport mss = multi_sort_init(numattrs);
     352              : 
     353              :     /* prepare the sort functions for all the attributes */
     354          543 :     for (i = 0; i < numattrs; i++)
     355              :     {
     356          392 :         VacAttrStats *colstat = data->stats[i];
     357              :         TypeCacheEntry *type;
     358              : 
     359          392 :         type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
     360          392 :         if (type->lt_opr == InvalidOid) /* shouldn't happen */
     361            0 :             elog(ERROR, "cache lookup failed for ordering operator for type %u",
     362              :                  colstat->attrtypid);
     363              : 
     364          392 :         multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
     365              :     }
     366              : 
     367          151 :     return mss;
     368              : }
     369              : 
     370              : /*
     371              :  * count_distinct_groups
     372              :  *      Count distinct combinations of SortItems in the array.
     373              :  *
     374              :  * The array is assumed to be sorted according to the MultiSortSupport.
     375              :  */
     376              : static int
     377          151 : count_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss)
     378              : {
     379              :     int         i;
     380              :     int         ndistinct;
     381              : 
     382          151 :     ndistinct = 1;
     383       242271 :     for (i = 1; i < numrows; i++)
     384              :     {
     385              :         /* make sure the array really is sorted */
     386              :         Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
     387              : 
     388       242120 :         if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
     389        45469 :             ndistinct += 1;
     390              :     }
     391              : 
     392          151 :     return ndistinct;
     393              : }
     394              : 
     395              : /*
     396              :  * compare_sort_item_count
     397              :  *      Comparator for sorting items by count (frequencies) in descending
     398              :  *      order.
     399              :  */
     400              : static int
     401        49108 : compare_sort_item_count(const void *a, const void *b, void *arg)
     402              : {
     403        49108 :     const SortItem *ia = a;
     404        49108 :     const SortItem *ib = b;
     405              : 
     406        49108 :     if (ia->count == ib->count)
     407        48570 :         return 0;
     408          538 :     else if (ia->count > ib->count)
     409          367 :         return -1;
     410              : 
     411          171 :     return 1;
     412              : }
     413              : 
     414              : /*
     415              :  * build_distinct_groups
     416              :  *      Build an array of SortItems for distinct groups and counts matching
     417              :  *      items.
     418              :  *
     419              :  * The 'items' array is assumed to be sorted.
     420              :  */
     421              : static SortItem *
     422          151 : build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss,
     423              :                       int *ndistinct)
     424              : {
     425              :     int         i,
     426              :                 j;
     427          151 :     int         ngroups = count_distinct_groups(numrows, items, mss);
     428              : 
     429          151 :     SortItem   *groups = (SortItem *) palloc(ngroups * sizeof(SortItem));
     430              : 
     431          151 :     j = 0;
     432          151 :     groups[0] = items[0];
     433          151 :     groups[0].count = 1;
     434              : 
     435       242271 :     for (i = 1; i < numrows; i++)
     436              :     {
     437              :         /* Assume sorted in ascending order. */
     438              :         Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
     439              : 
     440              :         /* New distinct group detected. */
     441       242120 :         if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
     442              :         {
     443        45469 :             groups[++j] = items[i];
     444        45469 :             groups[j].count = 0;
     445              :         }
     446              : 
     447       242120 :         groups[j].count++;
     448              :     }
     449              : 
     450              :     /* ensure we filled the expected number of distinct groups */
     451              :     Assert(j + 1 == ngroups);
     452              : 
     453              :     /* Sort the distinct groups by frequency (in descending order). */
     454          151 :     qsort_interruptible(groups, ngroups, sizeof(SortItem),
     455              :                         compare_sort_item_count, NULL);
     456              : 
     457          151 :     *ndistinct = ngroups;
     458          151 :     return groups;
     459              : }
     460              : 
     461              : /* compare sort items (single dimension) */
     462              : static int
     463       519092 : sort_item_compare(const void *a, const void *b, void *arg)
     464              : {
     465       519092 :     SortSupport ssup = (SortSupport) arg;
     466       519092 :     const SortItem *ia = a;
     467       519092 :     const SortItem *ib = b;
     468              : 
     469      1038184 :     return ApplySortComparator(ia->values[0], ia->isnull[0],
     470       519092 :                                ib->values[0], ib->isnull[0],
     471              :                                ssup);
     472              : }
     473              : 
     474              : /*
     475              :  * build_column_frequencies
     476              :  *      Compute frequencies of values in each column.
     477              :  *
     478              :  * This returns an array of SortItems for each attribute the MCV is built
     479              :  * on, with a frequency (number of occurrences) for each value. This is
     480              :  * then used to compute "base" frequency of MCV items.
     481              :  *
     482              :  * All the memory is allocated in a single chunk, so that a single pfree
     483              :  * is enough to release it. We do not allocate space for values/isnull
     484              :  * arrays in the SortItems, because we can simply point into the input
     485              :  * groups directly.
     486              :  */
     487              : static SortItem **
     488          151 : build_column_frequencies(SortItem *groups, int ngroups,
     489              :                          MultiSortSupport mss, int *ncounts)
     490              : {
     491              :     int         i,
     492              :                 dim;
     493              :     SortItem  **result;
     494              :     char       *ptr;
     495              : 
     496              :     Assert(groups);
     497              :     Assert(ncounts);
     498              : 
     499              :     /* allocate arrays for all columns as a single chunk */
     500          151 :     ptr = palloc(MAXALIGN(sizeof(SortItem *) * mss->ndims) +
     501          151 :                  mss->ndims * MAXALIGN(sizeof(SortItem) * ngroups));
     502              : 
     503              :     /* initial array of pointers */
     504          151 :     result = (SortItem **) ptr;
     505          151 :     ptr += MAXALIGN(sizeof(SortItem *) * mss->ndims);
     506              : 
     507          543 :     for (dim = 0; dim < mss->ndims; dim++)
     508              :     {
     509          392 :         SortSupport ssup = &mss->ssup[dim];
     510              : 
     511              :         /* array of values for a single column */
     512          392 :         result[dim] = (SortItem *) ptr;
     513          392 :         ptr += MAXALIGN(sizeof(SortItem) * ngroups);
     514              : 
     515              :         /* extract data for the dimension */
     516       127026 :         for (i = 0; i < ngroups; i++)
     517              :         {
     518              :             /* point into the input groups */
     519       126634 :             result[dim][i].values = &groups[i].values[dim];
     520       126634 :             result[dim][i].isnull = &groups[i].isnull[dim];
     521       126634 :             result[dim][i].count = groups[i].count;
     522              :         }
     523              : 
     524              :         /* sort the values, deduplicate */
     525          392 :         qsort_interruptible(result[dim], ngroups, sizeof(SortItem),
     526              :                             sort_item_compare, ssup);
     527              : 
     528              :         /*
     529              :          * Identify distinct values, compute frequency (there might be
     530              :          * multiple MCV items containing this value, so we need to sum counts
     531              :          * from all of them.
     532              :          */
     533          392 :         ncounts[dim] = 1;
     534       126634 :         for (i = 1; i < ngroups; i++)
     535              :         {
     536       126242 :             if (sort_item_compare(&result[dim][i - 1], &result[dim][i], ssup) == 0)
     537              :             {
     538        74551 :                 result[dim][ncounts[dim] - 1].count += result[dim][i].count;
     539        74551 :                 continue;
     540              :             }
     541              : 
     542        51691 :             result[dim][ncounts[dim]] = result[dim][i];
     543              : 
     544        51691 :             ncounts[dim]++;
     545              :         }
     546              :     }
     547              : 
     548          151 :     return result;
     549              : }
     550              : 
     551              : /*
     552              :  * statext_mcv_load
     553              :  *      Load the MCV list for the indicated pg_statistic_ext_data tuple.
     554              :  */
     555              : MCVList *
     556          303 : statext_mcv_load(Oid mvoid, bool inh)
     557              : {
     558              :     MCVList    *result;
     559              :     bool        isnull;
     560              :     Datum       mcvlist;
     561          303 :     HeapTuple   htup = SearchSysCache2(STATEXTDATASTXOID,
     562              :                                        ObjectIdGetDatum(mvoid), BoolGetDatum(inh));
     563              : 
     564          303 :     if (!HeapTupleIsValid(htup))
     565            0 :         elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
     566              : 
     567          303 :     mcvlist = SysCacheGetAttr(STATEXTDATASTXOID, htup,
     568              :                               Anum_pg_statistic_ext_data_stxdmcv, &isnull);
     569              : 
     570          303 :     if (isnull)
     571            0 :         elog(ERROR,
     572              :              "requested statistics kind \"%c\" is not yet built for statistics object %u",
     573              :              STATS_EXT_MCV, mvoid);
     574              : 
     575          303 :     result = statext_mcv_deserialize(DatumGetByteaP(mcvlist));
     576              : 
     577          303 :     ReleaseSysCache(htup);
     578              : 
     579          303 :     return result;
     580              : }
     581              : 
     582              : 
     583              : /*
     584              :  * statext_mcv_serialize
     585              :  *      Serialize MCV list into a pg_mcv_list value.
     586              :  *
     587              :  * The MCV items may include values of various data types, and it's reasonable
     588              :  * to expect redundancy (values for a given attribute, repeated for multiple
     589              :  * MCV list items). So we deduplicate the values into arrays, and then replace
     590              :  * the values by indexes into those arrays.
     591              :  *
     592              :  * The overall structure of the serialized representation looks like this:
     593              :  *
     594              :  * +---------------+----------------+---------------------+-------+
     595              :  * | header fields | dimension info | deduplicated values | items |
     596              :  * +---------------+----------------+---------------------+-------+
     597              :  *
     598              :  * Where dimension info stores information about the type of the K-th
     599              :  * attribute (e.g. typlen, typbyval and length of deduplicated values).
     600              :  * Deduplicated values store deduplicated values for each attribute.  And
     601              :  * items store the actual MCV list items, with values replaced by indexes into
     602              :  * the arrays.
     603              :  *
     604              :  * When serializing the items, we use uint16 indexes. The number of MCV items
     605              :  * is limited by the statistics target (which is capped to 10k at the moment).
     606              :  * We might increase this to 65k and still fit into uint16, so there's a bit of
     607              :  * slack. Furthermore, this limit is on the number of distinct values per column,
     608              :  * and we usually have few of those (and various combinations of them for the
     609              :  * those MCV list). So uint16 seems fine for now.
     610              :  *
     611              :  * We don't really expect the serialization to save as much space as for
     612              :  * histograms, as we are not doing any bucket splits (which is the source
     613              :  * of high redundancy in histograms).
     614              :  *
     615              :  * TODO: Consider packing boolean flags (NULL) for each item into a single char
     616              :  * (or a longer type) instead of using an array of bool items.
     617              :  */
     618              : bytea *
     619          169 : statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats)
     620              : {
     621              :     int         i;
     622              :     int         dim;
     623          169 :     int         ndims = mcvlist->ndimensions;
     624              : 
     625              :     SortSupport ssup;
     626              :     DimensionInfo *info;
     627              : 
     628              :     Size        total_length;
     629              : 
     630              :     /* serialized items (indexes into arrays, etc.) */
     631              :     bytea      *raw;
     632              :     char       *ptr;
     633              :     char       *endptr PG_USED_FOR_ASSERTS_ONLY;
     634              : 
     635              :     /* values per dimension (and number of non-NULL values) */
     636          169 :     Datum     **values = palloc0_array(Datum *, ndims);
     637          169 :     int        *counts = palloc0_array(int, ndims);
     638              : 
     639              :     /*
     640              :      * We'll include some rudimentary information about the attribute types
     641              :      * (length, by-val flag), so that we don't have to look them up while
     642              :      * deserializing the MCV list (we already have the type OID in the
     643              :      * header).  This is safe because when changing the type of the attribute
     644              :      * the statistics gets dropped automatically.  We need to store the info
     645              :      * about the arrays of deduplicated values anyway.
     646              :      */
     647          169 :     info = palloc0_array(DimensionInfo, ndims);
     648              : 
     649              :     /* sort support data for all attributes included in the MCV list */
     650          169 :     ssup = palloc0_array(SortSupportData, ndims);
     651              : 
     652              :     /* collect and deduplicate values for each dimension (attribute) */
     653          612 :     for (dim = 0; dim < ndims; dim++)
     654              :     {
     655              :         int         ndistinct;
     656              :         TypeCacheEntry *typentry;
     657              : 
     658              :         /*
     659              :          * Lookup the LT operator (can't get it from stats extra_data, as we
     660              :          * don't know how to interpret that - scalar vs. array etc.).
     661              :          */
     662          443 :         typentry = lookup_type_cache(stats[dim]->attrtypid, TYPECACHE_LT_OPR);
     663              : 
     664              :         /* copy important info about the data type (length, by-value) */
     665          443 :         info[dim].typlen = stats[dim]->attrtype->typlen;
     666          443 :         info[dim].typbyval = stats[dim]->attrtype->typbyval;
     667              : 
     668              :         /* allocate space for values in the attribute and collect them */
     669          443 :         values[dim] = palloc0_array(Datum, mcvlist->nitems);
     670              : 
     671        15450 :         for (i = 0; i < mcvlist->nitems; i++)
     672              :         {
     673              :             /* skip NULL values - we don't need to deduplicate those */
     674        15007 :             if (mcvlist->items[i].isnull[dim])
     675           70 :                 continue;
     676              : 
     677              :             /* append the value at the end */
     678        14937 :             values[dim][counts[dim]] = mcvlist->items[i].values[dim];
     679        14937 :             counts[dim] += 1;
     680              :         }
     681              : 
     682              :         /* if there are just NULL values in this dimension, we're done */
     683          443 :         if (counts[dim] == 0)
     684            4 :             continue;
     685              : 
     686              :         /* sort and deduplicate the data */
     687          439 :         ssup[dim].ssup_cxt = CurrentMemoryContext;
     688          439 :         ssup[dim].ssup_collation = stats[dim]->attrcollid;
     689          439 :         ssup[dim].ssup_nulls_first = false;
     690              : 
     691          439 :         PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);
     692              : 
     693          439 :         qsort_interruptible(values[dim], counts[dim], sizeof(Datum),
     694          439 :                             compare_scalars_simple, &ssup[dim]);
     695              : 
     696              :         /*
     697              :          * Walk through the array and eliminate duplicate values, but keep the
     698              :          * ordering (so that we can do a binary search later). We know there's
     699              :          * at least one item as (counts[dim] != 0), so we can skip the first
     700              :          * element.
     701              :          */
     702          439 :         ndistinct = 1;          /* number of distinct values */
     703        14937 :         for (i = 1; i < counts[dim]; i++)
     704              :         {
     705              :             /* expect sorted array */
     706              :             Assert(compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]) <= 0);
     707              : 
     708              :             /* if the value is the same as the previous one, we can skip it */
     709        14498 :             if (!compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]))
     710         6055 :                 continue;
     711              : 
     712         8443 :             values[dim][ndistinct] = values[dim][i];
     713         8443 :             ndistinct += 1;
     714              :         }
     715              : 
     716              :         /* we must not exceed PG_UINT16_MAX, as we use uint16 indexes */
     717              :         Assert(ndistinct <= PG_UINT16_MAX);
     718              : 
     719              :         /*
     720              :          * Store additional info about the attribute - number of deduplicated
     721              :          * values, and also size of the serialized data. For fixed-length data
     722              :          * types this is trivial to compute, for varwidth types we need to
     723              :          * actually walk the array and sum the sizes.
     724              :          */
     725          439 :         info[dim].nvalues = ndistinct;
     726              : 
     727          439 :         if (info[dim].typbyval) /* by-value data types */
     728              :         {
     729          260 :             info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
     730              : 
     731              :             /*
     732              :              * We copy the data into the MCV item during deserialization, so
     733              :              * we don't need to allocate any extra space.
     734              :              */
     735          260 :             info[dim].nbytes_aligned = 0;
     736              :         }
     737          179 :         else if (info[dim].typlen > 0)   /* fixed-length by-ref */
     738              :         {
     739              :             /*
     740              :              * We don't care about alignment in the serialized data, so we
     741              :              * pack the data as much as possible. But we also track how much
     742              :              * data will be needed after deserialization, and in that case we
     743              :              * need to account for alignment of each item.
     744              :              *
     745              :              * Note: As the items are fixed-length, we could easily compute
     746              :              * this during deserialization, but we do it here anyway.
     747              :              */
     748           12 :             info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
     749           12 :             info[dim].nbytes_aligned = info[dim].nvalues * MAXALIGN(info[dim].typlen);
     750              :         }
     751          167 :         else if (info[dim].typlen == -1)    /* varlena */
     752              :         {
     753          167 :             info[dim].nbytes = 0;
     754          167 :             info[dim].nbytes_aligned = 0;
     755         2239 :             for (i = 0; i < info[dim].nvalues; i++)
     756              :             {
     757              :                 Size        len;
     758              : 
     759              :                 /*
     760              :                  * For varlena values, we detoast the values and store the
     761              :                  * length and data separately. We don't bother with alignment
     762              :                  * here, which means that during deserialization we need to
     763              :                  * copy the fields and only access the copies.
     764              :                  */
     765         2072 :                 values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i]));
     766              : 
     767              :                 /* serialized length (uint32 length + data) */
     768         2072 :                 len = VARSIZE_ANY_EXHDR(DatumGetPointer(values[dim][i]));
     769         2072 :                 info[dim].nbytes += sizeof(uint32); /* length */
     770         2072 :                 info[dim].nbytes += len;    /* value (no header) */
     771              : 
     772              :                 /*
     773              :                  * During deserialization we'll build regular varlena values
     774              :                  * with full headers, and we need to align them properly.
     775              :                  */
     776         2072 :                 info[dim].nbytes_aligned += MAXALIGN(VARHDRSZ + len);
     777              :             }
     778              :         }
     779            0 :         else if (info[dim].typlen == -2)    /* cstring */
     780              :         {
     781            0 :             info[dim].nbytes = 0;
     782            0 :             info[dim].nbytes_aligned = 0;
     783            0 :             for (i = 0; i < info[dim].nvalues; i++)
     784              :             {
     785              :                 Size        len;
     786              : 
     787              :                 /*
     788              :                  * cstring is handled similar to varlena - first we store the
     789              :                  * length as uint32 and then the data. We don't care about
     790              :                  * alignment, which means that during deserialization we need
     791              :                  * to copy the fields and only access the copies.
     792              :                  */
     793              : 
     794              :                 /* c-strings include terminator, so +1 byte */
     795            0 :                 len = strlen(DatumGetCString(values[dim][i])) + 1;
     796            0 :                 info[dim].nbytes += sizeof(uint32); /* length */
     797            0 :                 info[dim].nbytes += len;    /* value */
     798              : 
     799              :                 /* space needed for properly aligned deserialized copies */
     800            0 :                 info[dim].nbytes_aligned += MAXALIGN(len);
     801              :             }
     802              :         }
     803              : 
     804              :         /* we know (count>0) so there must be some data */
     805              :         Assert(info[dim].nbytes > 0);
     806              :     }
     807              : 
     808              :     /*
     809              :      * Now we can finally compute how much space we'll actually need for the
     810              :      * whole serialized MCV list (varlena header, MCV header, dimension info
     811              :      * for each attribute, deduplicated values and items).
     812              :      */
     813          169 :     total_length = (3 * sizeof(uint32)) /* magic + type + nitems */
     814              :         + sizeof(AttrNumber)    /* ndimensions */
     815          169 :         + (ndims * sizeof(Oid));    /* attribute types */
     816              : 
     817              :     /* dimension info */
     818          169 :     total_length += ndims * sizeof(DimensionInfo);
     819              : 
     820              :     /* add space for the arrays of deduplicated values */
     821          612 :     for (i = 0; i < ndims; i++)
     822          443 :         total_length += info[i].nbytes;
     823              : 
     824              :     /*
     825              :      * And finally account for the items (those are fixed-length, thanks to
     826              :      * replacing values with uint16 indexes into the deduplicated arrays).
     827              :      */
     828          169 :     total_length += mcvlist->nitems * ITEM_SIZE(dim);
     829              : 
     830              :     /*
     831              :      * Allocate space for the whole serialized MCV list (we'll skip bytes, so
     832              :      * we set them to zero to make the result more compressible).
     833              :      */
     834          169 :     raw = (bytea *) palloc0(VARHDRSZ + total_length);
     835          169 :     SET_VARSIZE(raw, VARHDRSZ + total_length);
     836              : 
     837          169 :     ptr = VARDATA(raw);
     838          169 :     endptr = ptr + total_length;
     839              : 
     840              :     /* copy the MCV list header fields, one by one */
     841          169 :     memcpy(ptr, &mcvlist->magic, sizeof(uint32));
     842          169 :     ptr += sizeof(uint32);
     843              : 
     844          169 :     memcpy(ptr, &mcvlist->type, sizeof(uint32));
     845          169 :     ptr += sizeof(uint32);
     846              : 
     847          169 :     memcpy(ptr, &mcvlist->nitems, sizeof(uint32));
     848          169 :     ptr += sizeof(uint32);
     849              : 
     850          169 :     memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber));
     851          169 :     ptr += sizeof(AttrNumber);
     852              : 
     853          169 :     memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);
     854          169 :     ptr += (sizeof(Oid) * ndims);
     855              : 
     856              :     /* store information about the attributes (data amounts, ...) */
     857          169 :     memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
     858          169 :     ptr += sizeof(DimensionInfo) * ndims;
     859              : 
     860              :     /* Copy the deduplicated values for all attributes to the output. */
     861          612 :     for (dim = 0; dim < ndims; dim++)
     862              :     {
     863              :         /* remember the starting point for Asserts later */
     864          443 :         char       *start PG_USED_FOR_ASSERTS_ONLY = ptr;
     865              : 
     866         9325 :         for (i = 0; i < info[dim].nvalues; i++)
     867              :         {
     868         8882 :             Datum       value = values[dim][i];
     869              : 
     870         8882 :             if (info[dim].typbyval) /* passed by value */
     871              :             {
     872              :                 Datum       tmp;
     873              : 
     874              :                 /*
     875              :                  * For byval types, we need to copy just the significant bytes
     876              :                  * - we can't use memcpy directly, as that assumes
     877              :                  * little-endian behavior.  store_att_byval does almost what
     878              :                  * we need, but it requires a properly aligned buffer - the
     879              :                  * output buffer does not guarantee that. So we simply use a
     880              :                  * local Datum variable (which guarantees proper alignment),
     881              :                  * and then copy the value from it.
     882              :                  */
     883         6255 :                 store_att_byval(&tmp, value, info[dim].typlen);
     884              : 
     885         6255 :                 memcpy(ptr, &tmp, info[dim].typlen);
     886         6255 :                 ptr += info[dim].typlen;
     887              :             }
     888         2627 :             else if (info[dim].typlen > 0)   /* passed by reference */
     889              :             {
     890              :                 /* no special alignment needed, treated as char array */
     891          555 :                 memcpy(ptr, DatumGetPointer(value), info[dim].typlen);
     892          555 :                 ptr += info[dim].typlen;
     893              :             }
     894         2072 :             else if (info[dim].typlen == -1)    /* varlena */
     895              :             {
     896         2072 :                 uint32      len = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
     897              : 
     898              :                 /* copy the length */
     899         2072 :                 memcpy(ptr, &len, sizeof(uint32));
     900         2072 :                 ptr += sizeof(uint32);
     901              : 
     902              :                 /* data from the varlena value (without the header) */
     903         2072 :                 memcpy(ptr, VARDATA_ANY(DatumGetPointer(value)), len);
     904         2072 :                 ptr += len;
     905              :             }
     906            0 :             else if (info[dim].typlen == -2)    /* cstring */
     907              :             {
     908            0 :                 uint32      len = (uint32) strlen(DatumGetCString(value)) + 1;
     909              : 
     910              :                 /* copy the length */
     911            0 :                 memcpy(ptr, &len, sizeof(uint32));
     912            0 :                 ptr += sizeof(uint32);
     913              : 
     914              :                 /* value */
     915            0 :                 memcpy(ptr, DatumGetCString(value), len);
     916            0 :                 ptr += len;
     917              :             }
     918              : 
     919              :             /* no underflows or overflows */
     920              :             Assert((ptr > start) && ((ptr - start) <= info[dim].nbytes));
     921              :         }
     922              : 
     923              :         /* we should get exactly nbytes of data for this dimension */
     924              :         Assert((ptr - start) == info[dim].nbytes);
     925              :     }
     926              : 
     927              :     /* Serialize the items, with uint16 indexes instead of the values. */
     928         5826 :     for (i = 0; i < mcvlist->nitems; i++)
     929              :     {
     930         5657 :         MCVItem    *mcvitem = &mcvlist->items[i];
     931              : 
     932              :         /* don't write beyond the allocated space */
     933              :         Assert(ptr <= (endptr - ITEM_SIZE(dim)));
     934              : 
     935              :         /* copy NULL and frequency flags into the serialized MCV */
     936         5657 :         memcpy(ptr, mcvitem->isnull, sizeof(bool) * ndims);
     937         5657 :         ptr += sizeof(bool) * ndims;
     938              : 
     939         5657 :         memcpy(ptr, &mcvitem->frequency, sizeof(double));
     940         5657 :         ptr += sizeof(double);
     941              : 
     942         5657 :         memcpy(ptr, &mcvitem->base_frequency, sizeof(double));
     943         5657 :         ptr += sizeof(double);
     944              : 
     945              :         /* store the indexes last */
     946        20664 :         for (dim = 0; dim < ndims; dim++)
     947              :         {
     948        15007 :             uint16      index = 0;
     949              :             Datum      *value;
     950              : 
     951              :             /* do the lookup only for non-NULL values */
     952        15007 :             if (!mcvitem->isnull[dim])
     953              :             {
     954        14937 :                 value = (Datum *) bsearch_arg(&mcvitem->values[dim], values[dim],
     955        14937 :                                               info[dim].nvalues, sizeof(Datum),
     956        14937 :                                               compare_scalars_simple, &ssup[dim]);
     957              : 
     958              :                 Assert(value != NULL);  /* serialization or deduplication
     959              :                                          * error */
     960              : 
     961              :                 /* compute index within the deduplicated array */
     962        14937 :                 index = (uint16) (value - values[dim]);
     963              : 
     964              :                 /* check the index is within expected bounds */
     965              :                 Assert(index < info[dim].nvalues);
     966              :             }
     967              : 
     968              :             /* copy the index into the serialized MCV */
     969        15007 :             memcpy(ptr, &index, sizeof(uint16));
     970        15007 :             ptr += sizeof(uint16);
     971              :         }
     972              : 
     973              :         /* make sure we don't overflow the allocated value */
     974              :         Assert(ptr <= endptr);
     975              :     }
     976              : 
     977              :     /* at this point we expect to match the total_length exactly */
     978              :     Assert(ptr == endptr);
     979              : 
     980          169 :     pfree(values);
     981          169 :     pfree(counts);
     982              : 
     983          169 :     return raw;
     984              : }
     985              : 
     986              : /*
     987              :  * statext_mcv_deserialize
     988              :  *      Reads serialized MCV list into MCVList structure.
     989              :  *
     990              :  * All the memory needed by the MCV list is allocated as a single chunk, so
     991              :  * it's possible to simply pfree() it at once.
     992              :  */
     993              : MCVList *
     994          380 : statext_mcv_deserialize(bytea *data)
     995              : {
     996              :     int         dim,
     997              :                 i;
     998              :     Size        expected_size;
     999              :     MCVList    *mcvlist;
    1000              :     char       *raw;
    1001              :     char       *ptr;
    1002              :     char       *endptr PG_USED_FOR_ASSERTS_ONLY;
    1003              : 
    1004              :     int         ndims,
    1005              :                 nitems;
    1006          380 :     DimensionInfo *info = NULL;
    1007              : 
    1008              :     /* local allocation buffer (used only for deserialization) */
    1009          380 :     Datum     **map = NULL;
    1010              : 
    1011              :     /* MCV list */
    1012              :     Size        mcvlen;
    1013              : 
    1014              :     /* buffer used for the result */
    1015              :     Size        datalen;
    1016              :     char       *dataptr;
    1017              :     char       *valuesptr;
    1018              :     char       *isnullptr;
    1019              : 
    1020          380 :     if (data == NULL)
    1021            0 :         return NULL;
    1022              : 
    1023              :     /*
    1024              :      * We can't possibly deserialize a MCV list if there's not even a complete
    1025              :      * header. We need an explicit formula here, because we serialize the
    1026              :      * header fields one by one, so we need to ignore struct alignment.
    1027              :      */
    1028          380 :     if (VARSIZE_ANY(data) < MinSizeOfMCVList)
    1029            0 :         elog(ERROR, "invalid MCV size %zu (expected at least %zu)",
    1030              :              VARSIZE_ANY(data), MinSizeOfMCVList);
    1031              : 
    1032              :     /* read the MCV list header */
    1033          380 :     mcvlist = (MCVList *) palloc0(offsetof(MCVList, items));
    1034              : 
    1035              :     /* pointer to the data part (skip the varlena header) */
    1036          380 :     raw = (char *) data;
    1037          380 :     ptr = VARDATA_ANY(raw);
    1038          380 :     endptr = raw + VARSIZE_ANY(data);
    1039              : 
    1040              :     /* get the header and perform further sanity checks */
    1041          380 :     memcpy(&mcvlist->magic, ptr, sizeof(uint32));
    1042          380 :     ptr += sizeof(uint32);
    1043              : 
    1044          380 :     memcpy(&mcvlist->type, ptr, sizeof(uint32));
    1045          380 :     ptr += sizeof(uint32);
    1046              : 
    1047          380 :     memcpy(&mcvlist->nitems, ptr, sizeof(uint32));
    1048          380 :     ptr += sizeof(uint32);
    1049              : 
    1050          380 :     memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber));
    1051          380 :     ptr += sizeof(AttrNumber);
    1052              : 
    1053          380 :     if (mcvlist->magic != STATS_MCV_MAGIC)
    1054            0 :         elog(ERROR, "invalid MCV magic %u (expected %u)",
    1055              :              mcvlist->magic, STATS_MCV_MAGIC);
    1056              : 
    1057          380 :     if (mcvlist->type != STATS_MCV_TYPE_BASIC)
    1058            0 :         elog(ERROR, "invalid MCV type %u (expected %u)",
    1059              :              mcvlist->type, STATS_MCV_TYPE_BASIC);
    1060              : 
    1061          380 :     if (mcvlist->ndimensions == 0)
    1062            0 :         elog(ERROR, "invalid zero-length dimension array in MCVList");
    1063          380 :     else if ((mcvlist->ndimensions > STATS_MAX_DIMENSIONS) ||
    1064          380 :              (mcvlist->ndimensions < 0))
    1065            0 :         elog(ERROR, "invalid length (%d) dimension array in MCVList",
    1066              :              mcvlist->ndimensions);
    1067              : 
    1068          380 :     if (mcvlist->nitems == 0)
    1069            0 :         elog(ERROR, "invalid zero-length item array in MCVList");
    1070          380 :     else if (mcvlist->nitems > STATS_MCVLIST_MAX_ITEMS)
    1071            0 :         elog(ERROR, "invalid length (%u) item array in MCVList",
    1072              :              mcvlist->nitems);
    1073              : 
    1074          380 :     nitems = mcvlist->nitems;
    1075          380 :     ndims = mcvlist->ndimensions;
    1076              : 
    1077              :     /*
    1078              :      * Check amount of data including DimensionInfo for all dimensions and
    1079              :      * also the serialized items (including uint16 indexes). Also, walk
    1080              :      * through the dimension information and add it to the sum.
    1081              :      */
    1082          380 :     expected_size = SizeOfMCVList(ndims, nitems);
    1083              : 
    1084              :     /*
    1085              :      * Check that we have at least the dimension and info records, along with
    1086              :      * the items. We don't know the size of the serialized values yet. We need
    1087              :      * to do this check first, before accessing the dimension info.
    1088              :      */
    1089          380 :     if (VARSIZE_ANY(data) < expected_size)
    1090            0 :         elog(ERROR, "invalid MCV size %zu (expected %zu)",
    1091              :              VARSIZE_ANY(data), expected_size);
    1092              : 
    1093              :     /* Now copy the array of type Oids. */
    1094          380 :     memcpy(mcvlist->types, ptr, sizeof(Oid) * ndims);
    1095          380 :     ptr += (sizeof(Oid) * ndims);
    1096              : 
    1097              :     /* Now it's safe to access the dimension info. */
    1098          380 :     info = palloc(ndims * sizeof(DimensionInfo));
    1099              : 
    1100          380 :     memcpy(info, ptr, ndims * sizeof(DimensionInfo));
    1101          380 :     ptr += (ndims * sizeof(DimensionInfo));
    1102              : 
    1103              :     /* account for the value arrays */
    1104         1494 :     for (dim = 0; dim < ndims; dim++)
    1105              :     {
    1106              :         /*
    1107              :          * XXX I wonder if we can/should rely on asserts here. Maybe those
    1108              :          * checks should be done every time?
    1109              :          */
    1110              :         Assert(info[dim].nvalues >= 0);
    1111              :         Assert(info[dim].nbytes >= 0);
    1112              : 
    1113         1114 :         expected_size += info[dim].nbytes;
    1114              :     }
    1115              : 
    1116              :     /*
    1117              :      * Now we know the total expected MCV size, including all the pieces
    1118              :      * (header, dimension info. items and deduplicated data). So do the final
    1119              :      * check on size.
    1120              :      */
    1121          380 :     if (VARSIZE_ANY(data) != expected_size)
    1122            0 :         elog(ERROR, "invalid MCV size %zu (expected %zu)",
    1123              :              VARSIZE_ANY(data), expected_size);
    1124              : 
    1125              :     /*
    1126              :      * We need an array of Datum values for each dimension, so that we can
    1127              :      * easily translate the uint16 indexes later. We also need a top-level
    1128              :      * array of pointers to those per-dimension arrays.
    1129              :      *
    1130              :      * While allocating the arrays for dimensions, compute how much space we
    1131              :      * need for a copy of the by-ref data, as we can't simply point to the
    1132              :      * original values (it might go away).
    1133              :      */
    1134          380 :     datalen = 0;                /* space for by-ref data */
    1135          380 :     map = palloc_array(Datum *, ndims);
    1136              : 
    1137         1494 :     for (dim = 0; dim < ndims; dim++)
    1138              :     {
    1139         1114 :         map[dim] = palloc_array(Datum, info[dim].nvalues);
    1140              : 
    1141              :         /* space needed for a copy of data for by-ref types */
    1142         1114 :         datalen += info[dim].nbytes_aligned;
    1143              :     }
    1144              : 
    1145              :     /*
    1146              :      * Now resize the MCV list so that the allocation includes all the data.
    1147              :      *
    1148              :      * Allocate space for a copy of the data, as we can't simply reference the
    1149              :      * serialized data - it's not aligned properly, and it may disappear while
    1150              :      * we're still using the MCV list, e.g. due to catcache release.
    1151              :      *
    1152              :      * We do care about alignment here, because we will allocate all the
    1153              :      * pieces at once, but then use pointers to different parts.
    1154              :      */
    1155          380 :     mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
    1156              : 
    1157              :     /* arrays of values and isnull flags for all MCV items */
    1158          380 :     mcvlen += nitems * MAXALIGN(sizeof(Datum) * ndims);
    1159          380 :     mcvlen += nitems * MAXALIGN(sizeof(bool) * ndims);
    1160              : 
    1161              :     /* we don't quite need to align this, but it makes some asserts easier */
    1162          380 :     mcvlen += MAXALIGN(datalen);
    1163              : 
    1164              :     /* now resize the deserialized MCV list, and compute pointers to parts */
    1165          380 :     mcvlist = repalloc(mcvlist, mcvlen);
    1166              : 
    1167              :     /* pointer to the beginning of values/isnull arrays */
    1168          380 :     valuesptr = (char *) mcvlist
    1169          380 :         + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
    1170              : 
    1171          380 :     isnullptr = valuesptr + (nitems * MAXALIGN(sizeof(Datum) * ndims));
    1172              : 
    1173          380 :     dataptr = isnullptr + (nitems * MAXALIGN(sizeof(bool) * ndims));
    1174              : 
    1175              :     /*
    1176              :      * Build mapping (index => value) for translating the serialized data into
    1177              :      * the in-memory representation.
    1178              :      */
    1179         1494 :     for (dim = 0; dim < ndims; dim++)
    1180              :     {
    1181              :         /* remember start position in the input array */
    1182         1114 :         char       *start PG_USED_FOR_ASSERTS_ONLY = ptr;
    1183              : 
    1184         1114 :         if (info[dim].typbyval)
    1185              :         {
    1186              :             /* for by-val types we simply copy data into the mapping */
    1187        31874 :             for (i = 0; i < info[dim].nvalues; i++)
    1188              :             {
    1189        31122 :                 Datum       v = 0;
    1190              : 
    1191        31122 :                 memcpy(&v, ptr, info[dim].typlen);
    1192        31122 :                 ptr += info[dim].typlen;
    1193              : 
    1194        31122 :                 map[dim][i] = fetch_att(&v, true, info[dim].typlen);
    1195              : 
    1196              :                 /* no under/overflow of input array */
    1197              :                 Assert(ptr <= (start + info[dim].nbytes));
    1198              :             }
    1199              :         }
    1200              :         else
    1201              :         {
    1202              :             /* for by-ref types we need to also make a copy of the data */
    1203              : 
    1204              :             /* passed by reference, but fixed length (name, tid, ...) */
    1205          362 :             if (info[dim].typlen > 0)
    1206              :             {
    1207         1101 :                 for (i = 0; i < info[dim].nvalues; i++)
    1208              :                 {
    1209         1080 :                     memcpy(dataptr, ptr, info[dim].typlen);
    1210         1080 :                     ptr += info[dim].typlen;
    1211              : 
    1212              :                     /* just point into the array */
    1213         1080 :                     map[dim][i] = PointerGetDatum(dataptr);
    1214         1080 :                     dataptr += MAXALIGN(info[dim].typlen);
    1215              :                 }
    1216              :             }
    1217          341 :             else if (info[dim].typlen == -1)
    1218              :             {
    1219              :                 /* varlena */
    1220         7948 :                 for (i = 0; i < info[dim].nvalues; i++)
    1221              :                 {
    1222              :                     uint32      len;
    1223              : 
    1224              :                     /* read the uint32 length */
    1225         7607 :                     memcpy(&len, ptr, sizeof(uint32));
    1226         7607 :                     ptr += sizeof(uint32);
    1227              : 
    1228              :                     /* the length is data-only */
    1229         7607 :                     SET_VARSIZE(dataptr, len + VARHDRSZ);
    1230         7607 :                     memcpy(VARDATA(dataptr), ptr, len);
    1231         7607 :                     ptr += len;
    1232              : 
    1233              :                     /* just point into the array */
    1234         7607 :                     map[dim][i] = PointerGetDatum(dataptr);
    1235              : 
    1236              :                     /* skip to place of the next deserialized value */
    1237         7607 :                     dataptr += MAXALIGN(len + VARHDRSZ);
    1238              :                 }
    1239              :             }
    1240            0 :             else if (info[dim].typlen == -2)
    1241              :             {
    1242              :                 /* cstring */
    1243            0 :                 for (i = 0; i < info[dim].nvalues; i++)
    1244              :                 {
    1245              :                     uint32      len;
    1246              : 
    1247            0 :                     memcpy(&len, ptr, sizeof(uint32));
    1248            0 :                     ptr += sizeof(uint32);
    1249              : 
    1250            0 :                     memcpy(dataptr, ptr, len);
    1251            0 :                     ptr += len;
    1252              : 
    1253              :                     /* just point into the array */
    1254            0 :                     map[dim][i] = PointerGetDatum(dataptr);
    1255            0 :                     dataptr += MAXALIGN(len);
    1256              :                 }
    1257              :             }
    1258              : 
    1259              :             /* no under/overflow of input array */
    1260              :             Assert(ptr <= (start + info[dim].nbytes));
    1261              : 
    1262              :             /* no overflow of the output mcv value */
    1263              :             Assert(dataptr <= ((char *) mcvlist + mcvlen));
    1264              :         }
    1265              : 
    1266              :         /* check we consumed input data for this dimension exactly */
    1267              :         Assert(ptr == (start + info[dim].nbytes));
    1268              :     }
    1269              : 
    1270              :     /* we should have also filled the MCV list exactly */
    1271              :     Assert(dataptr == ((char *) mcvlist + mcvlen));
    1272              : 
    1273              :     /* deserialize the MCV items and translate the indexes to Datums */
    1274        25033 :     for (i = 0; i < nitems; i++)
    1275              :     {
    1276        24653 :         MCVItem    *item = &mcvlist->items[i];
    1277              : 
    1278        24653 :         item->values = (Datum *) valuesptr;
    1279        24653 :         valuesptr += MAXALIGN(sizeof(Datum) * ndims);
    1280              : 
    1281        24653 :         item->isnull = (bool *) isnullptr;
    1282        24653 :         isnullptr += MAXALIGN(sizeof(bool) * ndims);
    1283              : 
    1284        24653 :         memcpy(item->isnull, ptr, sizeof(bool) * ndims);
    1285        24653 :         ptr += sizeof(bool) * ndims;
    1286              : 
    1287        24653 :         memcpy(&item->frequency, ptr, sizeof(double));
    1288        24653 :         ptr += sizeof(double);
    1289              : 
    1290        24653 :         memcpy(&item->base_frequency, ptr, sizeof(double));
    1291        24653 :         ptr += sizeof(double);
    1292              : 
    1293              :         /* finally translate the indexes (for non-NULL only) */
    1294        96099 :         for (dim = 0; dim < ndims; dim++)
    1295              :         {
    1296              :             uint16      index;
    1297              : 
    1298        71446 :             memcpy(&index, ptr, sizeof(uint16));
    1299        71446 :             ptr += sizeof(uint16);
    1300              : 
    1301        71446 :             if (item->isnull[dim])
    1302          184 :                 continue;
    1303              : 
    1304        71262 :             item->values[dim] = map[dim][index];
    1305              :         }
    1306              : 
    1307              :         /* check we're not overflowing the input */
    1308              :         Assert(ptr <= endptr);
    1309              :     }
    1310              : 
    1311              :     /* check that we processed all the data */
    1312              :     Assert(ptr == endptr);
    1313              : 
    1314              :     /* release the buffers used for mapping */
    1315         1494 :     for (dim = 0; dim < ndims; dim++)
    1316         1114 :         pfree(map[dim]);
    1317              : 
    1318          380 :     pfree(map);
    1319              : 
    1320          380 :     return mcvlist;
    1321              : }
    1322              : 
    1323              : /*
    1324              :  * SRF with details about buckets of a histogram:
    1325              :  *
    1326              :  * - item ID (0...nitems)
    1327              :  * - values (string array)
    1328              :  * - nulls only (boolean array)
    1329              :  * - frequency (double precision)
    1330              :  * - base_frequency (double precision)
    1331              :  *
    1332              :  * The input is the OID of the statistics, and there are no rows returned if
    1333              :  * the statistics contains no histogram.
    1334              :  */
    1335              : Datum
    1336         3466 : pg_stats_ext_mcvlist_items(PG_FUNCTION_ARGS)
    1337              : {
    1338              :     FuncCallContext *funcctx;
    1339              : 
    1340              :     /* stuff done only on the first call of the function */
    1341         3466 :     if (SRF_IS_FIRSTCALL())
    1342              :     {
    1343              :         MemoryContext oldcontext;
    1344              :         MCVList    *mcvlist;
    1345              :         TupleDesc   tupdesc;
    1346              : 
    1347              :         /* create a function context for cross-call persistence */
    1348           77 :         funcctx = SRF_FIRSTCALL_INIT();
    1349              : 
    1350              :         /* switch to memory context appropriate for multiple function calls */
    1351           77 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
    1352              : 
    1353           77 :         mcvlist = statext_mcv_deserialize(PG_GETARG_BYTEA_P(0));
    1354              : 
    1355           77 :         funcctx->user_fctx = mcvlist;
    1356              : 
    1357              :         /* total number of tuples to be returned */
    1358           77 :         funcctx->max_calls = 0;
    1359           77 :         if (funcctx->user_fctx != NULL)
    1360           77 :             funcctx->max_calls = mcvlist->nitems;
    1361              : 
    1362              :         /* Build a tuple descriptor for our result type */
    1363           77 :         if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
    1364            0 :             ereport(ERROR,
    1365              :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1366              :                      errmsg("function returning record called in context "
    1367              :                             "that cannot accept type record")));
    1368           77 :         tupdesc = BlessTupleDesc(tupdesc);
    1369              : 
    1370              :         /*
    1371              :          * generate attribute metadata needed later to produce tuples from raw
    1372              :          * C strings
    1373              :          */
    1374           77 :         funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
    1375              : 
    1376           77 :         MemoryContextSwitchTo(oldcontext);
    1377              :     }
    1378              : 
    1379              :     /* stuff done on every call of the function */
    1380         3466 :     funcctx = SRF_PERCALL_SETUP();
    1381              : 
    1382         3466 :     if (funcctx->call_cntr < funcctx->max_calls)   /* do when there is more
    1383              :                                                      * left to send */
    1384              :     {
    1385              :         Datum       values[5];
    1386              :         bool        nulls[5];
    1387              :         HeapTuple   tuple;
    1388              :         Datum       result;
    1389         3389 :         ArrayBuildState *astate_values = NULL;
    1390         3389 :         ArrayBuildState *astate_nulls = NULL;
    1391              : 
    1392              :         int         i;
    1393              :         MCVList    *mcvlist;
    1394              :         MCVItem    *item;
    1395              : 
    1396         3389 :         mcvlist = (MCVList *) funcctx->user_fctx;
    1397              : 
    1398              :         Assert(funcctx->call_cntr < mcvlist->nitems);
    1399              : 
    1400         3389 :         item = &mcvlist->items[funcctx->call_cntr];
    1401              : 
    1402        10341 :         for (i = 0; i < mcvlist->ndimensions; i++)
    1403              :         {
    1404              : 
    1405         6952 :             astate_nulls = accumArrayResult(astate_nulls,
    1406         6952 :                                             BoolGetDatum(item->isnull[i]),
    1407              :                                             false,
    1408              :                                             BOOLOID,
    1409              :                                             CurrentMemoryContext);
    1410              : 
    1411         6952 :             if (!item->isnull[i])
    1412              :             {
    1413              :                 bool        isvarlena;
    1414              :                 Oid         outfunc;
    1415              :                 FmgrInfo    fmgrinfo;
    1416              :                 Datum       val;
    1417              :                 text       *txt;
    1418              : 
    1419              :                 /* lookup output func for the type */
    1420         6909 :                 getTypeOutputInfo(mcvlist->types[i], &outfunc, &isvarlena);
    1421         6909 :                 fmgr_info(outfunc, &fmgrinfo);
    1422              : 
    1423         6909 :                 val = FunctionCall1(&fmgrinfo, item->values[i]);
    1424         6909 :                 txt = cstring_to_text(DatumGetPointer(val));
    1425              : 
    1426         6909 :                 astate_values = accumArrayResult(astate_values,
    1427              :                                                  PointerGetDatum(txt),
    1428              :                                                  false,
    1429              :                                                  TEXTOID,
    1430              :                                                  CurrentMemoryContext);
    1431              :             }
    1432              :             else
    1433           43 :                 astate_values = accumArrayResult(astate_values,
    1434              :                                                  (Datum) 0,
    1435              :                                                  true,
    1436              :                                                  TEXTOID,
    1437              :                                                  CurrentMemoryContext);
    1438              :         }
    1439              : 
    1440         3389 :         values[0] = Int32GetDatum(funcctx->call_cntr);
    1441         3389 :         values[1] = makeArrayResult(astate_values, CurrentMemoryContext);
    1442         3389 :         values[2] = makeArrayResult(astate_nulls, CurrentMemoryContext);
    1443         3389 :         values[3] = Float8GetDatum(item->frequency);
    1444         3389 :         values[4] = Float8GetDatum(item->base_frequency);
    1445              : 
    1446              :         /* no NULLs in the tuple */
    1447         3389 :         memset(nulls, 0, sizeof(nulls));
    1448              : 
    1449              :         /* build a tuple */
    1450         3389 :         tuple = heap_form_tuple(funcctx->attinmeta->tupdesc, values, nulls);
    1451              : 
    1452              :         /* make the tuple into a datum */
    1453         3389 :         result = HeapTupleGetDatum(tuple);
    1454              : 
    1455         3389 :         SRF_RETURN_NEXT(funcctx, result);
    1456              :     }
    1457              :     else                        /* do when there is no more left */
    1458              :     {
    1459           77 :         SRF_RETURN_DONE(funcctx);
    1460              :     }
    1461              : }
    1462              : 
    1463              : /*
    1464              :  * pg_mcv_list_in       - input routine for type pg_mcv_list.
    1465              :  *
    1466              :  * pg_mcv_list is real enough to be a table column, but it has no operations
    1467              :  * of its own, and disallows input too
    1468              :  */
    1469              : Datum
    1470            0 : pg_mcv_list_in(PG_FUNCTION_ARGS)
    1471              : {
    1472              :     /*
    1473              :      * pg_mcv_list stores the data in binary form and parsing text input is
    1474              :      * not needed, so disallow this.
    1475              :      */
    1476            0 :     ereport(ERROR,
    1477              :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1478              :              errmsg("cannot accept a value of type %s", "pg_mcv_list")));
    1479              : 
    1480              :     PG_RETURN_VOID();           /* keep compiler quiet */
    1481              : }
    1482              : 
    1483              : 
    1484              : /*
    1485              :  * pg_mcv_list_out      - output routine for type pg_mcv_list.
    1486              :  *
    1487              :  * MCV lists are serialized into a bytea value, so we simply call byteaout()
    1488              :  * to serialize the value into text. But it'd be nice to serialize that into
    1489              :  * a meaningful representation (e.g. for inspection by people).
    1490              :  *
    1491              :  * XXX This should probably return something meaningful, similar to what
    1492              :  * pg_dependencies_out does. Not sure how to deal with the deduplicated
    1493              :  * values, though - do we want to expand that or not?
    1494              :  */
    1495              : Datum
    1496           11 : pg_mcv_list_out(PG_FUNCTION_ARGS)
    1497              : {
    1498           11 :     return byteaout(fcinfo);
    1499              : }
    1500              : 
    1501              : /*
    1502              :  * pg_mcv_list_recv     - binary input routine for type pg_mcv_list.
    1503              :  */
    1504              : Datum
    1505            0 : pg_mcv_list_recv(PG_FUNCTION_ARGS)
    1506              : {
    1507            0 :     ereport(ERROR,
    1508              :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1509              :              errmsg("cannot accept a value of type %s", "pg_mcv_list")));
    1510              : 
    1511              :     PG_RETURN_VOID();           /* keep compiler quiet */
    1512              : }
    1513              : 
    1514              : /*
    1515              :  * pg_mcv_list_send     - binary output routine for type pg_mcv_list.
    1516              :  *
    1517              :  * MCV lists are serialized in a bytea value (although the type is named
    1518              :  * differently), so let's just send that.
    1519              :  */
    1520              : Datum
    1521            0 : pg_mcv_list_send(PG_FUNCTION_ARGS)
    1522              : {
    1523            0 :     return byteasend(fcinfo);
    1524              : }
    1525              : 
    1526              : /*
    1527              :  * match the attribute/expression to a dimension of the statistic
    1528              :  *
    1529              :  * Returns the zero-based index of the matching statistics dimension.
    1530              :  * Optionally determines the collation.
    1531              :  */
    1532              : static int
    1533          705 : mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid)
    1534              : {
    1535              :     int         idx;
    1536              : 
    1537          705 :     if (IsA(expr, Var))
    1538              :     {
    1539              :         /* simple Var, so just lookup using varattno */
    1540          582 :         Var        *var = (Var *) expr;
    1541              : 
    1542          582 :         if (collid)
    1543          549 :             *collid = var->varcollid;
    1544              : 
    1545          582 :         idx = bms_member_index(keys, var->varattno);
    1546              : 
    1547          582 :         if (idx < 0)
    1548            0 :             elog(ERROR, "variable not found in statistics object");
    1549              :     }
    1550              :     else
    1551              :     {
    1552              :         /* expression - lookup in stats expressions */
    1553              :         ListCell   *lc;
    1554              : 
    1555          123 :         if (collid)
    1556          120 :             *collid = exprCollation(expr);
    1557              : 
    1558              :         /* expressions are stored after the simple columns */
    1559          123 :         idx = bms_num_members(keys);
    1560          228 :         foreach(lc, exprs)
    1561              :         {
    1562          228 :             Node       *stat_expr = (Node *) lfirst(lc);
    1563              : 
    1564          228 :             if (equal(expr, stat_expr))
    1565          123 :                 break;
    1566              : 
    1567          105 :             idx++;
    1568              :         }
    1569              : 
    1570          123 :         if (lc == NULL)
    1571            0 :             elog(ERROR, "expression not found in statistics object");
    1572              :     }
    1573              : 
    1574          705 :     return idx;
    1575              : }
    1576              : 
    1577              : /*
    1578              :  * mcv_get_match_bitmap
    1579              :  *  Evaluate clauses using the MCV list, and update the match bitmap.
    1580              :  *
    1581              :  * A match bitmap keeps match/mismatch status for each MCV item, and we
    1582              :  * update it based on additional clauses. We also use it to skip items
    1583              :  * that can't possibly match (e.g. item marked as "mismatch" can't change
    1584              :  * to "match" when evaluating AND clause list).
    1585              :  *
    1586              :  * The function also returns a flag indicating whether there was an
    1587              :  * equality condition for all attributes, the minimum frequency in the MCV
    1588              :  * list, and a total MCV frequency (sum of frequencies for all items).
    1589              :  *
    1590              :  * XXX Currently the match bitmap uses a bool for each MCV item, which is
    1591              :  * somewhat wasteful as we could do with just a single bit, thus reducing
    1592              :  * the size to ~1/8. It would also allow us to combine bitmaps simply using
    1593              :  * & and |, which should be faster than min/max. The bitmaps are fairly
    1594              :  * small, though (thanks to the cap on the MCV list size).
    1595              :  */
    1596              : static bool *
    1597          423 : mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
    1598              :                      Bitmapset *keys, List *exprs,
    1599              :                      MCVList *mcvlist, bool is_or)
    1600              : {
    1601              :     ListCell   *l;
    1602              :     bool       *matches;
    1603              : 
    1604              :     /* The bitmap may be partially built. */
    1605              :     Assert(clauses != NIL);
    1606              :     Assert(mcvlist != NULL);
    1607              :     Assert(mcvlist->nitems > 0);
    1608              :     Assert(mcvlist->nitems <= STATS_MCVLIST_MAX_ITEMS);
    1609              : 
    1610          423 :     matches = palloc_array(bool, mcvlist->nitems);
    1611          423 :     memset(matches, !is_or, sizeof(bool) * mcvlist->nitems);
    1612              : 
    1613              :     /*
    1614              :      * Loop through the list of clauses, and for each of them evaluate all the
    1615              :      * MCV items not yet eliminated by the preceding clauses.
    1616              :      */
    1617         1215 :     foreach(l, clauses)
    1618              :     {
    1619          792 :         Node       *clause = (Node *) lfirst(l);
    1620              : 
    1621              :         /* if it's a RestrictInfo, then extract the clause */
    1622          792 :         if (IsA(clause, RestrictInfo))
    1623          735 :             clause = (Node *) ((RestrictInfo *) clause)->clause;
    1624              : 
    1625              :         /*
    1626              :          * Handle the various types of clauses - OpClause, NullTest and
    1627              :          * AND/OR/NOT
    1628              :          */
    1629          792 :         if (is_opclause(clause))
    1630              :         {
    1631          531 :             OpExpr     *expr = (OpExpr *) clause;
    1632              :             FmgrInfo    opproc;
    1633              : 
    1634              :             /* valid only after examine_opclause_args returns true */
    1635              :             Node       *clause_expr;
    1636              :             Const      *cst;
    1637              :             bool        expronleft;
    1638              :             int         idx;
    1639              :             Oid         collid;
    1640              : 
    1641          531 :             fmgr_info(get_opcode(expr->opno), &opproc);
    1642              : 
    1643              :             /* extract the var/expr and const from the expression */
    1644          531 :             if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
    1645            0 :                 elog(ERROR, "incompatible clause");
    1646              : 
    1647              :             /* match the attribute/expression to a dimension of the statistic */
    1648          531 :             idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
    1649              : 
    1650              :             /*
    1651              :              * Walk through the MCV items and evaluate the current clause. We
    1652              :              * can skip items that were already ruled out, and terminate if
    1653              :              * there are no remaining MCV items that might possibly match.
    1654              :              */
    1655        38709 :             for (int i = 0; i < mcvlist->nitems; i++)
    1656              :             {
    1657        38178 :                 bool        match = true;
    1658        38178 :                 MCVItem    *item = &mcvlist->items[i];
    1659              : 
    1660              :                 Assert(idx >= 0);
    1661              : 
    1662              :                 /*
    1663              :                  * When the MCV item or the Const value is NULL we can treat
    1664              :                  * this as a mismatch. We must not call the operator because
    1665              :                  * of strictness.
    1666              :                  */
    1667        38178 :                 if (item->isnull[idx] || cst->constisnull)
    1668              :                 {
    1669           24 :                     matches[i] = RESULT_MERGE(matches[i], is_or, false);
    1670           24 :                     continue;
    1671              :                 }
    1672              : 
    1673              :                 /*
    1674              :                  * Skip MCV items that can't change result in the bitmap. Once
    1675              :                  * the value gets false for AND-lists, or true for OR-lists,
    1676              :                  * we don't need to look at more clauses.
    1677              :                  */
    1678        38154 :                 if (RESULT_IS_FINAL(matches[i], is_or))
    1679        15261 :                     continue;
    1680              : 
    1681              :                 /*
    1682              :                  * First check whether the constant is below the lower
    1683              :                  * boundary (in that case we can skip the bucket, because
    1684              :                  * there's no overlap).
    1685              :                  *
    1686              :                  * We don't store collations used to build the statistics, but
    1687              :                  * we can use the collation for the attribute itself, as
    1688              :                  * stored in varcollid. We do reset the statistics after a
    1689              :                  * type change (including collation change), so this is OK.
    1690              :                  * For expressions, we use the collation extracted from the
    1691              :                  * expression itself.
    1692              :                  */
    1693        22893 :                 if (expronleft)
    1694        21585 :                     match = DatumGetBool(FunctionCall2Coll(&opproc,
    1695              :                                                            collid,
    1696        21585 :                                                            item->values[idx],
    1697        21585 :                                                            cst->constvalue));
    1698              :                 else
    1699         1308 :                     match = DatumGetBool(FunctionCall2Coll(&opproc,
    1700              :                                                            collid,
    1701         1308 :                                                            cst->constvalue,
    1702         1308 :                                                            item->values[idx]));
    1703              : 
    1704              :                 /* update the match bitmap with the result */
    1705        22893 :                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
    1706              :             }
    1707              :         }
    1708          261 :         else if (IsA(clause, ScalarArrayOpExpr))
    1709              :         {
    1710          138 :             ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
    1711              :             FmgrInfo    opproc;
    1712              : 
    1713              :             /* valid only after examine_opclause_args returns true */
    1714              :             Node       *clause_expr;
    1715              :             Const      *cst;
    1716              :             bool        expronleft;
    1717              :             Oid         collid;
    1718              :             int         idx;
    1719              : 
    1720              :             /* array evaluation */
    1721              :             ArrayType  *arrayval;
    1722              :             int16       elmlen;
    1723              :             bool        elmbyval;
    1724              :             char        elmalign;
    1725              :             int         num_elems;
    1726              :             Datum      *elem_values;
    1727              :             bool       *elem_nulls;
    1728              : 
    1729          138 :             fmgr_info(get_opcode(expr->opno), &opproc);
    1730              : 
    1731              :             /* extract the var/expr and const from the expression */
    1732          138 :             if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
    1733            0 :                 elog(ERROR, "incompatible clause");
    1734              : 
    1735              :             /* We expect Var on left */
    1736          138 :             if (!expronleft)
    1737            0 :                 elog(ERROR, "incompatible clause");
    1738              : 
    1739              :             /*
    1740              :              * Deconstruct the array constant, unless it's NULL (we'll cover
    1741              :              * that case below)
    1742              :              */
    1743          138 :             if (!cst->constisnull)
    1744              :             {
    1745          138 :                 arrayval = DatumGetArrayTypeP(cst->constvalue);
    1746          138 :                 get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
    1747              :                                      &elmlen, &elmbyval, &elmalign);
    1748          138 :                 deconstruct_array(arrayval,
    1749              :                                   ARR_ELEMTYPE(arrayval),
    1750              :                                   elmlen, elmbyval, elmalign,
    1751              :                                   &elem_values, &elem_nulls, &num_elems);
    1752              :             }
    1753              : 
    1754              :             /* match the attribute/expression to a dimension of the statistic */
    1755          138 :             idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
    1756              : 
    1757              :             /*
    1758              :              * Walk through the MCV items and evaluate the current clause. We
    1759              :              * can skip items that were already ruled out, and terminate if
    1760              :              * there are no remaining MCV items that might possibly match.
    1761              :              */
    1762        11484 :             for (int i = 0; i < mcvlist->nitems; i++)
    1763              :             {
    1764              :                 int         j;
    1765        11346 :                 bool        match = !expr->useOr;
    1766        11346 :                 MCVItem    *item = &mcvlist->items[i];
    1767              : 
    1768              :                 /*
    1769              :                  * When the MCV item or the Const value is NULL we can treat
    1770              :                  * this as a mismatch. We must not call the operator because
    1771              :                  * of strictness.
    1772              :                  */
    1773        11346 :                 if (item->isnull[idx] || cst->constisnull)
    1774              :                 {
    1775            9 :                     matches[i] = RESULT_MERGE(matches[i], is_or, false);
    1776            9 :                     continue;
    1777              :                 }
    1778              : 
    1779              :                 /*
    1780              :                  * Skip MCV items that can't change result in the bitmap. Once
    1781              :                  * the value gets false for AND-lists, or true for OR-lists,
    1782              :                  * we don't need to look at more clauses.
    1783              :                  */
    1784        11337 :                 if (RESULT_IS_FINAL(matches[i], is_or))
    1785         5772 :                     continue;
    1786              : 
    1787        17877 :                 for (j = 0; j < num_elems; j++)
    1788              :                 {
    1789        15489 :                     Datum       elem_value = elem_values[j];
    1790        15489 :                     bool        elem_isnull = elem_nulls[j];
    1791              :                     bool        elem_match;
    1792              : 
    1793              :                     /* NULL values always evaluate as not matching. */
    1794        15489 :                     if (elem_isnull)
    1795              :                     {
    1796         1056 :                         match = RESULT_MERGE(match, expr->useOr, false);
    1797         1056 :                         continue;
    1798              :                     }
    1799              : 
    1800              :                     /*
    1801              :                      * Stop evaluating the array elements once we reach a
    1802              :                      * matching value that can't change - ALL() is the same as
    1803              :                      * AND-list, ANY() is the same as OR-list.
    1804              :                      */
    1805        14433 :                     if (RESULT_IS_FINAL(match, expr->useOr))
    1806         3177 :                         break;
    1807              : 
    1808        11256 :                     elem_match = DatumGetBool(FunctionCall2Coll(&opproc,
    1809              :                                                                 collid,
    1810        11256 :                                                                 item->values[idx],
    1811              :                                                                 elem_value));
    1812              : 
    1813        11256 :                     match = RESULT_MERGE(match, expr->useOr, elem_match);
    1814              :                 }
    1815              : 
    1816              :                 /* update the match bitmap with the result */
    1817         5565 :                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
    1818              :             }
    1819              :         }
    1820          123 :         else if (IsA(clause, NullTest))
    1821              :         {
    1822           33 :             NullTest   *expr = (NullTest *) clause;
    1823           33 :             Node       *clause_expr = (Node *) (expr->arg);
    1824              : 
    1825              :             /* match the attribute/expression to a dimension of the statistic */
    1826           33 :             int         idx = mcv_match_expression(clause_expr, keys, exprs, NULL);
    1827              : 
    1828              :             /*
    1829              :              * Walk through the MCV items and evaluate the current clause. We
    1830              :              * can skip items that were already ruled out, and terminate if
    1831              :              * there are no remaining MCV items that might possibly match.
    1832              :              */
    1833         3039 :             for (int i = 0; i < mcvlist->nitems; i++)
    1834              :             {
    1835         3006 :                 bool        match = false;  /* assume mismatch */
    1836         3006 :                 MCVItem    *item = &mcvlist->items[i];
    1837              : 
    1838              :                 /* if the clause mismatches the MCV item, update the bitmap */
    1839         3006 :                 switch (expr->nulltesttype)
    1840              :                 {
    1841         2106 :                     case IS_NULL:
    1842         2106 :                         match = (item->isnull[idx]) ? true : match;
    1843         2106 :                         break;
    1844              : 
    1845          900 :                     case IS_NOT_NULL:
    1846          900 :                         match = (!item->isnull[idx]) ? true : match;
    1847          900 :                         break;
    1848              :                 }
    1849              : 
    1850              :                 /* now, update the match bitmap, depending on OR/AND type */
    1851         3006 :                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
    1852              :             }
    1853              :         }
    1854           90 :         else if (is_orclause(clause) || is_andclause(clause))
    1855           33 :         {
    1856              :             /* AND/OR clause, with all subclauses being compatible */
    1857              : 
    1858              :             int         i;
    1859           33 :             BoolExpr   *bool_clause = ((BoolExpr *) clause);
    1860           33 :             List       *bool_clauses = bool_clause->args;
    1861              : 
    1862              :             /* match/mismatch bitmap for each MCV item */
    1863           33 :             bool       *bool_matches = NULL;
    1864              : 
    1865              :             Assert(bool_clauses != NIL);
    1866              :             Assert(list_length(bool_clauses) >= 2);
    1867              : 
    1868              :             /* build the match bitmap for the OR-clauses */
    1869           33 :             bool_matches = mcv_get_match_bitmap(root, bool_clauses, keys, exprs,
    1870           33 :                                                 mcvlist, is_orclause(clause));
    1871              : 
    1872              :             /*
    1873              :              * Merge the bitmap produced by mcv_get_match_bitmap into the
    1874              :              * current one. We need to consider if we're evaluating AND or OR
    1875              :              * condition when merging the results.
    1876              :              */
    1877         2181 :             for (i = 0; i < mcvlist->nitems; i++)
    1878         2148 :                 matches[i] = RESULT_MERGE(matches[i], is_or, bool_matches[i]);
    1879              : 
    1880           33 :             pfree(bool_matches);
    1881              :         }
    1882           57 :         else if (is_notclause(clause))
    1883              :         {
    1884              :             /* NOT clause, with all subclauses compatible */
    1885              : 
    1886              :             int         i;
    1887           15 :             BoolExpr   *not_clause = ((BoolExpr *) clause);
    1888           15 :             List       *not_args = not_clause->args;
    1889              : 
    1890              :             /* match/mismatch bitmap for each MCV item */
    1891           15 :             bool       *not_matches = NULL;
    1892              : 
    1893              :             Assert(not_args != NIL);
    1894              :             Assert(list_length(not_args) == 1);
    1895              : 
    1896              :             /* build the match bitmap for the NOT-clause */
    1897           15 :             not_matches = mcv_get_match_bitmap(root, not_args, keys, exprs,
    1898              :                                                mcvlist, false);
    1899              : 
    1900              :             /*
    1901              :              * Merge the bitmap produced by mcv_get_match_bitmap into the
    1902              :              * current one. We're handling a NOT clause, so invert the result
    1903              :              * before merging it into the global bitmap.
    1904              :              */
    1905           75 :             for (i = 0; i < mcvlist->nitems; i++)
    1906           60 :                 matches[i] = RESULT_MERGE(matches[i], is_or, !not_matches[i]);
    1907              : 
    1908           15 :             pfree(not_matches);
    1909              :         }
    1910           42 :         else if (IsA(clause, Var))
    1911              :         {
    1912              :             /* Var (has to be a boolean Var, possibly from below NOT) */
    1913              : 
    1914           39 :             Var        *var = (Var *) (clause);
    1915              : 
    1916              :             /* match the attribute to a dimension of the statistic */
    1917           39 :             int         idx = bms_member_index(keys, var->varattno);
    1918              : 
    1919              :             Assert(var->vartype == BOOLOID);
    1920              : 
    1921              :             /*
    1922              :              * Walk through the MCV items and evaluate the current clause. We
    1923              :              * can skip items that were already ruled out, and terminate if
    1924              :              * there are no remaining MCV items that might possibly match.
    1925              :              */
    1926          189 :             for (int i = 0; i < mcvlist->nitems; i++)
    1927              :             {
    1928          150 :                 MCVItem    *item = &mcvlist->items[i];
    1929          150 :                 bool        match = false;
    1930              : 
    1931              :                 /* if the item is NULL, it's a mismatch */
    1932          150 :                 if (!item->isnull[idx] && DatumGetBool(item->values[idx]))
    1933           75 :                     match = true;
    1934              : 
    1935              :                 /* update the result bitmap */
    1936          150 :                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
    1937              :             }
    1938              :         }
    1939              :         else
    1940              :         {
    1941              :             /* Otherwise, it must be a bare boolean-returning expression */
    1942              :             int         idx;
    1943              : 
    1944              :             /* match the expression to a dimension of the statistic */
    1945            3 :             idx = mcv_match_expression(clause, keys, exprs, NULL);
    1946              : 
    1947              :             /*
    1948              :              * Walk through the MCV items and evaluate the current clause. We
    1949              :              * can skip items that were already ruled out, and terminate if
    1950              :              * there are no remaining MCV items that might possibly match.
    1951              :              */
    1952          111 :             for (int i = 0; i < mcvlist->nitems; i++)
    1953              :             {
    1954              :                 bool        match;
    1955          108 :                 MCVItem    *item = &mcvlist->items[i];
    1956              : 
    1957              :                 /* "match" just means it's bool TRUE */
    1958          108 :                 match = !item->isnull[idx] && DatumGetBool(item->values[idx]);
    1959              : 
    1960              :                 /* now, update the match bitmap, depending on OR/AND type */
    1961          108 :                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
    1962              :             }
    1963              :         }
    1964              :     }
    1965              : 
    1966          423 :     return matches;
    1967              : }
    1968              : 
    1969              : 
    1970              : /*
    1971              :  * mcv_combine_selectivities
    1972              :  *      Combine per-column and multi-column MCV selectivity estimates.
    1973              :  *
    1974              :  * simple_sel is a "simple" selectivity estimate (produced without using any
    1975              :  * extended statistics, essentially assuming independence of columns/clauses).
    1976              :  *
    1977              :  * mcv_sel and mcv_basesel are sums of the frequencies and base frequencies of
    1978              :  * all matching MCV items.  The difference (mcv_sel - mcv_basesel) is then
    1979              :  * essentially interpreted as a correction to be added to simple_sel, as
    1980              :  * described below.
    1981              :  *
    1982              :  * mcv_totalsel is the sum of the frequencies of all MCV items (not just the
    1983              :  * matching ones).  This is used as an upper bound on the portion of the
    1984              :  * selectivity estimates not covered by the MCV statistics.
    1985              :  *
    1986              :  * Note: While simple and base selectivities are defined in a quite similar
    1987              :  * way, the values are computed differently and are not therefore equal. The
    1988              :  * simple selectivity is computed as a product of per-clause estimates, while
    1989              :  * the base selectivity is computed by adding up base frequencies of matching
    1990              :  * items of the multi-column MCV list. So the values may differ for two main
    1991              :  * reasons - (a) the MCV list may not cover 100% of the data and (b) some of
    1992              :  * the MCV items did not match the estimated clauses.
    1993              :  *
    1994              :  * As both (a) and (b) reduce the base selectivity value, it generally holds
    1995              :  * that (simple_sel >= mcv_basesel). If the MCV list covers all the data, the
    1996              :  * values may be equal.
    1997              :  *
    1998              :  * So, other_sel = (simple_sel - mcv_basesel) is an estimate for the part not
    1999              :  * covered by the MCV list, and (mcv_sel - mcv_basesel) may be seen as a
    2000              :  * correction for the part covered by the MCV list. Those two statements are
    2001              :  * actually equivalent.
    2002              :  */
    2003              : Selectivity
    2004          399 : mcv_combine_selectivities(Selectivity simple_sel,
    2005              :                           Selectivity mcv_sel,
    2006              :                           Selectivity mcv_basesel,
    2007              :                           Selectivity mcv_totalsel)
    2008              : {
    2009              :     Selectivity other_sel;
    2010              :     Selectivity sel;
    2011              : 
    2012              :     /* estimated selectivity of values not covered by MCV matches */
    2013          399 :     other_sel = simple_sel - mcv_basesel;
    2014          399 :     CLAMP_PROBABILITY(other_sel);
    2015              : 
    2016              :     /* this non-MCV selectivity cannot exceed 1 - mcv_totalsel */
    2017          399 :     if (other_sel > 1.0 - mcv_totalsel)
    2018          222 :         other_sel = 1.0 - mcv_totalsel;
    2019              : 
    2020              :     /* overall selectivity is the sum of the MCV and non-MCV parts */
    2021          399 :     sel = mcv_sel + other_sel;
    2022          399 :     CLAMP_PROBABILITY(sel);
    2023              : 
    2024          399 :     return sel;
    2025              : }
    2026              : 
    2027              : 
    2028              : /*
    2029              :  * mcv_clauselist_selectivity
    2030              :  *      Use MCV statistics to estimate the selectivity of an implicitly-ANDed
    2031              :  *      list of clauses.
    2032              :  *
    2033              :  * This determines which MCV items match every clause in the list and returns
    2034              :  * the sum of the frequencies of those items.
    2035              :  *
    2036              :  * In addition, it returns the sum of the base frequencies of each of those
    2037              :  * items (that is the sum of the selectivities that each item would have if
    2038              :  * the columns were independent of one another), and the total selectivity of
    2039              :  * all the MCV items (not just the matching ones).  These are expected to be
    2040              :  * used together with a "simple" selectivity estimate (one based only on
    2041              :  * per-column statistics) to produce an overall selectivity estimate that
    2042              :  * makes use of both per-column and multi-column statistics --- see
    2043              :  * mcv_combine_selectivities().
    2044              :  */
    2045              : Selectivity
    2046          255 : mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat,
    2047              :                            List *clauses, int varRelid,
    2048              :                            JoinType jointype, SpecialJoinInfo *sjinfo,
    2049              :                            RelOptInfo *rel,
    2050              :                            Selectivity *basesel, Selectivity *totalsel)
    2051              : {
    2052              :     int         i;
    2053              :     MCVList    *mcv;
    2054          255 :     Selectivity s = 0.0;
    2055          255 :     RangeTblEntry *rte = root->simple_rte_array[rel->relid];
    2056              : 
    2057              :     /* match/mismatch bitmap for each MCV item */
    2058          255 :     bool       *matches = NULL;
    2059              : 
    2060              :     /* load the MCV list stored in the statistics object */
    2061          255 :     mcv = statext_mcv_load(stat->statOid, rte->inh);
    2062              : 
    2063              :     /* build a match bitmap for the clauses */
    2064          255 :     matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs,
    2065              :                                    mcv, false);
    2066              : 
    2067              :     /* sum frequencies for all the matching MCV items */
    2068          255 :     *basesel = 0.0;
    2069          255 :     *totalsel = 0.0;
    2070        18915 :     for (i = 0; i < mcv->nitems; i++)
    2071              :     {
    2072        18660 :         *totalsel += mcv->items[i].frequency;
    2073              : 
    2074        18660 :         if (matches[i] != false)
    2075              :         {
    2076          333 :             *basesel += mcv->items[i].base_frequency;
    2077          333 :             s += mcv->items[i].frequency;
    2078              :         }
    2079              :     }
    2080              : 
    2081          255 :     return s;
    2082              : }
    2083              : 
    2084              : 
    2085              : /*
    2086              :  * mcv_clause_selectivity_or
    2087              :  *      Use MCV statistics to estimate the selectivity of a clause that
    2088              :  *      appears in an ORed list of clauses.
    2089              :  *
    2090              :  * As with mcv_clauselist_selectivity() this determines which MCV items match
    2091              :  * the clause and returns both the sum of the frequencies and the sum of the
    2092              :  * base frequencies of those items, as well as the sum of the frequencies of
    2093              :  * all MCV items (not just the matching ones) so that this information can be
    2094              :  * used by mcv_combine_selectivities() to produce a selectivity estimate that
    2095              :  * makes use of both per-column and multi-column statistics.
    2096              :  *
    2097              :  * Additionally, we return information to help compute the overall selectivity
    2098              :  * of the ORed list of clauses assumed to contain this clause.  This function
    2099              :  * is intended to be called for each clause in the ORed list of clauses,
    2100              :  * allowing the overall selectivity to be computed using the following
    2101              :  * algorithm:
    2102              :  *
    2103              :  * Suppose P[n] = P(C[1] OR C[2] OR ... OR C[n]) is the combined selectivity
    2104              :  * of the first n clauses in the list.  Then the combined selectivity taking
    2105              :  * into account the next clause C[n+1] can be written as
    2106              :  *
    2107              :  *      P[n+1] = P[n] + P(C[n+1]) - P((C[1] OR ... OR C[n]) AND C[n+1])
    2108              :  *
    2109              :  * The final term above represents the overlap between the clauses examined so
    2110              :  * far and the (n+1)'th clause.  To estimate its selectivity, we track the
    2111              :  * match bitmap for the ORed list of clauses examined so far and examine its
    2112              :  * intersection with the match bitmap for the (n+1)'th clause.
    2113              :  *
    2114              :  * We then also return the sums of the MCV item frequencies and base
    2115              :  * frequencies for the match bitmap intersection corresponding to the overlap
    2116              :  * term above, so that they can be combined with a simple selectivity estimate
    2117              :  * for that term.
    2118              :  *
    2119              :  * The parameter "or_matches" is an in/out parameter tracking the match bitmap
    2120              :  * for the clauses examined so far.  The caller is expected to set it to NULL
    2121              :  * the first time it calls this function.
    2122              :  */
    2123              : Selectivity
    2124          120 : mcv_clause_selectivity_or(PlannerInfo *root, StatisticExtInfo *stat,
    2125              :                           MCVList *mcv, Node *clause, bool **or_matches,
    2126              :                           Selectivity *basesel, Selectivity *overlap_mcvsel,
    2127              :                           Selectivity *overlap_basesel, Selectivity *totalsel)
    2128              : {
    2129          120 :     Selectivity s = 0.0;
    2130              :     bool       *new_matches;
    2131              :     int         i;
    2132              : 
    2133              :     /* build the OR-matches bitmap, if not built already */
    2134          120 :     if (*or_matches == NULL)
    2135           48 :         *or_matches = palloc0_array(bool, mcv->nitems);
    2136              : 
    2137              :     /* build the match bitmap for the new clause */
    2138          120 :     new_matches = mcv_get_match_bitmap(root, list_make1(clause), stat->keys,
    2139              :                                        stat->exprs, mcv, false);
    2140              : 
    2141              :     /*
    2142              :      * Sum the frequencies for all the MCV items matching this clause and also
    2143              :      * those matching the overlap between this clause and any of the preceding
    2144              :      * clauses as described above.
    2145              :      */
    2146          120 :     *basesel = 0.0;
    2147          120 :     *overlap_mcvsel = 0.0;
    2148          120 :     *overlap_basesel = 0.0;
    2149          120 :     *totalsel = 0.0;
    2150         7758 :     for (i = 0; i < mcv->nitems; i++)
    2151              :     {
    2152         7638 :         *totalsel += mcv->items[i].frequency;
    2153              : 
    2154         7638 :         if (new_matches[i])
    2155              :         {
    2156          168 :             s += mcv->items[i].frequency;
    2157          168 :             *basesel += mcv->items[i].base_frequency;
    2158              : 
    2159          168 :             if ((*or_matches)[i])
    2160              :             {
    2161           72 :                 *overlap_mcvsel += mcv->items[i].frequency;
    2162           72 :                 *overlap_basesel += mcv->items[i].base_frequency;
    2163              :             }
    2164              :         }
    2165              : 
    2166              :         /* update the OR-matches bitmap for the next clause */
    2167         7638 :         (*or_matches)[i] = (*or_matches)[i] || new_matches[i];
    2168              :     }
    2169              : 
    2170          120 :     pfree(new_matches);
    2171              : 
    2172          120 :     return s;
    2173              : }
    2174              : 
    2175              : /*
    2176              :  * Free allocations of a MCVList.
    2177              :  */
    2178              : void
    2179            0 : statext_mcv_free(MCVList *mcvlist)
    2180              : {
    2181            0 :     for (int i = 0; i < mcvlist->nitems; i++)
    2182              :     {
    2183            0 :         MCVItem    *item = &mcvlist->items[i];
    2184              : 
    2185            0 :         pfree(item->values);
    2186            0 :         pfree(item->isnull);
    2187              :     }
    2188            0 :     pfree(mcvlist);
    2189            0 : }
    2190              : 
    2191              : /*
    2192              :  * Create the MCV composite datum, which is a serialization of an array of
    2193              :  * MCVItems.
    2194              :  *
    2195              :  * The inputs consist of four separate arrays of equal length "numitems"
    2196              :  * (mcv_elems, mcv_nulls, freqs and base_freqs) that form the basics of
    2197              :  * what is stored in the catalogs.  These form an array of composite
    2198              :  * records defined by the three atttypX arrays of equal length "numattrs".
    2199              :  *
    2200              :  * If any data element fails to convert to the input type specified for that
    2201              :  * attribute, then function will return a NULL Datum if elevel < ERROR.
    2202              :  */
    2203              : Datum
    2204           18 : statext_mcv_import(int elevel, int numattrs,
    2205              :                    Oid *atttypids, int32 *atttypmods, Oid *atttypcolls,
    2206              :                    int nitems, Datum *mcv_elems, bool *mcv_nulls,
    2207              :                    float8 *freqs, float8 *base_freqs)
    2208              : {
    2209              :     MCVList    *mcvlist;
    2210              :     bytea      *bytes;
    2211              :     VacAttrStats **vastats;
    2212              : 
    2213              :     /*
    2214              :      * Allocate the MCV list structure, set the global parameters.
    2215              :      */
    2216           18 :     mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) +
    2217           18 :                                   (sizeof(MCVItem) * nitems));
    2218              : 
    2219           18 :     mcvlist->magic = STATS_MCV_MAGIC;
    2220           18 :     mcvlist->type = STATS_MCV_TYPE_BASIC;
    2221           18 :     mcvlist->ndimensions = numattrs;
    2222           18 :     mcvlist->nitems = nitems;
    2223              : 
    2224              :     /* Set the values for the 1-D arrays and allocate space for the 2-D arrays */
    2225          129 :     for (int i = 0; i < nitems; i++)
    2226              :     {
    2227          111 :         MCVItem    *item = &mcvlist->items[i];
    2228              : 
    2229          111 :         item->frequency = freqs[i];
    2230          111 :         item->base_frequency = base_freqs[i];
    2231          111 :         item->values = (Datum *) palloc0_array(Datum, numattrs);
    2232          111 :         item->isnull = (bool *) palloc0_array(bool, numattrs);
    2233              :     }
    2234              : 
    2235              :     /*
    2236              :      * Walk through each dimension, determine the input function for that
    2237              :      * type, and then attempt to convert all values in that column via that
    2238              :      * function.  We approach this column-wise because it is simpler to deal
    2239              :      * with one input function at time, and possibly more cache-friendly.
    2240              :      */
    2241           69 :     for (int j = 0; j < numattrs; j++)
    2242              :     {
    2243              :         FmgrInfo    finfo;
    2244              :         Oid         ioparam;
    2245              :         Oid         infunc;
    2246           51 :         int         index = j;
    2247              : 
    2248           51 :         getTypeInputInfo(atttypids[j], &infunc, &ioparam);
    2249           51 :         fmgr_info(infunc, &finfo);
    2250              : 
    2251              :         /* store info about data type OIDs */
    2252           51 :         mcvlist->types[j] = atttypids[j];
    2253              : 
    2254          372 :         for (int i = 0; i < nitems; i++)
    2255              :         {
    2256          321 :             MCVItem    *item = &mcvlist->items[i];
    2257              : 
    2258          321 :             if (mcv_nulls[index])
    2259              :             {
    2260              :                 /* NULL value detected, hence no input to process */
    2261           19 :                 item->values[j] = (Datum) 0;
    2262           19 :                 item->isnull[j] = true;
    2263              :             }
    2264              :             else
    2265              :             {
    2266          302 :                 char       *s = TextDatumGetCString(mcv_elems[index]);
    2267          302 :                 ErrorSaveContext escontext = {T_ErrorSaveContext};
    2268              : 
    2269          302 :                 if (!InputFunctionCallSafe(&finfo, s, ioparam, atttypmods[j],
    2270          302 :                                            (Node *) &escontext, &item->values[j]))
    2271              :                 {
    2272            0 :                     ereport(elevel,
    2273              :                             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    2274              :                              errmsg("could not parse MCV element \"%s\": incorrect value", s)));
    2275            0 :                     pfree(s);
    2276            0 :                     goto error;
    2277              :                 }
    2278              : 
    2279          302 :                 pfree(s);
    2280              :             }
    2281              : 
    2282          321 :             index += numattrs;
    2283              :         }
    2284              :     }
    2285              : 
    2286              :     /*
    2287              :      * The function statext_mcv_serialize() requires an array of pointers to
    2288              :      * VacAttrStats records, but only a few fields within those records have
    2289              :      * to be filled out.
    2290              :      */
    2291           18 :     vastats = (VacAttrStats **) palloc0_array(VacAttrStats *, numattrs);
    2292              : 
    2293           69 :     for (int i = 0; i < numattrs; i++)
    2294              :     {
    2295           51 :         Oid         typid = atttypids[i];
    2296              :         HeapTuple   typtuple;
    2297              : 
    2298           51 :         typtuple = SearchSysCacheCopy1(TYPEOID, ObjectIdGetDatum(typid));
    2299              : 
    2300           51 :         if (!HeapTupleIsValid(typtuple))
    2301            0 :             elog(ERROR, "cache lookup failed for type %u", typid);
    2302              : 
    2303           51 :         vastats[i] = palloc0_object(VacAttrStats);
    2304              : 
    2305           51 :         vastats[i]->attrtype = (Form_pg_type) GETSTRUCT(typtuple);
    2306           51 :         vastats[i]->attrtypid = typid;
    2307           51 :         vastats[i]->attrcollid = atttypcolls[i];
    2308              :     }
    2309              : 
    2310           18 :     bytes = statext_mcv_serialize(mcvlist, vastats);
    2311              : 
    2312           69 :     for (int i = 0; i < numattrs; i++)
    2313              :     {
    2314           51 :         pfree(vastats[i]);
    2315              :     }
    2316           18 :     pfree((void *) vastats);
    2317              : 
    2318           18 :     pfree(mcv_elems);
    2319           18 :     pfree(mcv_nulls);
    2320              : 
    2321           18 :     if (bytes == NULL)
    2322              :     {
    2323            0 :         ereport(elevel,
    2324              :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    2325              :                  errmsg("could not import MCV list")));
    2326            0 :         goto error;
    2327              :     }
    2328              : 
    2329           18 :     return PointerGetDatum(bytes);
    2330              : 
    2331            0 : error:
    2332            0 :     statext_mcv_free(mcvlist);
    2333            0 :     return (Datum) 0;
    2334              : }
        

Generated by: LCOV version 2.0-1