LCOV - code coverage report
Current view: top level - src/backend/statistics - mvdistinct.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 188 205 91.7 %
Date: 2019-11-13 23:06:49 Functions: 14 17 82.4 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * mvdistinct.c
       4             :  *    POSTGRES multivariate ndistinct coefficients
       5             :  *
       6             :  * Estimating number of groups in a combination of columns (e.g. for GROUP BY)
       7             :  * is tricky, and the estimation error is often significant.
       8             : 
       9             :  * The multivariate ndistinct coefficients address this by storing ndistinct
      10             :  * estimates for combinations of the user-specified columns.  So for example
      11             :  * given a statistics object on three columns (a,b,c), this module estimates
      12             :  * and stores n-distinct for (a,b), (a,c), (b,c) and (a,b,c).  The per-column
      13             :  * estimates are already available in pg_statistic.
      14             :  *
      15             :  *
      16             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
      17             :  * Portions Copyright (c) 1994, Regents of the University of California
      18             :  *
      19             :  * IDENTIFICATION
      20             :  *    src/backend/statistics/mvdistinct.c
      21             :  *
      22             :  *-------------------------------------------------------------------------
      23             :  */
      24             : #include "postgres.h"
      25             : 
      26             : #include <math.h>
      27             : 
      28             : #include "access/htup_details.h"
      29             : #include "catalog/pg_statistic_ext.h"
      30             : #include "catalog/pg_statistic_ext_data.h"
      31             : #include "lib/stringinfo.h"
      32             : #include "statistics/extended_stats_internal.h"
      33             : #include "statistics/statistics.h"
      34             : #include "utils/fmgrprotos.h"
      35             : #include "utils/lsyscache.h"
      36             : #include "utils/syscache.h"
      37             : #include "utils/typcache.h"
      38             : 
      39             : static double ndistinct_for_combination(double totalrows, int numrows,
      40             :                                         HeapTuple *rows, VacAttrStats **stats,
      41             :                                         int k, int *combination);
      42             : static double estimate_ndistinct(double totalrows, int numrows, int d, int f1);
      43             : static int  n_choose_k(int n, int k);
      44             : static int  num_combinations(int n);
      45             : 
      46             : /* size of the struct header fields (magic, type, nitems) */
      47             : #define SizeOfHeader        (3 * sizeof(uint32))
      48             : 
      49             : /* size of a serialized ndistinct item (coefficient, natts, atts) */
      50             : #define SizeOfItem(natts) \
      51             :     (sizeof(double) + sizeof(int) + (natts) * sizeof(AttrNumber))
      52             : 
      53             : /* minimal size of a ndistinct item (with two attributes) */
      54             : #define MinSizeOfItem   SizeOfItem(2)
      55             : 
      56             : /* minimal size of mvndistinct, when all items are minimal */
      57             : #define MinSizeOfItems(nitems)  \
      58             :     (SizeOfHeader + (nitems) * MinSizeOfItem)
      59             : 
      60             : /* Combination generator API */
      61             : 
      62             : /* internal state for generator of k-combinations of n elements */
      63             : typedef struct CombinationGenerator
      64             : {
      65             :     int         k;              /* size of the combination */
      66             :     int         n;              /* total number of elements */
      67             :     int         current;        /* index of the next combination to return */
      68             :     int         ncombinations;  /* number of combinations (size of array) */
      69             :     int        *combinations;   /* array of pre-built combinations */
      70             : } CombinationGenerator;
      71             : 
      72             : static CombinationGenerator *generator_init(int n, int k);
      73             : static void generator_free(CombinationGenerator *state);
      74             : static int *generator_next(CombinationGenerator *state);
      75             : static void generate_combinations(CombinationGenerator *state);
      76             : 
      77             : 
      78             : /*
      79             :  * statext_ndistinct_build
      80             :  *      Compute ndistinct coefficient for the combination of attributes.
      81             :  *
      82             :  * This computes the ndistinct estimate using the same estimator used
      83             :  * in analyze.c and then computes the coefficient.
      84             :  */
      85             : MVNDistinct *
      86          16 : statext_ndistinct_build(double totalrows, int numrows, HeapTuple *rows,
      87             :                         Bitmapset *attrs, VacAttrStats **stats)
      88             : {
      89             :     MVNDistinct *result;
      90             :     int         k;
      91             :     int         itemcnt;
      92          16 :     int         numattrs = bms_num_members(attrs);
      93          16 :     int         numcombs = num_combinations(numattrs);
      94             : 
      95          16 :     result = palloc(offsetof(MVNDistinct, items) +
      96             :                     numcombs * sizeof(MVNDistinctItem));
      97          16 :     result->magic = STATS_NDISTINCT_MAGIC;
      98          16 :     result->type = STATS_NDISTINCT_TYPE_BASIC;
      99          16 :     result->nitems = numcombs;
     100             : 
     101          16 :     itemcnt = 0;
     102          40 :     for (k = 2; k <= numattrs; k++)
     103             :     {
     104             :         int        *combination;
     105             :         CombinationGenerator *generator;
     106             : 
     107             :         /* generate combinations of K out of N elements */
     108          24 :         generator = generator_init(numattrs, k);
     109             : 
     110          88 :         while ((combination = generator_next(generator)))
     111             :         {
     112          40 :             MVNDistinctItem *item = &result->items[itemcnt];
     113             :             int         j;
     114             : 
     115          40 :             item->attrs = NULL;
     116         128 :             for (j = 0; j < k; j++)
     117          88 :                 item->attrs = bms_add_member(item->attrs,
     118          88 :                                              stats[combination[j]]->attr->attnum);
     119          40 :             item->ndistinct =
     120          40 :                 ndistinct_for_combination(totalrows, numrows, rows,
     121             :                                           stats, k, combination);
     122             : 
     123          40 :             itemcnt++;
     124             :             Assert(itemcnt <= result->nitems);
     125             :         }
     126             : 
     127          24 :         generator_free(generator);
     128             :     }
     129             : 
     130             :     /* must consume exactly the whole output array */
     131             :     Assert(itemcnt == result->nitems);
     132             : 
     133          16 :     return result;
     134             : }
     135             : 
     136             : /*
     137             :  * statext_ndistinct_load
     138             :  *      Load the ndistinct value for the indicated pg_statistic_ext tuple
     139             :  */
     140             : MVNDistinct *
     141          36 : statext_ndistinct_load(Oid mvoid)
     142             : {
     143             :     MVNDistinct *result;
     144             :     bool        isnull;
     145             :     Datum       ndist;
     146             :     HeapTuple   htup;
     147             : 
     148          36 :     htup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(mvoid));
     149          36 :     if (!HeapTupleIsValid(htup))
     150           0 :         elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
     151             : 
     152          36 :     ndist = SysCacheGetAttr(STATEXTDATASTXOID, htup,
     153             :                             Anum_pg_statistic_ext_data_stxdndistinct, &isnull);
     154          36 :     if (isnull)
     155           0 :         elog(ERROR,
     156             :              "requested statistic kind \"%c\" is not yet built for statistics object %u",
     157             :              STATS_EXT_NDISTINCT, mvoid);
     158             : 
     159          36 :     result = statext_ndistinct_deserialize(DatumGetByteaPP(ndist));
     160             : 
     161          36 :     ReleaseSysCache(htup);
     162             : 
     163          36 :     return result;
     164             : }
     165             : 
     166             : /*
     167             :  * statext_ndistinct_serialize
     168             :  *      serialize ndistinct to the on-disk bytea format
     169             :  */
     170             : bytea *
     171          16 : statext_ndistinct_serialize(MVNDistinct *ndistinct)
     172             : {
     173             :     int         i;
     174             :     bytea      *output;
     175             :     char       *tmp;
     176             :     Size        len;
     177             : 
     178             :     Assert(ndistinct->magic == STATS_NDISTINCT_MAGIC);
     179             :     Assert(ndistinct->type == STATS_NDISTINCT_TYPE_BASIC);
     180             : 
     181             :     /*
     182             :      * Base size is size of scalar fields in the struct, plus one base struct
     183             :      * for each item, including number of items for each.
     184             :      */
     185          16 :     len = VARHDRSZ + SizeOfHeader;
     186             : 
     187             :     /* and also include space for the actual attribute numbers */
     188          56 :     for (i = 0; i < ndistinct->nitems; i++)
     189             :     {
     190             :         int         nmembers;
     191             : 
     192          40 :         nmembers = bms_num_members(ndistinct->items[i].attrs);
     193             :         Assert(nmembers >= 2);
     194             : 
     195          40 :         len += SizeOfItem(nmembers);
     196             :     }
     197             : 
     198          16 :     output = (bytea *) palloc(len);
     199          16 :     SET_VARSIZE(output, len);
     200             : 
     201          16 :     tmp = VARDATA(output);
     202             : 
     203             :     /* Store the base struct values (magic, type, nitems) */
     204          16 :     memcpy(tmp, &ndistinct->magic, sizeof(uint32));
     205          16 :     tmp += sizeof(uint32);
     206          16 :     memcpy(tmp, &ndistinct->type, sizeof(uint32));
     207          16 :     tmp += sizeof(uint32);
     208          16 :     memcpy(tmp, &ndistinct->nitems, sizeof(uint32));
     209          16 :     tmp += sizeof(uint32);
     210             : 
     211             :     /*
     212             :      * store number of attributes and attribute numbers for each entry
     213             :      */
     214          56 :     for (i = 0; i < ndistinct->nitems; i++)
     215             :     {
     216          40 :         MVNDistinctItem item = ndistinct->items[i];
     217          40 :         int         nmembers = bms_num_members(item.attrs);
     218             :         int         x;
     219             : 
     220          40 :         memcpy(tmp, &item.ndistinct, sizeof(double));
     221          40 :         tmp += sizeof(double);
     222          40 :         memcpy(tmp, &nmembers, sizeof(int));
     223          40 :         tmp += sizeof(int);
     224             : 
     225          40 :         x = -1;
     226         168 :         while ((x = bms_next_member(item.attrs, x)) >= 0)
     227             :         {
     228          88 :             AttrNumber  value = (AttrNumber) x;
     229             : 
     230          88 :             memcpy(tmp, &value, sizeof(AttrNumber));
     231          88 :             tmp += sizeof(AttrNumber);
     232             :         }
     233             : 
     234             :         /* protect against overflows */
     235             :         Assert(tmp <= ((char *) output + len));
     236             :     }
     237             : 
     238             :     /* check we used exactly the expected space */
     239             :     Assert(tmp == ((char *) output + len));
     240             : 
     241          16 :     return output;
     242             : }
     243             : 
     244             : /*
     245             :  * statext_ndistinct_deserialize
     246             :  *      Read an on-disk bytea format MVNDistinct to in-memory format
     247             :  */
     248             : MVNDistinct *
     249          44 : statext_ndistinct_deserialize(bytea *data)
     250             : {
     251             :     int         i;
     252             :     Size        minimum_size;
     253             :     MVNDistinct ndist;
     254             :     MVNDistinct *ndistinct;
     255             :     char       *tmp;
     256             : 
     257          44 :     if (data == NULL)
     258           0 :         return NULL;
     259             : 
     260             :     /* we expect at least the basic fields of MVNDistinct struct */
     261          44 :     if (VARSIZE_ANY_EXHDR(data) < SizeOfHeader)
     262           0 :         elog(ERROR, "invalid MVNDistinct size %zd (expected at least %zd)",
     263             :              VARSIZE_ANY_EXHDR(data), SizeOfHeader);
     264             : 
     265             :     /* initialize pointer to the data part (skip the varlena header) */
     266          44 :     tmp = VARDATA_ANY(data);
     267             : 
     268             :     /* read the header fields and perform basic sanity checks */
     269          44 :     memcpy(&ndist.magic, tmp, sizeof(uint32));
     270          44 :     tmp += sizeof(uint32);
     271          44 :     memcpy(&ndist.type, tmp, sizeof(uint32));
     272          44 :     tmp += sizeof(uint32);
     273          44 :     memcpy(&ndist.nitems, tmp, sizeof(uint32));
     274          44 :     tmp += sizeof(uint32);
     275             : 
     276          44 :     if (ndist.magic != STATS_NDISTINCT_MAGIC)
     277           0 :         elog(ERROR, "invalid ndistinct magic %08x (expected %08x)",
     278             :              ndist.magic, STATS_NDISTINCT_MAGIC);
     279          44 :     if (ndist.type != STATS_NDISTINCT_TYPE_BASIC)
     280           0 :         elog(ERROR, "invalid ndistinct type %d (expected %d)",
     281             :              ndist.type, STATS_NDISTINCT_TYPE_BASIC);
     282          44 :     if (ndist.nitems == 0)
     283           0 :         elog(ERROR, "invalid zero-length item array in MVNDistinct");
     284             : 
     285             :     /* what minimum bytea size do we expect for those parameters */
     286          44 :     minimum_size = MinSizeOfItems(ndist.nitems);
     287          44 :     if (VARSIZE_ANY_EXHDR(data) < minimum_size)
     288           0 :         elog(ERROR, "invalid MVNDistinct size %zd (expected at least %zd)",
     289             :              VARSIZE_ANY_EXHDR(data), minimum_size);
     290             : 
     291             :     /*
     292             :      * Allocate space for the ndistinct items (no space for each item's
     293             :      * attnos: those live in bitmapsets allocated separately)
     294             :      */
     295          44 :     ndistinct = palloc0(MAXALIGN(offsetof(MVNDistinct, items)) +
     296          44 :                         (ndist.nitems * sizeof(MVNDistinctItem)));
     297          44 :     ndistinct->magic = ndist.magic;
     298          44 :     ndistinct->type = ndist.type;
     299          44 :     ndistinct->nitems = ndist.nitems;
     300             : 
     301         220 :     for (i = 0; i < ndistinct->nitems; i++)
     302             :     {
     303         176 :         MVNDistinctItem *item = &ndistinct->items[i];
     304             :         int         nelems;
     305             : 
     306         176 :         item->attrs = NULL;
     307             : 
     308             :         /* ndistinct value */
     309         176 :         memcpy(&item->ndistinct, tmp, sizeof(double));
     310         176 :         tmp += sizeof(double);
     311             : 
     312             :         /* number of attributes */
     313         176 :         memcpy(&nelems, tmp, sizeof(int));
     314         176 :         tmp += sizeof(int);
     315             :         Assert((nelems >= 2) && (nelems <= STATS_MAX_DIMENSIONS));
     316             : 
     317         748 :         while (nelems-- > 0)
     318             :         {
     319             :             AttrNumber  attno;
     320             : 
     321         396 :             memcpy(&attno, tmp, sizeof(AttrNumber));
     322         396 :             tmp += sizeof(AttrNumber);
     323         396 :             item->attrs = bms_add_member(item->attrs, attno);
     324             :         }
     325             : 
     326             :         /* still within the bytea */
     327             :         Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
     328             :     }
     329             : 
     330             :     /* we should have consumed the whole bytea exactly */
     331             :     Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
     332             : 
     333          44 :     return ndistinct;
     334             : }
     335             : 
     336             : /*
     337             :  * pg_ndistinct_in
     338             :  *      input routine for type pg_ndistinct
     339             :  *
     340             :  * pg_ndistinct is real enough to be a table column, but it has no
     341             :  * operations of its own, and disallows input (just like pg_node_tree).
     342             :  */
     343             : Datum
     344           0 : pg_ndistinct_in(PG_FUNCTION_ARGS)
     345             : {
     346           0 :     ereport(ERROR,
     347             :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     348             :              errmsg("cannot accept a value of type %s", "pg_ndistinct")));
     349             : 
     350             :     PG_RETURN_VOID();           /* keep compiler quiet */
     351             : }
     352             : 
     353             : /*
     354             :  * pg_ndistinct
     355             :  *      output routine for type pg_ndistinct
     356             :  *
     357             :  * Produces a human-readable representation of the value.
     358             :  */
     359             : Datum
     360           8 : pg_ndistinct_out(PG_FUNCTION_ARGS)
     361             : {
     362           8 :     bytea      *data = PG_GETARG_BYTEA_PP(0);
     363           8 :     MVNDistinct *ndist = statext_ndistinct_deserialize(data);
     364             :     int         i;
     365             :     StringInfoData str;
     366             : 
     367           8 :     initStringInfo(&str);
     368           8 :     appendStringInfoChar(&str, '{');
     369             : 
     370          40 :     for (i = 0; i < ndist->nitems; i++)
     371             :     {
     372          32 :         MVNDistinctItem item = ndist->items[i];
     373          32 :         int         x = -1;
     374          32 :         bool        first = true;
     375             : 
     376          32 :         if (i > 0)
     377          24 :             appendStringInfoString(&str, ", ");
     378             : 
     379         136 :         while ((x = bms_next_member(item.attrs, x)) >= 0)
     380             :         {
     381          72 :             appendStringInfo(&str, "%s%d", first ? "\"" : ", ", x);
     382          72 :             first = false;
     383             :         }
     384          32 :         appendStringInfo(&str, "\": %d", (int) item.ndistinct);
     385             :     }
     386             : 
     387           8 :     appendStringInfoChar(&str, '}');
     388             : 
     389           8 :     PG_RETURN_CSTRING(str.data);
     390             : }
     391             : 
     392             : /*
     393             :  * pg_ndistinct_recv
     394             :  *      binary input routine for type pg_ndistinct
     395             :  */
     396             : Datum
     397           0 : pg_ndistinct_recv(PG_FUNCTION_ARGS)
     398             : {
     399           0 :     ereport(ERROR,
     400             :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     401             :              errmsg("cannot accept a value of type %s", "pg_ndistinct")));
     402             : 
     403             :     PG_RETURN_VOID();           /* keep compiler quiet */
     404             : }
     405             : 
     406             : /*
     407             :  * pg_ndistinct_send
     408             :  *      binary output routine for type pg_ndistinct
     409             :  *
     410             :  * n-distinct is serialized into a bytea value, so let's send that.
     411             :  */
     412             : Datum
     413           0 : pg_ndistinct_send(PG_FUNCTION_ARGS)
     414             : {
     415           0 :     return byteasend(fcinfo);
     416             : }
     417             : 
     418             : /*
     419             :  * ndistinct_for_combination
     420             :  *      Estimates number of distinct values in a combination of columns.
     421             :  *
     422             :  * This uses the same ndistinct estimator as compute_scalar_stats() in
     423             :  * ANALYZE, i.e.,
     424             :  *      n*d / (n - f1 + f1*n/N)
     425             :  *
     426             :  * except that instead of values in a single column we are dealing with
     427             :  * combination of multiple columns.
     428             :  */
     429             : static double
     430          40 : ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows,
     431             :                           VacAttrStats **stats, int k, int *combination)
     432             : {
     433             :     int         i,
     434             :                 j;
     435             :     int         f1,
     436             :                 cnt,
     437             :                 d;
     438             :     bool       *isnull;
     439             :     Datum      *values;
     440             :     SortItem   *items;
     441             :     MultiSortSupport mss;
     442             : 
     443          40 :     mss = multi_sort_init(k);
     444             : 
     445             :     /*
     446             :      * In order to determine the number of distinct elements, create separate
     447             :      * values[]/isnull[] arrays with all the data we have, then sort them
     448             :      * using the specified column combination as dimensions.  We could try to
     449             :      * sort in place, but it'd probably be more complex and bug-prone.
     450             :      */
     451          40 :     items = (SortItem *) palloc(numrows * sizeof(SortItem));
     452          40 :     values = (Datum *) palloc0(sizeof(Datum) * numrows * k);
     453          40 :     isnull = (bool *) palloc0(sizeof(bool) * numrows * k);
     454             : 
     455      100044 :     for (i = 0; i < numrows; i++)
     456             :     {
     457      100004 :         items[i].values = &values[i * k];
     458      100004 :         items[i].isnull = &isnull[i * k];
     459             :     }
     460             : 
     461             :     /*
     462             :      * For each dimension, set up sort-support and fill in the values from the
     463             :      * sample data.
     464             :      *
     465             :      * We use the column data types' default sort operators and collations;
     466             :      * perhaps at some point it'd be worth using column-specific collations?
     467             :      */
     468         128 :     for (i = 0; i < k; i++)
     469             :     {
     470          88 :         VacAttrStats *colstat = stats[combination[i]];
     471             :         TypeCacheEntry *type;
     472             : 
     473          88 :         type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
     474          88 :         if (type->lt_opr == InvalidOid) /* shouldn't happen */
     475           0 :             elog(ERROR, "cache lookup failed for ordering operator for type %u",
     476             :                  colstat->attrtypid);
     477             : 
     478             :         /* prepare the sort function for this dimension */
     479          88 :         multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
     480             : 
     481             :         /* accumulate all the data for this dimension into the arrays */
     482      224096 :         for (j = 0; j < numrows; j++)
     483             :         {
     484      448016 :             items[j].values[i] =
     485      224008 :                 heap_getattr(rows[j],
     486             :                              colstat->attr->attnum,
     487             :                              colstat->tupDesc,
     488             :                              &items[j].isnull[i]);
     489             :         }
     490             :     }
     491             : 
     492             :     /* We can sort the array now ... */
     493          40 :     qsort_arg((void *) items, numrows, sizeof(SortItem),
     494             :               multi_sort_compare, mss);
     495             : 
     496             :     /* ... and count the number of distinct combinations */
     497             : 
     498          40 :     f1 = 0;
     499          40 :     cnt = 1;
     500          40 :     d = 1;
     501      100004 :     for (i = 1; i < numrows; i++)
     502             :     {
     503       99964 :         if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
     504             :         {
     505       44068 :             if (cnt == 1)
     506       24388 :                 f1 += 1;
     507             : 
     508       44068 :             d++;
     509       44068 :             cnt = 0;
     510             :         }
     511             : 
     512       99964 :         cnt += 1;
     513             :     }
     514             : 
     515          40 :     if (cnt == 1)
     516          32 :         f1 += 1;
     517             : 
     518          40 :     return estimate_ndistinct(totalrows, numrows, d, f1);
     519             : }
     520             : 
     521             : /* The Duj1 estimator (already used in analyze.c). */
     522             : static double
     523          40 : estimate_ndistinct(double totalrows, int numrows, int d, int f1)
     524             : {
     525             :     double      numer,
     526             :                 denom,
     527             :                 ndistinct;
     528             : 
     529          40 :     numer = (double) numrows * (double) d;
     530             : 
     531          80 :     denom = (double) (numrows - f1) +
     532          40 :         (double) f1 * (double) numrows / totalrows;
     533             : 
     534          40 :     ndistinct = numer / denom;
     535             : 
     536             :     /* Clamp to sane range in case of roundoff error */
     537          40 :     if (ndistinct < (double) d)
     538           0 :         ndistinct = (double) d;
     539             : 
     540          40 :     if (ndistinct > totalrows)
     541           0 :         ndistinct = totalrows;
     542             : 
     543          40 :     return floor(ndistinct + 0.5);
     544             : }
     545             : 
     546             : /*
     547             :  * n_choose_k
     548             :  *      computes binomial coefficients using an algorithm that is both
     549             :  *      efficient and prevents overflows
     550             :  */
     551             : static int
     552          24 : n_choose_k(int n, int k)
     553             : {
     554             :     int         d,
     555             :                 r;
     556             : 
     557             :     Assert((k > 0) && (n >= k));
     558             : 
     559             :     /* use symmetry of the binomial coefficients */
     560          24 :     k = Min(k, n - k);
     561             : 
     562          24 :     r = 1;
     563          32 :     for (d = 1; d <= k; ++d)
     564             :     {
     565           8 :         r *= n--;
     566           8 :         r /= d;
     567             :     }
     568             : 
     569          24 :     return r;
     570             : }
     571             : 
     572             : /*
     573             :  * num_combinations
     574             :  *      number of combinations, excluding single-value combinations
     575             :  */
     576             : static int
     577          16 : num_combinations(int n)
     578             : {
     579             :     int         k;
     580          16 :     int         ncombs = 1;
     581             : 
     582          56 :     for (k = 1; k <= n; k++)
     583          40 :         ncombs *= 2;
     584             : 
     585          16 :     ncombs -= (n + 1);
     586             : 
     587          16 :     return ncombs;
     588             : }
     589             : 
     590             : /*
     591             :  * generator_init
     592             :  *      initialize the generator of combinations
     593             :  *
     594             :  * The generator produces combinations of K elements in the interval (0..N).
     595             :  * We prebuild all the combinations in this method, which is simpler than
     596             :  * generating them on the fly.
     597             :  */
     598             : static CombinationGenerator *
     599          24 : generator_init(int n, int k)
     600             : {
     601             :     CombinationGenerator *state;
     602             : 
     603             :     Assert((n >= k) && (k > 0));
     604             : 
     605             :     /* allocate the generator state as a single chunk of memory */
     606          24 :     state = (CombinationGenerator *) palloc(sizeof(CombinationGenerator));
     607             : 
     608          24 :     state->ncombinations = n_choose_k(n, k);
     609             : 
     610             :     /* pre-allocate space for all combinations */
     611          24 :     state->combinations = (int *) palloc(sizeof(int) * k * state->ncombinations);
     612             : 
     613          24 :     state->current = 0;
     614          24 :     state->k = k;
     615          24 :     state->n = n;
     616             : 
     617             :     /* now actually pre-generate all the combinations of K elements */
     618          24 :     generate_combinations(state);
     619             : 
     620             :     /* make sure we got the expected number of combinations */
     621             :     Assert(state->current == state->ncombinations);
     622             : 
     623             :     /* reset the number, so we start with the first one */
     624          24 :     state->current = 0;
     625             : 
     626          24 :     return state;
     627             : }
     628             : 
     629             : /*
     630             :  * generator_next
     631             :  *      returns the next combination from the prebuilt list
     632             :  *
     633             :  * Returns a combination of K array indexes (0 .. N), as specified to
     634             :  * generator_init), or NULL when there are no more combination.
     635             :  */
     636             : static int *
     637          64 : generator_next(CombinationGenerator *state)
     638             : {
     639          64 :     if (state->current == state->ncombinations)
     640          24 :         return NULL;
     641             : 
     642          40 :     return &state->combinations[state->k * state->current++];
     643             : }
     644             : 
     645             : /*
     646             :  * generator_free
     647             :  *      free the internal state of the generator
     648             :  *
     649             :  * Releases the generator internal state (pre-built combinations).
     650             :  */
     651             : static void
     652          24 : generator_free(CombinationGenerator *state)
     653             : {
     654          24 :     pfree(state->combinations);
     655          24 :     pfree(state);
     656          24 : }
     657             : 
     658             : /*
     659             :  * generate_combinations_recurse
     660             :  *      given a prefix, generate all possible combinations
     661             :  *
     662             :  * Given a prefix (first few elements of the combination), generate following
     663             :  * elements recursively. We generate the combinations in lexicographic order,
     664             :  * which eliminates permutations of the same combination.
     665             :  */
     666             : static void
     667         152 : generate_combinations_recurse(CombinationGenerator *state,
     668             :                               int index, int start, int *current)
     669             : {
     670             :     /* If we haven't filled all the elements, simply recurse. */
     671         152 :     if (index < state->k)
     672             :     {
     673             :         int         i;
     674             : 
     675             :         /*
     676             :          * The values have to be in ascending order, so make sure we start
     677             :          * with the value passed by parameter.
     678             :          */
     679             : 
     680         240 :         for (i = start; i < state->n; i++)
     681             :         {
     682         128 :             current[index] = i;
     683         128 :             generate_combinations_recurse(state, (index + 1), (i + 1), current);
     684             :         }
     685             : 
     686         112 :         return;
     687             :     }
     688             :     else
     689             :     {
     690             :         /* we got a valid combination, add it to the array */
     691          40 :         memcpy(&state->combinations[(state->k * state->current)],
     692          40 :                current, state->k * sizeof(int));
     693          40 :         state->current++;
     694             :     }
     695             : }
     696             : 
     697             : /*
     698             :  * generate_combinations
     699             :  *      generate all k-combinations of N elements
     700             :  */
     701             : static void
     702          24 : generate_combinations(CombinationGenerator *state)
     703             : {
     704          24 :     int        *current = (int *) palloc0(sizeof(int) * state->k);
     705             : 
     706          24 :     generate_combinations_recurse(state, 0, 0, current);
     707             : 
     708          24 :     pfree(current);
     709          24 : }

Generated by: LCOV version 1.13