LCOV - code coverage report
Current view: top level - src/backend/statistics - mvdistinct.c (source / functions) Hit Total Coverage
Test: PostgreSQL 16beta1 Lines: 182 199 91.5 %
Date: 2023-06-01 13:12:25 Functions: 14 17 82.4 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * mvdistinct.c
       4             :  *    POSTGRES multivariate ndistinct coefficients
       5             :  *
       6             :  * Estimating number of groups in a combination of columns (e.g. for GROUP BY)
       7             :  * is tricky, and the estimation error is often significant.
       8             : 
       9             :  * The multivariate ndistinct coefficients address this by storing ndistinct
      10             :  * estimates for combinations of the user-specified columns.  So for example
      11             :  * given a statistics object on three columns (a,b,c), this module estimates
      12             :  * and stores n-distinct for (a,b), (a,c), (b,c) and (a,b,c).  The per-column
      13             :  * estimates are already available in pg_statistic.
      14             :  *
      15             :  *
      16             :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      17             :  * Portions Copyright (c) 1994, Regents of the University of California
      18             :  *
      19             :  * IDENTIFICATION
      20             :  *    src/backend/statistics/mvdistinct.c
      21             :  *
      22             :  *-------------------------------------------------------------------------
      23             :  */
      24             : #include "postgres.h"
      25             : 
      26             : #include <math.h>
      27             : 
      28             : #include "access/htup_details.h"
      29             : #include "catalog/pg_statistic_ext.h"
      30             : #include "catalog/pg_statistic_ext_data.h"
      31             : #include "lib/stringinfo.h"
      32             : #include "statistics/extended_stats_internal.h"
      33             : #include "statistics/statistics.h"
      34             : #include "utils/fmgrprotos.h"
      35             : #include "utils/lsyscache.h"
      36             : #include "utils/syscache.h"
      37             : #include "utils/typcache.h"
      38             : 
      39             : static double ndistinct_for_combination(double totalrows, StatsBuildData *data,
      40             :                                         int k, int *combination);
      41             : static double estimate_ndistinct(double totalrows, int numrows, int d, int f1);
      42             : static int  n_choose_k(int n, int k);
      43             : static int  num_combinations(int n);
      44             : 
      45             : /* size of the struct header fields (magic, type, nitems) */
      46             : #define SizeOfHeader        (3 * sizeof(uint32))
      47             : 
      48             : /* size of a serialized ndistinct item (coefficient, natts, atts) */
      49             : #define SizeOfItem(natts) \
      50             :     (sizeof(double) + sizeof(int) + (natts) * sizeof(AttrNumber))
      51             : 
      52             : /* minimal size of a ndistinct item (with two attributes) */
      53             : #define MinSizeOfItem   SizeOfItem(2)
      54             : 
      55             : /* minimal size of mvndistinct, when all items are minimal */
      56             : #define MinSizeOfItems(nitems)  \
      57             :     (SizeOfHeader + (nitems) * MinSizeOfItem)
      58             : 
      59             : /* Combination generator API */
      60             : 
      61             : /* internal state for generator of k-combinations of n elements */
      62             : typedef struct CombinationGenerator
      63             : {
      64             :     int         k;              /* size of the combination */
      65             :     int         n;              /* total number of elements */
      66             :     int         current;        /* index of the next combination to return */
      67             :     int         ncombinations;  /* number of combinations (size of array) */
      68             :     int        *combinations;   /* array of pre-built combinations */
      69             : } CombinationGenerator;
      70             : 
      71             : static CombinationGenerator *generator_init(int n, int k);
      72             : static void generator_free(CombinationGenerator *state);
      73             : static int *generator_next(CombinationGenerator *state);
      74             : static void generate_combinations(CombinationGenerator *state);
      75             : 
      76             : 
      77             : /*
      78             :  * statext_ndistinct_build
      79             :  *      Compute ndistinct coefficient for the combination of attributes.
      80             :  *
      81             :  * This computes the ndistinct estimate using the same estimator used
      82             :  * in analyze.c and then computes the coefficient.
      83             :  *
      84             :  * To handle expressions easily, we treat them as system attributes with
      85             :  * negative attnums, and offset everything by number of expressions to
      86             :  * allow using Bitmapsets.
      87             :  */
      88             : MVNDistinct *
      89         156 : statext_ndistinct_build(double totalrows, StatsBuildData *data)
      90             : {
      91             :     MVNDistinct *result;
      92             :     int         k;
      93             :     int         itemcnt;
      94         156 :     int         numattrs = data->nattnums;
      95         156 :     int         numcombs = num_combinations(numattrs);
      96             : 
      97         156 :     result = palloc(offsetof(MVNDistinct, items) +
      98         156 :                     numcombs * sizeof(MVNDistinctItem));
      99         156 :     result->magic = STATS_NDISTINCT_MAGIC;
     100         156 :     result->type = STATS_NDISTINCT_TYPE_BASIC;
     101         156 :     result->nitems = numcombs;
     102             : 
     103         156 :     itemcnt = 0;
     104         396 :     for (k = 2; k <= numattrs; k++)
     105             :     {
     106             :         int        *combination;
     107             :         CombinationGenerator *generator;
     108             : 
     109             :         /* generate combinations of K out of N elements */
     110         240 :         generator = generator_init(numattrs, k);
     111             : 
     112         744 :         while ((combination = generator_next(generator)))
     113             :         {
     114         504 :             MVNDistinctItem *item = &result->items[itemcnt];
     115             :             int         j;
     116             : 
     117         504 :             item->attributes = palloc(sizeof(AttrNumber) * k);
     118         504 :             item->nattributes = k;
     119             : 
     120             :             /* translate the indexes to attnums */
     121        1692 :             for (j = 0; j < k; j++)
     122             :             {
     123        1188 :                 item->attributes[j] = data->attnums[combination[j]];
     124             : 
     125             :                 Assert(AttributeNumberIsValid(item->attributes[j]));
     126             :             }
     127             : 
     128         504 :             item->ndistinct =
     129         504 :                 ndistinct_for_combination(totalrows, data, k, combination);
     130             : 
     131         504 :             itemcnt++;
     132             :             Assert(itemcnt <= result->nitems);
     133             :         }
     134             : 
     135         240 :         generator_free(generator);
     136             :     }
     137             : 
     138             :     /* must consume exactly the whole output array */
     139             :     Assert(itemcnt == result->nitems);
     140             : 
     141         156 :     return result;
     142             : }
     143             : 
     144             : /*
     145             :  * statext_ndistinct_load
     146             :  *      Load the ndistinct value for the indicated pg_statistic_ext tuple
     147             :  */
     148             : MVNDistinct *
     149         402 : statext_ndistinct_load(Oid mvoid, bool inh)
     150             : {
     151             :     MVNDistinct *result;
     152             :     bool        isnull;
     153             :     Datum       ndist;
     154             :     HeapTuple   htup;
     155             : 
     156         402 :     htup = SearchSysCache2(STATEXTDATASTXOID,
     157             :                            ObjectIdGetDatum(mvoid), BoolGetDatum(inh));
     158         402 :     if (!HeapTupleIsValid(htup))
     159           0 :         elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
     160             : 
     161         402 :     ndist = SysCacheGetAttr(STATEXTDATASTXOID, htup,
     162             :                             Anum_pg_statistic_ext_data_stxdndistinct, &isnull);
     163         402 :     if (isnull)
     164           0 :         elog(ERROR,
     165             :              "requested statistics kind \"%c\" is not yet built for statistics object %u",
     166             :              STATS_EXT_NDISTINCT, mvoid);
     167             : 
     168         402 :     result = statext_ndistinct_deserialize(DatumGetByteaPP(ndist));
     169             : 
     170         402 :     ReleaseSysCache(htup);
     171             : 
     172         402 :     return result;
     173             : }
     174             : 
     175             : /*
     176             :  * statext_ndistinct_serialize
     177             :  *      serialize ndistinct to the on-disk bytea format
     178             :  */
     179             : bytea *
     180         156 : statext_ndistinct_serialize(MVNDistinct *ndistinct)
     181             : {
     182             :     int         i;
     183             :     bytea      *output;
     184             :     char       *tmp;
     185             :     Size        len;
     186             : 
     187             :     Assert(ndistinct->magic == STATS_NDISTINCT_MAGIC);
     188             :     Assert(ndistinct->type == STATS_NDISTINCT_TYPE_BASIC);
     189             : 
     190             :     /*
     191             :      * Base size is size of scalar fields in the struct, plus one base struct
     192             :      * for each item, including number of items for each.
     193             :      */
     194         156 :     len = VARHDRSZ + SizeOfHeader;
     195             : 
     196             :     /* and also include space for the actual attribute numbers */
     197         660 :     for (i = 0; i < ndistinct->nitems; i++)
     198             :     {
     199             :         int         nmembers;
     200             : 
     201         504 :         nmembers = ndistinct->items[i].nattributes;
     202             :         Assert(nmembers >= 2);
     203             : 
     204         504 :         len += SizeOfItem(nmembers);
     205             :     }
     206             : 
     207         156 :     output = (bytea *) palloc(len);
     208         156 :     SET_VARSIZE(output, len);
     209             : 
     210         156 :     tmp = VARDATA(output);
     211             : 
     212             :     /* Store the base struct values (magic, type, nitems) */
     213         156 :     memcpy(tmp, &ndistinct->magic, sizeof(uint32));
     214         156 :     tmp += sizeof(uint32);
     215         156 :     memcpy(tmp, &ndistinct->type, sizeof(uint32));
     216         156 :     tmp += sizeof(uint32);
     217         156 :     memcpy(tmp, &ndistinct->nitems, sizeof(uint32));
     218         156 :     tmp += sizeof(uint32);
     219             : 
     220             :     /*
     221             :      * store number of attributes and attribute numbers for each entry
     222             :      */
     223         660 :     for (i = 0; i < ndistinct->nitems; i++)
     224             :     {
     225         504 :         MVNDistinctItem item = ndistinct->items[i];
     226         504 :         int         nmembers = item.nattributes;
     227             : 
     228         504 :         memcpy(tmp, &item.ndistinct, sizeof(double));
     229         504 :         tmp += sizeof(double);
     230         504 :         memcpy(tmp, &nmembers, sizeof(int));
     231         504 :         tmp += sizeof(int);
     232             : 
     233         504 :         memcpy(tmp, item.attributes, sizeof(AttrNumber) * nmembers);
     234         504 :         tmp += nmembers * sizeof(AttrNumber);
     235             : 
     236             :         /* protect against overflows */
     237             :         Assert(tmp <= ((char *) output + len));
     238             :     }
     239             : 
     240             :     /* check we used exactly the expected space */
     241             :     Assert(tmp == ((char *) output + len));
     242             : 
     243         156 :     return output;
     244             : }
     245             : 
     246             : /*
     247             :  * statext_ndistinct_deserialize
     248             :  *      Read an on-disk bytea format MVNDistinct to in-memory format
     249             :  */
     250             : MVNDistinct *
     251         426 : statext_ndistinct_deserialize(bytea *data)
     252             : {
     253             :     int         i;
     254             :     Size        minimum_size;
     255             :     MVNDistinct ndist;
     256             :     MVNDistinct *ndistinct;
     257             :     char       *tmp;
     258             : 
     259         426 :     if (data == NULL)
     260           0 :         return NULL;
     261             : 
     262             :     /* we expect at least the basic fields of MVNDistinct struct */
     263         426 :     if (VARSIZE_ANY_EXHDR(data) < SizeOfHeader)
     264           0 :         elog(ERROR, "invalid MVNDistinct size %zu (expected at least %zu)",
     265             :              VARSIZE_ANY_EXHDR(data), SizeOfHeader);
     266             : 
     267             :     /* initialize pointer to the data part (skip the varlena header) */
     268         426 :     tmp = VARDATA_ANY(data);
     269             : 
     270             :     /* read the header fields and perform basic sanity checks */
     271         426 :     memcpy(&ndist.magic, tmp, sizeof(uint32));
     272         426 :     tmp += sizeof(uint32);
     273         426 :     memcpy(&ndist.type, tmp, sizeof(uint32));
     274         426 :     tmp += sizeof(uint32);
     275         426 :     memcpy(&ndist.nitems, tmp, sizeof(uint32));
     276         426 :     tmp += sizeof(uint32);
     277             : 
     278         426 :     if (ndist.magic != STATS_NDISTINCT_MAGIC)
     279           0 :         elog(ERROR, "invalid ndistinct magic %08x (expected %08x)",
     280             :              ndist.magic, STATS_NDISTINCT_MAGIC);
     281         426 :     if (ndist.type != STATS_NDISTINCT_TYPE_BASIC)
     282           0 :         elog(ERROR, "invalid ndistinct type %d (expected %d)",
     283             :              ndist.type, STATS_NDISTINCT_TYPE_BASIC);
     284         426 :     if (ndist.nitems == 0)
     285           0 :         elog(ERROR, "invalid zero-length item array in MVNDistinct");
     286             : 
     287             :     /* what minimum bytea size do we expect for those parameters */
     288         426 :     minimum_size = MinSizeOfItems(ndist.nitems);
     289         426 :     if (VARSIZE_ANY_EXHDR(data) < minimum_size)
     290           0 :         elog(ERROR, "invalid MVNDistinct size %zu (expected at least %zu)",
     291             :              VARSIZE_ANY_EXHDR(data), minimum_size);
     292             : 
     293             :     /*
     294             :      * Allocate space for the ndistinct items (no space for each item's
     295             :      * attnos: those live in bitmapsets allocated separately)
     296             :      */
     297         426 :     ndistinct = palloc0(MAXALIGN(offsetof(MVNDistinct, items)) +
     298         426 :                         (ndist.nitems * sizeof(MVNDistinctItem)));
     299         426 :     ndistinct->magic = ndist.magic;
     300         426 :     ndistinct->type = ndist.type;
     301         426 :     ndistinct->nitems = ndist.nitems;
     302             : 
     303        2310 :     for (i = 0; i < ndistinct->nitems; i++)
     304             :     {
     305        1884 :         MVNDistinctItem *item = &ndistinct->items[i];
     306             : 
     307             :         /* ndistinct value */
     308        1884 :         memcpy(&item->ndistinct, tmp, sizeof(double));
     309        1884 :         tmp += sizeof(double);
     310             : 
     311             :         /* number of attributes */
     312        1884 :         memcpy(&item->nattributes, tmp, sizeof(int));
     313        1884 :         tmp += sizeof(int);
     314             :         Assert((item->nattributes >= 2) && (item->nattributes <= STATS_MAX_DIMENSIONS));
     315             : 
     316             :         item->attributes
     317        1884 :             = (AttrNumber *) palloc(item->nattributes * sizeof(AttrNumber));
     318             : 
     319        1884 :         memcpy(item->attributes, tmp, sizeof(AttrNumber) * item->nattributes);
     320        1884 :         tmp += sizeof(AttrNumber) * item->nattributes;
     321             : 
     322             :         /* still within the bytea */
     323             :         Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
     324             :     }
     325             : 
     326             :     /* we should have consumed the whole bytea exactly */
     327             :     Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
     328             : 
     329         426 :     return ndistinct;
     330             : }
     331             : 
     332             : /*
     333             :  * pg_ndistinct_in
     334             :  *      input routine for type pg_ndistinct
     335             :  *
     336             :  * pg_ndistinct is real enough to be a table column, but it has no
     337             :  * operations of its own, and disallows input (just like pg_node_tree).
     338             :  */
     339             : Datum
     340           0 : pg_ndistinct_in(PG_FUNCTION_ARGS)
     341             : {
     342           0 :     ereport(ERROR,
     343             :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     344             :              errmsg("cannot accept a value of type %s", "pg_ndistinct")));
     345             : 
     346             :     PG_RETURN_VOID();           /* keep compiler quiet */
     347             : }
     348             : 
     349             : /*
     350             :  * pg_ndistinct
     351             :  *      output routine for type pg_ndistinct
     352             :  *
     353             :  * Produces a human-readable representation of the value.
     354             :  */
     355             : Datum
     356          24 : pg_ndistinct_out(PG_FUNCTION_ARGS)
     357             : {
     358          24 :     bytea      *data = PG_GETARG_BYTEA_PP(0);
     359          24 :     MVNDistinct *ndist = statext_ndistinct_deserialize(data);
     360             :     int         i;
     361             :     StringInfoData str;
     362             : 
     363          24 :     initStringInfo(&str);
     364          24 :     appendStringInfoChar(&str, '{');
     365             : 
     366         120 :     for (i = 0; i < ndist->nitems; i++)
     367             :     {
     368             :         int         j;
     369          96 :         MVNDistinctItem item = ndist->items[i];
     370             : 
     371          96 :         if (i > 0)
     372          72 :             appendStringInfoString(&str, ", ");
     373             : 
     374         312 :         for (j = 0; j < item.nattributes; j++)
     375             :         {
     376         216 :             AttrNumber  attnum = item.attributes[j];
     377             : 
     378         216 :             appendStringInfo(&str, "%s%d", (j == 0) ? "\"" : ", ", attnum);
     379             :         }
     380          96 :         appendStringInfo(&str, "\": %d", (int) item.ndistinct);
     381             :     }
     382             : 
     383          24 :     appendStringInfoChar(&str, '}');
     384             : 
     385          24 :     PG_RETURN_CSTRING(str.data);
     386             : }
     387             : 
     388             : /*
     389             :  * pg_ndistinct_recv
     390             :  *      binary input routine for type pg_ndistinct
     391             :  */
     392             : Datum
     393           0 : pg_ndistinct_recv(PG_FUNCTION_ARGS)
     394             : {
     395           0 :     ereport(ERROR,
     396             :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     397             :              errmsg("cannot accept a value of type %s", "pg_ndistinct")));
     398             : 
     399             :     PG_RETURN_VOID();           /* keep compiler quiet */
     400             : }
     401             : 
     402             : /*
     403             :  * pg_ndistinct_send
     404             :  *      binary output routine for type pg_ndistinct
     405             :  *
     406             :  * n-distinct is serialized into a bytea value, so let's send that.
     407             :  */
     408             : Datum
     409           0 : pg_ndistinct_send(PG_FUNCTION_ARGS)
     410             : {
     411           0 :     return byteasend(fcinfo);
     412             : }
     413             : 
     414             : /*
     415             :  * ndistinct_for_combination
     416             :  *      Estimates number of distinct values in a combination of columns.
     417             :  *
     418             :  * This uses the same ndistinct estimator as compute_scalar_stats() in
     419             :  * ANALYZE, i.e.,
     420             :  *      n*d / (n - f1 + f1*n/N)
     421             :  *
     422             :  * except that instead of values in a single column we are dealing with
     423             :  * combination of multiple columns.
     424             :  */
     425             : static double
     426         504 : ndistinct_for_combination(double totalrows, StatsBuildData *data,
     427             :                           int k, int *combination)
     428             : {
     429             :     int         i,
     430             :                 j;
     431             :     int         f1,
     432             :                 cnt,
     433             :                 d;
     434             :     bool       *isnull;
     435             :     Datum      *values;
     436             :     SortItem   *items;
     437             :     MultiSortSupport mss;
     438         504 :     int         numrows = data->numrows;
     439             : 
     440         504 :     mss = multi_sort_init(k);
     441             : 
     442             :     /*
     443             :      * In order to determine the number of distinct elements, create separate
     444             :      * values[]/isnull[] arrays with all the data we have, then sort them
     445             :      * using the specified column combination as dimensions.  We could try to
     446             :      * sort in place, but it'd probably be more complex and bug-prone.
     447             :      */
     448         504 :     items = (SortItem *) palloc(numrows * sizeof(SortItem));
     449         504 :     values = (Datum *) palloc0(sizeof(Datum) * numrows * k);
     450         504 :     isnull = (bool *) palloc0(sizeof(bool) * numrows * k);
     451             : 
     452      483504 :     for (i = 0; i < numrows; i++)
     453             :     {
     454      483000 :         items[i].values = &values[i * k];
     455      483000 :         items[i].isnull = &isnull[i * k];
     456             :     }
     457             : 
     458             :     /*
     459             :      * For each dimension, set up sort-support and fill in the values from the
     460             :      * sample data.
     461             :      *
     462             :      * We use the column data types' default sort operators and collations;
     463             :      * perhaps at some point it'd be worth using column-specific collations?
     464             :      */
     465        1692 :     for (i = 0; i < k; i++)
     466             :     {
     467             :         Oid         typid;
     468             :         TypeCacheEntry *type;
     469        1188 :         Oid         collid = InvalidOid;
     470        1188 :         VacAttrStats *colstat = data->stats[combination[i]];
     471             : 
     472        1188 :         typid = colstat->attrtypid;
     473        1188 :         collid = colstat->attrcollid;
     474             : 
     475        1188 :         type = lookup_type_cache(typid, TYPECACHE_LT_OPR);
     476        1188 :         if (type->lt_opr == InvalidOid) /* shouldn't happen */
     477           0 :             elog(ERROR, "cache lookup failed for ordering operator for type %u",
     478             :                  typid);
     479             : 
     480             :         /* prepare the sort function for this dimension */
     481        1188 :         multi_sort_add_dimension(mss, i, type->lt_opr, collid);
     482             : 
     483             :         /* accumulate all the data for this dimension into the arrays */
     484     1147182 :         for (j = 0; j < numrows; j++)
     485             :         {
     486     1145994 :             items[j].values[i] = data->values[combination[i]][j];
     487     1145994 :             items[j].isnull[i] = data->nulls[combination[i]][j];
     488             :         }
     489             :     }
     490             : 
     491             :     /* We can sort the array now ... */
     492         504 :     qsort_interruptible(items, numrows, sizeof(SortItem),
     493             :                         multi_sort_compare, mss);
     494             : 
     495             :     /* ... and count the number of distinct combinations */
     496             : 
     497         504 :     f1 = 0;
     498         504 :     cnt = 1;
     499         504 :     d = 1;
     500      483000 :     for (i = 1; i < numrows; i++)
     501             :     {
     502      482496 :         if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
     503             :         {
     504       46686 :             if (cnt == 1)
     505       25818 :                 f1 += 1;
     506             : 
     507       46686 :             d++;
     508       46686 :             cnt = 0;
     509             :         }
     510             : 
     511      482496 :         cnt += 1;
     512             :     }
     513             : 
     514         504 :     if (cnt == 1)
     515          90 :         f1 += 1;
     516             : 
     517         504 :     return estimate_ndistinct(totalrows, numrows, d, f1);
     518             : }
     519             : 
     520             : /* The Duj1 estimator (already used in analyze.c). */
     521             : static double
     522         504 : estimate_ndistinct(double totalrows, int numrows, int d, int f1)
     523             : {
     524             :     double      numer,
     525             :                 denom,
     526             :                 ndistinct;
     527             : 
     528         504 :     numer = (double) numrows * (double) d;
     529             : 
     530         504 :     denom = (double) (numrows - f1) +
     531         504 :         (double) f1 * (double) numrows / totalrows;
     532             : 
     533         504 :     ndistinct = numer / denom;
     534             : 
     535             :     /* Clamp to sane range in case of roundoff error */
     536         504 :     if (ndistinct < (double) d)
     537           0 :         ndistinct = (double) d;
     538             : 
     539         504 :     if (ndistinct > totalrows)
     540           0 :         ndistinct = totalrows;
     541             : 
     542         504 :     return floor(ndistinct + 0.5);
     543             : }
     544             : 
     545             : /*
     546             :  * n_choose_k
     547             :  *      computes binomial coefficients using an algorithm that is both
     548             :  *      efficient and prevents overflows
     549             :  */
     550             : static int
     551         240 : n_choose_k(int n, int k)
     552             : {
     553             :     int         d,
     554             :                 r;
     555             : 
     556             :     Assert((k > 0) && (n >= k));
     557             : 
     558             :     /* use symmetry of the binomial coefficients */
     559         240 :     k = Min(k, n - k);
     560             : 
     561         240 :     r = 1;
     562         348 :     for (d = 1; d <= k; ++d)
     563             :     {
     564         108 :         r *= n--;
     565         108 :         r /= d;
     566             :     }
     567             : 
     568         240 :     return r;
     569             : }
     570             : 
     571             : /*
     572             :  * num_combinations
     573             :  *      number of combinations, excluding single-value combinations
     574             :  */
     575             : static int
     576         156 : num_combinations(int n)
     577             : {
     578         156 :     return (1 << n) - (n + 1);
     579             : }
     580             : 
     581             : /*
     582             :  * generator_init
     583             :  *      initialize the generator of combinations
     584             :  *
     585             :  * The generator produces combinations of K elements in the interval (0..N).
     586             :  * We prebuild all the combinations in this method, which is simpler than
     587             :  * generating them on the fly.
     588             :  */
     589             : static CombinationGenerator *
     590         240 : generator_init(int n, int k)
     591             : {
     592             :     CombinationGenerator *state;
     593             : 
     594             :     Assert((n >= k) && (k > 0));
     595             : 
     596             :     /* allocate the generator state as a single chunk of memory */
     597         240 :     state = (CombinationGenerator *) palloc(sizeof(CombinationGenerator));
     598             : 
     599         240 :     state->ncombinations = n_choose_k(n, k);
     600             : 
     601             :     /* pre-allocate space for all combinations */
     602         240 :     state->combinations = (int *) palloc(sizeof(int) * k * state->ncombinations);
     603             : 
     604         240 :     state->current = 0;
     605         240 :     state->k = k;
     606         240 :     state->n = n;
     607             : 
     608             :     /* now actually pre-generate all the combinations of K elements */
     609         240 :     generate_combinations(state);
     610             : 
     611             :     /* make sure we got the expected number of combinations */
     612             :     Assert(state->current == state->ncombinations);
     613             : 
     614             :     /* reset the number, so we start with the first one */
     615         240 :     state->current = 0;
     616             : 
     617         240 :     return state;
     618             : }
     619             : 
     620             : /*
     621             :  * generator_next
     622             :  *      returns the next combination from the prebuilt list
     623             :  *
     624             :  * Returns a combination of K array indexes (0 .. N), as specified to
     625             :  * generator_init), or NULL when there are no more combination.
     626             :  */
     627             : static int *
     628         744 : generator_next(CombinationGenerator *state)
     629             : {
     630         744 :     if (state->current == state->ncombinations)
     631         240 :         return NULL;
     632             : 
     633         504 :     return &state->combinations[state->k * state->current++];
     634             : }
     635             : 
     636             : /*
     637             :  * generator_free
     638             :  *      free the internal state of the generator
     639             :  *
     640             :  * Releases the generator internal state (pre-built combinations).
     641             :  */
     642             : static void
     643         240 : generator_free(CombinationGenerator *state)
     644             : {
     645         240 :     pfree(state->combinations);
     646         240 :     pfree(state);
     647         240 : }
     648             : 
     649             : /*
     650             :  * generate_combinations_recurse
     651             :  *      given a prefix, generate all possible combinations
     652             :  *
     653             :  * Given a prefix (first few elements of the combination), generate following
     654             :  * elements recursively. We generate the combinations in lexicographic order,
     655             :  * which eliminates permutations of the same combination.
     656             :  */
     657             : static void
     658        1932 : generate_combinations_recurse(CombinationGenerator *state,
     659             :                               int index, int start, int *current)
     660             : {
     661             :     /* If we haven't filled all the elements, simply recurse. */
     662        1932 :     if (index < state->k)
     663             :     {
     664             :         int         i;
     665             : 
     666             :         /*
     667             :          * The values have to be in ascending order, so make sure we start
     668             :          * with the value passed by parameter.
     669             :          */
     670             : 
     671        3120 :         for (i = start; i < state->n; i++)
     672             :         {
     673        1692 :             current[index] = i;
     674        1692 :             generate_combinations_recurse(state, (index + 1), (i + 1), current);
     675             :         }
     676             : 
     677        1428 :         return;
     678             :     }
     679             :     else
     680             :     {
     681             :         /* we got a valid combination, add it to the array */
     682         504 :         memcpy(&state->combinations[(state->k * state->current)],
     683         504 :                current, state->k * sizeof(int));
     684         504 :         state->current++;
     685             :     }
     686             : }
     687             : 
     688             : /*
     689             :  * generate_combinations
     690             :  *      generate all k-combinations of N elements
     691             :  */
     692             : static void
     693         240 : generate_combinations(CombinationGenerator *state)
     694             : {
     695         240 :     int        *current = (int *) palloc0(sizeof(int) * state->k);
     696             : 
     697         240 :     generate_combinations_recurse(state, 0, 0, current);
     698             : 
     699         240 :     pfree(current);
     700         240 : }

Generated by: LCOV version 1.14