LCOV - code coverage report
Current view: top level - src/backend/statistics - dependencies.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13beta1 Lines: 336 374 89.8 %
Date: 2020-05-29 01:06:25 Functions: 16 19 84.2 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * dependencies.c
       4             :  *    POSTGRES functional dependencies
       5             :  *
       6             :  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  * IDENTIFICATION
      10             :  *    src/backend/statistics/dependencies.c
      11             :  *
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : #include "postgres.h"
      15             : 
      16             : #include "access/htup_details.h"
      17             : #include "access/sysattr.h"
      18             : #include "catalog/pg_operator.h"
      19             : #include "catalog/pg_statistic_ext.h"
      20             : #include "catalog/pg_statistic_ext_data.h"
      21             : #include "lib/stringinfo.h"
      22             : #include "nodes/nodeFuncs.h"
      23             : #include "nodes/nodes.h"
      24             : #include "nodes/pathnodes.h"
      25             : #include "optimizer/clauses.h"
      26             : #include "optimizer/optimizer.h"
      27             : #include "statistics/extended_stats_internal.h"
      28             : #include "statistics/statistics.h"
      29             : #include "utils/bytea.h"
      30             : #include "utils/fmgroids.h"
      31             : #include "utils/fmgrprotos.h"
      32             : #include "utils/lsyscache.h"
      33             : #include "utils/selfuncs.h"
      34             : #include "utils/syscache.h"
      35             : #include "utils/typcache.h"
      36             : 
      37             : /* size of the struct header fields (magic, type, ndeps) */
      38             : #define SizeOfHeader        (3 * sizeof(uint32))
      39             : 
      40             : /* size of a serialized dependency (degree, natts, atts) */
      41             : #define SizeOfItem(natts) \
      42             :     (sizeof(double) + sizeof(AttrNumber) * (1 + (natts)))
      43             : 
      44             : /* minimal size of a dependency (with two attributes) */
      45             : #define MinSizeOfItem   SizeOfItem(2)
      46             : 
      47             : /* minimal size of dependencies, when all deps are minimal */
      48             : #define MinSizeOfItems(ndeps) \
      49             :     (SizeOfHeader + (ndeps) * MinSizeOfItem)
      50             : 
      51             : /*
      52             :  * Internal state for DependencyGenerator of dependencies. Dependencies are similar to
      53             :  * k-permutations of n elements, except that the order does not matter for the
      54             :  * first (k-1) elements. That is, (a,b=>c) and (b,a=>c) are equivalent.
      55             :  */
      56             : typedef struct DependencyGeneratorData
      57             : {
      58             :     int         k;              /* size of the dependency */
      59             :     int         n;              /* number of possible attributes */
      60             :     int         current;        /* next dependency to return (index) */
      61             :     AttrNumber  ndependencies;  /* number of dependencies generated */
      62             :     AttrNumber *dependencies;   /* array of pre-generated dependencies  */
      63             : } DependencyGeneratorData;
      64             : 
      65             : typedef DependencyGeneratorData *DependencyGenerator;
      66             : 
      67             : static void generate_dependencies_recurse(DependencyGenerator state,
      68             :                                           int index, AttrNumber start, AttrNumber *current);
      69             : static void generate_dependencies(DependencyGenerator state);
      70             : static DependencyGenerator DependencyGenerator_init(int n, int k);
      71             : static void DependencyGenerator_free(DependencyGenerator state);
      72             : static AttrNumber *DependencyGenerator_next(DependencyGenerator state);
      73             : static double dependency_degree(int numrows, HeapTuple *rows, int k,
      74             :                                 AttrNumber *dependency, VacAttrStats **stats, Bitmapset *attrs);
      75             : static bool dependency_is_fully_matched(MVDependency *dependency,
      76             :                                         Bitmapset *attnums);
      77             : static bool dependency_is_compatible_clause(Node *clause, Index relid,
      78             :                                             AttrNumber *attnum);
      79             : static MVDependency *find_strongest_dependency(MVDependencies **dependencies,
      80             :                                                int ndependencies,
      81             :                                                Bitmapset *attnums);
      82             : static Selectivity clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
      83             :                                                  int varRelid, JoinType jointype,
      84             :                                                  SpecialJoinInfo *sjinfo,
      85             :                                                  MVDependency **dependencies,
      86             :                                                  int ndependencies,
      87             :                                                  AttrNumber *list_attnums,
      88             :                                                  Bitmapset **estimatedclauses);
      89             : 
      90             : static void
      91         334 : generate_dependencies_recurse(DependencyGenerator state, int index,
      92             :                               AttrNumber start, AttrNumber *current)
      93             : {
      94             :     /*
      95             :      * The generator handles the first (k-1) elements differently from the
      96             :      * last element.
      97             :      */
      98         334 :     if (index < (state->k - 1))
      99             :     {
     100             :         AttrNumber  i;
     101             : 
     102             :         /*
     103             :          * The first (k-1) values have to be in ascending order, which we
     104             :          * generate recursively.
     105             :          */
     106             : 
     107         412 :         for (i = start; i < state->n; i++)
     108             :         {
     109         266 :             current[index] = i;
     110         266 :             generate_dependencies_recurse(state, (index + 1), (i + 1), current);
     111             :         }
     112             :     }
     113             :     else
     114             :     {
     115             :         int         i;
     116             : 
     117             :         /*
     118             :          * the last element is the implied value, which does not respect the
     119             :          * ascending order. We just need to check that the value is not in the
     120             :          * first (k-1) elements.
     121             :          */
     122             : 
     123         720 :         for (i = 0; i < state->n; i++)
     124             :         {
     125             :             int         j;
     126         532 :             bool        match = false;
     127             : 
     128         532 :             current[index] = i;
     129             : 
     130         954 :             for (j = 0; j < index; j++)
     131             :             {
     132         688 :                 if (current[j] == i)
     133             :                 {
     134         266 :                     match = true;
     135         266 :                     break;
     136             :                 }
     137             :             }
     138             : 
     139             :             /*
     140             :              * If the value is not found in the first part of the dependency,
     141             :              * we're done.
     142             :              */
     143         532 :             if (!match)
     144             :             {
     145         532 :                 state->dependencies = (AttrNumber *) repalloc(state->dependencies,
     146         266 :                                                               state->k * (state->ndependencies + 1) * sizeof(AttrNumber));
     147         266 :                 memcpy(&state->dependencies[(state->k * state->ndependencies)],
     148         266 :                        current, state->k * sizeof(AttrNumber));
     149         266 :                 state->ndependencies++;
     150             :             }
     151             :         }
     152             :     }
     153         334 : }
     154             : 
     155             : /* generate all dependencies (k-permutations of n elements) */
     156             : static void
     157          68 : generate_dependencies(DependencyGenerator state)
     158             : {
     159          68 :     AttrNumber *current = (AttrNumber *) palloc0(sizeof(AttrNumber) * state->k);
     160             : 
     161          68 :     generate_dependencies_recurse(state, 0, 0, current);
     162             : 
     163          68 :     pfree(current);
     164          68 : }
     165             : 
     166             : /*
     167             :  * initialize the DependencyGenerator of variations, and prebuild the variations
     168             :  *
     169             :  * This pre-builds all the variations. We could also generate them in
     170             :  * DependencyGenerator_next(), but this seems simpler.
     171             :  */
     172             : static DependencyGenerator
     173          68 : DependencyGenerator_init(int n, int k)
     174             : {
     175             :     DependencyGenerator state;
     176             : 
     177             :     Assert((n >= k) && (k > 0));
     178             : 
     179             :     /* allocate the DependencyGenerator state */
     180          68 :     state = (DependencyGenerator) palloc0(sizeof(DependencyGeneratorData));
     181          68 :     state->dependencies = (AttrNumber *) palloc(k * sizeof(AttrNumber));
     182             : 
     183          68 :     state->ndependencies = 0;
     184          68 :     state->current = 0;
     185          68 :     state->k = k;
     186          68 :     state->n = n;
     187             : 
     188             :     /* now actually pre-generate all the variations */
     189          68 :     generate_dependencies(state);
     190             : 
     191          68 :     return state;
     192             : }
     193             : 
     194             : /* free the DependencyGenerator state */
     195             : static void
     196          68 : DependencyGenerator_free(DependencyGenerator state)
     197             : {
     198          68 :     pfree(state->dependencies);
     199          68 :     pfree(state);
     200             : 
     201          68 : }
     202             : 
     203             : /* generate next combination */
     204             : static AttrNumber *
     205         334 : DependencyGenerator_next(DependencyGenerator state)
     206             : {
     207         334 :     if (state->current == state->ndependencies)
     208          68 :         return NULL;
     209             : 
     210         266 :     return &state->dependencies[state->k * state->current++];
     211             : }
     212             : 
     213             : 
     214             : /*
     215             :  * validates functional dependency on the data
     216             :  *
     217             :  * An actual work horse of detecting functional dependencies. Given a variation
     218             :  * of k attributes, it checks that the first (k-1) are sufficient to determine
     219             :  * the last one.
     220             :  */
     221             : static double
     222         266 : dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency,
     223             :                   VacAttrStats **stats, Bitmapset *attrs)
     224             : {
     225             :     int         i,
     226             :                 nitems;
     227             :     MultiSortSupport mss;
     228             :     SortItem   *items;
     229             :     AttrNumber *attnums;
     230             :     AttrNumber *attnums_dep;
     231             :     int         numattrs;
     232             : 
     233             :     /* counters valid within a group */
     234         266 :     int         group_size = 0;
     235         266 :     int         n_violations = 0;
     236             : 
     237             :     /* total number of rows supporting (consistent with) the dependency */
     238         266 :     int         n_supporting_rows = 0;
     239             : 
     240             :     /* Make sure we have at least two input attributes. */
     241             :     Assert(k >= 2);
     242             : 
     243             :     /* sort info for all attributes columns */
     244         266 :     mss = multi_sort_init(k);
     245             : 
     246             :     /*
     247             :      * Transform the attrs from bitmap to an array to make accessing the i-th
     248             :      * member easier, and then construct a filtered version with only attnums
     249             :      * referenced by the dependency we validate.
     250             :      */
     251         266 :     attnums = build_attnums_array(attrs, &numattrs);
     252             : 
     253         266 :     attnums_dep = (AttrNumber *) palloc(k * sizeof(AttrNumber));
     254         876 :     for (i = 0; i < k; i++)
     255         610 :         attnums_dep[i] = attnums[dependency[i]];
     256             : 
     257             :     /*
     258             :      * Verify the dependency (a,b,...)->z, using a rather simple algorithm:
     259             :      *
     260             :      * (a) sort the data lexicographically
     261             :      *
     262             :      * (b) split the data into groups by first (k-1) columns
     263             :      *
     264             :      * (c) for each group count different values in the last column
     265             :      *
     266             :      * We use the column data types' default sort operators and collations;
     267             :      * perhaps at some point it'd be worth using column-specific collations?
     268             :      */
     269             : 
     270             :     /* prepare the sort function for the dimensions */
     271         876 :     for (i = 0; i < k; i++)
     272             :     {
     273         610 :         VacAttrStats *colstat = stats[dependency[i]];
     274             :         TypeCacheEntry *type;
     275             : 
     276         610 :         type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
     277         610 :         if (type->lt_opr == InvalidOid) /* shouldn't happen */
     278           0 :             elog(ERROR, "cache lookup failed for ordering operator for type %u",
     279             :                  colstat->attrtypid);
     280             : 
     281             :         /* prepare the sort function for this dimension */
     282         610 :         multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
     283             :     }
     284             : 
     285             :     /*
     286             :      * build an array of SortItem(s) sorted using the multi-sort support
     287             :      *
     288             :      * XXX This relies on all stats entries pointing to the same tuple
     289             :      * descriptor.  For now that assumption holds, but it might change in the
     290             :      * future for example if we support statistics on multiple tables.
     291             :      */
     292         266 :     items = build_sorted_items(numrows, &nitems, rows, stats[0]->tupDesc,
     293             :                                mss, k, attnums_dep);
     294             : 
     295             :     /*
     296             :      * Walk through the sorted array, split it into rows according to the
     297             :      * first (k-1) columns. If there's a single value in the last column, we
     298             :      * count the group as 'supporting' the functional dependency. Otherwise we
     299             :      * count it as contradicting.
     300             :      */
     301             : 
     302             :     /* start with the first row forming a group */
     303         266 :     group_size = 1;
     304             : 
     305             :     /* loop 1 beyond the end of the array so that we count the final group */
     306      993674 :     for (i = 1; i <= nitems; i++)
     307             :     {
     308             :         /*
     309             :          * Check if the group ended, which may be either because we processed
     310             :          * all the items (i==nitems), or because the i-th item is not equal to
     311             :          * the preceding one.
     312             :          */
     313     1986550 :         if (i == nitems ||
     314      993142 :             multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0)
     315             :         {
     316             :             /*
     317             :              * If no violations were found in the group then track the rows of
     318             :              * the group as supporting the functional dependency.
     319             :              */
     320       43788 :             if (n_violations == 0)
     321       11236 :                 n_supporting_rows += group_size;
     322             : 
     323             :             /* Reset counters for the new group */
     324       43788 :             n_violations = 0;
     325       43788 :             group_size = 1;
     326       43788 :             continue;
     327             :         }
     328             :         /* first columns match, but the last one does not (so contradicting) */
     329      949620 :         else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0)
     330      154006 :             n_violations++;
     331             : 
     332      949620 :         group_size++;
     333             :     }
     334             : 
     335         266 :     if (items)
     336         266 :         pfree(items);
     337             : 
     338         266 :     pfree(mss);
     339         266 :     pfree(attnums);
     340         266 :     pfree(attnums_dep);
     341             : 
     342             :     /* Compute the 'degree of validity' as (supporting/total). */
     343         266 :     return (n_supporting_rows * 1.0 / numrows);
     344             : }
     345             : 
     346             : /*
     347             :  * detects functional dependencies between groups of columns
     348             :  *
     349             :  * Generates all possible subsets of columns (variations) and computes
     350             :  * the degree of validity for each one. For example when creating statistics
     351             :  * on three columns (a,b,c) there are 9 possible dependencies
     352             :  *
     353             :  *     two columns            three columns
     354             :  *     -----------            -------------
     355             :  *     (a) -> b                (a,b) -> c
     356             :  *     (a) -> c                (a,c) -> b
     357             :  *     (b) -> a                (b,c) -> a
     358             :  *     (b) -> c
     359             :  *     (c) -> a
     360             :  *     (c) -> b
     361             :  */
     362             : MVDependencies *
     363          42 : statext_dependencies_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
     364             :                            VacAttrStats **stats)
     365             : {
     366             :     int         i,
     367             :                 k;
     368             :     int         numattrs;
     369             :     AttrNumber *attnums;
     370             : 
     371             :     /* result */
     372          42 :     MVDependencies *dependencies = NULL;
     373             : 
     374             :     /*
     375             :      * Transform the bms into an array, to make accessing i-th member easier.
     376             :      */
     377          42 :     attnums = build_attnums_array(attrs, &numattrs);
     378             : 
     379             :     Assert(numattrs >= 2);
     380             : 
     381             :     /*
     382             :      * We'll try build functional dependencies starting from the smallest ones
     383             :      * covering just 2 columns, to the largest ones, covering all columns
     384             :      * included in the statistics object.  We start from the smallest ones
     385             :      * because we want to be able to skip already implied ones.
     386             :      */
     387         110 :     for (k = 2; k <= numattrs; k++)
     388             :     {
     389             :         AttrNumber *dependency; /* array with k elements */
     390             : 
     391             :         /* prepare a DependencyGenerator of variation */
     392          68 :         DependencyGenerator DependencyGenerator = DependencyGenerator_init(numattrs, k);
     393             : 
     394             :         /* generate all possible variations of k values (out of n) */
     395         334 :         while ((dependency = DependencyGenerator_next(DependencyGenerator)))
     396             :         {
     397             :             double      degree;
     398             :             MVDependency *d;
     399             : 
     400             :             /* compute how valid the dependency seems */
     401         266 :             degree = dependency_degree(numrows, rows, k, dependency, stats, attrs);
     402             : 
     403             :             /*
     404             :              * if the dependency seems entirely invalid, don't store it
     405             :              */
     406         266 :             if (degree == 0.0)
     407         124 :                 continue;
     408             : 
     409         142 :             d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
     410             :                                          + k * sizeof(AttrNumber));
     411             : 
     412             :             /* copy the dependency (and keep the indexes into stxkeys) */
     413         142 :             d->degree = degree;
     414         142 :             d->nattributes = k;
     415         472 :             for (i = 0; i < k; i++)
     416         330 :                 d->attributes[i] = attnums[dependency[i]];
     417             : 
     418             :             /* initialize the list of dependencies */
     419         142 :             if (dependencies == NULL)
     420             :             {
     421             :                 dependencies
     422          38 :                     = (MVDependencies *) palloc0(sizeof(MVDependencies));
     423             : 
     424          38 :                 dependencies->magic = STATS_DEPS_MAGIC;
     425          38 :                 dependencies->type = STATS_DEPS_TYPE_BASIC;
     426          38 :                 dependencies->ndeps = 0;
     427             :             }
     428             : 
     429         142 :             dependencies->ndeps++;
     430         142 :             dependencies = (MVDependencies *) repalloc(dependencies,
     431             :                                                        offsetof(MVDependencies, deps)
     432         142 :                                                        + dependencies->ndeps * sizeof(MVDependency *));
     433             : 
     434         142 :             dependencies->deps[dependencies->ndeps - 1] = d;
     435             :         }
     436             : 
     437             :         /*
     438             :          * we're done with variations of k elements, so free the
     439             :          * DependencyGenerator
     440             :          */
     441          68 :         DependencyGenerator_free(DependencyGenerator);
     442             :     }
     443             : 
     444          42 :     return dependencies;
     445             : }
     446             : 
     447             : 
     448             : /*
     449             :  * Serialize list of dependencies into a bytea value.
     450             :  */
     451             : bytea *
     452          38 : statext_dependencies_serialize(MVDependencies *dependencies)
     453             : {
     454             :     int         i;
     455             :     bytea      *output;
     456             :     char       *tmp;
     457             :     Size        len;
     458             : 
     459             :     /* we need to store ndeps, with a number of attributes for each one */
     460          38 :     len = VARHDRSZ + SizeOfHeader;
     461             : 
     462             :     /* and also include space for the actual attribute numbers and degrees */
     463         180 :     for (i = 0; i < dependencies->ndeps; i++)
     464         142 :         len += SizeOfItem(dependencies->deps[i]->nattributes);
     465             : 
     466          38 :     output = (bytea *) palloc0(len);
     467          38 :     SET_VARSIZE(output, len);
     468             : 
     469          38 :     tmp = VARDATA(output);
     470             : 
     471             :     /* Store the base struct values (magic, type, ndeps) */
     472          38 :     memcpy(tmp, &dependencies->magic, sizeof(uint32));
     473          38 :     tmp += sizeof(uint32);
     474          38 :     memcpy(tmp, &dependencies->type, sizeof(uint32));
     475          38 :     tmp += sizeof(uint32);
     476          38 :     memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
     477          38 :     tmp += sizeof(uint32);
     478             : 
     479             :     /* store number of attributes and attribute numbers for each dependency */
     480         180 :     for (i = 0; i < dependencies->ndeps; i++)
     481             :     {
     482         142 :         MVDependency *d = dependencies->deps[i];
     483             : 
     484         142 :         memcpy(tmp, &d->degree, sizeof(double));
     485         142 :         tmp += sizeof(double);
     486             : 
     487         142 :         memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
     488         142 :         tmp += sizeof(AttrNumber);
     489             : 
     490         142 :         memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
     491         142 :         tmp += sizeof(AttrNumber) * d->nattributes;
     492             : 
     493             :         /* protect against overflow */
     494             :         Assert(tmp <= ((char *) output + len));
     495             :     }
     496             : 
     497             :     /* make sure we've produced exactly the right amount of data */
     498             :     Assert(tmp == ((char *) output + len));
     499             : 
     500          38 :     return output;
     501             : }
     502             : 
     503             : /*
     504             :  * Reads serialized dependencies into MVDependencies structure.
     505             :  */
     506             : MVDependencies *
     507         728 : statext_dependencies_deserialize(bytea *data)
     508             : {
     509             :     int         i;
     510             :     Size        min_expected_size;
     511             :     MVDependencies *dependencies;
     512             :     char       *tmp;
     513             : 
     514         728 :     if (data == NULL)
     515           0 :         return NULL;
     516             : 
     517         728 :     if (VARSIZE_ANY_EXHDR(data) < SizeOfHeader)
     518           0 :         elog(ERROR, "invalid MVDependencies size %zd (expected at least %zd)",
     519             :              VARSIZE_ANY_EXHDR(data), SizeOfHeader);
     520             : 
     521             :     /* read the MVDependencies header */
     522         728 :     dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies));
     523             : 
     524             :     /* initialize pointer to the data part (skip the varlena header) */
     525         728 :     tmp = VARDATA_ANY(data);
     526             : 
     527             :     /* read the header fields and perform basic sanity checks */
     528         728 :     memcpy(&dependencies->magic, tmp, sizeof(uint32));
     529         728 :     tmp += sizeof(uint32);
     530         728 :     memcpy(&dependencies->type, tmp, sizeof(uint32));
     531         728 :     tmp += sizeof(uint32);
     532         728 :     memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
     533         728 :     tmp += sizeof(uint32);
     534             : 
     535         728 :     if (dependencies->magic != STATS_DEPS_MAGIC)
     536           0 :         elog(ERROR, "invalid dependency magic %d (expected %d)",
     537             :              dependencies->magic, STATS_DEPS_MAGIC);
     538             : 
     539         728 :     if (dependencies->type != STATS_DEPS_TYPE_BASIC)
     540           0 :         elog(ERROR, "invalid dependency type %d (expected %d)",
     541             :              dependencies->type, STATS_DEPS_TYPE_BASIC);
     542             : 
     543         728 :     if (dependencies->ndeps == 0)
     544           0 :         elog(ERROR, "invalid zero-length item array in MVDependencies");
     545             : 
     546             :     /* what minimum bytea size do we expect for those parameters */
     547         728 :     min_expected_size = SizeOfItem(dependencies->ndeps);
     548             : 
     549         728 :     if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
     550           0 :         elog(ERROR, "invalid dependencies size %zd (expected at least %zd)",
     551             :              VARSIZE_ANY_EXHDR(data), min_expected_size);
     552             : 
     553             :     /* allocate space for the MCV items */
     554         728 :     dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
     555         728 :                             + (dependencies->ndeps * sizeof(MVDependency *)));
     556             : 
     557        4284 :     for (i = 0; i < dependencies->ndeps; i++)
     558             :     {
     559             :         double      degree;
     560             :         AttrNumber  k;
     561             :         MVDependency *d;
     562             : 
     563             :         /* degree of validity */
     564        3556 :         memcpy(&degree, tmp, sizeof(double));
     565        3556 :         tmp += sizeof(double);
     566             : 
     567             :         /* number of attributes */
     568        3556 :         memcpy(&k, tmp, sizeof(AttrNumber));
     569        3556 :         tmp += sizeof(AttrNumber);
     570             : 
     571             :         /* is the number of attributes valid? */
     572             :         Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
     573             : 
     574             :         /* now that we know the number of attributes, allocate the dependency */
     575        3556 :         d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
     576             :                                      + (k * sizeof(AttrNumber)));
     577             : 
     578        3556 :         d->degree = degree;
     579        3556 :         d->nattributes = k;
     580             : 
     581             :         /* copy attribute numbers */
     582        3556 :         memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
     583        3556 :         tmp += sizeof(AttrNumber) * d->nattributes;
     584             : 
     585        3556 :         dependencies->deps[i] = d;
     586             : 
     587             :         /* still within the bytea */
     588             :         Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
     589             :     }
     590             : 
     591             :     /* we should have consumed the whole bytea exactly */
     592             :     Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
     593             : 
     594         728 :     return dependencies;
     595             : }
     596             : 
     597             : /*
     598             :  * dependency_is_fully_matched
     599             :  *      checks that a functional dependency is fully matched given clauses on
     600             :  *      attributes (assuming the clauses are suitable equality clauses)
     601             :  */
     602             : static bool
     603        3108 : dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
     604             : {
     605             :     int         j;
     606             : 
     607             :     /*
     608             :      * Check that the dependency actually is fully covered by clauses. We have
     609             :      * to translate all attribute numbers, as those are referenced
     610             :      */
     611        8016 :     for (j = 0; j < dependency->nattributes; j++)
     612             :     {
     613        6348 :         int         attnum = dependency->attributes[j];
     614             : 
     615        6348 :         if (!bms_is_member(attnum, attnums))
     616        1440 :             return false;
     617             :     }
     618             : 
     619        1668 :     return true;
     620             : }
     621             : 
     622             : /*
     623             :  * statext_dependencies_load
     624             :  *      Load the functional dependencies for the indicated pg_statistic_ext tuple
     625             :  */
     626             : MVDependencies *
     627         724 : statext_dependencies_load(Oid mvoid)
     628             : {
     629             :     MVDependencies *result;
     630             :     bool        isnull;
     631             :     Datum       deps;
     632             :     HeapTuple   htup;
     633             : 
     634         724 :     htup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(mvoid));
     635         724 :     if (!HeapTupleIsValid(htup))
     636           0 :         elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
     637             : 
     638         724 :     deps = SysCacheGetAttr(STATEXTDATASTXOID, htup,
     639             :                            Anum_pg_statistic_ext_data_stxddependencies, &isnull);
     640         724 :     if (isnull)
     641           0 :         elog(ERROR,
     642             :              "requested statistic kind \"%c\" is not yet built for statistics object %u",
     643             :              STATS_EXT_DEPENDENCIES, mvoid);
     644             : 
     645         724 :     result = statext_dependencies_deserialize(DatumGetByteaPP(deps));
     646             : 
     647         724 :     ReleaseSysCache(htup);
     648             : 
     649         724 :     return result;
     650             : }
     651             : 
     652             : /*
     653             :  * pg_dependencies_in       - input routine for type pg_dependencies.
     654             :  *
     655             :  * pg_dependencies is real enough to be a table column, but it has no operations
     656             :  * of its own, and disallows input too
     657             :  */
     658             : Datum
     659           0 : pg_dependencies_in(PG_FUNCTION_ARGS)
     660             : {
     661             :     /*
     662             :      * pg_node_list stores the data in binary form and parsing text input is
     663             :      * not needed, so disallow this.
     664             :      */
     665           0 :     ereport(ERROR,
     666             :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     667             :              errmsg("cannot accept a value of type %s", "pg_dependencies")));
     668             : 
     669             :     PG_RETURN_VOID();           /* keep compiler quiet */
     670             : }
     671             : 
     672             : /*
     673             :  * pg_dependencies      - output routine for type pg_dependencies.
     674             :  */
     675             : Datum
     676           4 : pg_dependencies_out(PG_FUNCTION_ARGS)
     677             : {
     678           4 :     bytea      *data = PG_GETARG_BYTEA_PP(0);
     679           4 :     MVDependencies *dependencies = statext_dependencies_deserialize(data);
     680             :     int         i,
     681             :                 j;
     682             :     StringInfoData str;
     683             : 
     684           4 :     initStringInfo(&str);
     685           4 :     appendStringInfoChar(&str, '{');
     686             : 
     687          24 :     for (i = 0; i < dependencies->ndeps; i++)
     688             :     {
     689          20 :         MVDependency *dependency = dependencies->deps[i];
     690             : 
     691          20 :         if (i > 0)
     692          16 :             appendStringInfoString(&str, ", ");
     693             : 
     694          20 :         appendStringInfoChar(&str, '"');
     695          68 :         for (j = 0; j < dependency->nattributes; j++)
     696             :         {
     697          48 :             if (j == dependency->nattributes - 1)
     698          20 :                 appendStringInfoString(&str, " => ");
     699          28 :             else if (j > 0)
     700           8 :                 appendStringInfoString(&str, ", ");
     701             : 
     702          48 :             appendStringInfo(&str, "%d", dependency->attributes[j]);
     703             :         }
     704          20 :         appendStringInfo(&str, "\": %f", dependency->degree);
     705             :     }
     706             : 
     707           4 :     appendStringInfoChar(&str, '}');
     708             : 
     709           4 :     PG_RETURN_CSTRING(str.data);
     710             : }
     711             : 
     712             : /*
     713             :  * pg_dependencies_recv     - binary input routine for type pg_dependencies.
     714             :  */
     715             : Datum
     716           0 : pg_dependencies_recv(PG_FUNCTION_ARGS)
     717             : {
     718           0 :     ereport(ERROR,
     719             :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     720             :              errmsg("cannot accept a value of type %s", "pg_dependencies")));
     721             : 
     722             :     PG_RETURN_VOID();           /* keep compiler quiet */
     723             : }
     724             : 
     725             : /*
     726             :  * pg_dependencies_send     - binary output routine for type pg_dependencies.
     727             :  *
     728             :  * Functional dependencies are serialized in a bytea value (although the type
     729             :  * is named differently), so let's just send that.
     730             :  */
     731             : Datum
     732           0 : pg_dependencies_send(PG_FUNCTION_ARGS)
     733             : {
     734           0 :     return byteasend(fcinfo);
     735             : }
     736             : 
     737             : /*
     738             :  * dependency_is_compatible_clause
     739             :  *      Determines if the clause is compatible with functional dependencies
     740             :  *
     741             :  * Only clauses that have the form of equality to a pseudoconstant, or can be
     742             :  * interpreted that way, are currently accepted.  Furthermore the variable
     743             :  * part of the clause must be a simple Var belonging to the specified
     744             :  * relation, whose attribute number we return in *attnum on success.
     745             :  */
     746             : static bool
     747        2516 : dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
     748             : {
     749             :     Var        *var;
     750             : 
     751        2516 :     if (IsA(clause, RestrictInfo))
     752             :     {
     753        2460 :         RestrictInfo *rinfo = (RestrictInfo *) clause;
     754             : 
     755             :         /* Pseudoconstants are not interesting (they couldn't contain a Var) */
     756        2460 :         if (rinfo->pseudoconstant)
     757           0 :             return false;
     758             : 
     759             :         /* Clauses referencing multiple, or no, varnos are incompatible */
     760        2460 :         if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
     761           0 :             return false;
     762             : 
     763        2460 :         clause = (Node *) rinfo->clause;
     764             :     }
     765             : 
     766        2516 :     if (is_opclause(clause))
     767             :     {
     768             :         /* If it's an opclause, check for Var = Const or Const = Var. */
     769         952 :         OpExpr     *expr = (OpExpr *) clause;
     770             : 
     771             :         /* Only expressions with two arguments are candidates. */
     772         952 :         if (list_length(expr->args) != 2)
     773           0 :             return false;
     774             : 
     775             :         /* Make sure non-selected argument is a pseudoconstant. */
     776         952 :         if (is_pseudo_constant_clause(lsecond(expr->args)))
     777         952 :             var = linitial(expr->args);
     778           0 :         else if (is_pseudo_constant_clause(linitial(expr->args)))
     779           0 :             var = lsecond(expr->args);
     780             :         else
     781           0 :             return false;
     782             : 
     783             :         /*
     784             :          * If it's not an "=" operator, just ignore the clause, as it's not
     785             :          * compatible with functional dependencies.
     786             :          *
     787             :          * This uses the function for estimating selectivity, not the operator
     788             :          * directly (a bit awkward, but well ...).
     789             :          *
     790             :          * XXX this is pretty dubious; probably it'd be better to check btree
     791             :          * or hash opclass membership, so as not to be fooled by custom
     792             :          * selectivity functions, and to be more consistent with decisions
     793             :          * elsewhere in the planner.
     794             :          */
     795         952 :         if (get_oprrest(expr->opno) != F_EQSEL)
     796          20 :             return false;
     797             : 
     798             :         /* OK to proceed with checking "var" */
     799             :     }
     800        1564 :     else if (IsA(clause, ScalarArrayOpExpr))
     801             :     {
     802             :         /* If it's an scalar array operator, check for Var IN Const. */
     803        1540 :         ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
     804             : 
     805             :         /*
     806             :          * Reject ALL() variant, we only care about ANY/IN.
     807             :          *
     808             :          * FIXME Maybe we should check if all the values are the same, and
     809             :          * allow ALL in that case? Doesn't seem very practical, though.
     810             :          */
     811        1540 :         if (!expr->useOr)
     812          12 :             return false;
     813             : 
     814             :         /* Only expressions with two arguments are candidates. */
     815        1528 :         if (list_length(expr->args) != 2)
     816           0 :             return false;
     817             : 
     818             :         /*
     819             :          * We know it's always (Var IN Const), so we assume the var is the
     820             :          * first argument, and pseudoconstant is the second one.
     821             :          */
     822        1528 :         if (!is_pseudo_constant_clause(lsecond(expr->args)))
     823           0 :             return false;
     824             : 
     825        1528 :         var = linitial(expr->args);
     826             : 
     827             :         /*
     828             :          * If it's not an "=" operator, just ignore the clause, as it's not
     829             :          * compatible with functional dependencies. The operator is identified
     830             :          * simply by looking at which function it uses to estimate
     831             :          * selectivity. That's a bit strange, but it's what other similar
     832             :          * places do.
     833             :          */
     834        1528 :         if (get_oprrest(expr->opno) != F_EQSEL)
     835         212 :             return false;
     836             : 
     837             :         /* OK to proceed with checking "var" */
     838             :     }
     839          24 :     else if (is_orclause(clause))
     840             :     {
     841          24 :         BoolExpr   *expr = (BoolExpr *) clause;
     842             :         ListCell   *lc;
     843             : 
     844             :         /* start with no attribute number */
     845          24 :         *attnum = InvalidAttrNumber;
     846             : 
     847          76 :         foreach(lc, expr->args)
     848             :         {
     849             :             AttrNumber  clause_attnum;
     850             : 
     851             :             /*
     852             :              * Had we found incompatible clause in the arguments, treat the
     853             :              * whole clause as incompatible.
     854             :              */
     855          56 :             if (!dependency_is_compatible_clause((Node *) lfirst(lc),
     856             :                                                  relid, &clause_attnum))
     857           4 :                 return false;
     858             : 
     859          56 :             if (*attnum == InvalidAttrNumber)
     860          24 :                 *attnum = clause_attnum;
     861             : 
     862          56 :             if (*attnum != clause_attnum)
     863           4 :                 return false;
     864             :         }
     865             : 
     866             :         /* the Var is already checked by the recursive call */
     867          20 :         return true;
     868             :     }
     869           0 :     else if (is_notclause(clause))
     870             :     {
     871             :         /*
     872             :          * "NOT x" can be interpreted as "x = false", so get the argument and
     873             :          * proceed with seeing if it's a suitable Var.
     874             :          */
     875           0 :         var = (Var *) get_notclausearg(clause);
     876             :     }
     877             :     else
     878             :     {
     879             :         /*
     880             :          * A boolean expression "x" can be interpreted as "x = true", so
     881             :          * proceed with seeing if it's a suitable Var.
     882             :          */
     883           0 :         var = (Var *) clause;
     884             :     }
     885             : 
     886             :     /*
     887             :      * We may ignore any RelabelType node above the operand.  (There won't be
     888             :      * more than one, since eval_const_expressions has been applied already.)
     889             :      */
     890        2248 :     if (IsA(var, RelabelType))
     891           0 :         var = (Var *) ((RelabelType *) var)->arg;
     892             : 
     893             :     /* We only support plain Vars for now */
     894        2248 :     if (!IsA(var, Var))
     895           0 :         return false;
     896             : 
     897             :     /* Ensure Var is from the correct relation */
     898        2248 :     if (var->varno != relid)
     899           0 :         return false;
     900             : 
     901             :     /* We also better ensure the Var is from the current level */
     902        2248 :     if (var->varlevelsup != 0)
     903           0 :         return false;
     904             : 
     905             :     /* Also ignore system attributes (we don't allow stats on those) */
     906        2248 :     if (!AttrNumberIsForUserDefinedAttr(var->varattno))
     907           0 :         return false;
     908             : 
     909        2248 :     *attnum = var->varattno;
     910        2248 :     return true;
     911             : }
     912             : 
     913             : /*
     914             :  * find_strongest_dependency
     915             :  *      find the strongest dependency on the attributes
     916             :  *
     917             :  * When applying functional dependencies, we start with the strongest
     918             :  * dependencies. That is, we select the dependency that:
     919             :  *
     920             :  * (a) has all attributes covered by equality clauses
     921             :  *
     922             :  * (b) has the most attributes
     923             :  *
     924             :  * (c) has the highest degree of validity
     925             :  *
     926             :  * This guarantees that we eliminate the most redundant conditions first
     927             :  * (see the comment in dependencies_clauselist_selectivity).
     928             :  */
     929             : static MVDependency *
     930        1620 : find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
     931             :                           Bitmapset *attnums)
     932             : {
     933             :     int         i,
     934             :                 j;
     935        1620 :     MVDependency *strongest = NULL;
     936             : 
     937             :     /* number of attnums in clauses */
     938        1620 :     int         nattnums = bms_num_members(attnums);
     939             : 
     940             :     /*
     941             :      * Iterate over the MVDependency items and find the strongest one from the
     942             :      * fully-matched dependencies. We do the cheap checks first, before
     943             :      * matching it against the attnums.
     944             :      */
     945        3264 :     for (i = 0; i < ndependencies; i++)
     946             :     {
     947        9648 :         for (j = 0; j < dependencies[i]->ndeps; j++)
     948             :         {
     949        8004 :             MVDependency *dependency = dependencies[i]->deps[j];
     950             : 
     951             :             /*
     952             :              * Skip dependencies referencing more attributes than available
     953             :              * clauses, as those can't be fully matched.
     954             :              */
     955        8004 :             if (dependency->nattributes > nattnums)
     956        4896 :                 continue;
     957             : 
     958        3108 :             if (strongest)
     959             :             {
     960             :                 /* skip dependencies on fewer attributes than the strongest. */
     961        1968 :                 if (dependency->nattributes < strongest->nattributes)
     962           0 :                     continue;
     963             : 
     964             :                 /* also skip weaker dependencies when attribute count matches */
     965        1968 :                 if (strongest->nattributes == dependency->nattributes &&
     966        1788 :                     strongest->degree > dependency->degree)
     967           0 :                     continue;
     968             :             }
     969             : 
     970             :             /*
     971             :              * this dependency is stronger, but we must still check that it's
     972             :              * fully matched to these attnums. We perform this check last as
     973             :              * it's slightly more expensive than the previous checks.
     974             :              */
     975        3108 :             if (dependency_is_fully_matched(dependency, attnums))
     976        1668 :                 strongest = dependency; /* save new best match */
     977             :         }
     978             :     }
     979             : 
     980        1620 :     return strongest;
     981             : }
     982             : 
     983             : /*
     984             :  * clauselist_apply_dependencies
     985             :  *      Apply the specified functional dependencies to a list of clauses and
     986             :  *      return the estimated selecvitity of the clauses that are compatible
     987             :  *      with any of the given dependencies.
     988             :  *
     989             :  * This will estimate all not-already-estimated clauses that are compatible
     990             :  * with functional dependencies, and which have an attribute mentioned by any
     991             :  * of the given dependencies (either as an implying or implied attribute).
     992             :  *
     993             :  * Given (lists of) clauses on attributes (a,b) and a functional dependency
     994             :  * (a=>b), the per-column selectivities P(a) and P(b) are notionally combined
     995             :  * using the formula
     996             :  *
     997             :  *      P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
     998             :  *
     999             :  * where 'f' is the degree of dependency.  This reflects the fact that we
    1000             :  * expect a fraction f of all rows to be consistent with the dependency
    1001             :  * (a=>b), and so have a selectivity of P(a), while the remaining rows are
    1002             :  * treated as independent.
    1003             :  *
    1004             :  * In practice, we use a slightly modified version of this formula, which uses
    1005             :  * a selectivity of Min(P(a), P(b)) for the dependent rows, since the result
    1006             :  * should obviously not exceed either column's individual selectivity.  I.e.,
    1007             :  * we actually combine selectivities using the formula
    1008             :  *
    1009             :  *      P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
    1010             :  *
    1011             :  * This can make quite a difference if the specific values matching the
    1012             :  * clauses are not consistent with the functional dependency.
    1013             :  */
    1014             : static Selectivity
    1015         716 : clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
    1016             :                               int varRelid, JoinType jointype,
    1017             :                               SpecialJoinInfo *sjinfo,
    1018             :                               MVDependency **dependencies, int ndependencies,
    1019             :                               AttrNumber *list_attnums,
    1020             :                               Bitmapset **estimatedclauses)
    1021             : {
    1022             :     Bitmapset  *attnums;
    1023             :     int         i;
    1024             :     int         j;
    1025             :     int         nattrs;
    1026             :     Selectivity *attr_sel;
    1027             :     int         attidx;
    1028             :     int         listidx;
    1029             :     ListCell   *l;
    1030             :     Selectivity s1;
    1031             : 
    1032             :     /*
    1033             :      * Extract the attnums of all implying and implied attributes from all the
    1034             :      * given dependencies.  Each of these attributes is expected to have at
    1035             :      * least 1 not-already-estimated compatible clause that we will estimate
    1036             :      * here.
    1037             :      */
    1038         716 :     attnums = NULL;
    1039        1620 :     for (i = 0; i < ndependencies; i++)
    1040             :     {
    1041        2892 :         for (j = 0; j < dependencies[i]->nattributes; j++)
    1042             :         {
    1043        1988 :             AttrNumber  attnum = dependencies[i]->attributes[j];
    1044             : 
    1045        1988 :             attnums = bms_add_member(attnums, attnum);
    1046             :         }
    1047             :     }
    1048             : 
    1049             :     /*
    1050             :      * Compute per-column selectivity estimates for each of these attributes,
    1051             :      * and mark all the corresponding clauses as estimated.
    1052             :      */
    1053         716 :     nattrs = bms_num_members(attnums);
    1054         716 :     attr_sel = (Selectivity *) palloc(sizeof(Selectivity) * nattrs);
    1055             : 
    1056         716 :     attidx = 0;
    1057         716 :     i = -1;
    1058        2344 :     while ((i = bms_next_member(attnums, i)) >= 0)
    1059             :     {
    1060        1628 :         List       *attr_clauses = NIL;
    1061             :         Selectivity simple_sel;
    1062             : 
    1063        1628 :         listidx = -1;
    1064        5488 :         foreach(l, clauses)
    1065             :         {
    1066        3860 :             Node       *clause = (Node *) lfirst(l);
    1067             : 
    1068        3860 :             listidx++;
    1069        3860 :             if (list_attnums[listidx] == i)
    1070             :             {
    1071        1628 :                 attr_clauses = lappend(attr_clauses, clause);
    1072        1628 :                 *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
    1073             :             }
    1074             :         }
    1075             : 
    1076        1628 :         simple_sel = clauselist_selectivity_simple(root, attr_clauses, varRelid,
    1077             :                                                    jointype, sjinfo, NULL);
    1078        1628 :         attr_sel[attidx++] = simple_sel;
    1079             :     }
    1080             : 
    1081             :     /*
    1082             :      * Now combine these selectivities using the dependency information.  For
    1083             :      * chains of dependencies such as a -> b -> c, the b -> c dependency will
    1084             :      * come before the a -> b dependency in the array, so we traverse the
    1085             :      * array backwards to ensure such chains are computed in the right order.
    1086             :      *
    1087             :      * As explained above, pairs of selectivities are combined using the
    1088             :      * formula
    1089             :      *
    1090             :      * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
    1091             :      *
    1092             :      * to ensure that the combined selectivity is never greater than either
    1093             :      * individual selectivity.
    1094             :      *
    1095             :      * Where multiple dependencies apply (e.g., a -> b -> c), we use
    1096             :      * conditional probabilities to compute the overall result as follows:
    1097             :      *
    1098             :      * P(a,b,c) = P(c|a,b) * P(a,b) = P(c|a,b) * P(b|a) * P(a)
    1099             :      *
    1100             :      * so we replace the selectivities of all implied attributes with
    1101             :      * conditional probabilities, that are conditional on all their implying
    1102             :      * attributes.  The selectivities of all other non-implied attributes are
    1103             :      * left as they are.
    1104             :      */
    1105        1620 :     for (i = ndependencies - 1; i >= 0; i--)
    1106             :     {
    1107         904 :         MVDependency *dependency = dependencies[i];
    1108             :         AttrNumber  attnum;
    1109             :         Selectivity s2;
    1110             :         double      f;
    1111             : 
    1112             :         /* Selectivity of all the implying attributes */
    1113         904 :         s1 = 1.0;
    1114        1988 :         for (j = 0; j < dependency->nattributes - 1; j++)
    1115             :         {
    1116        1084 :             attnum = dependency->attributes[j];
    1117        1084 :             attidx = bms_member_index(attnums, attnum);
    1118        1084 :             s1 *= attr_sel[attidx];
    1119             :         }
    1120             : 
    1121             :         /* Original selectivity of the implied attribute */
    1122         904 :         attnum = dependency->attributes[j];
    1123         904 :         attidx = bms_member_index(attnums, attnum);
    1124         904 :         s2 = attr_sel[attidx];
    1125             : 
    1126             :         /*
    1127             :          * Replace s2 with the conditional probability s2 given s1, computed
    1128             :          * using the formula P(b|a) = P(a,b) / P(a), which simplifies to
    1129             :          *
    1130             :          * P(b|a) = f * Min(P(a), P(b)) / P(a) + (1-f) * P(b)
    1131             :          *
    1132             :          * where P(a) = s1, the selectivity of the implying attributes, and
    1133             :          * P(b) = s2, the selectivity of the implied attribute.
    1134             :          */
    1135         904 :         f = dependency->degree;
    1136             : 
    1137         904 :         if (s1 <= s2)
    1138         848 :             attr_sel[attidx] = f + (1 - f) * s2;
    1139             :         else
    1140          56 :             attr_sel[attidx] = f * s2 / s1 + (1 - f) * s2;
    1141             :     }
    1142             : 
    1143             :     /*
    1144             :      * The overall selectivity of all the clauses on all these attributes is
    1145             :      * then the product of all the original (non-implied) probabilities and
    1146             :      * the new conditional (implied) probabilities.
    1147             :      */
    1148         716 :     s1 = 1.0;
    1149        2344 :     for (i = 0; i < nattrs; i++)
    1150        1628 :         s1 *= attr_sel[i];
    1151             : 
    1152         716 :     CLAMP_PROBABILITY(s1);
    1153             : 
    1154         716 :     pfree(attr_sel);
    1155         716 :     bms_free(attnums);
    1156             : 
    1157         716 :     return s1;
    1158             : }
    1159             : 
    1160             : /*
    1161             :  * dependencies_clauselist_selectivity
    1162             :  *      Return the estimated selectivity of (a subset of) the given clauses
    1163             :  *      using functional dependency statistics, or 1.0 if no useful functional
    1164             :  *      dependency statistic exists.
    1165             :  *
    1166             :  * 'estimatedclauses' is an input/output argument that gets a bit set
    1167             :  * corresponding to the (zero-based) list index of each clause that is included
    1168             :  * in the estimated selectivity.
    1169             :  *
    1170             :  * Given equality clauses on attributes (a,b) we find the strongest dependency
    1171             :  * between them, i.e. either (a=>b) or (b=>a). Assuming (a=>b) is the selected
    1172             :  * dependency, we then combine the per-clause selectivities using the formula
    1173             :  *
    1174             :  *     P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
    1175             :  *
    1176             :  * where 'f' is the degree of the dependency.  (Actually we use a slightly
    1177             :  * modified version of this formula -- see clauselist_apply_dependencies()).
    1178             :  *
    1179             :  * With clauses on more than two attributes, the dependencies are applied
    1180             :  * recursively, starting with the widest/strongest dependencies. For example
    1181             :  * P(a,b,c) is first split like this:
    1182             :  *
    1183             :  *     P(a,b,c) = f * P(a,b) + (1-f) * P(a,b) * P(c)
    1184             :  *
    1185             :  * assuming (a,b=>c) is the strongest dependency.
    1186             :  */
    1187             : Selectivity
    1188        1656 : dependencies_clauselist_selectivity(PlannerInfo *root,
    1189             :                                     List *clauses,
    1190             :                                     int varRelid,
    1191             :                                     JoinType jointype,
    1192             :                                     SpecialJoinInfo *sjinfo,
    1193             :                                     RelOptInfo *rel,
    1194             :                                     Bitmapset **estimatedclauses)
    1195             : {
    1196        1656 :     Selectivity s1 = 1.0;
    1197             :     ListCell   *l;
    1198        1656 :     Bitmapset  *clauses_attnums = NULL;
    1199             :     AttrNumber *list_attnums;
    1200             :     int         listidx;
    1201             :     MVDependencies **func_dependencies;
    1202             :     int         nfunc_dependencies;
    1203             :     int         total_ndeps;
    1204             :     MVDependency **dependencies;
    1205             :     int         ndependencies;
    1206             :     int         i;
    1207             : 
    1208             :     /* check if there's any stats that might be useful for us. */
    1209        1656 :     if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
    1210         192 :         return 1.0;
    1211             : 
    1212        1464 :     list_attnums = (AttrNumber *) palloc(sizeof(AttrNumber) *
    1213        1464 :                                          list_length(clauses));
    1214             : 
    1215             :     /*
    1216             :      * Pre-process the clauses list to extract the attnums seen in each item.
    1217             :      * We need to determine if there's any clauses which will be useful for
    1218             :      * dependency selectivity estimations. Along the way we'll record all of
    1219             :      * the attnums for each clause in a list which we'll reference later so we
    1220             :      * don't need to repeat the same work again. We'll also keep track of all
    1221             :      * attnums seen.
    1222             :      *
    1223             :      * We also skip clauses that we already estimated using different types of
    1224             :      * statistics (we treat them as incompatible).
    1225             :      */
    1226        1464 :     listidx = 0;
    1227        3924 :     foreach(l, clauses)
    1228             :     {
    1229        2460 :         Node       *clause = (Node *) lfirst(l);
    1230             :         AttrNumber  attnum;
    1231             : 
    1232        4920 :         if (!bms_is_member(listidx, *estimatedclauses) &&
    1233        2460 :             dependency_is_compatible_clause(clause, rel->relid, &attnum))
    1234             :         {
    1235        2212 :             list_attnums[listidx] = attnum;
    1236        2212 :             clauses_attnums = bms_add_member(clauses_attnums, attnum);
    1237             :         }
    1238             :         else
    1239         248 :             list_attnums[listidx] = InvalidAttrNumber;
    1240             : 
    1241        2460 :         listidx++;
    1242             :     }
    1243             : 
    1244             :     /*
    1245             :      * If there's not at least two distinct attnums then reject the whole list
    1246             :      * of clauses. We must return 1.0 so the calling function's selectivity is
    1247             :      * unaffected.
    1248             :      */
    1249        1464 :     if (bms_num_members(clauses_attnums) < 2)
    1250             :     {
    1251         748 :         bms_free(clauses_attnums);
    1252         748 :         pfree(list_attnums);
    1253         748 :         return 1.0;
    1254             :     }
    1255             : 
    1256             :     /*
    1257             :      * Load all functional dependencies matching at least two parameters. We
    1258             :      * can simply consider all dependencies at once, without having to search
    1259             :      * for the best statistics object.
    1260             :      *
    1261             :      * To not waste cycles and memory, we deserialize dependencies only for
    1262             :      * statistics that match at least two attributes. The array is allocated
    1263             :      * with the assumption that all objects match - we could grow the array to
    1264             :      * make it just the right size, but it's likely wasteful anyway thanks to
    1265             :      * moving the freed chunks to freelists etc.
    1266             :      */
    1267         716 :     func_dependencies = (MVDependencies **) palloc(sizeof(MVDependencies *) *
    1268         716 :                                                    list_length(rel->statlist));
    1269         716 :     nfunc_dependencies = 0;
    1270         716 :     total_ndeps = 0;
    1271             : 
    1272        1452 :     foreach(l, rel->statlist)
    1273             :     {
    1274         736 :         StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
    1275             :         Bitmapset  *matched;
    1276             :         int         num_matched;
    1277             : 
    1278             :         /* skip statistics that are not of the correct type */
    1279         736 :         if (stat->kind != STATS_EXT_DEPENDENCIES)
    1280           0 :             continue;
    1281             : 
    1282         736 :         matched = bms_intersect(clauses_attnums, stat->keys);
    1283         736 :         num_matched = bms_num_members(matched);
    1284         736 :         bms_free(matched);
    1285             : 
    1286             :         /* skip objects matching fewer than two attributes from clauses */
    1287         736 :         if (num_matched < 2)
    1288          12 :             continue;
    1289             : 
    1290         724 :         func_dependencies[nfunc_dependencies]
    1291         724 :             = statext_dependencies_load(stat->statOid);
    1292             : 
    1293         724 :         total_ndeps += func_dependencies[nfunc_dependencies]->ndeps;
    1294         724 :         nfunc_dependencies++;
    1295             :     }
    1296             : 
    1297             :     /* if no matching stats could be found then we've nothing to do */
    1298         716 :     if (nfunc_dependencies == 0)
    1299             :     {
    1300           0 :         pfree(func_dependencies);
    1301           0 :         bms_free(clauses_attnums);
    1302           0 :         pfree(list_attnums);
    1303           0 :         return 1.0;
    1304             :     }
    1305             : 
    1306             :     /*
    1307             :      * Work out which dependencies we can apply, starting with the
    1308             :      * widest/stongest ones, and proceeding to smaller/weaker ones.
    1309             :      */
    1310         716 :     dependencies = (MVDependency **) palloc(sizeof(MVDependency *) *
    1311             :                                             total_ndeps);
    1312         716 :     ndependencies = 0;
    1313             : 
    1314             :     while (true)
    1315         904 :     {
    1316             :         MVDependency *dependency;
    1317             :         AttrNumber  attnum;
    1318             : 
    1319             :         /* the widest/strongest dependency, fully matched by clauses */
    1320        1620 :         dependency = find_strongest_dependency(func_dependencies,
    1321             :                                                nfunc_dependencies,
    1322             :                                                clauses_attnums);
    1323        1620 :         if (!dependency)
    1324         716 :             break;
    1325             : 
    1326         904 :         dependencies[ndependencies++] = dependency;
    1327             : 
    1328             :         /* Ignore dependencies using this implied attribute in later loops */
    1329         904 :         attnum = dependency->attributes[dependency->nattributes - 1];
    1330         904 :         clauses_attnums = bms_del_member(clauses_attnums, attnum);
    1331             :     }
    1332             : 
    1333             :     /*
    1334             :      * If we found applicable dependencies, use them to estimate all
    1335             :      * compatible clauses on attributes that they refer to.
    1336             :      */
    1337         716 :     if (ndependencies != 0)
    1338         716 :         s1 = clauselist_apply_dependencies(root, clauses, varRelid, jointype,
    1339             :                                            sjinfo, dependencies, ndependencies,
    1340             :                                            list_attnums, estimatedclauses);
    1341             : 
    1342             :     /* free deserialized functional dependencies (and then the array) */
    1343        1440 :     for (i = 0; i < nfunc_dependencies; i++)
    1344         724 :         pfree(func_dependencies[i]);
    1345             : 
    1346         716 :     pfree(dependencies);
    1347         716 :     pfree(func_dependencies);
    1348         716 :     bms_free(clauses_attnums);
    1349         716 :     pfree(list_attnums);
    1350             : 
    1351         716 :     return s1;
    1352             : }

Generated by: LCOV version 1.13