LCOV - code coverage report
Current view: top level - src/backend/statistics - dependencies.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 240 298 80.5 %
Date: 2019-11-13 22:07:24 Functions: 15 19 78.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * dependencies.c
       4             :  *    POSTGRES functional dependencies
       5             :  *
       6             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  * IDENTIFICATION
      10             :  *    src/backend/statistics/dependencies.c
      11             :  *
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : #include "postgres.h"
      15             : 
      16             : #include "access/htup_details.h"
      17             : #include "access/sysattr.h"
      18             : #include "catalog/pg_operator.h"
      19             : #include "catalog/pg_statistic_ext.h"
      20             : #include "catalog/pg_statistic_ext_data.h"
      21             : #include "lib/stringinfo.h"
      22             : #include "nodes/nodeFuncs.h"
      23             : #include "nodes/nodes.h"
      24             : #include "nodes/pathnodes.h"
      25             : #include "optimizer/clauses.h"
      26             : #include "optimizer/optimizer.h"
      27             : #include "statistics/extended_stats_internal.h"
      28             : #include "statistics/statistics.h"
      29             : #include "utils/bytea.h"
      30             : #include "utils/fmgroids.h"
      31             : #include "utils/fmgrprotos.h"
      32             : #include "utils/lsyscache.h"
      33             : #include "utils/syscache.h"
      34             : #include "utils/typcache.h"
      35             : 
      36             : /* size of the struct header fields (magic, type, ndeps) */
      37             : #define SizeOfHeader        (3 * sizeof(uint32))
      38             : 
      39             : /* size of a serialized dependency (degree, natts, atts) */
      40             : #define SizeOfItem(natts) \
      41             :     (sizeof(double) + sizeof(AttrNumber) * (1 + (natts)))
      42             : 
      43             : /* minimal size of a dependency (with two attributes) */
      44             : #define MinSizeOfItem   SizeOfItem(2)
      45             : 
      46             : /* minimal size of dependencies, when all deps are minimal */
      47             : #define MinSizeOfItems(ndeps) \
      48             :     (SizeOfHeader + (ndeps) * MinSizeOfItem)
      49             : 
      50             : /*
      51             :  * Internal state for DependencyGenerator of dependencies. Dependencies are similar to
      52             :  * k-permutations of n elements, except that the order does not matter for the
      53             :  * first (k-1) elements. That is, (a,b=>c) and (b,a=>c) are equivalent.
      54             :  */
      55             : typedef struct DependencyGeneratorData
      56             : {
      57             :     int         k;              /* size of the dependency */
      58             :     int         n;              /* number of possible attributes */
      59             :     int         current;        /* next dependency to return (index) */
      60             :     AttrNumber  ndependencies;  /* number of dependencies generated */
      61             :     AttrNumber *dependencies;   /* array of pre-generated dependencies  */
      62             : } DependencyGeneratorData;
      63             : 
      64             : typedef DependencyGeneratorData *DependencyGenerator;
      65             : 
      66             : static void generate_dependencies_recurse(DependencyGenerator state,
      67             :                                           int index, AttrNumber start, AttrNumber *current);
      68             : static void generate_dependencies(DependencyGenerator state);
      69             : static DependencyGenerator DependencyGenerator_init(int n, int k);
      70             : static void DependencyGenerator_free(DependencyGenerator state);
      71             : static AttrNumber *DependencyGenerator_next(DependencyGenerator state);
      72             : static double dependency_degree(int numrows, HeapTuple *rows, int k,
      73             :                                 AttrNumber *dependency, VacAttrStats **stats, Bitmapset *attrs);
      74             : static bool dependency_is_fully_matched(MVDependency *dependency,
      75             :                                         Bitmapset *attnums);
      76             : static bool dependency_implies_attribute(MVDependency *dependency,
      77             :                                          AttrNumber attnum);
      78             : static bool dependency_is_compatible_clause(Node *clause, Index relid,
      79             :                                             AttrNumber *attnum);
      80             : static MVDependency *find_strongest_dependency(StatisticExtInfo *stats,
      81             :                                                MVDependencies *dependencies,
      82             :                                                Bitmapset *attnums);
      83             : 
      84             : static void
      85         332 : generate_dependencies_recurse(DependencyGenerator state, int index,
      86             :                               AttrNumber start, AttrNumber *current)
      87             : {
      88             :     /*
      89             :      * The generator handles the first (k-1) elements differently from the
      90             :      * last element.
      91             :      */
      92         332 :     if (index < (state->k - 1))
      93             :     {
      94             :         AttrNumber  i;
      95             : 
      96             :         /*
      97             :          * The first (k-1) values have to be in ascending order, which we
      98             :          * generate recursively.
      99             :          */
     100             : 
     101         416 :         for (i = start; i < state->n; i++)
     102             :         {
     103         268 :             current[index] = i;
     104         268 :             generate_dependencies_recurse(state, (index + 1), (i + 1), current);
     105             :         }
     106             :     }
     107             :     else
     108             :     {
     109             :         int         i;
     110             : 
     111             :         /*
     112             :          * the last element is the implied value, which does not respect the
     113             :          * ascending order. We just need to check that the value is not in the
     114             :          * first (k-1) elements.
     115             :          */
     116             : 
     117         720 :         for (i = 0; i < state->n; i++)
     118             :         {
     119             :             int         j;
     120         536 :             bool        match = false;
     121             : 
     122         536 :             current[index] = i;
     123             : 
     124         972 :             for (j = 0; j < index; j++)
     125             :             {
     126         704 :                 if (current[j] == i)
     127             :                 {
     128         268 :                     match = true;
     129         268 :                     break;
     130             :                 }
     131             :             }
     132             : 
     133             :             /*
     134             :              * If the value is not found in the first part of the dependency,
     135             :              * we're done.
     136             :              */
     137         536 :             if (!match)
     138             :             {
     139         268 :                 state->dependencies = (AttrNumber *) repalloc(state->dependencies,
     140         268 :                                                               state->k * (state->ndependencies + 1) * sizeof(AttrNumber));
     141         268 :                 memcpy(&state->dependencies[(state->k * state->ndependencies)],
     142         268 :                        current, state->k * sizeof(AttrNumber));
     143         268 :                 state->ndependencies++;
     144             :             }
     145             :         }
     146             :     }
     147         332 : }
     148             : 
     149             : /* generate all dependencies (k-permutations of n elements) */
     150             : static void
     151          64 : generate_dependencies(DependencyGenerator state)
     152             : {
     153          64 :     AttrNumber *current = (AttrNumber *) palloc0(sizeof(AttrNumber) * state->k);
     154             : 
     155          64 :     generate_dependencies_recurse(state, 0, 0, current);
     156             : 
     157          64 :     pfree(current);
     158          64 : }
     159             : 
     160             : /*
     161             :  * initialize the DependencyGenerator of variations, and prebuild the variations
     162             :  *
     163             :  * This pre-builds all the variations. We could also generate them in
     164             :  * DependencyGenerator_next(), but this seems simpler.
     165             :  */
     166             : static DependencyGenerator
     167          64 : DependencyGenerator_init(int n, int k)
     168             : {
     169             :     DependencyGenerator state;
     170             : 
     171             :     Assert((n >= k) && (k > 0));
     172             : 
     173             :     /* allocate the DependencyGenerator state */
     174          64 :     state = (DependencyGenerator) palloc0(sizeof(DependencyGeneratorData));
     175          64 :     state->dependencies = (AttrNumber *) palloc(k * sizeof(AttrNumber));
     176             : 
     177          64 :     state->ndependencies = 0;
     178          64 :     state->current = 0;
     179          64 :     state->k = k;
     180          64 :     state->n = n;
     181             : 
     182             :     /* now actually pre-generate all the variations */
     183          64 :     generate_dependencies(state);
     184             : 
     185          64 :     return state;
     186             : }
     187             : 
     188             : /* free the DependencyGenerator state */
     189             : static void
     190          64 : DependencyGenerator_free(DependencyGenerator state)
     191             : {
     192          64 :     pfree(state->dependencies);
     193          64 :     pfree(state);
     194             : 
     195          64 : }
     196             : 
     197             : /* generate next combination */
     198             : static AttrNumber *
     199         332 : DependencyGenerator_next(DependencyGenerator state)
     200             : {
     201         332 :     if (state->current == state->ndependencies)
     202          64 :         return NULL;
     203             : 
     204         268 :     return &state->dependencies[state->k * state->current++];
     205             : }
     206             : 
     207             : 
     208             : /*
     209             :  * validates functional dependency on the data
     210             :  *
     211             :  * An actual work horse of detecting functional dependencies. Given a variation
     212             :  * of k attributes, it checks that the first (k-1) are sufficient to determine
     213             :  * the last one.
     214             :  */
     215             : static double
     216         268 : dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency,
     217             :                   VacAttrStats **stats, Bitmapset *attrs)
     218             : {
     219             :     int         i,
     220             :                 nitems;
     221             :     MultiSortSupport mss;
     222             :     SortItem   *items;
     223             :     AttrNumber *attnums;
     224             :     AttrNumber *attnums_dep;
     225             :     int         numattrs;
     226             : 
     227             :     /* counters valid within a group */
     228         268 :     int         group_size = 0;
     229         268 :     int         n_violations = 0;
     230             : 
     231             :     /* total number of rows supporting (consistent with) the dependency */
     232         268 :     int         n_supporting_rows = 0;
     233             : 
     234             :     /* Make sure we have at least two input attributes. */
     235             :     Assert(k >= 2);
     236             : 
     237             :     /* sort info for all attributes columns */
     238         268 :     mss = multi_sort_init(k);
     239             : 
     240             :     /*
     241             :      * Transform the attrs from bitmap to an array to make accessing the i-th
     242             :      * member easier, and then construct a filtered version with only attnums
     243             :      * referenced by the dependency we validate.
     244             :      */
     245         268 :     attnums = build_attnums_array(attrs, &numattrs);
     246             : 
     247         268 :     attnums_dep = (AttrNumber *) palloc(k * sizeof(AttrNumber));
     248         888 :     for (i = 0; i < k; i++)
     249         620 :         attnums_dep[i] = attnums[dependency[i]];
     250             : 
     251             :     /*
     252             :      * Verify the dependency (a,b,...)->z, using a rather simple algorithm:
     253             :      *
     254             :      * (a) sort the data lexicographically
     255             :      *
     256             :      * (b) split the data into groups by first (k-1) columns
     257             :      *
     258             :      * (c) for each group count different values in the last column
     259             :      *
     260             :      * We use the column data types' default sort operators and collations;
     261             :      * perhaps at some point it'd be worth using column-specific collations?
     262             :      */
     263             : 
     264             :     /* prepare the sort function for the dimensions */
     265         888 :     for (i = 0; i < k; i++)
     266             :     {
     267         620 :         VacAttrStats *colstat = stats[dependency[i]];
     268             :         TypeCacheEntry *type;
     269             : 
     270         620 :         type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
     271         620 :         if (type->lt_opr == InvalidOid) /* shouldn't happen */
     272           0 :             elog(ERROR, "cache lookup failed for ordering operator for type %u",
     273             :                  colstat->attrtypid);
     274             : 
     275             :         /* prepare the sort function for this dimension */
     276         620 :         multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
     277             :     }
     278             : 
     279             :     /*
     280             :      * build an array of SortItem(s) sorted using the multi-sort support
     281             :      *
     282             :      * XXX This relies on all stats entries pointing to the same tuple
     283             :      * descriptor.  For now that assumption holds, but it might change in the
     284             :      * future for example if we support statistics on multiple tables.
     285             :      */
     286         268 :     items = build_sorted_items(numrows, &nitems, rows, stats[0]->tupDesc,
     287             :                                mss, k, attnums_dep);
     288             : 
     289             :     /*
     290             :      * Walk through the sorted array, split it into rows according to the
     291             :      * first (k-1) columns. If there's a single value in the last column, we
     292             :      * count the group as 'supporting' the functional dependency. Otherwise we
     293             :      * count it as contradicting.
     294             :      */
     295             : 
     296             :     /* start with the first row forming a group */
     297         268 :     group_size = 1;
     298             : 
     299             :     /* loop 1 beyond the end of the array so that we count the final group */
     300     1003676 :     for (i = 1; i <= nitems; i++)
     301             :     {
     302             :         /*
     303             :          * Check if the group ended, which may be either because we processed
     304             :          * all the items (i==nitems), or because the i-th item is not equal to
     305             :          * the preceding one.
     306             :          */
     307     2006548 :         if (i == nitems ||
     308     1003140 :             multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0)
     309             :         {
     310             :             /*
     311             :              * If no violations were found in the group then track the rows of
     312             :              * the group as supporting the functional dependency.
     313             :              */
     314       44820 :             if (n_violations == 0)
     315       11980 :                 n_supporting_rows += group_size;
     316             : 
     317             :             /* Reset counters for the new group */
     318       44820 :             n_violations = 0;
     319       44820 :             group_size = 1;
     320       44820 :             continue;
     321             :         }
     322             :         /* first columns match, but the last one does not (so contradicting) */
     323      958588 :         else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0)
     324      154388 :             n_violations++;
     325             : 
     326      958588 :         group_size++;
     327             :     }
     328             : 
     329         268 :     if (items)
     330         268 :         pfree(items);
     331             : 
     332         268 :     pfree(mss);
     333         268 :     pfree(attnums);
     334         268 :     pfree(attnums_dep);
     335             : 
     336             :     /* Compute the 'degree of validity' as (supporting/total). */
     337         268 :     return (n_supporting_rows * 1.0 / numrows);
     338             : }
     339             : 
     340             : /*
     341             :  * detects functional dependencies between groups of columns
     342             :  *
     343             :  * Generates all possible subsets of columns (variations) and computes
     344             :  * the degree of validity for each one. For example when creating statistics
     345             :  * on three columns (a,b,c) there are 9 possible dependencies
     346             :  *
     347             :  *     two columns            three columns
     348             :  *     -----------            -------------
     349             :  *     (a) -> b                (a,b) -> c
     350             :  *     (a) -> c                (a,c) -> b
     351             :  *     (b) -> a                (b,c) -> a
     352             :  *     (b) -> c
     353             :  *     (c) -> a
     354             :  *     (c) -> b
     355             :  */
     356             : MVDependencies *
     357          36 : statext_dependencies_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
     358             :                            VacAttrStats **stats)
     359             : {
     360             :     int         i,
     361             :                 k;
     362             :     int         numattrs;
     363             :     AttrNumber *attnums;
     364             : 
     365             :     /* result */
     366          36 :     MVDependencies *dependencies = NULL;
     367             : 
     368             :     /*
     369             :      * Transform the bms into an array, to make accessing i-th member easier.
     370             :      */
     371          36 :     attnums = build_attnums_array(attrs, &numattrs);
     372             : 
     373             :     Assert(numattrs >= 2);
     374             : 
     375             :     /*
     376             :      * We'll try build functional dependencies starting from the smallest ones
     377             :      * covering just 2 columns, to the largest ones, covering all columns
     378             :      * included in the statistics object.  We start from the smallest ones
     379             :      * because we want to be able to skip already implied ones.
     380             :      */
     381         100 :     for (k = 2; k <= numattrs; k++)
     382             :     {
     383             :         AttrNumber *dependency; /* array with k elements */
     384             : 
     385             :         /* prepare a DependencyGenerator of variation */
     386          64 :         DependencyGenerator DependencyGenerator = DependencyGenerator_init(numattrs, k);
     387             : 
     388             :         /* generate all possible variations of k values (out of n) */
     389         396 :         while ((dependency = DependencyGenerator_next(DependencyGenerator)))
     390             :         {
     391             :             double      degree;
     392             :             MVDependency *d;
     393             : 
     394             :             /* compute how valid the dependency seems */
     395         268 :             degree = dependency_degree(numrows, rows, k, dependency, stats, attrs);
     396             : 
     397             :             /*
     398             :              * if the dependency seems entirely invalid, don't store it
     399             :              */
     400         268 :             if (degree == 0.0)
     401         132 :                 continue;
     402             : 
     403         136 :             d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
     404             :                                          + k * sizeof(AttrNumber));
     405             : 
     406             :             /* copy the dependency (and keep the indexes into stxkeys) */
     407         136 :             d->degree = degree;
     408         136 :             d->nattributes = k;
     409         458 :             for (i = 0; i < k; i++)
     410         322 :                 d->attributes[i] = attnums[dependency[i]];
     411             : 
     412             :             /* initialize the list of dependencies */
     413         136 :             if (dependencies == NULL)
     414             :             {
     415             :                 dependencies
     416          32 :                     = (MVDependencies *) palloc0(sizeof(MVDependencies));
     417             : 
     418          32 :                 dependencies->magic = STATS_DEPS_MAGIC;
     419          32 :                 dependencies->type = STATS_DEPS_TYPE_BASIC;
     420          32 :                 dependencies->ndeps = 0;
     421             :             }
     422             : 
     423         136 :             dependencies->ndeps++;
     424         136 :             dependencies = (MVDependencies *) repalloc(dependencies,
     425             :                                                        offsetof(MVDependencies, deps)
     426         136 :                                                        + dependencies->ndeps * sizeof(MVDependency *));
     427             : 
     428         136 :             dependencies->deps[dependencies->ndeps - 1] = d;
     429             :         }
     430             : 
     431             :         /*
     432             :          * we're done with variations of k elements, so free the
     433             :          * DependencyGenerator
     434             :          */
     435          64 :         DependencyGenerator_free(DependencyGenerator);
     436             :     }
     437             : 
     438          36 :     return dependencies;
     439             : }
     440             : 
     441             : 
     442             : /*
     443             :  * Serialize list of dependencies into a bytea value.
     444             :  */
     445             : bytea *
     446          32 : statext_dependencies_serialize(MVDependencies *dependencies)
     447             : {
     448             :     int         i;
     449             :     bytea      *output;
     450             :     char       *tmp;
     451             :     Size        len;
     452             : 
     453             :     /* we need to store ndeps, with a number of attributes for each one */
     454          32 :     len = VARHDRSZ + SizeOfHeader;
     455             : 
     456             :     /* and also include space for the actual attribute numbers and degrees */
     457         168 :     for (i = 0; i < dependencies->ndeps; i++)
     458         136 :         len += SizeOfItem(dependencies->deps[i]->nattributes);
     459             : 
     460          32 :     output = (bytea *) palloc0(len);
     461          32 :     SET_VARSIZE(output, len);
     462             : 
     463          32 :     tmp = VARDATA(output);
     464             : 
     465             :     /* Store the base struct values (magic, type, ndeps) */
     466          32 :     memcpy(tmp, &dependencies->magic, sizeof(uint32));
     467          32 :     tmp += sizeof(uint32);
     468          32 :     memcpy(tmp, &dependencies->type, sizeof(uint32));
     469          32 :     tmp += sizeof(uint32);
     470          32 :     memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
     471          32 :     tmp += sizeof(uint32);
     472             : 
     473             :     /* store number of attributes and attribute numbers for each dependency */
     474         168 :     for (i = 0; i < dependencies->ndeps; i++)
     475             :     {
     476         136 :         MVDependency *d = dependencies->deps[i];
     477             : 
     478         136 :         memcpy(tmp, &d->degree, sizeof(double));
     479         136 :         tmp += sizeof(double);
     480             : 
     481         136 :         memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
     482         136 :         tmp += sizeof(AttrNumber);
     483             : 
     484         136 :         memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
     485         136 :         tmp += sizeof(AttrNumber) * d->nattributes;
     486             : 
     487             :         /* protect against overflow */
     488             :         Assert(tmp <= ((char *) output + len));
     489             :     }
     490             : 
     491             :     /* make sure we've produced exactly the right amount of data */
     492             :     Assert(tmp == ((char *) output + len));
     493             : 
     494          32 :     return output;
     495             : }
     496             : 
     497             : /*
     498             :  * Reads serialized dependencies into MVDependencies structure.
     499             :  */
     500             : MVDependencies *
     501         144 : statext_dependencies_deserialize(bytea *data)
     502             : {
     503             :     int         i;
     504             :     Size        min_expected_size;
     505             :     MVDependencies *dependencies;
     506             :     char       *tmp;
     507             : 
     508         144 :     if (data == NULL)
     509           0 :         return NULL;
     510             : 
     511         144 :     if (VARSIZE_ANY_EXHDR(data) < SizeOfHeader)
     512           0 :         elog(ERROR, "invalid MVDependencies size %zd (expected at least %zd)",
     513             :              VARSIZE_ANY_EXHDR(data), SizeOfHeader);
     514             : 
     515             :     /* read the MVDependencies header */
     516         144 :     dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies));
     517             : 
     518             :     /* initialize pointer to the data part (skip the varlena header) */
     519         144 :     tmp = VARDATA_ANY(data);
     520             : 
     521             :     /* read the header fields and perform basic sanity checks */
     522         144 :     memcpy(&dependencies->magic, tmp, sizeof(uint32));
     523         144 :     tmp += sizeof(uint32);
     524         144 :     memcpy(&dependencies->type, tmp, sizeof(uint32));
     525         144 :     tmp += sizeof(uint32);
     526         144 :     memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
     527         144 :     tmp += sizeof(uint32);
     528             : 
     529         144 :     if (dependencies->magic != STATS_DEPS_MAGIC)
     530           0 :         elog(ERROR, "invalid dependency magic %d (expected %d)",
     531             :              dependencies->magic, STATS_DEPS_MAGIC);
     532             : 
     533         144 :     if (dependencies->type != STATS_DEPS_TYPE_BASIC)
     534           0 :         elog(ERROR, "invalid dependency type %d (expected %d)",
     535             :              dependencies->type, STATS_DEPS_TYPE_BASIC);
     536             : 
     537         144 :     if (dependencies->ndeps == 0)
     538           0 :         elog(ERROR, "invalid zero-length item array in MVDependencies");
     539             : 
     540             :     /* what minimum bytea size do we expect for those parameters */
     541         144 :     min_expected_size = SizeOfItem(dependencies->ndeps);
     542             : 
     543         144 :     if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
     544           0 :         elog(ERROR, "invalid dependencies size %zd (expected at least %zd)",
     545             :              VARSIZE_ANY_EXHDR(data), min_expected_size);
     546             : 
     547             :     /* allocate space for the MCV items */
     548         144 :     dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
     549         144 :                             + (dependencies->ndeps * sizeof(MVDependency *)));
     550             : 
     551         864 :     for (i = 0; i < dependencies->ndeps; i++)
     552             :     {
     553             :         double      degree;
     554             :         AttrNumber  k;
     555             :         MVDependency *d;
     556             : 
     557             :         /* degree of validity */
     558         720 :         memcpy(&degree, tmp, sizeof(double));
     559         720 :         tmp += sizeof(double);
     560             : 
     561             :         /* number of attributes */
     562         720 :         memcpy(&k, tmp, sizeof(AttrNumber));
     563         720 :         tmp += sizeof(AttrNumber);
     564             : 
     565             :         /* is the number of attributes valid? */
     566             :         Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
     567             : 
     568             :         /* now that we know the number of attributes, allocate the dependency */
     569         720 :         d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
     570             :                                      + (k * sizeof(AttrNumber)));
     571             : 
     572         720 :         d->degree = degree;
     573         720 :         d->nattributes = k;
     574             : 
     575             :         /* copy attribute numbers */
     576         720 :         memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
     577         720 :         tmp += sizeof(AttrNumber) * d->nattributes;
     578             : 
     579         720 :         dependencies->deps[i] = d;
     580             : 
     581             :         /* still within the bytea */
     582             :         Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
     583             :     }
     584             : 
     585             :     /* we should have consumed the whole bytea exactly */
     586             :     Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
     587             : 
     588         144 :     return dependencies;
     589             : }
     590             : 
     591             : /*
     592             :  * dependency_is_fully_matched
     593             :  *      checks that a functional dependency is fully matched given clauses on
     594             :  *      attributes (assuming the clauses are suitable equality clauses)
     595             :  */
     596             : static bool
     597         732 : dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
     598             : {
     599             :     int         j;
     600             : 
     601             :     /*
     602             :      * Check that the dependency actually is fully covered by clauses. We have
     603             :      * to translate all attribute numbers, as those are referenced
     604             :      */
     605        1968 :     for (j = 0; j < dependency->nattributes; j++)
     606             :     {
     607        1524 :         int         attnum = dependency->attributes[j];
     608             : 
     609        1524 :         if (!bms_is_member(attnum, attnums))
     610         288 :             return false;
     611             :     }
     612             : 
     613         444 :     return true;
     614             : }
     615             : 
     616             : /*
     617             :  * dependency_implies_attribute
     618             :  *      check that the attnum matches is implied by the functional dependency
     619             :  */
     620             : static bool
     621         528 : dependency_implies_attribute(MVDependency *dependency, AttrNumber attnum)
     622             : {
     623         528 :     if (attnum == dependency->attributes[dependency->nattributes - 1])
     624         204 :         return true;
     625             : 
     626         324 :     return false;
     627             : }
     628             : 
     629             : /*
     630             :  * statext_dependencies_load
     631             :  *      Load the functional dependencies for the indicated pg_statistic_ext tuple
     632             :  */
     633             : MVDependencies *
     634         144 : statext_dependencies_load(Oid mvoid)
     635             : {
     636             :     MVDependencies *result;
     637             :     bool        isnull;
     638             :     Datum       deps;
     639             :     HeapTuple   htup;
     640             : 
     641         144 :     htup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(mvoid));
     642         144 :     if (!HeapTupleIsValid(htup))
     643           0 :         elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
     644             : 
     645         144 :     deps = SysCacheGetAttr(STATEXTDATASTXOID, htup,
     646             :                            Anum_pg_statistic_ext_data_stxddependencies, &isnull);
     647         144 :     if (isnull)
     648           0 :         elog(ERROR,
     649             :              "requested statistic kind \"%c\" is not yet built for statistics object %u",
     650             :              STATS_EXT_DEPENDENCIES, mvoid);
     651             : 
     652         144 :     result = statext_dependencies_deserialize(DatumGetByteaPP(deps));
     653             : 
     654         144 :     ReleaseSysCache(htup);
     655             : 
     656         144 :     return result;
     657             : }
     658             : 
     659             : /*
     660             :  * pg_dependencies_in       - input routine for type pg_dependencies.
     661             :  *
     662             :  * pg_dependencies is real enough to be a table column, but it has no operations
     663             :  * of its own, and disallows input too
     664             :  */
     665             : Datum
     666           0 : pg_dependencies_in(PG_FUNCTION_ARGS)
     667             : {
     668             :     /*
     669             :      * pg_node_list stores the data in binary form and parsing text input is
     670             :      * not needed, so disallow this.
     671             :      */
     672           0 :     ereport(ERROR,
     673             :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     674             :              errmsg("cannot accept a value of type %s", "pg_dependencies")));
     675             : 
     676             :     PG_RETURN_VOID();           /* keep compiler quiet */
     677             : }
     678             : 
     679             : /*
     680             :  * pg_dependencies      - output routine for type pg_dependencies.
     681             :  */
     682             : Datum
     683           0 : pg_dependencies_out(PG_FUNCTION_ARGS)
     684             : {
     685           0 :     bytea      *data = PG_GETARG_BYTEA_PP(0);
     686           0 :     MVDependencies *dependencies = statext_dependencies_deserialize(data);
     687             :     int         i,
     688             :                 j;
     689             :     StringInfoData str;
     690             : 
     691           0 :     initStringInfo(&str);
     692           0 :     appendStringInfoChar(&str, '{');
     693             : 
     694           0 :     for (i = 0; i < dependencies->ndeps; i++)
     695             :     {
     696           0 :         MVDependency *dependency = dependencies->deps[i];
     697             : 
     698           0 :         if (i > 0)
     699           0 :             appendStringInfoString(&str, ", ");
     700             : 
     701           0 :         appendStringInfoChar(&str, '"');
     702           0 :         for (j = 0; j < dependency->nattributes; j++)
     703             :         {
     704           0 :             if (j == dependency->nattributes - 1)
     705           0 :                 appendStringInfoString(&str, " => ");
     706           0 :             else if (j > 0)
     707           0 :                 appendStringInfoString(&str, ", ");
     708             : 
     709           0 :             appendStringInfo(&str, "%d", dependency->attributes[j]);
     710             :         }
     711           0 :         appendStringInfo(&str, "\": %f", dependency->degree);
     712             :     }
     713             : 
     714           0 :     appendStringInfoChar(&str, '}');
     715             : 
     716           0 :     PG_RETURN_CSTRING(str.data);
     717             : }
     718             : 
     719             : /*
     720             :  * pg_dependencies_recv     - binary input routine for type pg_dependencies.
     721             :  */
     722             : Datum
     723           0 : pg_dependencies_recv(PG_FUNCTION_ARGS)
     724             : {
     725           0 :     ereport(ERROR,
     726             :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     727             :              errmsg("cannot accept a value of type %s", "pg_dependencies")));
     728             : 
     729             :     PG_RETURN_VOID();           /* keep compiler quiet */
     730             : }
     731             : 
     732             : /*
     733             :  * pg_dependencies_send     - binary output routine for type pg_dependencies.
     734             :  *
     735             :  * Functional dependencies are serialized in a bytea value (although the type
     736             :  * is named differently), so let's just send that.
     737             :  */
     738             : Datum
     739           0 : pg_dependencies_send(PG_FUNCTION_ARGS)
     740             : {
     741           0 :     return byteasend(fcinfo);
     742             : }
     743             : 
     744             : /*
     745             :  * dependency_is_compatible_clause
     746             :  *      Determines if the clause is compatible with functional dependencies
     747             :  *
     748             :  * Only clauses that have the form of equality to a pseudoconstant, or can be
     749             :  * interpreted that way, are currently accepted.  Furthermore the variable
     750             :  * part of the clause must be a simple Var belonging to the specified
     751             :  * relation, whose attribute number we return in *attnum on success.
     752             :  */
     753             : static bool
     754         348 : dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
     755             : {
     756         348 :     RestrictInfo *rinfo = (RestrictInfo *) clause;
     757             :     Var        *var;
     758             : 
     759         348 :     if (!IsA(rinfo, RestrictInfo))
     760           0 :         return false;
     761             : 
     762             :     /* Pseudoconstants are not interesting (they couldn't contain a Var) */
     763         348 :     if (rinfo->pseudoconstant)
     764           0 :         return false;
     765             : 
     766             :     /* Clauses referencing multiple, or no, varnos are incompatible */
     767         348 :     if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
     768           0 :         return false;
     769             : 
     770         348 :     if (is_opclause(rinfo->clause))
     771             :     {
     772             :         /* If it's an opclause, check for Var = Const or Const = Var. */
     773         348 :         OpExpr     *expr = (OpExpr *) rinfo->clause;
     774             : 
     775             :         /* Only expressions with two arguments are candidates. */
     776         348 :         if (list_length(expr->args) != 2)
     777           0 :             return false;
     778             : 
     779             :         /* Make sure non-selected argument is a pseudoconstant. */
     780         348 :         if (is_pseudo_constant_clause(lsecond(expr->args)))
     781         348 :             var = linitial(expr->args);
     782           0 :         else if (is_pseudo_constant_clause(linitial(expr->args)))
     783           0 :             var = lsecond(expr->args);
     784             :         else
     785           0 :             return false;
     786             : 
     787             :         /*
     788             :          * If it's not an "=" operator, just ignore the clause, as it's not
     789             :          * compatible with functional dependencies.
     790             :          *
     791             :          * This uses the function for estimating selectivity, not the operator
     792             :          * directly (a bit awkward, but well ...).
     793             :          *
     794             :          * XXX this is pretty dubious; probably it'd be better to check btree
     795             :          * or hash opclass membership, so as not to be fooled by custom
     796             :          * selectivity functions, and to be more consistent with decisions
     797             :          * elsewhere in the planner.
     798             :          */
     799         348 :         if (get_oprrest(expr->opno) != F_EQSEL)
     800           0 :             return false;
     801             : 
     802             :         /* OK to proceed with checking "var" */
     803             :     }
     804           0 :     else if (is_notclause(rinfo->clause))
     805             :     {
     806             :         /*
     807             :          * "NOT x" can be interpreted as "x = false", so get the argument and
     808             :          * proceed with seeing if it's a suitable Var.
     809             :          */
     810           0 :         var = (Var *) get_notclausearg(rinfo->clause);
     811             :     }
     812             :     else
     813             :     {
     814             :         /*
     815             :          * A boolean expression "x" can be interpreted as "x = true", so
     816             :          * proceed with seeing if it's a suitable Var.
     817             :          */
     818           0 :         var = (Var *) rinfo->clause;
     819             :     }
     820             : 
     821             :     /*
     822             :      * We may ignore any RelabelType node above the operand.  (There won't be
     823             :      * more than one, since eval_const_expressions has been applied already.)
     824             :      */
     825         348 :     if (IsA(var, RelabelType))
     826           0 :         var = (Var *) ((RelabelType *) var)->arg;
     827             : 
     828             :     /* We only support plain Vars for now */
     829         348 :     if (!IsA(var, Var))
     830           0 :         return false;
     831             : 
     832             :     /* Ensure Var is from the correct relation */
     833         348 :     if (var->varno != relid)
     834           0 :         return false;
     835             : 
     836             :     /* We also better ensure the Var is from the current level */
     837         348 :     if (var->varlevelsup != 0)
     838           0 :         return false;
     839             : 
     840             :     /* Also ignore system attributes (we don't allow stats on those) */
     841         348 :     if (!AttrNumberIsForUserDefinedAttr(var->varattno))
     842           0 :         return false;
     843             : 
     844         348 :     *attnum = var->varattno;
     845         348 :     return true;
     846             : }
     847             : 
     848             : /*
     849             :  * find_strongest_dependency
     850             :  *      find the strongest dependency on the attributes
     851             :  *
     852             :  * When applying functional dependencies, we start with the strongest
     853             :  * dependencies. That is, we select the dependency that:
     854             :  *
     855             :  * (a) has all attributes covered by equality clauses
     856             :  *
     857             :  * (b) has the most attributes
     858             :  *
     859             :  * (c) has the highest degree of validity
     860             :  *
     861             :  * This guarantees that we eliminate the most redundant conditions first
     862             :  * (see the comment in dependencies_clauselist_selectivity).
     863             :  */
     864             : static MVDependency *
     865         348 : find_strongest_dependency(StatisticExtInfo *stats, MVDependencies *dependencies,
     866             :                           Bitmapset *attnums)
     867             : {
     868             :     int         i;
     869         348 :     MVDependency *strongest = NULL;
     870             : 
     871             :     /* number of attnums in clauses */
     872         348 :     int         nattnums = bms_num_members(attnums);
     873             : 
     874             :     /*
     875             :      * Iterate over the MVDependency items and find the strongest one from the
     876             :      * fully-matched dependencies. We do the cheap checks first, before
     877             :      * matching it against the attnums.
     878             :      */
     879        2088 :     for (i = 0; i < dependencies->ndeps; i++)
     880             :     {
     881        1740 :         MVDependency *dependency = dependencies->deps[i];
     882             : 
     883             :         /*
     884             :          * Skip dependencies referencing more attributes than available
     885             :          * clauses, as those can't be fully matched.
     886             :          */
     887        1740 :         if (dependency->nattributes > nattnums)
     888        1008 :             continue;
     889             : 
     890         732 :         if (strongest)
     891             :         {
     892             :             /* skip dependencies on fewer attributes than the strongest. */
     893         468 :             if (dependency->nattributes < strongest->nattributes)
     894           0 :                 continue;
     895             : 
     896             :             /* also skip weaker dependencies when attribute count matches */
     897         876 :             if (strongest->nattributes == dependency->nattributes &&
     898         408 :                 strongest->degree > dependency->degree)
     899           0 :                 continue;
     900             :         }
     901             : 
     902             :         /*
     903             :          * this dependency is stronger, but we must still check that it's
     904             :          * fully matched to these attnums. We perform this check last as it's
     905             :          * slightly more expensive than the previous checks.
     906             :          */
     907         732 :         if (dependency_is_fully_matched(dependency, attnums))
     908         444 :             strongest = dependency; /* save new best match */
     909             :     }
     910             : 
     911         348 :     return strongest;
     912             : }
     913             : 
     914             : /*
     915             :  * dependencies_clauselist_selectivity
     916             :  *      Return the estimated selectivity of (a subset of) the given clauses
     917             :  *      using functional dependency statistics, or 1.0 if no useful functional
     918             :  *      dependency statistic exists.
     919             :  *
     920             :  * 'estimatedclauses' is an input/output argument that gets a bit set
     921             :  * corresponding to the (zero-based) list index of each clause that is included
     922             :  * in the estimated selectivity.
     923             :  *
     924             :  * Given equality clauses on attributes (a,b) we find the strongest dependency
     925             :  * between them, i.e. either (a=>b) or (b=>a). Assuming (a=>b) is the selected
     926             :  * dependency, we then combine the per-clause selectivities using the formula
     927             :  *
     928             :  *     P(a,b) = P(a) * [f + (1-f)*P(b)]
     929             :  *
     930             :  * where 'f' is the degree of the dependency.
     931             :  *
     932             :  * With clauses on more than two attributes, the dependencies are applied
     933             :  * recursively, starting with the widest/strongest dependencies. For example
     934             :  * P(a,b,c) is first split like this:
     935             :  *
     936             :  *     P(a,b,c) = P(a,b) * [f + (1-f)*P(c)]
     937             :  *
     938             :  * assuming (a,b=>c) is the strongest dependency.
     939             :  */
     940             : Selectivity
     941         228 : dependencies_clauselist_selectivity(PlannerInfo *root,
     942             :                                     List *clauses,
     943             :                                     int varRelid,
     944             :                                     JoinType jointype,
     945             :                                     SpecialJoinInfo *sjinfo,
     946             :                                     RelOptInfo *rel,
     947             :                                     Bitmapset **estimatedclauses)
     948             : {
     949         228 :     Selectivity s1 = 1.0;
     950             :     ListCell   *l;
     951         228 :     Bitmapset  *clauses_attnums = NULL;
     952             :     StatisticExtInfo *stat;
     953             :     MVDependencies *dependencies;
     954             :     AttrNumber *list_attnums;
     955             :     int         listidx;
     956             : 
     957             :     /* check if there's any stats that might be useful for us. */
     958         228 :     if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
     959          84 :         return 1.0;
     960             : 
     961         144 :     list_attnums = (AttrNumber *) palloc(sizeof(AttrNumber) *
     962         144 :                                          list_length(clauses));
     963             : 
     964             :     /*
     965             :      * Pre-process the clauses list to extract the attnums seen in each item.
     966             :      * We need to determine if there's any clauses which will be useful for
     967             :      * dependency selectivity estimations. Along the way we'll record all of
     968             :      * the attnums for each clause in a list which we'll reference later so we
     969             :      * don't need to repeat the same work again. We'll also keep track of all
     970             :      * attnums seen.
     971             :      *
     972             :      * We also skip clauses that we already estimated using different types of
     973             :      * statistics (we treat them as incompatible).
     974             :      */
     975         144 :     listidx = 0;
     976         492 :     foreach(l, clauses)
     977             :     {
     978         348 :         Node       *clause = (Node *) lfirst(l);
     979             :         AttrNumber  attnum;
     980             : 
     981         696 :         if (!bms_is_member(listidx, *estimatedclauses) &&
     982         348 :             dependency_is_compatible_clause(clause, rel->relid, &attnum))
     983             :         {
     984         348 :             list_attnums[listidx] = attnum;
     985         348 :             clauses_attnums = bms_add_member(clauses_attnums, attnum);
     986             :         }
     987             :         else
     988           0 :             list_attnums[listidx] = InvalidAttrNumber;
     989             : 
     990         348 :         listidx++;
     991             :     }
     992             : 
     993             :     /*
     994             :      * If there's not at least two distinct attnums then reject the whole list
     995             :      * of clauses. We must return 1.0 so the calling function's selectivity is
     996             :      * unaffected.
     997             :      */
     998         144 :     if (bms_num_members(clauses_attnums) < 2)
     999             :     {
    1000           0 :         pfree(list_attnums);
    1001           0 :         return 1.0;
    1002             :     }
    1003             : 
    1004             :     /* find the best suited statistics object for these attnums */
    1005         144 :     stat = choose_best_statistics(rel->statlist, clauses_attnums,
    1006             :                                   STATS_EXT_DEPENDENCIES);
    1007             : 
    1008             :     /* if no matching stats could be found then we've nothing to do */
    1009         144 :     if (!stat)
    1010             :     {
    1011           0 :         pfree(list_attnums);
    1012           0 :         return 1.0;
    1013             :     }
    1014             : 
    1015             :     /* load the dependency items stored in the statistics object */
    1016         144 :     dependencies = statext_dependencies_load(stat->statOid);
    1017             : 
    1018             :     /*
    1019             :      * Apply the dependencies recursively, starting with the widest/strongest
    1020             :      * ones, and proceeding to the smaller/weaker ones. At the end of each
    1021             :      * round we factor in the selectivity of clauses on the implied attribute,
    1022             :      * and remove the clauses from the list.
    1023             :      */
    1024             :     while (true)
    1025         204 :     {
    1026         348 :         Selectivity s2 = 1.0;
    1027             :         MVDependency *dependency;
    1028             : 
    1029             :         /* the widest/strongest dependency, fully matched by clauses */
    1030         348 :         dependency = find_strongest_dependency(stat, dependencies,
    1031             :                                                clauses_attnums);
    1032             : 
    1033             :         /* if no suitable dependency was found, we're done */
    1034         348 :         if (!dependency)
    1035         144 :             break;
    1036             : 
    1037             :         /*
    1038             :          * We found an applicable dependency, so find all the clauses on the
    1039             :          * implied attribute - with dependency (a,b => c) we look for clauses
    1040             :          * on 'c'.
    1041             :          */
    1042         204 :         listidx = -1;
    1043         732 :         foreach(l, clauses)
    1044             :         {
    1045             :             Node       *clause;
    1046             : 
    1047         528 :             listidx++;
    1048             : 
    1049             :             /*
    1050             :              * Skip incompatible clauses, and ones we've already estimated on.
    1051             :              */
    1052         528 :             if (list_attnums[listidx] == InvalidAttrNumber)
    1053           0 :                 continue;
    1054             : 
    1055             :             /*
    1056             :              * Technically we could find more than one clause for a given
    1057             :              * attnum. Since these clauses must be equality clauses, we choose
    1058             :              * to only take the selectivity estimate from the final clause in
    1059             :              * the list for this attnum. If the attnum happens to be compared
    1060             :              * to a different Const in another clause then no rows will match
    1061             :              * anyway. If it happens to be compared to the same Const, then
    1062             :              * ignoring the additional clause is just the thing to do.
    1063             :              */
    1064         528 :             if (dependency_implies_attribute(dependency,
    1065         528 :                                              list_attnums[listidx]))
    1066             :             {
    1067         204 :                 clause = (Node *) lfirst(l);
    1068             : 
    1069         204 :                 s2 = clause_selectivity(root, clause, varRelid, jointype,
    1070             :                                         sjinfo);
    1071             : 
    1072             :                 /* mark this one as done, so we don't touch it again. */
    1073         204 :                 *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
    1074             : 
    1075             :                 /*
    1076             :                  * Mark that we've got and used the dependency on this clause.
    1077             :                  * We'll want to ignore this when looking for the next
    1078             :                  * strongest dependency above.
    1079             :                  */
    1080         204 :                 clauses_attnums = bms_del_member(clauses_attnums,
    1081         204 :                                                  list_attnums[listidx]);
    1082             :             }
    1083             :         }
    1084             : 
    1085             :         /*
    1086             :          * Now factor in the selectivity for all the "implied" clauses into
    1087             :          * the final one, using this formula:
    1088             :          *
    1089             :          * P(a,b) = P(a) * (f + (1-f) * P(b))
    1090             :          *
    1091             :          * where 'f' is the degree of validity of the dependency.
    1092             :          */
    1093         204 :         s1 *= (dependency->degree + (1 - dependency->degree) * s2);
    1094             :     }
    1095             : 
    1096         144 :     pfree(dependencies);
    1097         144 :     pfree(list_attnums);
    1098             : 
    1099         144 :     return s1;
    1100             : }

Generated by: LCOV version 1.13