LCOV - code coverage report
Current view: top level - src/backend/statistics - dependencies.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 442 500 88.4 %
Date: 2024-11-21 08:14:44 Functions: 17 20 85.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * dependencies.c
       4             :  *    POSTGRES functional dependencies
       5             :  *
       6             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  * IDENTIFICATION
      10             :  *    src/backend/statistics/dependencies.c
      11             :  *
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : #include "postgres.h"
      15             : 
      16             : #include "access/htup_details.h"
      17             : #include "catalog/pg_statistic_ext.h"
      18             : #include "catalog/pg_statistic_ext_data.h"
      19             : #include "lib/stringinfo.h"
      20             : #include "nodes/nodeFuncs.h"
      21             : #include "nodes/nodes.h"
      22             : #include "nodes/pathnodes.h"
      23             : #include "optimizer/clauses.h"
      24             : #include "optimizer/optimizer.h"
      25             : #include "parser/parsetree.h"
      26             : #include "statistics/extended_stats_internal.h"
      27             : #include "statistics/statistics.h"
      28             : #include "utils/fmgroids.h"
      29             : #include "utils/fmgrprotos.h"
      30             : #include "utils/lsyscache.h"
      31             : #include "utils/memutils.h"
      32             : #include "utils/selfuncs.h"
      33             : #include "utils/syscache.h"
      34             : #include "utils/typcache.h"
      35             : #include "varatt.h"
      36             : 
      37             : /* size of the struct header fields (magic, type, ndeps) */
      38             : #define SizeOfHeader        (3 * sizeof(uint32))
      39             : 
      40             : /* size of a serialized dependency (degree, natts, atts) */
      41             : #define SizeOfItem(natts) \
      42             :     (sizeof(double) + sizeof(AttrNumber) * (1 + (natts)))
      43             : 
      44             : /* minimal size of a dependency (with two attributes) */
      45             : #define MinSizeOfItem   SizeOfItem(2)
      46             : 
      47             : /* minimal size of dependencies, when all deps are minimal */
      48             : #define MinSizeOfItems(ndeps) \
      49             :     (SizeOfHeader + (ndeps) * MinSizeOfItem)
      50             : 
      51             : /*
      52             :  * Internal state for DependencyGenerator of dependencies. Dependencies are similar to
      53             :  * k-permutations of n elements, except that the order does not matter for the
      54             :  * first (k-1) elements. That is, (a,b=>c) and (b,a=>c) are equivalent.
      55             :  */
      56             : typedef struct DependencyGeneratorData
      57             : {
      58             :     int         k;              /* size of the dependency */
      59             :     int         n;              /* number of possible attributes */
      60             :     int         current;        /* next dependency to return (index) */
      61             :     AttrNumber  ndependencies;  /* number of dependencies generated */
      62             :     AttrNumber *dependencies;   /* array of pre-generated dependencies  */
      63             : } DependencyGeneratorData;
      64             : 
      65             : typedef DependencyGeneratorData *DependencyGenerator;
      66             : 
      67             : static void generate_dependencies_recurse(DependencyGenerator state,
      68             :                                           int index, AttrNumber start, AttrNumber *current);
      69             : static void generate_dependencies(DependencyGenerator state);
      70             : static DependencyGenerator DependencyGenerator_init(int n, int k);
      71             : static void DependencyGenerator_free(DependencyGenerator state);
      72             : static AttrNumber *DependencyGenerator_next(DependencyGenerator state);
      73             : static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency);
      74             : static bool dependency_is_fully_matched(MVDependency *dependency,
      75             :                                         Bitmapset *attnums);
      76             : static bool dependency_is_compatible_clause(Node *clause, Index relid,
      77             :                                             AttrNumber *attnum);
      78             : static bool dependency_is_compatible_expression(Node *clause, Index relid,
      79             :                                                 List *statlist, Node **expr);
      80             : static MVDependency *find_strongest_dependency(MVDependencies **dependencies,
      81             :                                                int ndependencies, Bitmapset *attnums);
      82             : static Selectivity clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
      83             :                                                  int varRelid, JoinType jointype,
      84             :                                                  SpecialJoinInfo *sjinfo,
      85             :                                                  MVDependency **dependencies,
      86             :                                                  int ndependencies,
      87             :                                                  AttrNumber *list_attnums,
      88             :                                                  Bitmapset **estimatedclauses);
      89             : 
      90             : static void
      91         780 : generate_dependencies_recurse(DependencyGenerator state, int index,
      92             :                               AttrNumber start, AttrNumber *current)
      93             : {
      94             :     /*
      95             :      * The generator handles the first (k-1) elements differently from the
      96             :      * last element.
      97             :      */
      98         780 :     if (index < (state->k - 1))
      99             :     {
     100             :         AttrNumber  i;
     101             : 
     102             :         /*
     103             :          * The first (k-1) values have to be in ascending order, which we
     104             :          * generate recursively.
     105             :          */
     106             : 
     107         924 :         for (i = start; i < state->n; i++)
     108             :         {
     109         600 :             current[index] = i;
     110         600 :             generate_dependencies_recurse(state, (index + 1), (i + 1), current);
     111             :         }
     112             :     }
     113             :     else
     114             :     {
     115             :         int         i;
     116             : 
     117             :         /*
     118             :          * the last element is the implied value, which does not respect the
     119             :          * ascending order. We just need to check that the value is not in the
     120             :          * first (k-1) elements.
     121             :          */
     122             : 
     123        1656 :         for (i = 0; i < state->n; i++)
     124             :         {
     125             :             int         j;
     126        1200 :             bool        match = false;
     127             : 
     128        1200 :             current[index] = i;
     129             : 
     130        2088 :             for (j = 0; j < index; j++)
     131             :             {
     132        1488 :                 if (current[j] == i)
     133             :                 {
     134         600 :                     match = true;
     135         600 :                     break;
     136             :                 }
     137             :             }
     138             : 
     139             :             /*
     140             :              * If the value is not found in the first part of the dependency,
     141             :              * we're done.
     142             :              */
     143        1200 :             if (!match)
     144             :             {
     145        1200 :                 state->dependencies = (AttrNumber *) repalloc(state->dependencies,
     146         600 :                                                               state->k * (state->ndependencies + 1) * sizeof(AttrNumber));
     147         600 :                 memcpy(&state->dependencies[(state->k * state->ndependencies)],
     148         600 :                        current, state->k * sizeof(AttrNumber));
     149         600 :                 state->ndependencies++;
     150             :             }
     151             :         }
     152             :     }
     153         780 : }
     154             : 
     155             : /* generate all dependencies (k-permutations of n elements) */
     156             : static void
     157         180 : generate_dependencies(DependencyGenerator state)
     158             : {
     159         180 :     AttrNumber *current = (AttrNumber *) palloc0(sizeof(AttrNumber) * state->k);
     160             : 
     161         180 :     generate_dependencies_recurse(state, 0, 0, current);
     162             : 
     163         180 :     pfree(current);
     164         180 : }
     165             : 
     166             : /*
     167             :  * initialize the DependencyGenerator of variations, and prebuild the variations
     168             :  *
     169             :  * This pre-builds all the variations. We could also generate them in
     170             :  * DependencyGenerator_next(), but this seems simpler.
     171             :  */
     172             : static DependencyGenerator
     173         180 : DependencyGenerator_init(int n, int k)
     174             : {
     175             :     DependencyGenerator state;
     176             : 
     177             :     Assert((n >= k) && (k > 0));
     178             : 
     179             :     /* allocate the DependencyGenerator state */
     180         180 :     state = (DependencyGenerator) palloc0(sizeof(DependencyGeneratorData));
     181         180 :     state->dependencies = (AttrNumber *) palloc(k * sizeof(AttrNumber));
     182             : 
     183         180 :     state->ndependencies = 0;
     184         180 :     state->current = 0;
     185         180 :     state->k = k;
     186         180 :     state->n = n;
     187             : 
     188             :     /* now actually pre-generate all the variations */
     189         180 :     generate_dependencies(state);
     190             : 
     191         180 :     return state;
     192             : }
     193             : 
     194             : /* free the DependencyGenerator state */
     195             : static void
     196         180 : DependencyGenerator_free(DependencyGenerator state)
     197             : {
     198         180 :     pfree(state->dependencies);
     199         180 :     pfree(state);
     200         180 : }
     201             : 
     202             : /* generate next combination */
     203             : static AttrNumber *
     204         780 : DependencyGenerator_next(DependencyGenerator state)
     205             : {
     206         780 :     if (state->current == state->ndependencies)
     207         180 :         return NULL;
     208             : 
     209         600 :     return &state->dependencies[state->k * state->current++];
     210             : }
     211             : 
     212             : 
     213             : /*
     214             :  * validates functional dependency on the data
     215             :  *
     216             :  * An actual work horse of detecting functional dependencies. Given a variation
     217             :  * of k attributes, it checks that the first (k-1) are sufficient to determine
     218             :  * the last one.
     219             :  */
     220             : static double
     221         600 : dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency)
     222             : {
     223             :     int         i,
     224             :                 nitems;
     225             :     MultiSortSupport mss;
     226             :     SortItem   *items;
     227             :     AttrNumber *attnums_dep;
     228             : 
     229             :     /* counters valid within a group */
     230         600 :     int         group_size = 0;
     231         600 :     int         n_violations = 0;
     232             : 
     233             :     /* total number of rows supporting (consistent with) the dependency */
     234         600 :     int         n_supporting_rows = 0;
     235             : 
     236             :     /* Make sure we have at least two input attributes. */
     237             :     Assert(k >= 2);
     238             : 
     239             :     /* sort info for all attributes columns */
     240         600 :     mss = multi_sort_init(k);
     241             : 
     242             :     /*
     243             :      * Translate the array of indexes to regular attnums for the dependency
     244             :      * (we will need this to identify the columns in StatsBuildData).
     245             :      */
     246         600 :     attnums_dep = (AttrNumber *) palloc(k * sizeof(AttrNumber));
     247        1944 :     for (i = 0; i < k; i++)
     248        1344 :         attnums_dep[i] = data->attnums[dependency[i]];
     249             : 
     250             :     /*
     251             :      * Verify the dependency (a,b,...)->z, using a rather simple algorithm:
     252             :      *
     253             :      * (a) sort the data lexicographically
     254             :      *
     255             :      * (b) split the data into groups by first (k-1) columns
     256             :      *
     257             :      * (c) for each group count different values in the last column
     258             :      *
     259             :      * We use the column data types' default sort operators and collations;
     260             :      * perhaps at some point it'd be worth using column-specific collations?
     261             :      */
     262             : 
     263             :     /* prepare the sort function for the dimensions */
     264        1944 :     for (i = 0; i < k; i++)
     265             :     {
     266        1344 :         VacAttrStats *colstat = data->stats[dependency[i]];
     267             :         TypeCacheEntry *type;
     268             : 
     269        1344 :         type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
     270        1344 :         if (type->lt_opr == InvalidOid) /* shouldn't happen */
     271           0 :             elog(ERROR, "cache lookup failed for ordering operator for type %u",
     272             :                  colstat->attrtypid);
     273             : 
     274             :         /* prepare the sort function for this dimension */
     275        1344 :         multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
     276             :     }
     277             : 
     278             :     /*
     279             :      * build an array of SortItem(s) sorted using the multi-sort support
     280             :      *
     281             :      * XXX This relies on all stats entries pointing to the same tuple
     282             :      * descriptor.  For now that assumption holds, but it might change in the
     283             :      * future for example if we support statistics on multiple tables.
     284             :      */
     285         600 :     items = build_sorted_items(data, &nitems, mss, k, attnums_dep);
     286             : 
     287             :     /*
     288             :      * Walk through the sorted array, split it into rows according to the
     289             :      * first (k-1) columns. If there's a single value in the last column, we
     290             :      * count the group as 'supporting' the functional dependency. Otherwise we
     291             :      * count it as contradicting.
     292             :      */
     293             : 
     294             :     /* start with the first row forming a group */
     295         600 :     group_size = 1;
     296             : 
     297             :     /* loop 1 beyond the end of the array so that we count the final group */
     298     1505466 :     for (i = 1; i <= nitems; i++)
     299             :     {
     300             :         /*
     301             :          * Check if the group ended, which may be either because we processed
     302             :          * all the items (i==nitems), or because the i-th item is not equal to
     303             :          * the preceding one.
     304             :          */
     305     3009132 :         if (i == nitems ||
     306     1504266 :             multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0)
     307             :         {
     308             :             /*
     309             :              * If no violations were found in the group then track the rows of
     310             :              * the group as supporting the functional dependency.
     311             :              */
     312       32268 :             if (n_violations == 0)
     313       19578 :                 n_supporting_rows += group_size;
     314             : 
     315             :             /* Reset counters for the new group */
     316       32268 :             n_violations = 0;
     317       32268 :             group_size = 1;
     318       32268 :             continue;
     319             :         }
     320             :         /* first columns match, but the last one does not (so contradicting) */
     321     1472598 :         else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0)
     322       60102 :             n_violations++;
     323             : 
     324     1472598 :         group_size++;
     325             :     }
     326             : 
     327             :     /* Compute the 'degree of validity' as (supporting/total). */
     328         600 :     return (n_supporting_rows * 1.0 / data->numrows);
     329             : }
     330             : 
     331             : /*
     332             :  * detects functional dependencies between groups of columns
     333             :  *
     334             :  * Generates all possible subsets of columns (variations) and computes
     335             :  * the degree of validity for each one. For example when creating statistics
     336             :  * on three columns (a,b,c) there are 9 possible dependencies
     337             :  *
     338             :  *     two columns            three columns
     339             :  *     -----------            -------------
     340             :  *     (a) -> b                (a,b) -> c
     341             :  *     (a) -> c                (a,c) -> b
     342             :  *     (b) -> a                (b,c) -> a
     343             :  *     (b) -> c
     344             :  *     (c) -> a
     345             :  *     (c) -> b
     346             :  */
     347             : MVDependencies *
     348         132 : statext_dependencies_build(StatsBuildData *data)
     349             : {
     350             :     int         i,
     351             :                 k;
     352             : 
     353             :     /* result */
     354         132 :     MVDependencies *dependencies = NULL;
     355             :     MemoryContext cxt;
     356             : 
     357             :     Assert(data->nattnums >= 2);
     358             : 
     359             :     /* tracks memory allocated by dependency_degree calls */
     360         132 :     cxt = AllocSetContextCreate(CurrentMemoryContext,
     361             :                                 "dependency_degree cxt",
     362             :                                 ALLOCSET_DEFAULT_SIZES);
     363             : 
     364             :     /*
     365             :      * We'll try build functional dependencies starting from the smallest ones
     366             :      * covering just 2 columns, to the largest ones, covering all columns
     367             :      * included in the statistics object.  We start from the smallest ones
     368             :      * because we want to be able to skip already implied ones.
     369             :      */
     370         312 :     for (k = 2; k <= data->nattnums; k++)
     371             :     {
     372             :         AttrNumber *dependency; /* array with k elements */
     373             : 
     374             :         /* prepare a DependencyGenerator of variation */
     375         180 :         DependencyGenerator DependencyGenerator = DependencyGenerator_init(data->nattnums, k);
     376             : 
     377             :         /* generate all possible variations of k values (out of n) */
     378         780 :         while ((dependency = DependencyGenerator_next(DependencyGenerator)))
     379             :         {
     380             :             double      degree;
     381             :             MVDependency *d;
     382             :             MemoryContext oldcxt;
     383             : 
     384             :             /* release memory used by dependency degree calculation */
     385         600 :             oldcxt = MemoryContextSwitchTo(cxt);
     386             : 
     387             :             /* compute how valid the dependency seems */
     388         600 :             degree = dependency_degree(data, k, dependency);
     389             : 
     390         600 :             MemoryContextSwitchTo(oldcxt);
     391         600 :             MemoryContextReset(cxt);
     392             : 
     393             :             /*
     394             :              * if the dependency seems entirely invalid, don't store it
     395             :              */
     396         600 :             if (degree == 0.0)
     397         252 :                 continue;
     398             : 
     399         348 :             d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
     400         348 :                                          + k * sizeof(AttrNumber));
     401             : 
     402             :             /* copy the dependency (and keep the indexes into stxkeys) */
     403         348 :             d->degree = degree;
     404         348 :             d->nattributes = k;
     405        1116 :             for (i = 0; i < k; i++)
     406         768 :                 d->attributes[i] = data->attnums[dependency[i]];
     407             : 
     408             :             /* initialize the list of dependencies */
     409         348 :             if (dependencies == NULL)
     410             :             {
     411             :                 dependencies
     412         114 :                     = (MVDependencies *) palloc0(sizeof(MVDependencies));
     413             : 
     414         114 :                 dependencies->magic = STATS_DEPS_MAGIC;
     415         114 :                 dependencies->type = STATS_DEPS_TYPE_BASIC;
     416         114 :                 dependencies->ndeps = 0;
     417             :             }
     418             : 
     419         348 :             dependencies->ndeps++;
     420         348 :             dependencies = (MVDependencies *) repalloc(dependencies,
     421             :                                                        offsetof(MVDependencies, deps)
     422         348 :                                                        + dependencies->ndeps * sizeof(MVDependency *));
     423             : 
     424         348 :             dependencies->deps[dependencies->ndeps - 1] = d;
     425             :         }
     426             : 
     427             :         /*
     428             :          * we're done with variations of k elements, so free the
     429             :          * DependencyGenerator
     430             :          */
     431         180 :         DependencyGenerator_free(DependencyGenerator);
     432             :     }
     433             : 
     434         132 :     MemoryContextDelete(cxt);
     435             : 
     436         132 :     return dependencies;
     437             : }
     438             : 
     439             : 
     440             : /*
     441             :  * Serialize list of dependencies into a bytea value.
     442             :  */
     443             : bytea *
     444         114 : statext_dependencies_serialize(MVDependencies *dependencies)
     445             : {
     446             :     int         i;
     447             :     bytea      *output;
     448             :     char       *tmp;
     449             :     Size        len;
     450             : 
     451             :     /* we need to store ndeps, with a number of attributes for each one */
     452         114 :     len = VARHDRSZ + SizeOfHeader;
     453             : 
     454             :     /* and also include space for the actual attribute numbers and degrees */
     455         462 :     for (i = 0; i < dependencies->ndeps; i++)
     456         348 :         len += SizeOfItem(dependencies->deps[i]->nattributes);
     457             : 
     458         114 :     output = (bytea *) palloc0(len);
     459         114 :     SET_VARSIZE(output, len);
     460             : 
     461         114 :     tmp = VARDATA(output);
     462             : 
     463             :     /* Store the base struct values (magic, type, ndeps) */
     464         114 :     memcpy(tmp, &dependencies->magic, sizeof(uint32));
     465         114 :     tmp += sizeof(uint32);
     466         114 :     memcpy(tmp, &dependencies->type, sizeof(uint32));
     467         114 :     tmp += sizeof(uint32);
     468         114 :     memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
     469         114 :     tmp += sizeof(uint32);
     470             : 
     471             :     /* store number of attributes and attribute numbers for each dependency */
     472         462 :     for (i = 0; i < dependencies->ndeps; i++)
     473             :     {
     474         348 :         MVDependency *d = dependencies->deps[i];
     475             : 
     476         348 :         memcpy(tmp, &d->degree, sizeof(double));
     477         348 :         tmp += sizeof(double);
     478             : 
     479         348 :         memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
     480         348 :         tmp += sizeof(AttrNumber);
     481             : 
     482         348 :         memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
     483         348 :         tmp += sizeof(AttrNumber) * d->nattributes;
     484             : 
     485             :         /* protect against overflow */
     486             :         Assert(tmp <= ((char *) output + len));
     487             :     }
     488             : 
     489             :     /* make sure we've produced exactly the right amount of data */
     490             :     Assert(tmp == ((char *) output + len));
     491             : 
     492         114 :     return output;
     493             : }
     494             : 
     495             : /*
     496             :  * Reads serialized dependencies into MVDependencies structure.
     497             :  */
     498             : MVDependencies *
     499        1120 : statext_dependencies_deserialize(bytea *data)
     500             : {
     501             :     int         i;
     502             :     Size        min_expected_size;
     503             :     MVDependencies *dependencies;
     504             :     char       *tmp;
     505             : 
     506        1120 :     if (data == NULL)
     507           0 :         return NULL;
     508             : 
     509        1120 :     if (VARSIZE_ANY_EXHDR(data) < SizeOfHeader)
     510           0 :         elog(ERROR, "invalid MVDependencies size %zu (expected at least %zu)",
     511             :              VARSIZE_ANY_EXHDR(data), SizeOfHeader);
     512             : 
     513             :     /* read the MVDependencies header */
     514        1120 :     dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies));
     515             : 
     516             :     /* initialize pointer to the data part (skip the varlena header) */
     517        1120 :     tmp = VARDATA_ANY(data);
     518             : 
     519             :     /* read the header fields and perform basic sanity checks */
     520        1120 :     memcpy(&dependencies->magic, tmp, sizeof(uint32));
     521        1120 :     tmp += sizeof(uint32);
     522        1120 :     memcpy(&dependencies->type, tmp, sizeof(uint32));
     523        1120 :     tmp += sizeof(uint32);
     524        1120 :     memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
     525        1120 :     tmp += sizeof(uint32);
     526             : 
     527        1120 :     if (dependencies->magic != STATS_DEPS_MAGIC)
     528           0 :         elog(ERROR, "invalid dependency magic %d (expected %d)",
     529             :              dependencies->magic, STATS_DEPS_MAGIC);
     530             : 
     531        1120 :     if (dependencies->type != STATS_DEPS_TYPE_BASIC)
     532           0 :         elog(ERROR, "invalid dependency type %d (expected %d)",
     533             :              dependencies->type, STATS_DEPS_TYPE_BASIC);
     534             : 
     535        1120 :     if (dependencies->ndeps == 0)
     536           0 :         elog(ERROR, "invalid zero-length item array in MVDependencies");
     537             : 
     538             :     /* what minimum bytea size do we expect for those parameters */
     539        1120 :     min_expected_size = SizeOfItem(dependencies->ndeps);
     540             : 
     541        1120 :     if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
     542           0 :         elog(ERROR, "invalid dependencies size %zu (expected at least %zu)",
     543             :              VARSIZE_ANY_EXHDR(data), min_expected_size);
     544             : 
     545             :     /* allocate space for the MCV items */
     546        1120 :     dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
     547        1120 :                             + (dependencies->ndeps * sizeof(MVDependency *)));
     548             : 
     549        6594 :     for (i = 0; i < dependencies->ndeps; i++)
     550             :     {
     551             :         double      degree;
     552             :         AttrNumber  k;
     553             :         MVDependency *d;
     554             : 
     555             :         /* degree of validity */
     556        5474 :         memcpy(&degree, tmp, sizeof(double));
     557        5474 :         tmp += sizeof(double);
     558             : 
     559             :         /* number of attributes */
     560        5474 :         memcpy(&k, tmp, sizeof(AttrNumber));
     561        5474 :         tmp += sizeof(AttrNumber);
     562             : 
     563             :         /* is the number of attributes valid? */
     564             :         Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
     565             : 
     566             :         /* now that we know the number of attributes, allocate the dependency */
     567        5474 :         d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
     568        5474 :                                      + (k * sizeof(AttrNumber)));
     569             : 
     570        5474 :         d->degree = degree;
     571        5474 :         d->nattributes = k;
     572             : 
     573             :         /* copy attribute numbers */
     574        5474 :         memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
     575        5474 :         tmp += sizeof(AttrNumber) * d->nattributes;
     576             : 
     577        5474 :         dependencies->deps[i] = d;
     578             : 
     579             :         /* still within the bytea */
     580             :         Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
     581             :     }
     582             : 
     583             :     /* we should have consumed the whole bytea exactly */
     584             :     Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
     585             : 
     586        1120 :     return dependencies;
     587             : }
     588             : 
     589             : /*
     590             :  * dependency_is_fully_matched
     591             :  *      checks that a functional dependency is fully matched given clauses on
     592             :  *      attributes (assuming the clauses are suitable equality clauses)
     593             :  */
     594             : static bool
     595        4644 : dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
     596             : {
     597             :     int         j;
     598             : 
     599             :     /*
     600             :      * Check that the dependency actually is fully covered by clauses. We have
     601             :      * to translate all attribute numbers, as those are referenced
     602             :      */
     603       12114 :     for (j = 0; j < dependency->nattributes; j++)
     604             :     {
     605        9534 :         int         attnum = dependency->attributes[j];
     606             : 
     607        9534 :         if (!bms_is_member(attnum, attnums))
     608        2064 :             return false;
     609             :     }
     610             : 
     611        2580 :     return true;
     612             : }
     613             : 
     614             : /*
     615             :  * statext_dependencies_load
     616             :  *      Load the functional dependencies for the indicated pg_statistic_ext tuple
     617             :  */
     618             : MVDependencies *
     619        1104 : statext_dependencies_load(Oid mvoid, bool inh)
     620             : {
     621             :     MVDependencies *result;
     622             :     bool        isnull;
     623             :     Datum       deps;
     624             :     HeapTuple   htup;
     625             : 
     626        1104 :     htup = SearchSysCache2(STATEXTDATASTXOID,
     627             :                            ObjectIdGetDatum(mvoid),
     628             :                            BoolGetDatum(inh));
     629        1104 :     if (!HeapTupleIsValid(htup))
     630           0 :         elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
     631             : 
     632        1104 :     deps = SysCacheGetAttr(STATEXTDATASTXOID, htup,
     633             :                            Anum_pg_statistic_ext_data_stxddependencies, &isnull);
     634        1104 :     if (isnull)
     635           0 :         elog(ERROR,
     636             :              "requested statistics kind \"%c\" is not yet built for statistics object %u",
     637             :              STATS_EXT_DEPENDENCIES, mvoid);
     638             : 
     639        1104 :     result = statext_dependencies_deserialize(DatumGetByteaPP(deps));
     640             : 
     641        1104 :     ReleaseSysCache(htup);
     642             : 
     643        1104 :     return result;
     644             : }
     645             : 
     646             : /*
     647             :  * pg_dependencies_in       - input routine for type pg_dependencies.
     648             :  *
     649             :  * pg_dependencies is real enough to be a table column, but it has no operations
     650             :  * of its own, and disallows input too
     651             :  */
     652             : Datum
     653           0 : pg_dependencies_in(PG_FUNCTION_ARGS)
     654             : {
     655             :     /*
     656             :      * pg_node_list stores the data in binary form and parsing text input is
     657             :      * not needed, so disallow this.
     658             :      */
     659           0 :     ereport(ERROR,
     660             :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     661             :              errmsg("cannot accept a value of type %s", "pg_dependencies")));
     662             : 
     663             :     PG_RETURN_VOID();           /* keep compiler quiet */
     664             : }
     665             : 
     666             : /*
     667             :  * pg_dependencies      - output routine for type pg_dependencies.
     668             :  */
     669             : Datum
     670          16 : pg_dependencies_out(PG_FUNCTION_ARGS)
     671             : {
     672          16 :     bytea      *data = PG_GETARG_BYTEA_PP(0);
     673          16 :     MVDependencies *dependencies = statext_dependencies_deserialize(data);
     674             :     int         i,
     675             :                 j;
     676             :     StringInfoData str;
     677             : 
     678          16 :     initStringInfo(&str);
     679          16 :     appendStringInfoChar(&str, '{');
     680             : 
     681          96 :     for (i = 0; i < dependencies->ndeps; i++)
     682             :     {
     683          80 :         MVDependency *dependency = dependencies->deps[i];
     684             : 
     685          80 :         if (i > 0)
     686          64 :             appendStringInfoString(&str, ", ");
     687             : 
     688          80 :         appendStringInfoChar(&str, '"');
     689         272 :         for (j = 0; j < dependency->nattributes; j++)
     690             :         {
     691         192 :             if (j == dependency->nattributes - 1)
     692          80 :                 appendStringInfoString(&str, " => ");
     693         112 :             else if (j > 0)
     694          32 :                 appendStringInfoString(&str, ", ");
     695             : 
     696         192 :             appendStringInfo(&str, "%d", dependency->attributes[j]);
     697             :         }
     698          80 :         appendStringInfo(&str, "\": %f", dependency->degree);
     699             :     }
     700             : 
     701          16 :     appendStringInfoChar(&str, '}');
     702             : 
     703          16 :     PG_RETURN_CSTRING(str.data);
     704             : }
     705             : 
     706             : /*
     707             :  * pg_dependencies_recv     - binary input routine for type pg_dependencies.
     708             :  */
     709             : Datum
     710           0 : pg_dependencies_recv(PG_FUNCTION_ARGS)
     711             : {
     712           0 :     ereport(ERROR,
     713             :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     714             :              errmsg("cannot accept a value of type %s", "pg_dependencies")));
     715             : 
     716             :     PG_RETURN_VOID();           /* keep compiler quiet */
     717             : }
     718             : 
     719             : /*
     720             :  * pg_dependencies_send     - binary output routine for type pg_dependencies.
     721             :  *
     722             :  * Functional dependencies are serialized in a bytea value (although the type
     723             :  * is named differently), so let's just send that.
     724             :  */
     725             : Datum
     726           0 : pg_dependencies_send(PG_FUNCTION_ARGS)
     727             : {
     728           0 :     return byteasend(fcinfo);
     729             : }
     730             : 
     731             : /*
     732             :  * dependency_is_compatible_clause
     733             :  *      Determines if the clause is compatible with functional dependencies
     734             :  *
     735             :  * Only clauses that have the form of equality to a pseudoconstant, or can be
     736             :  * interpreted that way, are currently accepted.  Furthermore the variable
     737             :  * part of the clause must be a simple Var belonging to the specified
     738             :  * relation, whose attribute number we return in *attnum on success.
     739             :  */
     740             : static bool
     741        2952 : dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
     742             : {
     743             :     Var        *var;
     744             :     Node       *clause_expr;
     745             : 
     746        2952 :     if (IsA(clause, RestrictInfo))
     747             :     {
     748        2832 :         RestrictInfo *rinfo = (RestrictInfo *) clause;
     749             : 
     750             :         /* Pseudoconstants are not interesting (they couldn't contain a Var) */
     751        2832 :         if (rinfo->pseudoconstant)
     752           6 :             return false;
     753             : 
     754             :         /* Clauses referencing multiple, or no, varnos are incompatible */
     755        2826 :         if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
     756           0 :             return false;
     757             : 
     758        2826 :         clause = (Node *) rinfo->clause;
     759             :     }
     760             : 
     761        2946 :     if (is_opclause(clause))
     762             :     {
     763             :         /* If it's an opclause, check for Var = Const or Const = Var. */
     764        1134 :         OpExpr     *expr = (OpExpr *) clause;
     765             : 
     766             :         /* Only expressions with two arguments are candidates. */
     767        1134 :         if (list_length(expr->args) != 2)
     768           0 :             return false;
     769             : 
     770             :         /* Make sure non-selected argument is a pseudoconstant. */
     771        1134 :         if (is_pseudo_constant_clause(lsecond(expr->args)))
     772        1134 :             clause_expr = linitial(expr->args);
     773           0 :         else if (is_pseudo_constant_clause(linitial(expr->args)))
     774           0 :             clause_expr = lsecond(expr->args);
     775             :         else
     776           0 :             return false;
     777             : 
     778             :         /*
     779             :          * If it's not an "=" operator, just ignore the clause, as it's not
     780             :          * compatible with functional dependencies.
     781             :          *
     782             :          * This uses the function for estimating selectivity, not the operator
     783             :          * directly (a bit awkward, but well ...).
     784             :          *
     785             :          * XXX this is pretty dubious; probably it'd be better to check btree
     786             :          * or hash opclass membership, so as not to be fooled by custom
     787             :          * selectivity functions, and to be more consistent with decisions
     788             :          * elsewhere in the planner.
     789             :          */
     790        1134 :         if (get_oprrest(expr->opno) != F_EQSEL)
     791          36 :             return false;
     792             : 
     793             :         /* OK to proceed with checking "var" */
     794             :     }
     795        1812 :     else if (IsA(clause, ScalarArrayOpExpr))
     796             :     {
     797             :         /* If it's a scalar array operator, check for Var IN Const. */
     798        1740 :         ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
     799             : 
     800             :         /*
     801             :          * Reject ALL() variant, we only care about ANY/IN.
     802             :          *
     803             :          * XXX Maybe we should check if all the values are the same, and allow
     804             :          * ALL in that case? Doesn't seem very practical, though.
     805             :          */
     806        1740 :         if (!expr->useOr)
     807          36 :             return false;
     808             : 
     809             :         /* Only expressions with two arguments are candidates. */
     810        1704 :         if (list_length(expr->args) != 2)
     811           0 :             return false;
     812             : 
     813             :         /*
     814             :          * We know it's always (Var IN Const), so we assume the var is the
     815             :          * first argument, and pseudoconstant is the second one.
     816             :          */
     817        1704 :         if (!is_pseudo_constant_clause(lsecond(expr->args)))
     818           0 :             return false;
     819             : 
     820        1704 :         clause_expr = linitial(expr->args);
     821             : 
     822             :         /*
     823             :          * If it's not an "=" operator, just ignore the clause, as it's not
     824             :          * compatible with functional dependencies. The operator is identified
     825             :          * simply by looking at which function it uses to estimate
     826             :          * selectivity. That's a bit strange, but it's what other similar
     827             :          * places do.
     828             :          */
     829        1704 :         if (get_oprrest(expr->opno) != F_EQSEL)
     830         180 :             return false;
     831             : 
     832             :         /* OK to proceed with checking "var" */
     833             :     }
     834          72 :     else if (is_orclause(clause))
     835             :     {
     836          72 :         BoolExpr   *bool_expr = (BoolExpr *) clause;
     837             :         ListCell   *lc;
     838             : 
     839             :         /* start with no attribute number */
     840          72 :         *attnum = InvalidAttrNumber;
     841             : 
     842         150 :         foreach(lc, bool_expr->args)
     843             :         {
     844             :             AttrNumber  clause_attnum;
     845             : 
     846             :             /*
     847             :              * Had we found incompatible clause in the arguments, treat the
     848             :              * whole clause as incompatible.
     849             :              */
     850         120 :             if (!dependency_is_compatible_clause((Node *) lfirst(lc),
     851             :                                                  relid, &clause_attnum))
     852          42 :                 return false;
     853             : 
     854          84 :             if (*attnum == InvalidAttrNumber)
     855          36 :                 *attnum = clause_attnum;
     856             : 
     857             :             /* ensure all the variables are the same (same attnum) */
     858          84 :             if (*attnum != clause_attnum)
     859           6 :                 return false;
     860             :         }
     861             : 
     862             :         /* the Var is already checked by the recursive call */
     863          30 :         return true;
     864             :     }
     865           0 :     else if (is_notclause(clause))
     866             :     {
     867             :         /*
     868             :          * "NOT x" can be interpreted as "x = false", so get the argument and
     869             :          * proceed with seeing if it's a suitable Var.
     870             :          */
     871           0 :         clause_expr = (Node *) get_notclausearg(clause);
     872             :     }
     873             :     else
     874             :     {
     875             :         /*
     876             :          * A boolean expression "x" can be interpreted as "x = true", so
     877             :          * proceed with seeing if it's a suitable Var.
     878             :          */
     879           0 :         clause_expr = (Node *) clause;
     880             :     }
     881             : 
     882             :     /*
     883             :      * We may ignore any RelabelType node above the operand.  (There won't be
     884             :      * more than one, since eval_const_expressions has been applied already.)
     885             :      */
     886        2622 :     if (IsA(clause_expr, RelabelType))
     887           0 :         clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
     888             : 
     889             :     /* We only support plain Vars for now */
     890        2622 :     if (!IsA(clause_expr, Var))
     891         288 :         return false;
     892             : 
     893             :     /* OK, we know we have a Var */
     894        2334 :     var = (Var *) clause_expr;
     895             : 
     896             :     /* Ensure Var is from the correct relation */
     897        2334 :     if (var->varno != relid)
     898           0 :         return false;
     899             : 
     900             :     /* We also better ensure the Var is from the current level */
     901        2334 :     if (var->varlevelsup != 0)
     902           0 :         return false;
     903             : 
     904             :     /* Also ignore system attributes (we don't allow stats on those) */
     905        2334 :     if (!AttrNumberIsForUserDefinedAttr(var->varattno))
     906           0 :         return false;
     907             : 
     908        2334 :     *attnum = var->varattno;
     909        2334 :     return true;
     910             : }
     911             : 
     912             : /*
     913             :  * find_strongest_dependency
     914             :  *      find the strongest dependency on the attributes
     915             :  *
     916             :  * When applying functional dependencies, we start with the strongest
     917             :  * dependencies. That is, we select the dependency that:
     918             :  *
     919             :  * (a) has all attributes covered by equality clauses
     920             :  *
     921             :  * (b) has the most attributes
     922             :  *
     923             :  * (c) has the highest degree of validity
     924             :  *
     925             :  * This guarantees that we eliminate the most redundant conditions first
     926             :  * (see the comment in dependencies_clauselist_selectivity).
     927             :  */
     928             : static MVDependency *
     929        2478 : find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
     930             :                           Bitmapset *attnums)
     931             : {
     932             :     int         i,
     933             :                 j;
     934        2478 :     MVDependency *strongest = NULL;
     935             : 
     936             :     /* number of attnums in clauses */
     937        2478 :     int         nattnums = bms_num_members(attnums);
     938             : 
     939             :     /*
     940             :      * Iterate over the MVDependency items and find the strongest one from the
     941             :      * fully-matched dependencies. We do the cheap checks first, before
     942             :      * matching it against the attnums.
     943             :      */
     944        4992 :     for (i = 0; i < ndependencies; i++)
     945             :     {
     946       14232 :         for (j = 0; j < dependencies[i]->ndeps; j++)
     947             :         {
     948       11718 :             MVDependency *dependency = dependencies[i]->deps[j];
     949             : 
     950             :             /*
     951             :              * Skip dependencies referencing more attributes than available
     952             :              * clauses, as those can't be fully matched.
     953             :              */
     954       11718 :             if (dependency->nattributes > nattnums)
     955        7074 :                 continue;
     956             : 
     957        4644 :             if (strongest)
     958             :             {
     959             :                 /* skip dependencies on fewer attributes than the strongest. */
     960        2928 :                 if (dependency->nattributes < strongest->nattributes)
     961           0 :                     continue;
     962             : 
     963             :                 /* also skip weaker dependencies when attribute count matches */
     964        2928 :                 if (strongest->nattributes == dependency->nattributes &&
     965        2646 :                     strongest->degree > dependency->degree)
     966           0 :                     continue;
     967             :             }
     968             : 
     969             :             /*
     970             :              * this dependency is stronger, but we must still check that it's
     971             :              * fully matched to these attnums. We perform this check last as
     972             :              * it's slightly more expensive than the previous checks.
     973             :              */
     974        4644 :             if (dependency_is_fully_matched(dependency, attnums))
     975        2580 :                 strongest = dependency; /* save new best match */
     976             :         }
     977             :     }
     978             : 
     979        2478 :     return strongest;
     980             : }
     981             : 
     982             : /*
     983             :  * clauselist_apply_dependencies
     984             :  *      Apply the specified functional dependencies to a list of clauses and
     985             :  *      return the estimated selectivity of the clauses that are compatible
     986             :  *      with any of the given dependencies.
     987             :  *
     988             :  * This will estimate all not-already-estimated clauses that are compatible
     989             :  * with functional dependencies, and which have an attribute mentioned by any
     990             :  * of the given dependencies (either as an implying or implied attribute).
     991             :  *
     992             :  * Given (lists of) clauses on attributes (a,b) and a functional dependency
     993             :  * (a=>b), the per-column selectivities P(a) and P(b) are notionally combined
     994             :  * using the formula
     995             :  *
     996             :  *      P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
     997             :  *
     998             :  * where 'f' is the degree of dependency.  This reflects the fact that we
     999             :  * expect a fraction f of all rows to be consistent with the dependency
    1000             :  * (a=>b), and so have a selectivity of P(a), while the remaining rows are
    1001             :  * treated as independent.
    1002             :  *
    1003             :  * In practice, we use a slightly modified version of this formula, which uses
    1004             :  * a selectivity of Min(P(a), P(b)) for the dependent rows, since the result
    1005             :  * should obviously not exceed either column's individual selectivity.  I.e.,
    1006             :  * we actually combine selectivities using the formula
    1007             :  *
    1008             :  *      P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
    1009             :  *
    1010             :  * This can make quite a difference if the specific values matching the
    1011             :  * clauses are not consistent with the functional dependency.
    1012             :  */
    1013             : static Selectivity
    1014        1092 : clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
    1015             :                               int varRelid, JoinType jointype,
    1016             :                               SpecialJoinInfo *sjinfo,
    1017             :                               MVDependency **dependencies, int ndependencies,
    1018             :                               AttrNumber *list_attnums,
    1019             :                               Bitmapset **estimatedclauses)
    1020             : {
    1021             :     Bitmapset  *attnums;
    1022             :     int         i;
    1023             :     int         j;
    1024             :     int         nattrs;
    1025             :     Selectivity *attr_sel;
    1026             :     int         attidx;
    1027             :     int         listidx;
    1028             :     ListCell   *l;
    1029             :     Selectivity s1;
    1030             : 
    1031             :     /*
    1032             :      * Extract the attnums of all implying and implied attributes from all the
    1033             :      * given dependencies.  Each of these attributes is expected to have at
    1034             :      * least 1 not-already-estimated compatible clause that we will estimate
    1035             :      * here.
    1036             :      */
    1037        1092 :     attnums = NULL;
    1038        2478 :     for (i = 0; i < ndependencies; i++)
    1039             :     {
    1040        4440 :         for (j = 0; j < dependencies[i]->nattributes; j++)
    1041             :         {
    1042        3054 :             AttrNumber  attnum = dependencies[i]->attributes[j];
    1043             : 
    1044        3054 :             attnums = bms_add_member(attnums, attnum);
    1045             :         }
    1046             :     }
    1047             : 
    1048             :     /*
    1049             :      * Compute per-column selectivity estimates for each of these attributes,
    1050             :      * and mark all the corresponding clauses as estimated.
    1051             :      */
    1052        1092 :     nattrs = bms_num_members(attnums);
    1053        1092 :     attr_sel = (Selectivity *) palloc(sizeof(Selectivity) * nattrs);
    1054             : 
    1055        1092 :     attidx = 0;
    1056        1092 :     i = -1;
    1057        3582 :     while ((i = bms_next_member(attnums, i)) >= 0)
    1058             :     {
    1059        2490 :         List       *attr_clauses = NIL;
    1060             :         Selectivity simple_sel;
    1061             : 
    1062        2490 :         listidx = -1;
    1063        8412 :         foreach(l, clauses)
    1064             :         {
    1065        5922 :             Node       *clause = (Node *) lfirst(l);
    1066             : 
    1067        5922 :             listidx++;
    1068        5922 :             if (list_attnums[listidx] == i)
    1069             :             {
    1070        2490 :                 attr_clauses = lappend(attr_clauses, clause);
    1071        2490 :                 *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
    1072             :             }
    1073             :         }
    1074             : 
    1075        2490 :         simple_sel = clauselist_selectivity_ext(root, attr_clauses, varRelid,
    1076             :                                                 jointype, sjinfo, false);
    1077        2490 :         attr_sel[attidx++] = simple_sel;
    1078             :     }
    1079             : 
    1080             :     /*
    1081             :      * Now combine these selectivities using the dependency information.  For
    1082             :      * chains of dependencies such as a -> b -> c, the b -> c dependency will
    1083             :      * come before the a -> b dependency in the array, so we traverse the
    1084             :      * array backwards to ensure such chains are computed in the right order.
    1085             :      *
    1086             :      * As explained above, pairs of selectivities are combined using the
    1087             :      * formula
    1088             :      *
    1089             :      * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
    1090             :      *
    1091             :      * to ensure that the combined selectivity is never greater than either
    1092             :      * individual selectivity.
    1093             :      *
    1094             :      * Where multiple dependencies apply (e.g., a -> b -> c), we use
    1095             :      * conditional probabilities to compute the overall result as follows:
    1096             :      *
    1097             :      * P(a,b,c) = P(c|a,b) * P(a,b) = P(c|a,b) * P(b|a) * P(a)
    1098             :      *
    1099             :      * so we replace the selectivities of all implied attributes with
    1100             :      * conditional probabilities, that are conditional on all their implying
    1101             :      * attributes.  The selectivities of all other non-implied attributes are
    1102             :      * left as they are.
    1103             :      */
    1104        2478 :     for (i = ndependencies - 1; i >= 0; i--)
    1105             :     {
    1106        1386 :         MVDependency *dependency = dependencies[i];
    1107             :         AttrNumber  attnum;
    1108             :         Selectivity s2;
    1109             :         double      f;
    1110             : 
    1111             :         /* Selectivity of all the implying attributes */
    1112        1386 :         s1 = 1.0;
    1113        3054 :         for (j = 0; j < dependency->nattributes - 1; j++)
    1114             :         {
    1115        1668 :             attnum = dependency->attributes[j];
    1116        1668 :             attidx = bms_member_index(attnums, attnum);
    1117        1668 :             s1 *= attr_sel[attidx];
    1118             :         }
    1119             : 
    1120             :         /* Original selectivity of the implied attribute */
    1121        1386 :         attnum = dependency->attributes[j];
    1122        1386 :         attidx = bms_member_index(attnums, attnum);
    1123        1386 :         s2 = attr_sel[attidx];
    1124             : 
    1125             :         /*
    1126             :          * Replace s2 with the conditional probability s2 given s1, computed
    1127             :          * using the formula P(b|a) = P(a,b) / P(a), which simplifies to
    1128             :          *
    1129             :          * P(b|a) = f * Min(P(a), P(b)) / P(a) + (1-f) * P(b)
    1130             :          *
    1131             :          * where P(a) = s1, the selectivity of the implying attributes, and
    1132             :          * P(b) = s2, the selectivity of the implied attribute.
    1133             :          */
    1134        1386 :         f = dependency->degree;
    1135             : 
    1136        1386 :         if (s1 <= s2)
    1137        1326 :             attr_sel[attidx] = f + (1 - f) * s2;
    1138             :         else
    1139          60 :             attr_sel[attidx] = f * s2 / s1 + (1 - f) * s2;
    1140             :     }
    1141             : 
    1142             :     /*
    1143             :      * The overall selectivity of all the clauses on all these attributes is
    1144             :      * then the product of all the original (non-implied) probabilities and
    1145             :      * the new conditional (implied) probabilities.
    1146             :      */
    1147        1092 :     s1 = 1.0;
    1148        3582 :     for (i = 0; i < nattrs; i++)
    1149        2490 :         s1 *= attr_sel[i];
    1150             : 
    1151        1092 :     CLAMP_PROBABILITY(s1);
    1152             : 
    1153        1092 :     pfree(attr_sel);
    1154        1092 :     bms_free(attnums);
    1155             : 
    1156        1092 :     return s1;
    1157             : }
    1158             : 
    1159             : /*
    1160             :  * dependency_is_compatible_expression
    1161             :  *      Determines if the expression is compatible with functional dependencies
    1162             :  *
    1163             :  * Similar to dependency_is_compatible_clause, but doesn't enforce that the
    1164             :  * expression is a simple Var.  On success, return the matching statistics
    1165             :  * expression into *expr.
    1166             :  */
    1167             : static bool
    1168         642 : dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
    1169             : {
    1170             :     ListCell   *lc,
    1171             :                *lc2;
    1172             :     Node       *clause_expr;
    1173             : 
    1174         642 :     if (IsA(clause, RestrictInfo))
    1175             :     {
    1176         552 :         RestrictInfo *rinfo = (RestrictInfo *) clause;
    1177             : 
    1178             :         /* Pseudoconstants are not interesting (they couldn't contain a Var) */
    1179         552 :         if (rinfo->pseudoconstant)
    1180           6 :             return false;
    1181             : 
    1182             :         /* Clauses referencing multiple, or no, varnos are incompatible */
    1183         546 :         if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
    1184           0 :             return false;
    1185             : 
    1186         546 :         clause = (Node *) rinfo->clause;
    1187             :     }
    1188             : 
    1189         636 :     if (is_opclause(clause))
    1190             :     {
    1191             :         /* If it's an opclause, check for Var = Const or Const = Var. */
    1192         204 :         OpExpr     *expr = (OpExpr *) clause;
    1193             : 
    1194             :         /* Only expressions with two arguments are candidates. */
    1195         204 :         if (list_length(expr->args) != 2)
    1196           0 :             return false;
    1197             : 
    1198             :         /* Make sure non-selected argument is a pseudoconstant. */
    1199         204 :         if (is_pseudo_constant_clause(lsecond(expr->args)))
    1200         204 :             clause_expr = linitial(expr->args);
    1201           0 :         else if (is_pseudo_constant_clause(linitial(expr->args)))
    1202           0 :             clause_expr = lsecond(expr->args);
    1203             :         else
    1204           0 :             return false;
    1205             : 
    1206             :         /*
    1207             :          * If it's not an "=" operator, just ignore the clause, as it's not
    1208             :          * compatible with functional dependencies.
    1209             :          *
    1210             :          * This uses the function for estimating selectivity, not the operator
    1211             :          * directly (a bit awkward, but well ...).
    1212             :          *
    1213             :          * XXX this is pretty dubious; probably it'd be better to check btree
    1214             :          * or hash opclass membership, so as not to be fooled by custom
    1215             :          * selectivity functions, and to be more consistent with decisions
    1216             :          * elsewhere in the planner.
    1217             :          */
    1218         204 :         if (get_oprrest(expr->opno) != F_EQSEL)
    1219          36 :             return false;
    1220             : 
    1221             :         /* OK to proceed with checking "var" */
    1222             :     }
    1223         432 :     else if (IsA(clause, ScalarArrayOpExpr))
    1224             :     {
    1225             :         /* If it's a scalar array operator, check for Var IN Const. */
    1226         390 :         ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
    1227             : 
    1228             :         /*
    1229             :          * Reject ALL() variant, we only care about ANY/IN.
    1230             :          *
    1231             :          * FIXME Maybe we should check if all the values are the same, and
    1232             :          * allow ALL in that case? Doesn't seem very practical, though.
    1233             :          */
    1234         390 :         if (!expr->useOr)
    1235          36 :             return false;
    1236             : 
    1237             :         /* Only expressions with two arguments are candidates. */
    1238         354 :         if (list_length(expr->args) != 2)
    1239           0 :             return false;
    1240             : 
    1241             :         /*
    1242             :          * We know it's always (Var IN Const), so we assume the var is the
    1243             :          * first argument, and pseudoconstant is the second one.
    1244             :          */
    1245         354 :         if (!is_pseudo_constant_clause(lsecond(expr->args)))
    1246           0 :             return false;
    1247             : 
    1248         354 :         clause_expr = linitial(expr->args);
    1249             : 
    1250             :         /*
    1251             :          * If it's not an "=" operator, just ignore the clause, as it's not
    1252             :          * compatible with functional dependencies. The operator is identified
    1253             :          * simply by looking at which function it uses to estimate
    1254             :          * selectivity. That's a bit strange, but it's what other similar
    1255             :          * places do.
    1256             :          */
    1257         354 :         if (get_oprrest(expr->opno) != F_EQSEL)
    1258         180 :             return false;
    1259             : 
    1260             :         /* OK to proceed with checking "var" */
    1261             :     }
    1262          42 :     else if (is_orclause(clause))
    1263             :     {
    1264          42 :         BoolExpr   *bool_expr = (BoolExpr *) clause;
    1265             : 
    1266             :         /* start with no expression (we'll use the first match) */
    1267          42 :         *expr = NULL;
    1268             : 
    1269         120 :         foreach(lc, bool_expr->args)
    1270             :         {
    1271          90 :             Node       *or_expr = NULL;
    1272             : 
    1273             :             /*
    1274             :              * Had we found incompatible expression in the arguments, treat
    1275             :              * the whole expression as incompatible.
    1276             :              */
    1277          90 :             if (!dependency_is_compatible_expression((Node *) lfirst(lc), relid,
    1278             :                                                      statlist, &or_expr))
    1279          12 :                 return false;
    1280             : 
    1281          84 :             if (*expr == NULL)
    1282          36 :                 *expr = or_expr;
    1283             : 
    1284             :             /* ensure all the expressions are the same */
    1285          84 :             if (!equal(or_expr, *expr))
    1286           6 :                 return false;
    1287             :         }
    1288             : 
    1289             :         /* the expression is already checked by the recursive call */
    1290          30 :         return true;
    1291             :     }
    1292           0 :     else if (is_notclause(clause))
    1293             :     {
    1294             :         /*
    1295             :          * "NOT x" can be interpreted as "x = false", so get the argument and
    1296             :          * proceed with seeing if it's a suitable Var.
    1297             :          */
    1298           0 :         clause_expr = (Node *) get_notclausearg(clause);
    1299             :     }
    1300             :     else
    1301             :     {
    1302             :         /*
    1303             :          * A boolean expression "x" can be interpreted as "x = true", so
    1304             :          * proceed with seeing if it's a suitable Var.
    1305             :          */
    1306           0 :         clause_expr = (Node *) clause;
    1307             :     }
    1308             : 
    1309             :     /*
    1310             :      * We may ignore any RelabelType node above the operand.  (There won't be
    1311             :      * more than one, since eval_const_expressions has been applied already.)
    1312             :      */
    1313         342 :     if (IsA(clause_expr, RelabelType))
    1314           0 :         clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
    1315             : 
    1316             :     /*
    1317             :      * Search for a matching statistics expression.
    1318             :      */
    1319         348 :     foreach(lc, statlist)
    1320             :     {
    1321         342 :         StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
    1322             : 
    1323             :         /* ignore stats without dependencies */
    1324         342 :         if (info->kind != STATS_EXT_DEPENDENCIES)
    1325           0 :             continue;
    1326             : 
    1327         558 :         foreach(lc2, info->exprs)
    1328             :         {
    1329         552 :             Node       *stat_expr = (Node *) lfirst(lc2);
    1330             : 
    1331         552 :             if (equal(clause_expr, stat_expr))
    1332             :             {
    1333         336 :                 *expr = stat_expr;
    1334         336 :                 return true;
    1335             :             }
    1336             :         }
    1337             :     }
    1338             : 
    1339           6 :     return false;
    1340             : }
    1341             : 
    1342             : /*
    1343             :  * dependencies_clauselist_selectivity
    1344             :  *      Return the estimated selectivity of (a subset of) the given clauses
    1345             :  *      using functional dependency statistics, or 1.0 if no useful functional
    1346             :  *      dependency statistic exists.
    1347             :  *
    1348             :  * 'estimatedclauses' is an input/output argument that gets a bit set
    1349             :  * corresponding to the (zero-based) list index of each clause that is included
    1350             :  * in the estimated selectivity.
    1351             :  *
    1352             :  * Given equality clauses on attributes (a,b) we find the strongest dependency
    1353             :  * between them, i.e. either (a=>b) or (b=>a). Assuming (a=>b) is the selected
    1354             :  * dependency, we then combine the per-clause selectivities using the formula
    1355             :  *
    1356             :  *     P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
    1357             :  *
    1358             :  * where 'f' is the degree of the dependency.  (Actually we use a slightly
    1359             :  * modified version of this formula -- see clauselist_apply_dependencies()).
    1360             :  *
    1361             :  * With clauses on more than two attributes, the dependencies are applied
    1362             :  * recursively, starting with the widest/strongest dependencies. For example
    1363             :  * P(a,b,c) is first split like this:
    1364             :  *
    1365             :  *     P(a,b,c) = f * P(a,b) + (1-f) * P(a,b) * P(c)
    1366             :  *
    1367             :  * assuming (a,b=>c) is the strongest dependency.
    1368             :  */
    1369             : Selectivity
    1370        1734 : dependencies_clauselist_selectivity(PlannerInfo *root,
    1371             :                                     List *clauses,
    1372             :                                     int varRelid,
    1373             :                                     JoinType jointype,
    1374             :                                     SpecialJoinInfo *sjinfo,
    1375             :                                     RelOptInfo *rel,
    1376             :                                     Bitmapset **estimatedclauses)
    1377             : {
    1378        1734 :     Selectivity s1 = 1.0;
    1379             :     ListCell   *l;
    1380        1734 :     Bitmapset  *clauses_attnums = NULL;
    1381             :     AttrNumber *list_attnums;
    1382             :     int         listidx;
    1383             :     MVDependencies **func_dependencies;
    1384             :     int         nfunc_dependencies;
    1385             :     int         total_ndeps;
    1386             :     MVDependency **dependencies;
    1387             :     int         ndependencies;
    1388             :     int         i;
    1389             :     AttrNumber  attnum_offset;
    1390        1734 :     RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
    1391             : 
    1392             :     /* unique expressions */
    1393             :     Node      **unique_exprs;
    1394             :     int         unique_exprs_cnt;
    1395             : 
    1396             :     /* check if there's any stats that might be useful for us. */
    1397        1734 :     if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
    1398         450 :         return 1.0;
    1399             : 
    1400        1284 :     list_attnums = (AttrNumber *) palloc(sizeof(AttrNumber) *
    1401        1284 :                                          list_length(clauses));
    1402             : 
    1403             :     /*
    1404             :      * We allocate space as if every clause was a unique expression, although
    1405             :      * that's probably overkill. Some will be simple column references that
    1406             :      * we'll translate to attnums, and there might be duplicates. But it's
    1407             :      * easier and cheaper to just do one allocation than repalloc later.
    1408             :      */
    1409        1284 :     unique_exprs = (Node **) palloc(sizeof(Node *) * list_length(clauses));
    1410        1284 :     unique_exprs_cnt = 0;
    1411             : 
    1412             :     /*
    1413             :      * Pre-process the clauses list to extract the attnums seen in each item.
    1414             :      * We need to determine if there's any clauses which will be useful for
    1415             :      * dependency selectivity estimations. Along the way we'll record all of
    1416             :      * the attnums for each clause in a list which we'll reference later so we
    1417             :      * don't need to repeat the same work again. We'll also keep track of all
    1418             :      * attnums seen.
    1419             :      *
    1420             :      * We also skip clauses that we already estimated using different types of
    1421             :      * statistics (we treat them as incompatible).
    1422             :      *
    1423             :      * To handle expressions, we assign them negative attnums, as if it was a
    1424             :      * system attribute (this is fine, as we only allow extended stats on user
    1425             :      * attributes). And then we offset everything by the number of
    1426             :      * expressions, so that we can store the values in a bitmapset.
    1427             :      */
    1428        1284 :     listidx = 0;
    1429        4158 :     foreach(l, clauses)
    1430             :     {
    1431        2874 :         Node       *clause = (Node *) lfirst(l);
    1432             :         AttrNumber  attnum;
    1433        2874 :         Node       *expr = NULL;
    1434             : 
    1435             :         /* ignore clause by default */
    1436        2874 :         list_attnums[listidx] = InvalidAttrNumber;
    1437             : 
    1438        2874 :         if (!bms_is_member(listidx, *estimatedclauses))
    1439             :         {
    1440             :             /*
    1441             :              * If it's a simple column reference, just extract the attnum. If
    1442             :              * it's an expression, assign a negative attnum as if it was a
    1443             :              * system attribute.
    1444             :              */
    1445        2832 :             if (dependency_is_compatible_clause(clause, rel->relid, &attnum))
    1446             :             {
    1447        2280 :                 list_attnums[listidx] = attnum;
    1448             :             }
    1449         552 :             else if (dependency_is_compatible_expression(clause, rel->relid,
    1450             :                                                          rel->statlist,
    1451             :                                                          &expr))
    1452             :             {
    1453             :                 /* special attnum assigned to this expression */
    1454         282 :                 attnum = InvalidAttrNumber;
    1455             : 
    1456             :                 Assert(expr != NULL);
    1457             : 
    1458             :                 /* If the expression is duplicate, use the same attnum. */
    1459         474 :                 for (i = 0; i < unique_exprs_cnt; i++)
    1460             :                 {
    1461         192 :                     if (equal(unique_exprs[i], expr))
    1462             :                     {
    1463             :                         /* negative attribute number to expression */
    1464           0 :                         attnum = -(i + 1);
    1465           0 :                         break;
    1466             :                     }
    1467             :                 }
    1468             : 
    1469             :                 /* not found in the list, so add it */
    1470         282 :                 if (attnum == InvalidAttrNumber)
    1471             :                 {
    1472         282 :                     unique_exprs[unique_exprs_cnt++] = expr;
    1473             : 
    1474             :                     /* after incrementing the value, to get -1, -2, ... */
    1475         282 :                     attnum = (-unique_exprs_cnt);
    1476             :                 }
    1477             : 
    1478             :                 /* remember which attnum was assigned to this clause */
    1479         282 :                 list_attnums[listidx] = attnum;
    1480             :             }
    1481             :         }
    1482             : 
    1483        2874 :         listidx++;
    1484             :     }
    1485             : 
    1486             :     Assert(listidx == list_length(clauses));
    1487             : 
    1488             :     /*
    1489             :      * How much we need to offset the attnums? If there are no expressions,
    1490             :      * then no offset is needed. Otherwise we need to offset enough for the
    1491             :      * lowest value (-unique_exprs_cnt) to become 1.
    1492             :      */
    1493        1284 :     if (unique_exprs_cnt > 0)
    1494         132 :         attnum_offset = (unique_exprs_cnt + 1);
    1495             :     else
    1496        1152 :         attnum_offset = 0;
    1497             : 
    1498             :     /*
    1499             :      * Now that we know how many expressions there are, we can offset the
    1500             :      * values just enough to build the bitmapset.
    1501             :      */
    1502        4158 :     for (i = 0; i < list_length(clauses); i++)
    1503             :     {
    1504             :         AttrNumber  attnum;
    1505             : 
    1506             :         /* ignore incompatible or already estimated clauses */
    1507        2874 :         if (list_attnums[i] == InvalidAttrNumber)
    1508         312 :             continue;
    1509             : 
    1510             :         /* make sure the attnum is in the expected range */
    1511             :         Assert(list_attnums[i] >= (-unique_exprs_cnt));
    1512             :         Assert(list_attnums[i] <= MaxHeapAttributeNumber);
    1513             : 
    1514             :         /* make sure the attnum is positive (valid AttrNumber) */
    1515        2562 :         attnum = list_attnums[i] + attnum_offset;
    1516             : 
    1517             :         /*
    1518             :          * Either it's a regular attribute, or it's an expression, in which
    1519             :          * case we must not have seen it before (expressions are unique).
    1520             :          *
    1521             :          * XXX Check whether it's a regular attribute has to be done using the
    1522             :          * original attnum, while the second check has to use the value with
    1523             :          * an offset.
    1524             :          */
    1525             :         Assert(AttrNumberIsForUserDefinedAttr(list_attnums[i]) ||
    1526             :                !bms_is_member(attnum, clauses_attnums));
    1527             : 
    1528             :         /*
    1529             :          * Remember the offset attnum, both for attributes and expressions.
    1530             :          * We'll pass list_attnums to clauselist_apply_dependencies, which
    1531             :          * uses it to identify clauses in a bitmap. We could also pass the
    1532             :          * offset, but this is more convenient.
    1533             :          */
    1534        2562 :         list_attnums[i] = attnum;
    1535             : 
    1536        2562 :         clauses_attnums = bms_add_member(clauses_attnums, attnum);
    1537             :     }
    1538             : 
    1539             :     /*
    1540             :      * If there's not at least two distinct attnums and expressions, then
    1541             :      * reject the whole list of clauses. We must return 1.0 so the calling
    1542             :      * function's selectivity is unaffected.
    1543             :      */
    1544        1284 :     if (bms_membership(clauses_attnums) != BMS_MULTIPLE)
    1545             :     {
    1546         192 :         bms_free(clauses_attnums);
    1547         192 :         pfree(list_attnums);
    1548         192 :         return 1.0;
    1549             :     }
    1550             : 
    1551             :     /*
    1552             :      * Load all functional dependencies matching at least two parameters. We
    1553             :      * can simply consider all dependencies at once, without having to search
    1554             :      * for the best statistics object.
    1555             :      *
    1556             :      * To not waste cycles and memory, we deserialize dependencies only for
    1557             :      * statistics that match at least two attributes. The array is allocated
    1558             :      * with the assumption that all objects match - we could grow the array to
    1559             :      * make it just the right size, but it's likely wasteful anyway thanks to
    1560             :      * moving the freed chunks to freelists etc.
    1561             :      */
    1562        1092 :     func_dependencies = (MVDependencies **) palloc(sizeof(MVDependencies *) *
    1563        1092 :                                                    list_length(rel->statlist));
    1564        1092 :     nfunc_dependencies = 0;
    1565        1092 :     total_ndeps = 0;
    1566             : 
    1567        2322 :     foreach(l, rel->statlist)
    1568             :     {
    1569        1230 :         StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
    1570             :         int         nmatched;
    1571             :         int         nexprs;
    1572             :         int         k;
    1573             :         MVDependencies *deps;
    1574             : 
    1575             :         /* skip statistics that are not of the correct type */
    1576        1230 :         if (stat->kind != STATS_EXT_DEPENDENCIES)
    1577         108 :             continue;
    1578             : 
    1579             :         /* skip statistics with mismatching stxdinherit value */
    1580        1122 :         if (stat->inherit != rte->inh)
    1581           0 :             continue;
    1582             : 
    1583             :         /*
    1584             :          * Count matching attributes - we have to undo the attnum offsets. The
    1585             :          * input attribute numbers are not offset (expressions are not
    1586             :          * included in stat->keys, so it's not necessary). But we need to
    1587             :          * offset it before checking against clauses_attnums.
    1588             :          */
    1589        1122 :         nmatched = 0;
    1590        1122 :         k = -1;
    1591        4104 :         while ((k = bms_next_member(stat->keys, k)) >= 0)
    1592             :         {
    1593        2982 :             AttrNumber  attnum = (AttrNumber) k;
    1594             : 
    1595             :             /* skip expressions */
    1596        2982 :             if (!AttrNumberIsForUserDefinedAttr(attnum))
    1597           0 :                 continue;
    1598             : 
    1599             :             /* apply the same offset as above */
    1600        2982 :             attnum += attnum_offset;
    1601             : 
    1602        2982 :             if (bms_is_member(attnum, clauses_attnums))
    1603        2232 :                 nmatched++;
    1604             :         }
    1605             : 
    1606             :         /* count matching expressions */
    1607        1122 :         nexprs = 0;
    1608        1380 :         for (i = 0; i < unique_exprs_cnt; i++)
    1609             :         {
    1610             :             ListCell   *lc;
    1611             : 
    1612        1032 :             foreach(lc, stat->exprs)
    1613             :             {
    1614         774 :                 Node       *stat_expr = (Node *) lfirst(lc);
    1615             : 
    1616             :                 /* try to match it */
    1617         774 :                 if (equal(stat_expr, unique_exprs[i]))
    1618         258 :                     nexprs++;
    1619             :             }
    1620             :         }
    1621             : 
    1622             :         /*
    1623             :          * Skip objects matching fewer than two attributes/expressions from
    1624             :          * clauses.
    1625             :          */
    1626        1122 :         if (nmatched + nexprs < 2)
    1627          18 :             continue;
    1628             : 
    1629        1104 :         deps = statext_dependencies_load(stat->statOid, rte->inh);
    1630             : 
    1631             :         /*
    1632             :          * The expressions may be represented by different attnums in the
    1633             :          * stats, we need to remap them to be consistent with the clauses.
    1634             :          * That will make the later steps (e.g. picking the strongest item and
    1635             :          * so on) much simpler and cheaper, because it won't need to care
    1636             :          * about the offset at all.
    1637             :          *
    1638             :          * When we're at it, we can ignore dependencies that are not fully
    1639             :          * matched by clauses (i.e. referencing attributes or expressions that
    1640             :          * are not in the clauses).
    1641             :          *
    1642             :          * We have to do this for all statistics, as long as there are any
    1643             :          * expressions - we need to shift the attnums in all dependencies.
    1644             :          *
    1645             :          * XXX Maybe we should do this always, because it also eliminates some
    1646             :          * of the dependencies early. It might be cheaper than having to walk
    1647             :          * the longer list in find_strongest_dependency later, especially as
    1648             :          * we need to do that repeatedly?
    1649             :          *
    1650             :          * XXX We have to do this even when there are no expressions in
    1651             :          * clauses, otherwise find_strongest_dependency may fail for stats
    1652             :          * with expressions (due to lookup of negative value in bitmap). So we
    1653             :          * need to at least filter out those dependencies. Maybe we could do
    1654             :          * it in a cheaper way (if there are no expr clauses, we can just
    1655             :          * discard all negative attnums without any lookups).
    1656             :          */
    1657        1104 :         if (unique_exprs_cnt > 0 || stat->exprs != NIL)
    1658             :         {
    1659         108 :             int         ndeps = 0;
    1660             : 
    1661         648 :             for (i = 0; i < deps->ndeps; i++)
    1662             :             {
    1663         540 :                 bool        skip = false;
    1664         540 :                 MVDependency *dep = deps->deps[i];
    1665             :                 int         j;
    1666             : 
    1667        1506 :                 for (j = 0; j < dep->nattributes; j++)
    1668             :                 {
    1669             :                     int         idx;
    1670             :                     Node       *expr;
    1671        1230 :                     AttrNumber  unique_attnum = InvalidAttrNumber;
    1672             :                     AttrNumber  attnum;
    1673             : 
    1674             :                     /* undo the per-statistics offset */
    1675        1230 :                     attnum = dep->attributes[j];
    1676             : 
    1677             :                     /*
    1678             :                      * For regular attributes we can simply check if it
    1679             :                      * matches any clause. If there's no matching clause, we
    1680             :                      * can just ignore it. We need to offset the attnum
    1681             :                      * though.
    1682             :                      */
    1683        1230 :                     if (AttrNumberIsForUserDefinedAttr(attnum))
    1684             :                     {
    1685           0 :                         dep->attributes[j] = attnum + attnum_offset;
    1686             : 
    1687           0 :                         if (!bms_is_member(dep->attributes[j], clauses_attnums))
    1688             :                         {
    1689           0 :                             skip = true;
    1690           0 :                             break;
    1691             :                         }
    1692             : 
    1693           0 :                         continue;
    1694             :                     }
    1695             : 
    1696             :                     /*
    1697             :                      * the attnum should be a valid system attnum (-1, -2,
    1698             :                      * ...)
    1699             :                      */
    1700             :                     Assert(AttributeNumberIsValid(attnum));
    1701             : 
    1702             :                     /*
    1703             :                      * For expressions, we need to do two translations. First
    1704             :                      * we have to translate the negative attnum to index in
    1705             :                      * the list of expressions (in the statistics object).
    1706             :                      * Then we need to see if there's a matching clause. The
    1707             :                      * index of the unique expression determines the attnum
    1708             :                      * (and we offset it).
    1709             :                      */
    1710        1230 :                     idx = -(1 + attnum);
    1711             : 
    1712             :                     /* Is the expression index is valid? */
    1713             :                     Assert((idx >= 0) && (idx < list_length(stat->exprs)));
    1714             : 
    1715        1230 :                     expr = (Node *) list_nth(stat->exprs, idx);
    1716             : 
    1717             :                     /* try to find the expression in the unique list */
    1718        2460 :                     for (int m = 0; m < unique_exprs_cnt; m++)
    1719             :                     {
    1720             :                         /*
    1721             :                          * found a matching unique expression, use the attnum
    1722             :                          * (derived from index of the unique expression)
    1723             :                          */
    1724        2196 :                         if (equal(unique_exprs[m], expr))
    1725             :                         {
    1726         966 :                             unique_attnum = -(m + 1) + attnum_offset;
    1727         966 :                             break;
    1728             :                         }
    1729             :                     }
    1730             : 
    1731             :                     /*
    1732             :                      * Found no matching expression, so we can simply skip
    1733             :                      * this dependency, because there's no chance it will be
    1734             :                      * fully covered.
    1735             :                      */
    1736        1230 :                     if (unique_attnum == InvalidAttrNumber)
    1737             :                     {
    1738         264 :                         skip = true;
    1739         264 :                         break;
    1740             :                     }
    1741             : 
    1742             :                     /* otherwise remap it to the new attnum */
    1743         966 :                     dep->attributes[j] = unique_attnum;
    1744             :                 }
    1745             : 
    1746             :                 /* if found a matching dependency, keep it */
    1747         540 :                 if (!skip)
    1748             :                 {
    1749             :                     /* maybe we've skipped something earlier, so move it */
    1750         276 :                     if (ndeps != i)
    1751           0 :                         deps->deps[ndeps] = deps->deps[i];
    1752             : 
    1753         276 :                     ndeps++;
    1754             :                 }
    1755             :             }
    1756             : 
    1757         108 :             deps->ndeps = ndeps;
    1758             :         }
    1759             : 
    1760             :         /*
    1761             :          * It's possible we've removed all dependencies, in which case we
    1762             :          * don't bother adding it to the list.
    1763             :          */
    1764        1104 :         if (deps->ndeps > 0)
    1765             :         {
    1766        1104 :             func_dependencies[nfunc_dependencies] = deps;
    1767        1104 :             total_ndeps += deps->ndeps;
    1768        1104 :             nfunc_dependencies++;
    1769             :         }
    1770             :     }
    1771             : 
    1772             :     /* if no matching stats could be found then we've nothing to do */
    1773        1092 :     if (nfunc_dependencies == 0)
    1774             :     {
    1775           0 :         pfree(func_dependencies);
    1776           0 :         bms_free(clauses_attnums);
    1777           0 :         pfree(list_attnums);
    1778           0 :         pfree(unique_exprs);
    1779           0 :         return 1.0;
    1780             :     }
    1781             : 
    1782             :     /*
    1783             :      * Work out which dependencies we can apply, starting with the
    1784             :      * widest/strongest ones, and proceeding to smaller/weaker ones.
    1785             :      */
    1786        1092 :     dependencies = (MVDependency **) palloc(sizeof(MVDependency *) *
    1787             :                                             total_ndeps);
    1788        1092 :     ndependencies = 0;
    1789             : 
    1790             :     while (true)
    1791        1386 :     {
    1792             :         MVDependency *dependency;
    1793             :         AttrNumber  attnum;
    1794             : 
    1795             :         /* the widest/strongest dependency, fully matched by clauses */
    1796        2478 :         dependency = find_strongest_dependency(func_dependencies,
    1797             :                                                nfunc_dependencies,
    1798             :                                                clauses_attnums);
    1799        2478 :         if (!dependency)
    1800        1092 :             break;
    1801             : 
    1802        1386 :         dependencies[ndependencies++] = dependency;
    1803             : 
    1804             :         /* Ignore dependencies using this implied attribute in later loops */
    1805        1386 :         attnum = dependency->attributes[dependency->nattributes - 1];
    1806        1386 :         clauses_attnums = bms_del_member(clauses_attnums, attnum);
    1807             :     }
    1808             : 
    1809             :     /*
    1810             :      * If we found applicable dependencies, use them to estimate all
    1811             :      * compatible clauses on attributes that they refer to.
    1812             :      */
    1813        1092 :     if (ndependencies != 0)
    1814        1092 :         s1 = clauselist_apply_dependencies(root, clauses, varRelid, jointype,
    1815             :                                            sjinfo, dependencies, ndependencies,
    1816             :                                            list_attnums, estimatedclauses);
    1817             : 
    1818             :     /* free deserialized functional dependencies (and then the array) */
    1819        2196 :     for (i = 0; i < nfunc_dependencies; i++)
    1820        1104 :         pfree(func_dependencies[i]);
    1821             : 
    1822        1092 :     pfree(dependencies);
    1823        1092 :     pfree(func_dependencies);
    1824        1092 :     bms_free(clauses_attnums);
    1825        1092 :     pfree(list_attnums);
    1826        1092 :     pfree(unique_exprs);
    1827             : 
    1828        1092 :     return s1;
    1829             : }

Generated by: LCOV version 1.14