LCOV - code coverage report
Current view: top level - src/backend/statistics - extended_stats.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 358 390 91.8 %
Date: 2019-11-13 23:06:49 Functions: 24 24 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * extended_stats.c
       4             :  *    POSTGRES extended statistics
       5             :  *
       6             :  * Generic code supporting statistics objects created via CREATE STATISTICS.
       7             :  *
       8             :  *
       9             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
      10             :  * Portions Copyright (c) 1994, Regents of the University of California
      11             :  *
      12             :  * IDENTIFICATION
      13             :  *    src/backend/statistics/extended_stats.c
      14             :  *
      15             :  *-------------------------------------------------------------------------
      16             :  */
      17             : #include "postgres.h"
      18             : 
      19             : #include "access/detoast.h"
      20             : #include "access/genam.h"
      21             : #include "access/htup_details.h"
      22             : #include "access/table.h"
      23             : #include "catalog/indexing.h"
      24             : #include "catalog/pg_collation.h"
      25             : #include "catalog/pg_statistic_ext.h"
      26             : #include "catalog/pg_statistic_ext_data.h"
      27             : #include "miscadmin.h"
      28             : #include "nodes/nodeFuncs.h"
      29             : #include "optimizer/clauses.h"
      30             : #include "optimizer/optimizer.h"
      31             : #include "postmaster/autovacuum.h"
      32             : #include "statistics/extended_stats_internal.h"
      33             : #include "statistics/statistics.h"
      34             : #include "utils/array.h"
      35             : #include "utils/builtins.h"
      36             : #include "utils/fmgroids.h"
      37             : #include "utils/lsyscache.h"
      38             : #include "utils/memutils.h"
      39             : #include "utils/rel.h"
      40             : #include "utils/selfuncs.h"
      41             : #include "utils/syscache.h"
      42             : 
      43             : /*
      44             :  * To avoid consuming too much memory during analysis and/or too much space
      45             :  * in the resulting pg_statistic rows, we ignore varlena datums that are wider
      46             :  * than WIDTH_THRESHOLD (after detoasting!).  This is legitimate for MCV
      47             :  * and distinct-value calculations since a wide value is unlikely to be
      48             :  * duplicated at all, much less be a most-common value.  For the same reason,
      49             :  * ignoring wide values will not affect our estimates of histogram bin
      50             :  * boundaries very much.
      51             :  */
      52             : #define WIDTH_THRESHOLD  1024
      53             : 
      54             : /*
      55             :  * Used internally to refer to an individual statistics object, i.e.,
      56             :  * a pg_statistic_ext entry.
      57             :  */
      58             : typedef struct StatExtEntry
      59             : {
      60             :     Oid         statOid;        /* OID of pg_statistic_ext entry */
      61             :     char       *schema;         /* statistics object's schema */
      62             :     char       *name;           /* statistics object's name */
      63             :     Bitmapset  *columns;        /* attribute numbers covered by the object */
      64             :     List       *types;          /* 'char' list of enabled statistic kinds */
      65             :     int         stattarget;     /* statistics target (-1 for default) */
      66             : } StatExtEntry;
      67             : 
      68             : 
      69             : static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid);
      70             : static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
      71             :                                             int nvacatts, VacAttrStats **vacatts);
      72             : static void statext_store(Oid relid,
      73             :                           MVNDistinct *ndistinct, MVDependencies *dependencies,
      74             :                           MCVList *mcv, VacAttrStats **stats);
      75             : static int statext_compute_stattarget(int stattarget,
      76             :                                       int natts, VacAttrStats **stats);
      77             : 
      78             : /*
      79             :  * Compute requested extended stats, using the rows sampled for the plain
      80             :  * (single-column) stats.
      81             :  *
      82             :  * This fetches a list of stats types from pg_statistic_ext, computes the
      83             :  * requested stats, and serializes them back into the catalog.
      84             :  */
      85             : void
      86       19526 : BuildRelationExtStatistics(Relation onerel, double totalrows,
      87             :                            int numrows, HeapTuple *rows,
      88             :                            int natts, VacAttrStats **vacattrstats)
      89             : {
      90             :     Relation    pg_stext;
      91             :     ListCell   *lc;
      92             :     List       *stats;
      93             :     MemoryContext cxt;
      94             :     MemoryContext oldcxt;
      95             : 
      96       19526 :     cxt = AllocSetContextCreate(CurrentMemoryContext,
      97             :                                 "BuildRelationExtStatistics",
      98             :                                 ALLOCSET_DEFAULT_SIZES);
      99       19526 :     oldcxt = MemoryContextSwitchTo(cxt);
     100             : 
     101       19526 :     pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock);
     102       19526 :     stats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
     103             : 
     104       19632 :     foreach(lc, stats)
     105             :     {
     106         106 :         StatExtEntry *stat = (StatExtEntry *) lfirst(lc);
     107         106 :         MVNDistinct *ndistinct = NULL;
     108         106 :         MVDependencies *dependencies = NULL;
     109         106 :         MCVList    *mcv = NULL;
     110             :         VacAttrStats **stats;
     111             :         ListCell   *lc2;
     112             :         int         stattarget;
     113             : 
     114             :         /*
     115             :          * Check if we can build these stats based on the column analyzed. If
     116             :          * not, report this fact (except in autovacuum) and move on.
     117             :          */
     118         106 :         stats = lookup_var_attr_stats(onerel, stat->columns,
     119             :                                       natts, vacattrstats);
     120         106 :         if (!stats)
     121             :         {
     122           8 :             if (!IsAutoVacuumWorkerProcess())
     123           8 :                 ereport(WARNING,
     124             :                         (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
     125             :                          errmsg("statistics object \"%s.%s\" could not be computed for relation \"%s.%s\"",
     126             :                                 stat->schema, stat->name,
     127             :                                 get_namespace_name(onerel->rd_rel->relnamespace),
     128             :                                 RelationGetRelationName(onerel)),
     129             :                          errtable(onerel)));
     130           8 :             continue;
     131             :         }
     132             : 
     133             :         /* check allowed number of dimensions */
     134             :         Assert(bms_num_members(stat->columns) >= 2 &&
     135             :                bms_num_members(stat->columns) <= STATS_MAX_DIMENSIONS);
     136             : 
     137             :         /* compute statistics target for this statistics */
     138          98 :         stattarget = statext_compute_stattarget(stat->stattarget,
     139          98 :                                                 bms_num_members(stat->columns),
     140             :                                                 stats);
     141             : 
     142             :         /*
     143             :          * Don't rebuild statistics objects with statistics target set to 0 (we
     144             :          * just leave the existing values around, just like we do for regular
     145             :          * per-column statistics).
     146             :          */
     147          98 :         if (stattarget == 0)
     148           4 :             continue;
     149             : 
     150             :         /* compute statistic of each requested type */
     151         220 :         foreach(lc2, stat->types)
     152             :         {
     153         126 :             char        t = (char) lfirst_int(lc2);
     154             : 
     155         126 :             if (t == STATS_EXT_NDISTINCT)
     156          16 :                 ndistinct = statext_ndistinct_build(totalrows, numrows, rows,
     157             :                                                     stat->columns, stats);
     158         110 :             else if (t == STATS_EXT_DEPENDENCIES)
     159          34 :                 dependencies = statext_dependencies_build(numrows, rows,
     160             :                                                           stat->columns, stats);
     161          76 :             else if (t == STATS_EXT_MCV)
     162          76 :                 mcv = statext_mcv_build(numrows, rows, stat->columns, stats,
     163             :                                         totalrows, stattarget);
     164             :         }
     165             : 
     166             :         /* store the statistics in the catalog */
     167          94 :         statext_store(stat->statOid, ndistinct, dependencies, mcv, stats);
     168             :     }
     169             : 
     170       19526 :     table_close(pg_stext, RowExclusiveLock);
     171             : 
     172       19526 :     MemoryContextSwitchTo(oldcxt);
     173       19526 :     MemoryContextDelete(cxt);
     174       19526 : }
     175             : 
     176             : /*
     177             :  * ComputeExtStatisticsRows
     178             :  *      Compute number of rows required by extended statistics on a table.
     179             :  *
     180             :  * Computes number of rows we need to sample to build extended statistics on a
     181             :  * table. This only looks at statistics we can actually build - for example
     182             :  * when analyzing only some of the columns, this will skip statistics objects
     183             :  * that would require additional columns.
     184             :  *
     185             :  * See statext_compute_stattarget for details about how we compute statistics
     186             :  * target for a statistics objects (from the object target, attribute targets
     187             :  * and default statistics target).
     188             :  */
     189             : int
     190       31098 : ComputeExtStatisticsRows(Relation onerel,
     191             :                          int natts, VacAttrStats **vacattrstats)
     192             : {
     193             :     Relation    pg_stext;
     194             :     ListCell   *lc;
     195             :     List       *lstats;
     196             :     MemoryContext cxt;
     197             :     MemoryContext oldcxt;
     198       31098 :     int         result = 0;
     199             : 
     200       31098 :     cxt = AllocSetContextCreate(CurrentMemoryContext,
     201             :                                 "ComputeExtStatisticsRows",
     202             :                                 ALLOCSET_DEFAULT_SIZES);
     203       31098 :     oldcxt = MemoryContextSwitchTo(cxt);
     204             : 
     205       31098 :     pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock);
     206       31098 :     lstats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
     207             : 
     208       31208 :     foreach(lc, lstats)
     209             :     {
     210         110 :         StatExtEntry   *stat = (StatExtEntry *) lfirst(lc);
     211         110 :         int             stattarget = stat->stattarget;
     212             :         VacAttrStats  **stats;
     213         110 :         int             nattrs = bms_num_members(stat->columns);
     214             : 
     215             :         /*
     216             :          * Check if we can build this statistics object based on the columns
     217             :          * analyzed. If not, ignore it (don't report anything, we'll do that
     218             :          * during the actual build BuildRelationExtStatistics).
     219             :          */
     220         110 :         stats = lookup_var_attr_stats(onerel, stat->columns,
     221             :                                       natts, vacattrstats);
     222             : 
     223         110 :         if (!stats)
     224           8 :             continue;
     225             : 
     226             :         /*
     227             :          * Compute statistics target, based on what's set for the statistic
     228             :          * object itself, and for its attributes.
     229             :          */
     230         102 :         stattarget = statext_compute_stattarget(stat->stattarget,
     231             :                                                 nattrs, stats);
     232             : 
     233             :         /* Use the largest value for all statistics objects. */
     234         102 :         if (stattarget > result)
     235          98 :             result = stattarget;
     236             :     }
     237             : 
     238       31098 :     table_close(pg_stext, RowExclusiveLock);
     239             : 
     240       31098 :     MemoryContextSwitchTo(oldcxt);
     241       31098 :     MemoryContextDelete(cxt);
     242             : 
     243             :     /* compute sample size based on the statistics target */
     244       31098 :     return (300 * result);
     245             : }
     246             : 
     247             : /*
     248             :  * statext_compute_stattarget
     249             :  *      compute statistics target for an extended statistic
     250             :  *
     251             :  * When computing target for extended statistics objects, we consider three
     252             :  * places where the target may be set - the statistics object itself,
     253             :  * attributes the statistics is defined on, and then the default statistics
     254             :  * target.
     255             :  *
     256             :  * First we look at what's set for the statistics object itself, using the
     257             :  * ALTER STATISTICS ... SET STATISTICS command. If we find a valid value
     258             :  * there (i.e. not -1) we're done. Otherwise we look at targets set for any
     259             :  * of the attributes the statistic is defined on, and if there are columns
     260             :  * with defined target, we use the maximum value. We do this mostly for
     261             :  * backwards compatibility, because this is what we did before having
     262             :  * statistics target for extended statistics.
     263             :  *
     264             :  * And finally, if we still don't have a statistics target, we use the value
     265             :  * set in default_statistics_target.
     266             :  */
     267             : static int
     268         200 : statext_compute_stattarget(int stattarget, int nattrs, VacAttrStats **stats)
     269             : {
     270             :     int i;
     271             : 
     272             :     /*
     273             :      * If there's statistics target set for the statistics object, use it.
     274             :      * It may be set to 0 which disables building of that statistic.
     275             :      */
     276         200 :     if (stattarget >= 0)
     277           8 :         return stattarget;
     278             : 
     279             :     /*
     280             :      * The target for the statistics object is set to -1, in which case we
     281             :      * look at the maximum target set for any of the attributes the object
     282             :      * is defined on.
     283             :      */
     284         716 :     for (i = 0; i < nattrs; i++)
     285             :     {
     286             :         /* keep the maximmum statistics target */
     287         524 :         if (stats[i]->attr->attstattarget > stattarget)
     288         192 :             stattarget = stats[i]->attr->attstattarget;
     289             :     }
     290             : 
     291             :     /*
     292             :      * If the value is still negative (so neither the statistics object nor
     293             :      * any of the columns have custom statistics target set), use the global
     294             :      * default target.
     295             :      */
     296         192 :     if (stattarget < 0)
     297           0 :         stattarget = default_statistics_target;
     298             : 
     299             :     /* As this point we should have a valid statistics target. */
     300             :     Assert((stattarget >= 0) && (stattarget <= 10000));
     301             : 
     302         192 :     return stattarget;
     303             : }
     304             : 
     305             : /*
     306             :  * statext_is_kind_built
     307             :  *      Is this stat kind built in the given pg_statistic_ext_data tuple?
     308             :  */
     309             : bool
     310         548 : statext_is_kind_built(HeapTuple htup, char type)
     311             : {
     312             :     AttrNumber  attnum;
     313             : 
     314         548 :     switch (type)
     315             :     {
     316             :         case STATS_EXT_NDISTINCT:
     317         180 :             attnum = Anum_pg_statistic_ext_data_stxdndistinct;
     318         180 :             break;
     319             : 
     320             :         case STATS_EXT_DEPENDENCIES:
     321         180 :             attnum = Anum_pg_statistic_ext_data_stxddependencies;
     322         180 :             break;
     323             : 
     324             :         case STATS_EXT_MCV:
     325         188 :             attnum = Anum_pg_statistic_ext_data_stxdmcv;
     326         188 :             break;
     327             : 
     328             :         default:
     329           0 :             elog(ERROR, "unexpected statistics type requested: %d", type);
     330             :     }
     331             : 
     332         548 :     return !heap_attisnull(htup, attnum, NULL);
     333             : }
     334             : 
     335             : /*
     336             :  * Return a list (of StatExtEntry) of statistics objects for the given relation.
     337             :  */
     338             : static List *
     339       50624 : fetch_statentries_for_relation(Relation pg_statext, Oid relid)
     340             : {
     341             :     SysScanDesc scan;
     342             :     ScanKeyData skey;
     343             :     HeapTuple   htup;
     344       50624 :     List       *result = NIL;
     345             : 
     346             :     /*
     347             :      * Prepare to scan pg_statistic_ext for entries having stxrelid = this
     348             :      * rel.
     349             :      */
     350       50624 :     ScanKeyInit(&skey,
     351             :                 Anum_pg_statistic_ext_stxrelid,
     352             :                 BTEqualStrategyNumber, F_OIDEQ,
     353             :                 ObjectIdGetDatum(relid));
     354             : 
     355       50624 :     scan = systable_beginscan(pg_statext, StatisticExtRelidIndexId, true,
     356             :                               NULL, 1, &skey);
     357             : 
     358      101464 :     while (HeapTupleIsValid(htup = systable_getnext(scan)))
     359             :     {
     360             :         StatExtEntry *entry;
     361             :         Datum       datum;
     362             :         bool        isnull;
     363             :         int         i;
     364             :         ArrayType  *arr;
     365             :         char       *enabled;
     366             :         Form_pg_statistic_ext staForm;
     367             : 
     368         216 :         entry = palloc0(sizeof(StatExtEntry));
     369         216 :         staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
     370         216 :         entry->statOid = staForm->oid;
     371         216 :         entry->schema = get_namespace_name(staForm->stxnamespace);
     372         216 :         entry->name = pstrdup(NameStr(staForm->stxname));
     373         216 :         entry->stattarget = staForm->stxstattarget;
     374         788 :         for (i = 0; i < staForm->stxkeys.dim1; i++)
     375             :         {
     376         572 :             entry->columns = bms_add_member(entry->columns,
     377         572 :                                             staForm->stxkeys.values[i]);
     378             :         }
     379             : 
     380             :         /* decode the stxkind char array into a list of chars */
     381         216 :         datum = SysCacheGetAttr(STATEXTOID, htup,
     382             :                                 Anum_pg_statistic_ext_stxkind, &isnull);
     383             :         Assert(!isnull);
     384         216 :         arr = DatumGetArrayTypeP(datum);
     385         432 :         if (ARR_NDIM(arr) != 1 ||
     386         432 :             ARR_HASNULL(arr) ||
     387         216 :             ARR_ELEMTYPE(arr) != CHAROID)
     388           0 :             elog(ERROR, "stxkind is not a 1-D char array");
     389         216 :         enabled = (char *) ARR_DATA_PTR(arr);
     390         552 :         for (i = 0; i < ARR_DIMS(arr)[0]; i++)
     391             :         {
     392             :             Assert((enabled[i] == STATS_EXT_NDISTINCT) ||
     393             :                    (enabled[i] == STATS_EXT_DEPENDENCIES) ||
     394             :                    (enabled[i] == STATS_EXT_MCV));
     395         336 :             entry->types = lappend_int(entry->types, (int) enabled[i]);
     396             :         }
     397             : 
     398         216 :         result = lappend(result, entry);
     399             :     }
     400             : 
     401       50624 :     systable_endscan(scan);
     402             : 
     403       50624 :     return result;
     404             : }
     405             : 
     406             : /*
     407             :  * Using 'vacatts' of size 'nvacatts' as input data, return a newly built
     408             :  * VacAttrStats array which includes only the items corresponding to
     409             :  * attributes indicated by 'stxkeys'. If we don't have all of the per column
     410             :  * stats available to compute the extended stats, then we return NULL to indicate
     411             :  * to the caller that the stats should not be built.
     412             :  */
     413             : static VacAttrStats **
     414         216 : lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
     415             :                       int nvacatts, VacAttrStats **vacatts)
     416             : {
     417         216 :     int         i = 0;
     418         216 :     int         x = -1;
     419             :     VacAttrStats **stats;
     420             : 
     421         216 :     stats = (VacAttrStats **)
     422         216 :         palloc(bms_num_members(attrs) * sizeof(VacAttrStats *));
     423             : 
     424             :     /* lookup VacAttrStats info for the requested columns (same attnum) */
     425         980 :     while ((x = bms_next_member(attrs, x)) >= 0)
     426             :     {
     427             :         int         j;
     428             : 
     429         564 :         stats[i] = NULL;
     430        1896 :         for (j = 0; j < nvacatts; j++)
     431             :         {
     432        1880 :             if (x == vacatts[j]->tupattnum)
     433             :             {
     434         548 :                 stats[i] = vacatts[j];
     435         548 :                 break;
     436             :             }
     437             :         }
     438             : 
     439         564 :         if (!stats[i])
     440             :         {
     441             :             /*
     442             :              * Looks like stats were not gathered for one of the columns
     443             :              * required. We'll be unable to build the extended stats without
     444             :              * this column.
     445             :              */
     446          16 :             pfree(stats);
     447          16 :             return NULL;
     448             :         }
     449             : 
     450             :         /*
     451             :          * Sanity check that the column is not dropped - stats should have
     452             :          * been removed in this case.
     453             :          */
     454             :         Assert(!stats[i]->attr->attisdropped);
     455             : 
     456         548 :         i++;
     457             :     }
     458             : 
     459         200 :     return stats;
     460             : }
     461             : 
     462             : /*
     463             :  * statext_store
     464             :  *  Serializes the statistics and stores them into the pg_statistic_ext_data
     465             :  *  tuple.
     466             :  */
     467             : static void
     468          94 : statext_store(Oid statOid,
     469             :               MVNDistinct *ndistinct, MVDependencies *dependencies,
     470             :               MCVList *mcv, VacAttrStats **stats)
     471             : {
     472             :     HeapTuple   stup,
     473             :                 oldtup;
     474             :     Datum       values[Natts_pg_statistic_ext_data];
     475             :     bool        nulls[Natts_pg_statistic_ext_data];
     476             :     bool        replaces[Natts_pg_statistic_ext_data];
     477             :     Relation    pg_stextdata;
     478             : 
     479          94 :     memset(nulls, true, sizeof(nulls));
     480          94 :     memset(replaces, false, sizeof(replaces));
     481          94 :     memset(values, 0, sizeof(values));
     482             : 
     483             :     /*
     484             :      * Construct a new pg_statistic_ext_data tuple, replacing the calculated
     485             :      * stats.
     486             :      */
     487          94 :     if (ndistinct != NULL)
     488             :     {
     489          16 :         bytea      *data = statext_ndistinct_serialize(ndistinct);
     490             : 
     491          16 :         nulls[Anum_pg_statistic_ext_data_stxdndistinct - 1] = (data == NULL);
     492          16 :         values[Anum_pg_statistic_ext_data_stxdndistinct - 1] = PointerGetDatum(data);
     493             :     }
     494             : 
     495          94 :     if (dependencies != NULL)
     496             :     {
     497          30 :         bytea      *data = statext_dependencies_serialize(dependencies);
     498             : 
     499          30 :         nulls[Anum_pg_statistic_ext_data_stxddependencies - 1] = (data == NULL);
     500          30 :         values[Anum_pg_statistic_ext_data_stxddependencies - 1] = PointerGetDatum(data);
     501             :     }
     502          94 :     if (mcv != NULL)
     503             :     {
     504          74 :         bytea      *data = statext_mcv_serialize(mcv, stats);
     505             : 
     506          74 :         nulls[Anum_pg_statistic_ext_data_stxdmcv - 1] = (data == NULL);
     507          74 :         values[Anum_pg_statistic_ext_data_stxdmcv - 1] = PointerGetDatum(data);
     508             :     }
     509             : 
     510             :     /* always replace the value (either by bytea or NULL) */
     511          94 :     replaces[Anum_pg_statistic_ext_data_stxdndistinct - 1] = true;
     512          94 :     replaces[Anum_pg_statistic_ext_data_stxddependencies - 1] = true;
     513          94 :     replaces[Anum_pg_statistic_ext_data_stxdmcv - 1] = true;
     514             : 
     515             :     /* there should already be a pg_statistic_ext_data tuple */
     516          94 :     oldtup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(statOid));
     517          94 :     if (!HeapTupleIsValid(oldtup))
     518           0 :         elog(ERROR, "cache lookup failed for statistics object %u", statOid);
     519             : 
     520             :     /* replace it */
     521          94 :     pg_stextdata = table_open(StatisticExtDataRelationId, RowExclusiveLock);
     522             : 
     523          94 :     stup = heap_modify_tuple(oldtup,
     524             :                              RelationGetDescr(pg_stextdata),
     525             :                              values,
     526             :                              nulls,
     527             :                              replaces);
     528          94 :     ReleaseSysCache(oldtup);
     529          94 :     CatalogTupleUpdate(pg_stextdata, &stup->t_self, stup);
     530             : 
     531          94 :     heap_freetuple(stup);
     532             : 
     533          94 :     table_close(pg_stextdata, RowExclusiveLock);
     534          94 : }
     535             : 
     536             : /* initialize multi-dimensional sort */
     537             : MultiSortSupport
     538         366 : multi_sort_init(int ndims)
     539             : {
     540             :     MultiSortSupport mss;
     541             : 
     542             :     Assert(ndims >= 2);
     543             : 
     544         366 :     mss = (MultiSortSupport) palloc0(offsetof(MultiSortSupportData, ssup)
     545         366 :                                      + sizeof(SortSupportData) * ndims);
     546             : 
     547         366 :     mss->ndims = ndims;
     548             : 
     549         366 :     return mss;
     550             : }
     551             : 
     552             : /*
     553             :  * Prepare sort support info using the given sort operator and collation
     554             :  * at the position 'sortdim'
     555             :  */
     556             : void
     557         870 : multi_sort_add_dimension(MultiSortSupport mss, int sortdim,
     558             :                          Oid oper, Oid collation)
     559             : {
     560         870 :     SortSupport ssup = &mss->ssup[sortdim];
     561             : 
     562         870 :     ssup->ssup_cxt = CurrentMemoryContext;
     563         870 :     ssup->ssup_collation = collation;
     564         870 :     ssup->ssup_nulls_first = false;
     565             : 
     566         870 :     PrepareSortSupportFromOrderingOp(oper, ssup);
     567         870 : }
     568             : 
     569             : /* compare all the dimensions in the selected order */
     570             : int
     571    10702518 : multi_sort_compare(const void *a, const void *b, void *arg)
     572             : {
     573    10702518 :     MultiSortSupport mss = (MultiSortSupport) arg;
     574    10702518 :     SortItem   *ia = (SortItem *) a;
     575    10702518 :     SortItem   *ib = (SortItem *) b;
     576             :     int         i;
     577             : 
     578    18193500 :     for (i = 0; i < mss->ndims; i++)
     579             :     {
     580             :         int         compare;
     581             : 
     582    33064312 :         compare = ApplySortComparator(ia->values[i], ia->isnull[i],
     583    33064312 :                                       ib->values[i], ib->isnull[i],
     584    16532156 :                                       &mss->ssup[i]);
     585             : 
     586    16532156 :         if (compare != 0)
     587     9041174 :             return compare;
     588             :     }
     589             : 
     590             :     /* equal by default */
     591     1661344 :     return 0;
     592             : }
     593             : 
     594             : /* compare selected dimension */
     595             : int
     596      869788 : multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b,
     597             :                        MultiSortSupport mss)
     598             : {
     599     1739576 :     return ApplySortComparator(a->values[dim], a->isnull[dim],
     600     1739576 :                                b->values[dim], b->isnull[dim],
     601      869788 :                                &mss->ssup[dim]);
     602             : }
     603             : 
     604             : int
     605      913158 : multi_sort_compare_dims(int start, int end,
     606             :                         const SortItem *a, const SortItem *b,
     607             :                         MultiSortSupport mss)
     608             : {
     609             :     int         dim;
     610             : 
     611     2080234 :     for (dim = start; dim <= end; dim++)
     612             :     {
     613     2420892 :         int         r = ApplySortComparator(a->values[dim], a->isnull[dim],
     614     2420892 :                                             b->values[dim], b->isnull[dim],
     615     1210446 :                                             &mss->ssup[dim]);
     616             : 
     617     1210446 :         if (r != 0)
     618       43370 :             return r;
     619             :     }
     620             : 
     621      869788 :     return 0;
     622             : }
     623             : 
     624             : int
     625       65922 : compare_scalars_simple(const void *a, const void *b, void *arg)
     626             : {
     627       65922 :     return compare_datums_simple(*(Datum *) a,
     628             :                                  *(Datum *) b,
     629             :                                  (SortSupport) arg);
     630             : }
     631             : 
     632             : int
     633       74162 : compare_datums_simple(Datum a, Datum b, SortSupport ssup)
     634             : {
     635       74162 :     return ApplySortComparator(a, false, b, false, ssup);
     636             : }
     637             : 
     638             : /* simple counterpart to qsort_arg */
     639             : void *
     640       16920 : bsearch_arg(const void *key, const void *base, size_t nmemb, size_t size,
     641             :             int (*compar) (const void *, const void *, void *),
     642             :             void *arg)
     643             : {
     644             :     size_t      l,
     645             :                 u,
     646             :                 idx;
     647             :     const void *p;
     648             :     int         comparison;
     649             : 
     650       16920 :     l = 0;
     651       16920 :     u = nmemb;
     652       94568 :     while (l < u)
     653             :     {
     654       77648 :         idx = (l + u) / 2;
     655       77648 :         p = (void *) (((const char *) base) + (idx * size));
     656       77648 :         comparison = (*compar) (key, p, arg);
     657             : 
     658       77648 :         if (comparison < 0)
     659       36200 :             u = idx;
     660       41448 :         else if (comparison > 0)
     661       24528 :             l = idx + 1;
     662             :         else
     663       16920 :             return (void *) p;
     664             :     }
     665             : 
     666           0 :     return NULL;
     667             : }
     668             : 
     669             : /*
     670             :  * build_attnums_array
     671             :  *      Transforms a bitmap into an array of AttrNumber values.
     672             :  *
     673             :  * This is used for extended statistics only, so all the attribute must be
     674             :  * user-defined. That means offsetting by FirstLowInvalidHeapAttributeNumber
     675             :  * is not necessary here (and when querying the bitmap).
     676             :  */
     677             : AttrNumber *
     678         360 : build_attnums_array(Bitmapset *attrs, int *numattrs)
     679             : {
     680             :     int         i,
     681             :                 j;
     682             :     AttrNumber *attnums;
     683         360 :     int         num = bms_num_members(attrs);
     684             : 
     685         360 :     if (numattrs)
     686         360 :         *numattrs = num;
     687             : 
     688             :     /* build attnums from the bitmapset */
     689         360 :     attnums = (AttrNumber *) palloc(sizeof(AttrNumber) * num);
     690         360 :     i = 0;
     691         360 :     j = -1;
     692        1752 :     while ((j = bms_next_member(attrs, j)) >= 0)
     693             :     {
     694             :         /*
     695             :          * Make sure the bitmap contains only user-defined attributes. As
     696             :          * bitmaps can't contain negative values, this can be violated in two
     697             :          * ways. Firstly, the bitmap might contain 0 as a member, and secondly
     698             :          * the integer value might be larger than MaxAttrNumber.
     699             :          */
     700             :         Assert(AttrNumberIsForUserDefinedAttr(j));
     701             :         Assert(j <= MaxAttrNumber);
     702             : 
     703        1032 :         attnums[i++] = (AttrNumber) j;
     704             : 
     705             :         /* protect against overflows */
     706             :         Assert(i <= num);
     707             :     }
     708             : 
     709         360 :     return attnums;
     710             : }
     711             : 
     712             : /*
     713             :  * build_sorted_items
     714             :  *      build a sorted array of SortItem with values from rows
     715             :  *
     716             :  * Note: All the memory is allocated in a single chunk, so that the caller
     717             :  * can simply pfree the return value to release all of it.
     718             :  */
     719             : SortItem *
     720         326 : build_sorted_items(int numrows, int *nitems, HeapTuple *rows, TupleDesc tdesc,
     721             :                    MultiSortSupport mss, int numattrs, AttrNumber *attnums)
     722             : {
     723             :     int         i,
     724             :                 j,
     725             :                 len,
     726             :                 idx;
     727         326 :     int         nvalues = numrows * numattrs;
     728             : 
     729             :     SortItem   *items;
     730             :     Datum      *values;
     731             :     bool       *isnull;
     732             :     char       *ptr;
     733             : 
     734             :     /* Compute the total amount of memory we need (both items and values). */
     735         326 :     len = numrows * sizeof(SortItem) + nvalues * (sizeof(Datum) + sizeof(bool));
     736             : 
     737             :     /* Allocate the memory and split it into the pieces. */
     738         326 :     ptr = palloc0(len);
     739             : 
     740             :     /* items to sort */
     741         326 :     items = (SortItem *) ptr;
     742         326 :     ptr += numrows * sizeof(SortItem);
     743             : 
     744             :     /* values and null flags */
     745         326 :     values = (Datum *) ptr;
     746         326 :     ptr += nvalues * sizeof(Datum);
     747             : 
     748         326 :     isnull = (bool *) ptr;
     749         326 :     ptr += nvalues * sizeof(bool);
     750             : 
     751             :     /* make sure we consumed the whole buffer exactly */
     752             :     Assert((ptr - (char *) items) == len);
     753             : 
     754             :     /* fix the pointers to Datum and bool arrays */
     755         326 :     idx = 0;
     756     1205938 :     for (i = 0; i < numrows; i++)
     757             :     {
     758     1205612 :         bool        toowide = false;
     759             : 
     760     1205612 :         items[idx].values = &values[idx * numattrs];
     761     1205612 :         items[idx].isnull = &isnull[idx * numattrs];
     762             : 
     763             :         /* load the values/null flags from sample rows */
     764     4159836 :         for (j = 0; j < numattrs; j++)
     765             :         {
     766             :             Datum       value;
     767             :             bool        isnull;
     768             : 
     769     2954224 :             value = heap_getattr(rows[i], attnums[j], tdesc, &isnull);
     770             : 
     771             :             /*
     772             :              * If this is a varlena value, check if it's too wide and if yes
     773             :              * then skip the whole item. Otherwise detoast the value.
     774             :              *
     775             :              * XXX It may happen that we've already detoasted some preceding
     776             :              * values for the current item. We don't bother to cleanup those
     777             :              * on the assumption that those are small (below WIDTH_THRESHOLD)
     778             :              * and will be discarded at the end of analyze.
     779             :              */
     780     5860356 :             if ((!isnull) &&
     781     2906132 :                 (TupleDescAttr(tdesc, attnums[j] - 1)->attlen == -1))
     782             :             {
     783     1105508 :                 if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
     784             :                 {
     785           0 :                     toowide = true;
     786           0 :                     break;
     787             :                 }
     788             : 
     789     1105508 :                 value = PointerGetDatum(PG_DETOAST_DATUM(value));
     790             :             }
     791             : 
     792     2954224 :             items[idx].values[j] = value;
     793     2954224 :             items[idx].isnull[j] = isnull;
     794             :         }
     795             : 
     796     1205612 :         if (toowide)
     797           0 :             continue;
     798             : 
     799     1205612 :         idx++;
     800             :     }
     801             : 
     802             :     /* store the actual number of items (ignoring the too-wide ones) */
     803         326 :     *nitems = idx;
     804             : 
     805             :     /* all items were too wide */
     806         326 :     if (idx == 0)
     807             :     {
     808             :         /* everything is allocated as a single chunk */
     809           0 :         pfree(items);
     810           0 :         return NULL;
     811             :     }
     812             : 
     813             :     /* do the sort, using the multi-sort */
     814         326 :     qsort_arg((void *) items, idx, sizeof(SortItem),
     815             :               multi_sort_compare, mss);
     816             : 
     817         326 :     return items;
     818             : }
     819             : 
     820             : /*
     821             :  * has_stats_of_kind
     822             :  *      Check whether the list contains statistic of a given kind
     823             :  */
     824             : bool
     825         456 : has_stats_of_kind(List *stats, char requiredkind)
     826             : {
     827             :     ListCell   *l;
     828             : 
     829         684 :     foreach(l, stats)
     830             :     {
     831         456 :         StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
     832             : 
     833         456 :         if (stat->kind == requiredkind)
     834         228 :             return true;
     835             :     }
     836             : 
     837         228 :     return false;
     838             : }
     839             : 
     840             : /*
     841             :  * choose_best_statistics
     842             :  *      Look for and return statistics with the specified 'requiredkind' which
     843             :  *      have keys that match at least two of the given attnums.  Return NULL if
     844             :  *      there's no match.
     845             :  *
     846             :  * The current selection criteria is very simple - we choose the statistics
     847             :  * object referencing the most of the requested attributes, breaking ties
     848             :  * in favor of objects with fewer keys overall.
     849             :  *
     850             :  * XXX If multiple statistics objects tie on both criteria, then which object
     851             :  * is chosen depends on the order that they appear in the stats list. Perhaps
     852             :  * further tiebreakers are needed.
     853             :  */
     854             : StatisticExtInfo *
     855         208 : choose_best_statistics(List *stats, Bitmapset *attnums, char requiredkind)
     856             : {
     857             :     ListCell   *lc;
     858         208 :     StatisticExtInfo *best_match = NULL;
     859         208 :     int         best_num_matched = 2;   /* goal #1: maximize */
     860         208 :     int         best_match_keys = (STATS_MAX_DIMENSIONS + 1);   /* goal #2: minimize */
     861             : 
     862         416 :     foreach(lc, stats)
     863             :     {
     864         208 :         StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
     865             :         int         num_matched;
     866             :         int         numkeys;
     867             :         Bitmapset  *matched;
     868             : 
     869             :         /* skip statistics that are not of the correct type */
     870         208 :         if (info->kind != requiredkind)
     871           0 :             continue;
     872             : 
     873             :         /* determine how many attributes of these stats can be matched to */
     874         208 :         matched = bms_intersect(attnums, info->keys);
     875         208 :         num_matched = bms_num_members(matched);
     876         208 :         bms_free(matched);
     877             : 
     878             :         /*
     879             :          * save the actual number of keys in the stats so that we can choose
     880             :          * the narrowest stats with the most matching keys.
     881             :          */
     882         208 :         numkeys = bms_num_members(info->keys);
     883             : 
     884             :         /*
     885             :          * Use this object when it increases the number of matched clauses or
     886             :          * when it matches the same number of attributes but these stats have
     887             :          * fewer keys than any previous match.
     888             :          */
     889         208 :         if (num_matched > best_num_matched ||
     890         112 :             (num_matched == best_num_matched && numkeys < best_match_keys))
     891             :         {
     892         208 :             best_match = info;
     893         208 :             best_num_matched = num_matched;
     894         208 :             best_match_keys = numkeys;
     895             :         }
     896             :     }
     897             : 
     898         208 :     return best_match;
     899             : }
     900             : 
     901             : /*
     902             :  * statext_is_compatible_clause_internal
     903             :  *      Determines if the clause is compatible with MCV lists.
     904             :  *
     905             :  * Does the heavy lifting of actually inspecting the clauses for
     906             :  * statext_is_compatible_clause. It needs to be split like this because
     907             :  * of recursion.  The attnums bitmap is an input/output parameter collecting
     908             :  * attribute numbers from all compatible clauses (recursively).
     909             :  */
     910             : static bool
     911         360 : statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
     912             :                                       Index relid, Bitmapset **attnums)
     913             : {
     914             :     /* Look inside any binary-compatible relabeling (as in examine_variable) */
     915         360 :     if (IsA(clause, RelabelType))
     916           0 :         clause = (Node *) ((RelabelType *) clause)->arg;
     917             : 
     918             :     /* plain Var references (boolean Vars or recursive checks) */
     919         360 :     if (IsA(clause, Var))
     920             :     {
     921         180 :         Var        *var = (Var *) clause;
     922             : 
     923             :         /* Ensure var is from the correct relation */
     924         180 :         if (var->varno != relid)
     925           0 :             return false;
     926             : 
     927             :         /* we also better ensure the Var is from the current level */
     928         180 :         if (var->varlevelsup > 0)
     929           0 :             return false;
     930             : 
     931             :         /* Also skip system attributes (we don't allow stats on those). */
     932         180 :         if (!AttrNumberIsForUserDefinedAttr(var->varattno))
     933           0 :             return false;
     934             : 
     935         180 :         *attnums = bms_add_member(*attnums, var->varattno);
     936             : 
     937         180 :         return true;
     938             :     }
     939             : 
     940             :     /* (Var op Const) or (Const op Var) */
     941         180 :     if (is_opclause(clause))
     942             :     {
     943         136 :         RangeTblEntry *rte = root->simple_rte_array[relid];
     944         136 :         OpExpr     *expr = (OpExpr *) clause;
     945             :         Var        *var;
     946             : 
     947             :         /* Only expressions with two arguments are considered compatible. */
     948         136 :         if (list_length(expr->args) != 2)
     949           0 :             return false;
     950             : 
     951             :         /* Check if the expression the right shape (one Var, one Const) */
     952         136 :         if (!examine_opclause_expression(expr, &var, NULL, NULL))
     953           0 :             return false;
     954             : 
     955             :         /*
     956             :          * If it's not one of the supported operators ("=", "<", ">", etc.),
     957             :          * just ignore the clause, as it's not compatible with MCV lists.
     958             :          *
     959             :          * This uses the function for estimating selectivity, not the operator
     960             :          * directly (a bit awkward, but well ...).
     961             :          */
     962         136 :         switch (get_oprrest(expr->opno))
     963             :         {
     964             :             case F_EQSEL:
     965             :             case F_NEQSEL:
     966             :             case F_SCALARLTSEL:
     967             :             case F_SCALARLESEL:
     968             :             case F_SCALARGTSEL:
     969             :             case F_SCALARGESEL:
     970             :                 /* supported, will continue with inspection of the Var */
     971         136 :                 break;
     972             : 
     973             :             default:
     974             :                 /* other estimators are considered unknown/unsupported */
     975           0 :                 return false;
     976             :         }
     977             : 
     978             :         /*
     979             :          * If there are any securityQuals on the RTE from security barrier
     980             :          * views or RLS policies, then the user may not have access to all the
     981             :          * table's data, and we must check that the operator is leak-proof.
     982             :          *
     983             :          * If the operator is leaky, then we must ignore this clause for the
     984             :          * purposes of estimating with MCV lists, otherwise the operator might
     985             :          * reveal values from the MCV list that the user doesn't have
     986             :          * permission to see.
     987             :          */
     988         160 :         if (rte->securityQuals != NIL &&
     989          24 :             !get_func_leakproof(get_opcode(expr->opno)))
     990          24 :             return false;
     991             : 
     992         112 :         return statext_is_compatible_clause_internal(root, (Node *) var,
     993             :                                                      relid, attnums);
     994             :     }
     995             : 
     996             :     /* AND/OR/NOT clause */
     997          88 :     if (is_andclause(clause) ||
     998          84 :         is_orclause(clause) ||
     999          40 :         is_notclause(clause))
    1000             :     {
    1001             :         /*
    1002             :          * AND/OR/NOT-clauses are supported if all sub-clauses are supported
    1003             :          *
    1004             :          * Perhaps we could improve this by handling mixed cases, when some of
    1005             :          * the clauses are supported and some are not. Selectivity for the
    1006             :          * supported subclauses would be computed using extended statistics,
    1007             :          * and the remaining clauses would be estimated using the traditional
    1008             :          * algorithm (product of selectivities).
    1009             :          *
    1010             :          * It however seems overly complex, and in a way we already do that
    1011             :          * because if we reject the whole clause as unsupported here, it will
    1012             :          * be eventually passed to clauselist_selectivity() which does exactly
    1013             :          * this (split into supported/unsupported clauses etc).
    1014             :          */
    1015          24 :         BoolExpr   *expr = (BoolExpr *) clause;
    1016             :         ListCell   *lc;
    1017             : 
    1018          52 :         foreach(lc, expr->args)
    1019             :         {
    1020             :             /*
    1021             :              * Had we found incompatible clause in the arguments, treat the
    1022             :              * whole clause as incompatible.
    1023             :              */
    1024          28 :             if (!statext_is_compatible_clause_internal(root,
    1025          28 :                                                        (Node *) lfirst(lc),
    1026             :                                                        relid, attnums))
    1027           0 :                 return false;
    1028             :         }
    1029             : 
    1030          24 :         return true;
    1031             :     }
    1032             : 
    1033             :     /* Var IS NULL */
    1034          20 :     if (IsA(clause, NullTest))
    1035             :     {
    1036          20 :         NullTest   *nt = (NullTest *) clause;
    1037             : 
    1038             :         /*
    1039             :          * Only simple (Var IS NULL) expressions supported for now. Maybe we
    1040             :          * could use examine_variable to fix this?
    1041             :          */
    1042          20 :         if (!IsA(nt->arg, Var))
    1043           0 :             return false;
    1044             : 
    1045          20 :         return statext_is_compatible_clause_internal(root, (Node *) (nt->arg),
    1046             :                                                      relid, attnums);
    1047             :     }
    1048             : 
    1049           0 :     return false;
    1050             : }
    1051             : 
    1052             : /*
    1053             :  * statext_is_compatible_clause
    1054             :  *      Determines if the clause is compatible with MCV lists.
    1055             :  *
    1056             :  * Currently, we only support three types of clauses:
    1057             :  *
    1058             :  * (a) OpExprs of the form (Var op Const), or (Const op Var), where the op
    1059             :  * is one of ("=", "<", ">", ">=", "<=")
    1060             :  *
    1061             :  * (b) (Var IS [NOT] NULL)
    1062             :  *
    1063             :  * (c) combinations using AND/OR/NOT
    1064             :  *
    1065             :  * In the future, the range of supported clauses may be expanded to more
    1066             :  * complex cases, for example (Var op Var).
    1067             :  */
    1068             : static bool
    1069         216 : statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid,
    1070             :                              Bitmapset **attnums)
    1071             : {
    1072         216 :     RangeTblEntry *rte = root->simple_rte_array[relid];
    1073         216 :     RestrictInfo *rinfo = (RestrictInfo *) clause;
    1074             :     Oid         userid;
    1075             : 
    1076         216 :     if (!IsA(rinfo, RestrictInfo))
    1077           0 :         return false;
    1078             : 
    1079             :     /* Pseudoconstants are not really interesting here. */
    1080         216 :     if (rinfo->pseudoconstant)
    1081           0 :         return false;
    1082             : 
    1083             :     /* clauses referencing multiple varnos are incompatible */
    1084         216 :     if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
    1085          16 :         return false;
    1086             : 
    1087             :     /* Check the clause and determine what attributes it references. */
    1088         200 :     if (!statext_is_compatible_clause_internal(root, (Node *) rinfo->clause,
    1089             :                                                relid, attnums))
    1090          24 :         return false;
    1091             : 
    1092             :     /*
    1093             :      * Check that the user has permission to read all these attributes.  Use
    1094             :      * checkAsUser if it's set, in case we're accessing the table via a view.
    1095             :      */
    1096         176 :     userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
    1097             : 
    1098         176 :     if (pg_class_aclcheck(rte->relid, userid, ACL_SELECT) != ACLCHECK_OK)
    1099             :     {
    1100             :         /* Don't have table privilege, must check individual columns */
    1101          16 :         if (bms_is_member(InvalidAttrNumber, *attnums))
    1102             :         {
    1103             :             /* Have a whole-row reference, must have access to all columns */
    1104           0 :             if (pg_attribute_aclcheck_all(rte->relid, userid, ACL_SELECT,
    1105             :                                           ACLMASK_ALL) != ACLCHECK_OK)
    1106           0 :                 return false;
    1107             :         }
    1108             :         else
    1109             :         {
    1110             :             /* Check the columns referenced by the clause */
    1111          16 :             int         attnum = -1;
    1112             : 
    1113          32 :             while ((attnum = bms_next_member(*attnums, attnum)) >= 0)
    1114             :             {
    1115          16 :                 if (pg_attribute_aclcheck(rte->relid, attnum, userid,
    1116             :                                           ACL_SELECT) != ACLCHECK_OK)
    1117          16 :                     return false;
    1118             :             }
    1119             :         }
    1120             :     }
    1121             : 
    1122             :     /* If we reach here, the clause is OK */
    1123         160 :     return true;
    1124             : }
    1125             : 
    1126             : /*
    1127             :  * statext_mcv_clauselist_selectivity
    1128             :  *      Estimate clauses using the best multi-column statistics.
    1129             :  *
    1130             :  * Selects the best extended (multi-column) statistic on a table (measured by
    1131             :  * the number of attributes extracted from the clauses and covered by it), and
    1132             :  * computes the selectivity for the supplied clauses.
    1133             :  *
    1134             :  * One of the main challenges with using MCV lists is how to extrapolate the
    1135             :  * estimate to the data not covered by the MCV list. To do that, we compute
    1136             :  * not only the "MCV selectivity" (selectivities for MCV items matching the
    1137             :  * supplied clauses), but also a couple of derived selectivities:
    1138             :  *
    1139             :  * - simple selectivity:  Computed without extended statistic, i.e. as if the
    1140             :  * columns/clauses were independent
    1141             :  *
    1142             :  * - base selectivity:  Similar to simple selectivity, but is computed using
    1143             :  * the extended statistic by adding up the base frequencies (that we compute
    1144             :  * and store for each MCV item) of matching MCV items.
    1145             :  *
    1146             :  * - total selectivity: Selectivity covered by the whole MCV list.
    1147             :  *
    1148             :  * - other selectivity: A selectivity estimate for data not covered by the MCV
    1149             :  * list (i.e. satisfying the clauses, but not common enough to make it into
    1150             :  * the MCV list)
    1151             :  *
    1152             :  * Note: While simple and base selectivities are defined in a quite similar
    1153             :  * way, the values are computed differently and are not therefore equal. The
    1154             :  * simple selectivity is computed as a product of per-clause estimates, while
    1155             :  * the base selectivity is computed by adding up base frequencies of matching
    1156             :  * items of the multi-column MCV list. So the values may differ for two main
    1157             :  * reasons - (a) the MCV list may not cover 100% of the data and (b) some of
    1158             :  * the MCV items did not match the estimated clauses.
    1159             :  *
    1160             :  * As both (a) and (b) reduce the base selectivity value, it generally holds
    1161             :  * that (simple_selectivity >= base_selectivity). If the MCV list covers all
    1162             :  * the data, the values may be equal.
    1163             :  *
    1164             :  * So, (simple_selectivity - base_selectivity) is an estimate for the part
    1165             :  * not covered by the MCV list, and (mcv_selectivity - base_selectivity) may
    1166             :  * be seen as a correction for the part covered by the MCV list. Those two
    1167             :  * statements are actually equivalent.
    1168             :  *
    1169             :  * Note: Due to rounding errors and minor differences in how the estimates
    1170             :  * are computed, the inequality may not always hold. Which is why we clamp
    1171             :  * the selectivities to prevent strange estimate (negative etc.).
    1172             :  *
    1173             :  * 'estimatedclauses' is an input/output parameter.  We set bits for the
    1174             :  * 0-based 'clauses' indexes we estimate for and also skip clause items that
    1175             :  * already have a bit set.
    1176             :  *
    1177             :  * XXX If we were to use multiple statistics, this is where it would happen.
    1178             :  * We would simply repeat this on a loop on the "remaining" clauses, possibly
    1179             :  * using the already estimated clauses as conditions (and combining the values
    1180             :  * using conditional probability formula).
    1181             :  */
    1182             : static Selectivity
    1183         228 : statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
    1184             :                                    JoinType jointype, SpecialJoinInfo *sjinfo,
    1185             :                                    RelOptInfo *rel, Bitmapset **estimatedclauses)
    1186             : {
    1187             :     ListCell   *l;
    1188         228 :     Bitmapset  *clauses_attnums = NULL;
    1189             :     Bitmapset **list_attnums;
    1190             :     int         listidx;
    1191             :     StatisticExtInfo *stat;
    1192             :     List       *stat_clauses;
    1193             :     Selectivity simple_sel,
    1194             :                 mcv_sel,
    1195             :                 mcv_basesel,
    1196             :                 mcv_totalsel,
    1197             :                 other_sel,
    1198             :                 sel;
    1199             : 
    1200             :     /* check if there's any stats that might be useful for us. */
    1201         228 :     if (!has_stats_of_kind(rel->statlist, STATS_EXT_MCV))
    1202         144 :         return 1.0;
    1203             : 
    1204          84 :     list_attnums = (Bitmapset **) palloc(sizeof(Bitmapset *) *
    1205          84 :                                          list_length(clauses));
    1206             : 
    1207             :     /*
    1208             :      * Pre-process the clauses list to extract the attnums seen in each item.
    1209             :      * We need to determine if there's any clauses which will be useful for
    1210             :      * selectivity estimations with extended stats. Along the way we'll record
    1211             :      * all of the attnums for each clause in a list which we'll reference
    1212             :      * later so we don't need to repeat the same work again. We'll also keep
    1213             :      * track of all attnums seen.
    1214             :      *
    1215             :      * We also skip clauses that we already estimated using different types of
    1216             :      * statistics (we treat them as incompatible).
    1217             :      */
    1218          84 :     listidx = 0;
    1219         300 :     foreach(l, clauses)
    1220             :     {
    1221         216 :         Node       *clause = (Node *) lfirst(l);
    1222         216 :         Bitmapset  *attnums = NULL;
    1223             : 
    1224         432 :         if (!bms_is_member(listidx, *estimatedclauses) &&
    1225         216 :             statext_is_compatible_clause(root, clause, rel->relid, &attnums))
    1226             :         {
    1227         160 :             list_attnums[listidx] = attnums;
    1228         160 :             clauses_attnums = bms_add_members(clauses_attnums, attnums);
    1229             :         }
    1230             :         else
    1231          56 :             list_attnums[listidx] = NULL;
    1232             : 
    1233         216 :         listidx++;
    1234             :     }
    1235             : 
    1236             :     /* We need at least two attributes for multivariate statistics. */
    1237          84 :     if (bms_membership(clauses_attnums) != BMS_MULTIPLE)
    1238          20 :         return 1.0;
    1239             : 
    1240             :     /* find the best suited statistics object for these attnums */
    1241          64 :     stat = choose_best_statistics(rel->statlist, clauses_attnums, STATS_EXT_MCV);
    1242             : 
    1243             :     /* if no matching stats could be found then we've nothing to do */
    1244          64 :     if (!stat)
    1245           0 :         return 1.0;
    1246             : 
    1247             :     /* Ensure choose_best_statistics produced an expected stats type. */
    1248             :     Assert(stat->kind == STATS_EXT_MCV);
    1249             : 
    1250             :     /* now filter the clauses to be estimated using the selected MCV */
    1251          64 :     stat_clauses = NIL;
    1252             : 
    1253          64 :     listidx = 0;
    1254         224 :     foreach(l, clauses)
    1255             :     {
    1256             :         /*
    1257             :          * If the clause is compatible with the selected statistics, mark it
    1258             :          * as estimated and add it to the list to estimate.
    1259             :          */
    1260         320 :         if (list_attnums[listidx] != NULL &&
    1261         160 :             bms_is_subset(list_attnums[listidx], stat->keys))
    1262             :         {
    1263         160 :             stat_clauses = lappend(stat_clauses, (Node *) lfirst(l));
    1264         160 :             *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
    1265             :         }
    1266             : 
    1267         160 :         listidx++;
    1268             :     }
    1269             : 
    1270             :     /*
    1271             :      * First compute "simple" selectivity, i.e. without the extended
    1272             :      * statistics, and essentially assuming independence of the
    1273             :      * columns/clauses. We'll then use the various selectivities computed from
    1274             :      * MCV list to improve it.
    1275             :      */
    1276          64 :     simple_sel = clauselist_selectivity_simple(root, stat_clauses, varRelid,
    1277             :                                                jointype, sjinfo, NULL);
    1278             : 
    1279             :     /*
    1280             :      * Now compute the multi-column estimate from the MCV list, along with the
    1281             :      * other selectivities (base & total selectivity).
    1282             :      */
    1283          64 :     mcv_sel = mcv_clauselist_selectivity(root, stat, stat_clauses, varRelid,
    1284             :                                          jointype, sjinfo, rel,
    1285             :                                          &mcv_basesel, &mcv_totalsel);
    1286             : 
    1287             :     /* Estimated selectivity of values not covered by MCV matches */
    1288          64 :     other_sel = simple_sel - mcv_basesel;
    1289          64 :     CLAMP_PROBABILITY(other_sel);
    1290             : 
    1291             :     /* The non-MCV selectivity can't exceed the 1 - mcv_totalsel. */
    1292          64 :     if (other_sel > 1.0 - mcv_totalsel)
    1293          52 :         other_sel = 1.0 - mcv_totalsel;
    1294             : 
    1295             :     /* Overall selectivity is the combination of MCV and non-MCV estimates. */
    1296          64 :     sel = mcv_sel + other_sel;
    1297          64 :     CLAMP_PROBABILITY(sel);
    1298             : 
    1299          64 :     return sel;
    1300             : }
    1301             : 
    1302             : /*
    1303             :  * statext_clauselist_selectivity
    1304             :  *      Estimate clauses using the best multi-column statistics.
    1305             :  */
    1306             : Selectivity
    1307         228 : statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
    1308             :                                JoinType jointype, SpecialJoinInfo *sjinfo,
    1309             :                                RelOptInfo *rel, Bitmapset **estimatedclauses)
    1310             : {
    1311             :     Selectivity sel;
    1312             : 
    1313             :     /* First, try estimating clauses using a multivariate MCV list. */
    1314         228 :     sel = statext_mcv_clauselist_selectivity(root, clauses, varRelid, jointype,
    1315             :                                              sjinfo, rel, estimatedclauses);
    1316             : 
    1317             :     /*
    1318             :      * Then, apply functional dependencies on the remaining clauses by calling
    1319             :      * dependencies_clauselist_selectivity.  Pass 'estimatedclauses' so the
    1320             :      * function can properly skip clauses already estimated above.
    1321             :      *
    1322             :      * The reasoning for applying dependencies last is that the more complex
    1323             :      * stats can track more complex correlations between the attributes, and
    1324             :      * so may be considered more reliable.
    1325             :      *
    1326             :      * For example, MCV list can give us an exact selectivity for values in
    1327             :      * two columns, while functional dependencies can only provide information
    1328             :      * about the overall strength of the dependency.
    1329             :      */
    1330         228 :     sel *= dependencies_clauselist_selectivity(root, clauses, varRelid,
    1331             :                                                jointype, sjinfo, rel,
    1332             :                                                estimatedclauses);
    1333             : 
    1334         228 :     return sel;
    1335             : }
    1336             : 
    1337             : /*
    1338             :  * examine_opclause_expression
    1339             :  *      Split expression into Var and Const parts.
    1340             :  *
    1341             :  * Attempts to match the arguments to either (Var op Const) or (Const op Var),
    1342             :  * possibly with a RelabelType on top. When the expression matches this form,
    1343             :  * returns true, otherwise returns false.
    1344             :  *
    1345             :  * Optionally returns pointers to the extracted Var/Const nodes, when passed
    1346             :  * non-null pointers (varp, cstp and varonleftp). The varonleftp flag specifies
    1347             :  * on which side of the operator we found the Var node.
    1348             :  */
    1349             : bool
    1350         232 : examine_opclause_expression(OpExpr *expr, Var **varp, Const **cstp, bool *varonleftp)
    1351             : {
    1352             :     Var    *var;
    1353             :     Const  *cst;
    1354             :     bool    varonleft;
    1355             :     Node   *leftop,
    1356             :            *rightop;
    1357             : 
    1358             :     /* enforced by statext_is_compatible_clause_internal */
    1359             :     Assert(list_length(expr->args) == 2);
    1360             : 
    1361         232 :     leftop = linitial(expr->args);
    1362         232 :     rightop = lsecond(expr->args);
    1363             : 
    1364             :     /* strip RelabelType from either side of the expression */
    1365         232 :     if (IsA(leftop, RelabelType))
    1366          88 :         leftop = (Node *) ((RelabelType *) leftop)->arg;
    1367             : 
    1368         232 :     if (IsA(rightop, RelabelType))
    1369           0 :         rightop = (Node *) ((RelabelType *) rightop)->arg;
    1370             : 
    1371         232 :     if (IsA(leftop, Var) && IsA(rightop, Const))
    1372             :     {
    1373         232 :         var = (Var *) leftop;
    1374         232 :         cst = (Const *) rightop;
    1375         232 :         varonleft = true;
    1376             :     }
    1377           0 :     else if (IsA(leftop, Const) && IsA(rightop, Var))
    1378             :     {
    1379           0 :         var = (Var *) rightop;
    1380           0 :         cst = (Const *) leftop;
    1381           0 :         varonleft = false;
    1382             :     }
    1383             :     else
    1384           0 :         return false;
    1385             : 
    1386             :     /* return pointers to the extracted parts if requested */
    1387         232 :     if (varp)
    1388         232 :         *varp = var;
    1389             : 
    1390         232 :     if (cstp)
    1391          96 :         *cstp = cst;
    1392             : 
    1393         232 :     if (varonleftp)
    1394          96 :         *varonleftp = varonleft;
    1395             : 
    1396         232 :     return true;
    1397             : }

Generated by: LCOV version 1.13