LCOV - code coverage report
Current view: top level - src/backend/statistics - extended_stats.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13beta1 Lines: 391 425 92.0 %
Date: 2020-05-29 01:06:25 Functions: 24 24 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * extended_stats.c
       4             :  *    POSTGRES extended statistics
       5             :  *
       6             :  * Generic code supporting statistics objects created via CREATE STATISTICS.
       7             :  *
       8             :  *
       9             :  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
      10             :  * Portions Copyright (c) 1994, Regents of the University of California
      11             :  *
      12             :  * IDENTIFICATION
      13             :  *    src/backend/statistics/extended_stats.c
      14             :  *
      15             :  *-------------------------------------------------------------------------
      16             :  */
      17             : #include "postgres.h"
      18             : 
      19             : #include "access/detoast.h"
      20             : #include "access/genam.h"
      21             : #include "access/htup_details.h"
      22             : #include "access/table.h"
      23             : #include "catalog/indexing.h"
      24             : #include "catalog/pg_collation.h"
      25             : #include "catalog/pg_statistic_ext.h"
      26             : #include "catalog/pg_statistic_ext_data.h"
      27             : #include "commands/progress.h"
      28             : #include "miscadmin.h"
      29             : #include "nodes/nodeFuncs.h"
      30             : #include "optimizer/clauses.h"
      31             : #include "optimizer/optimizer.h"
      32             : #include "pgstat.h"
      33             : #include "postmaster/autovacuum.h"
      34             : #include "statistics/extended_stats_internal.h"
      35             : #include "statistics/statistics.h"
      36             : #include "utils/acl.h"
      37             : #include "utils/array.h"
      38             : #include "utils/builtins.h"
      39             : #include "utils/fmgroids.h"
      40             : #include "utils/lsyscache.h"
      41             : #include "utils/memutils.h"
      42             : #include "utils/rel.h"
      43             : #include "utils/selfuncs.h"
      44             : #include "utils/syscache.h"
      45             : 
      46             : /*
      47             :  * To avoid consuming too much memory during analysis and/or too much space
      48             :  * in the resulting pg_statistic rows, we ignore varlena datums that are wider
      49             :  * than WIDTH_THRESHOLD (after detoasting!).  This is legitimate for MCV
      50             :  * and distinct-value calculations since a wide value is unlikely to be
      51             :  * duplicated at all, much less be a most-common value.  For the same reason,
      52             :  * ignoring wide values will not affect our estimates of histogram bin
      53             :  * boundaries very much.
      54             :  */
      55             : #define WIDTH_THRESHOLD  1024
      56             : 
      57             : /*
      58             :  * Used internally to refer to an individual statistics object, i.e.,
      59             :  * a pg_statistic_ext entry.
      60             :  */
      61             : typedef struct StatExtEntry
      62             : {
      63             :     Oid         statOid;        /* OID of pg_statistic_ext entry */
      64             :     char       *schema;         /* statistics object's schema */
      65             :     char       *name;           /* statistics object's name */
      66             :     Bitmapset  *columns;        /* attribute numbers covered by the object */
      67             :     List       *types;          /* 'char' list of enabled statistic kinds */
      68             :     int         stattarget;     /* statistics target (-1 for default) */
      69             : } StatExtEntry;
      70             : 
      71             : 
      72             : static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid);
      73             : static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
      74             :                                             int nvacatts, VacAttrStats **vacatts);
      75             : static void statext_store(Oid relid,
      76             :                           MVNDistinct *ndistinct, MVDependencies *dependencies,
      77             :                           MCVList *mcv, VacAttrStats **stats);
      78             : static int  statext_compute_stattarget(int stattarget,
      79             :                                        int natts, VacAttrStats **stats);
      80             : 
      81             : /*
      82             :  * Compute requested extended stats, using the rows sampled for the plain
      83             :  * (single-column) stats.
      84             :  *
      85             :  * This fetches a list of stats types from pg_statistic_ext, computes the
      86             :  * requested stats, and serializes them back into the catalog.
      87             :  */
      88             : void
      89       21168 : BuildRelationExtStatistics(Relation onerel, double totalrows,
      90             :                            int numrows, HeapTuple *rows,
      91             :                            int natts, VacAttrStats **vacattrstats)
      92             : {
      93             :     Relation    pg_stext;
      94             :     ListCell   *lc;
      95             :     List       *stats;
      96             :     MemoryContext cxt;
      97             :     MemoryContext oldcxt;
      98             :     int64       ext_cnt;
      99             : 
     100       21168 :     cxt = AllocSetContextCreate(CurrentMemoryContext,
     101             :                                 "BuildRelationExtStatistics",
     102             :                                 ALLOCSET_DEFAULT_SIZES);
     103       21168 :     oldcxt = MemoryContextSwitchTo(cxt);
     104             : 
     105       21168 :     pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock);
     106       21168 :     stats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
     107             : 
     108             :     /* report this phase */
     109       21168 :     if (stats != NIL)
     110             :     {
     111         112 :         const int   index[] = {
     112             :             PROGRESS_ANALYZE_PHASE,
     113             :             PROGRESS_ANALYZE_EXT_STATS_TOTAL
     114             :         };
     115         224 :         const int64 val[] = {
     116             :             PROGRESS_ANALYZE_PHASE_COMPUTE_EXT_STATS,
     117         112 :             list_length(stats)
     118             :         };
     119             : 
     120         112 :         pgstat_progress_update_multi_param(2, index, val);
     121             :     }
     122             : 
     123       21168 :     ext_cnt = 0;
     124       21288 :     foreach(lc, stats)
     125             :     {
     126         120 :         StatExtEntry *stat = (StatExtEntry *) lfirst(lc);
     127         120 :         MVNDistinct *ndistinct = NULL;
     128         120 :         MVDependencies *dependencies = NULL;
     129         120 :         MCVList    *mcv = NULL;
     130             :         VacAttrStats **stats;
     131             :         ListCell   *lc2;
     132             :         int         stattarget;
     133             : 
     134             :         /*
     135             :          * Check if we can build these stats based on the column analyzed. If
     136             :          * not, report this fact (except in autovacuum) and move on.
     137             :          */
     138         120 :         stats = lookup_var_attr_stats(onerel, stat->columns,
     139             :                                       natts, vacattrstats);
     140         120 :         if (!stats)
     141             :         {
     142           8 :             if (!IsAutoVacuumWorkerProcess())
     143           8 :                 ereport(WARNING,
     144             :                         (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
     145             :                          errmsg("statistics object \"%s.%s\" could not be computed for relation \"%s.%s\"",
     146             :                                 stat->schema, stat->name,
     147             :                                 get_namespace_name(onerel->rd_rel->relnamespace),
     148             :                                 RelationGetRelationName(onerel)),
     149             :                          errtable(onerel)));
     150           8 :             continue;
     151             :         }
     152             : 
     153             :         /* check allowed number of dimensions */
     154             :         Assert(bms_num_members(stat->columns) >= 2 &&
     155             :                bms_num_members(stat->columns) <= STATS_MAX_DIMENSIONS);
     156             : 
     157             :         /* compute statistics target for this statistics */
     158         112 :         stattarget = statext_compute_stattarget(stat->stattarget,
     159         112 :                                                 bms_num_members(stat->columns),
     160             :                                                 stats);
     161             : 
     162             :         /*
     163             :          * Don't rebuild statistics objects with statistics target set to 0
     164             :          * (we just leave the existing values around, just like we do for
     165             :          * regular per-column statistics).
     166             :          */
     167         112 :         if (stattarget == 0)
     168           4 :             continue;
     169             : 
     170             :         /* compute statistic of each requested type */
     171         248 :         foreach(lc2, stat->types)
     172             :         {
     173         140 :             char        t = (char) lfirst_int(lc2);
     174             : 
     175         140 :             if (t == STATS_EXT_NDISTINCT)
     176          16 :                 ndistinct = statext_ndistinct_build(totalrows, numrows, rows,
     177             :                                                     stat->columns, stats);
     178         124 :             else if (t == STATS_EXT_DEPENDENCIES)
     179          42 :                 dependencies = statext_dependencies_build(numrows, rows,
     180             :                                                           stat->columns, stats);
     181          82 :             else if (t == STATS_EXT_MCV)
     182          82 :                 mcv = statext_mcv_build(numrows, rows, stat->columns, stats,
     183             :                                         totalrows, stattarget);
     184             :         }
     185             : 
     186             :         /* store the statistics in the catalog */
     187         108 :         statext_store(stat->statOid, ndistinct, dependencies, mcv, stats);
     188             : 
     189             :         /* for reporting progress */
     190         108 :         pgstat_progress_update_param(PROGRESS_ANALYZE_EXT_STATS_COMPUTED,
     191             :                                      ++ext_cnt);
     192             :     }
     193             : 
     194       21168 :     table_close(pg_stext, RowExclusiveLock);
     195             : 
     196       21168 :     MemoryContextSwitchTo(oldcxt);
     197       21168 :     MemoryContextDelete(cxt);
     198       21168 : }
     199             : 
     200             : /*
     201             :  * ComputeExtStatisticsRows
     202             :  *      Compute number of rows required by extended statistics on a table.
     203             :  *
     204             :  * Computes number of rows we need to sample to build extended statistics on a
     205             :  * table. This only looks at statistics we can actually build - for example
     206             :  * when analyzing only some of the columns, this will skip statistics objects
     207             :  * that would require additional columns.
     208             :  *
     209             :  * See statext_compute_stattarget for details about how we compute statistics
     210             :  * target for a statistics objects (from the object target, attribute targets
     211             :  * and default statistics target).
     212             :  */
     213             : int
     214       33922 : ComputeExtStatisticsRows(Relation onerel,
     215             :                          int natts, VacAttrStats **vacattrstats)
     216             : {
     217             :     Relation    pg_stext;
     218             :     ListCell   *lc;
     219             :     List       *lstats;
     220             :     MemoryContext cxt;
     221             :     MemoryContext oldcxt;
     222       33922 :     int         result = 0;
     223             : 
     224       33922 :     cxt = AllocSetContextCreate(CurrentMemoryContext,
     225             :                                 "ComputeExtStatisticsRows",
     226             :                                 ALLOCSET_DEFAULT_SIZES);
     227       33922 :     oldcxt = MemoryContextSwitchTo(cxt);
     228             : 
     229       33922 :     pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock);
     230       33922 :     lstats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
     231             : 
     232       34046 :     foreach(lc, lstats)
     233             :     {
     234         124 :         StatExtEntry *stat = (StatExtEntry *) lfirst(lc);
     235         124 :         int         stattarget = stat->stattarget;
     236             :         VacAttrStats **stats;
     237         124 :         int         nattrs = bms_num_members(stat->columns);
     238             : 
     239             :         /*
     240             :          * Check if we can build this statistics object based on the columns
     241             :          * analyzed. If not, ignore it (don't report anything, we'll do that
     242             :          * during the actual build BuildRelationExtStatistics).
     243             :          */
     244         124 :         stats = lookup_var_attr_stats(onerel, stat->columns,
     245             :                                       natts, vacattrstats);
     246             : 
     247         124 :         if (!stats)
     248           8 :             continue;
     249             : 
     250             :         /*
     251             :          * Compute statistics target, based on what's set for the statistic
     252             :          * object itself, and for its attributes.
     253             :          */
     254         116 :         stattarget = statext_compute_stattarget(stat->stattarget,
     255             :                                                 nattrs, stats);
     256             : 
     257             :         /* Use the largest value for all statistics objects. */
     258         116 :         if (stattarget > result)
     259         104 :             result = stattarget;
     260             :     }
     261             : 
     262       33922 :     table_close(pg_stext, RowExclusiveLock);
     263             : 
     264       33922 :     MemoryContextSwitchTo(oldcxt);
     265       33922 :     MemoryContextDelete(cxt);
     266             : 
     267             :     /* compute sample size based on the statistics target */
     268       33922 :     return (300 * result);
     269             : }
     270             : 
     271             : /*
     272             :  * statext_compute_stattarget
     273             :  *      compute statistics target for an extended statistic
     274             :  *
     275             :  * When computing target for extended statistics objects, we consider three
     276             :  * places where the target may be set - the statistics object itself,
     277             :  * attributes the statistics is defined on, and then the default statistics
     278             :  * target.
     279             :  *
     280             :  * First we look at what's set for the statistics object itself, using the
     281             :  * ALTER STATISTICS ... SET STATISTICS command. If we find a valid value
     282             :  * there (i.e. not -1) we're done. Otherwise we look at targets set for any
     283             :  * of the attributes the statistic is defined on, and if there are columns
     284             :  * with defined target, we use the maximum value. We do this mostly for
     285             :  * backwards compatibility, because this is what we did before having
     286             :  * statistics target for extended statistics.
     287             :  *
     288             :  * And finally, if we still don't have a statistics target, we use the value
     289             :  * set in default_statistics_target.
     290             :  */
     291             : static int
     292         228 : statext_compute_stattarget(int stattarget, int nattrs, VacAttrStats **stats)
     293             : {
     294             :     int         i;
     295             : 
     296             :     /*
     297             :      * If there's statistics target set for the statistics object, use it. It
     298             :      * may be set to 0 which disables building of that statistic.
     299             :      */
     300         228 :     if (stattarget >= 0)
     301           8 :         return stattarget;
     302             : 
     303             :     /*
     304             :      * The target for the statistics object is set to -1, in which case we
     305             :      * look at the maximum target set for any of the attributes the object is
     306             :      * defined on.
     307             :      */
     308         820 :     for (i = 0; i < nattrs; i++)
     309             :     {
     310             :         /* keep the maximmum statistics target */
     311         600 :         if (stats[i]->attr->attstattarget > stattarget)
     312         220 :             stattarget = stats[i]->attr->attstattarget;
     313             :     }
     314             : 
     315             :     /*
     316             :      * If the value is still negative (so neither the statistics object nor
     317             :      * any of the columns have custom statistics target set), use the global
     318             :      * default target.
     319             :      */
     320         220 :     if (stattarget < 0)
     321           0 :         stattarget = default_statistics_target;
     322             : 
     323             :     /* As this point we should have a valid statistics target. */
     324             :     Assert((stattarget >= 0) && (stattarget <= 10000));
     325             : 
     326         220 :     return stattarget;
     327             : }
     328             : 
     329             : /*
     330             :  * statext_is_kind_built
     331             :  *      Is this stat kind built in the given pg_statistic_ext_data tuple?
     332             :  */
     333             : bool
     334        1316 : statext_is_kind_built(HeapTuple htup, char type)
     335             : {
     336             :     AttrNumber  attnum;
     337             : 
     338        1316 :     switch (type)
     339             :     {
     340         436 :         case STATS_EXT_NDISTINCT:
     341         436 :             attnum = Anum_pg_statistic_ext_data_stxdndistinct;
     342         436 :             break;
     343             : 
     344         436 :         case STATS_EXT_DEPENDENCIES:
     345         436 :             attnum = Anum_pg_statistic_ext_data_stxddependencies;
     346         436 :             break;
     347             : 
     348         444 :         case STATS_EXT_MCV:
     349         444 :             attnum = Anum_pg_statistic_ext_data_stxdmcv;
     350         444 :             break;
     351             : 
     352           0 :         default:
     353           0 :             elog(ERROR, "unexpected statistics type requested: %d", type);
     354             :     }
     355             : 
     356        1316 :     return !heap_attisnull(htup, attnum, NULL);
     357             : }
     358             : 
     359             : /*
     360             :  * Return a list (of StatExtEntry) of statistics objects for the given relation.
     361             :  */
     362             : static List *
     363       55090 : fetch_statentries_for_relation(Relation pg_statext, Oid relid)
     364             : {
     365             :     SysScanDesc scan;
     366             :     ScanKeyData skey;
     367             :     HeapTuple   htup;
     368       55090 :     List       *result = NIL;
     369             : 
     370             :     /*
     371             :      * Prepare to scan pg_statistic_ext for entries having stxrelid = this
     372             :      * rel.
     373             :      */
     374       55090 :     ScanKeyInit(&skey,
     375             :                 Anum_pg_statistic_ext_stxrelid,
     376             :                 BTEqualStrategyNumber, F_OIDEQ,
     377             :                 ObjectIdGetDatum(relid));
     378             : 
     379       55090 :     scan = systable_beginscan(pg_statext, StatisticExtRelidIndexId, true,
     380             :                               NULL, 1, &skey);
     381             : 
     382       55334 :     while (HeapTupleIsValid(htup = systable_getnext(scan)))
     383             :     {
     384             :         StatExtEntry *entry;
     385             :         Datum       datum;
     386             :         bool        isnull;
     387             :         int         i;
     388             :         ArrayType  *arr;
     389             :         char       *enabled;
     390             :         Form_pg_statistic_ext staForm;
     391             : 
     392         244 :         entry = palloc0(sizeof(StatExtEntry));
     393         244 :         staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
     394         244 :         entry->statOid = staForm->oid;
     395         244 :         entry->schema = get_namespace_name(staForm->stxnamespace);
     396         244 :         entry->name = pstrdup(NameStr(staForm->stxname));
     397         244 :         entry->stattarget = staForm->stxstattarget;
     398         892 :         for (i = 0; i < staForm->stxkeys.dim1; i++)
     399             :         {
     400         648 :             entry->columns = bms_add_member(entry->columns,
     401         648 :                                             staForm->stxkeys.values[i]);
     402             :         }
     403             : 
     404             :         /* decode the stxkind char array into a list of chars */
     405         244 :         datum = SysCacheGetAttr(STATEXTOID, htup,
     406             :                                 Anum_pg_statistic_ext_stxkind, &isnull);
     407             :         Assert(!isnull);
     408         244 :         arr = DatumGetArrayTypeP(datum);
     409         244 :         if (ARR_NDIM(arr) != 1 ||
     410         244 :             ARR_HASNULL(arr) ||
     411         244 :             ARR_ELEMTYPE(arr) != CHAROID)
     412           0 :             elog(ERROR, "stxkind is not a 1-D char array");
     413         244 :         enabled = (char *) ARR_DATA_PTR(arr);
     414         608 :         for (i = 0; i < ARR_DIMS(arr)[0]; i++)
     415             :         {
     416             :             Assert((enabled[i] == STATS_EXT_NDISTINCT) ||
     417             :                    (enabled[i] == STATS_EXT_DEPENDENCIES) ||
     418             :                    (enabled[i] == STATS_EXT_MCV));
     419         364 :             entry->types = lappend_int(entry->types, (int) enabled[i]);
     420             :         }
     421             : 
     422         244 :         result = lappend(result, entry);
     423             :     }
     424             : 
     425       55090 :     systable_endscan(scan);
     426             : 
     427       55090 :     return result;
     428             : }
     429             : 
     430             : /*
     431             :  * Using 'vacatts' of size 'nvacatts' as input data, return a newly built
     432             :  * VacAttrStats array which includes only the items corresponding to
     433             :  * attributes indicated by 'stxkeys'. If we don't have all of the per column
     434             :  * stats available to compute the extended stats, then we return NULL to indicate
     435             :  * to the caller that the stats should not be built.
     436             :  */
     437             : static VacAttrStats **
     438         244 : lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
     439             :                       int nvacatts, VacAttrStats **vacatts)
     440             : {
     441         244 :     int         i = 0;
     442         244 :     int         x = -1;
     443             :     VacAttrStats **stats;
     444             : 
     445             :     stats = (VacAttrStats **)
     446         244 :         palloc(bms_num_members(attrs) * sizeof(VacAttrStats *));
     447             : 
     448             :     /* lookup VacAttrStats info for the requested columns (same attnum) */
     449         868 :     while ((x = bms_next_member(attrs, x)) >= 0)
     450             :     {
     451             :         int         j;
     452             : 
     453         640 :         stats[i] = NULL;
     454        2072 :         for (j = 0; j < nvacatts; j++)
     455             :         {
     456        2056 :             if (x == vacatts[j]->tupattnum)
     457             :             {
     458         624 :                 stats[i] = vacatts[j];
     459         624 :                 break;
     460             :             }
     461             :         }
     462             : 
     463         640 :         if (!stats[i])
     464             :         {
     465             :             /*
     466             :              * Looks like stats were not gathered for one of the columns
     467             :              * required. We'll be unable to build the extended stats without
     468             :              * this column.
     469             :              */
     470          16 :             pfree(stats);
     471          16 :             return NULL;
     472             :         }
     473             : 
     474             :         /*
     475             :          * Sanity check that the column is not dropped - stats should have
     476             :          * been removed in this case.
     477             :          */
     478             :         Assert(!stats[i]->attr->attisdropped);
     479             : 
     480         624 :         i++;
     481             :     }
     482             : 
     483         228 :     return stats;
     484             : }
     485             : 
     486             : /*
     487             :  * statext_store
     488             :  *  Serializes the statistics and stores them into the pg_statistic_ext_data
     489             :  *  tuple.
     490             :  */
     491             : static void
     492         108 : statext_store(Oid statOid,
     493             :               MVNDistinct *ndistinct, MVDependencies *dependencies,
     494             :               MCVList *mcv, VacAttrStats **stats)
     495             : {
     496             :     Relation    pg_stextdata;
     497             :     HeapTuple   stup,
     498             :                 oldtup;
     499             :     Datum       values[Natts_pg_statistic_ext_data];
     500             :     bool        nulls[Natts_pg_statistic_ext_data];
     501             :     bool        replaces[Natts_pg_statistic_ext_data];
     502             : 
     503         108 :     pg_stextdata = table_open(StatisticExtDataRelationId, RowExclusiveLock);
     504             : 
     505         108 :     memset(nulls, true, sizeof(nulls));
     506         108 :     memset(replaces, false, sizeof(replaces));
     507         108 :     memset(values, 0, sizeof(values));
     508             : 
     509             :     /*
     510             :      * Construct a new pg_statistic_ext_data tuple, replacing the calculated
     511             :      * stats.
     512             :      */
     513         108 :     if (ndistinct != NULL)
     514             :     {
     515          16 :         bytea      *data = statext_ndistinct_serialize(ndistinct);
     516             : 
     517          16 :         nulls[Anum_pg_statistic_ext_data_stxdndistinct - 1] = (data == NULL);
     518          16 :         values[Anum_pg_statistic_ext_data_stxdndistinct - 1] = PointerGetDatum(data);
     519             :     }
     520             : 
     521         108 :     if (dependencies != NULL)
     522             :     {
     523          38 :         bytea      *data = statext_dependencies_serialize(dependencies);
     524             : 
     525          38 :         nulls[Anum_pg_statistic_ext_data_stxddependencies - 1] = (data == NULL);
     526          38 :         values[Anum_pg_statistic_ext_data_stxddependencies - 1] = PointerGetDatum(data);
     527             :     }
     528         108 :     if (mcv != NULL)
     529             :     {
     530          80 :         bytea      *data = statext_mcv_serialize(mcv, stats);
     531             : 
     532          80 :         nulls[Anum_pg_statistic_ext_data_stxdmcv - 1] = (data == NULL);
     533          80 :         values[Anum_pg_statistic_ext_data_stxdmcv - 1] = PointerGetDatum(data);
     534             :     }
     535             : 
     536             :     /* always replace the value (either by bytea or NULL) */
     537         108 :     replaces[Anum_pg_statistic_ext_data_stxdndistinct - 1] = true;
     538         108 :     replaces[Anum_pg_statistic_ext_data_stxddependencies - 1] = true;
     539         108 :     replaces[Anum_pg_statistic_ext_data_stxdmcv - 1] = true;
     540             : 
     541             :     /* there should already be a pg_statistic_ext_data tuple */
     542         108 :     oldtup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(statOid));
     543         108 :     if (!HeapTupleIsValid(oldtup))
     544           0 :         elog(ERROR, "cache lookup failed for statistics object %u", statOid);
     545             : 
     546             :     /* replace it */
     547         108 :     stup = heap_modify_tuple(oldtup,
     548             :                              RelationGetDescr(pg_stextdata),
     549             :                              values,
     550             :                              nulls,
     551             :                              replaces);
     552         108 :     ReleaseSysCache(oldtup);
     553         108 :     CatalogTupleUpdate(pg_stextdata, &stup->t_self, stup);
     554             : 
     555         108 :     heap_freetuple(stup);
     556             : 
     557         108 :     table_close(pg_stextdata, RowExclusiveLock);
     558         108 : }
     559             : 
     560             : /* initialize multi-dimensional sort */
     561             : MultiSortSupport
     562         388 : multi_sort_init(int ndims)
     563             : {
     564             :     MultiSortSupport mss;
     565             : 
     566             :     Assert(ndims >= 2);
     567             : 
     568         388 :     mss = (MultiSortSupport) palloc0(offsetof(MultiSortSupportData, ssup)
     569         388 :                                      + sizeof(SortSupportData) * ndims);
     570             : 
     571         388 :     mss->ndims = ndims;
     572             : 
     573         388 :     return mss;
     574             : }
     575             : 
     576             : /*
     577             :  * Prepare sort support info using the given sort operator and collation
     578             :  * at the position 'sortdim'
     579             :  */
     580             : void
     581         924 : multi_sort_add_dimension(MultiSortSupport mss, int sortdim,
     582             :                          Oid oper, Oid collation)
     583             : {
     584         924 :     SortSupport ssup = &mss->ssup[sortdim];
     585             : 
     586         924 :     ssup->ssup_cxt = CurrentMemoryContext;
     587         924 :     ssup->ssup_collation = collation;
     588         924 :     ssup->ssup_nulls_first = false;
     589             : 
     590         924 :     PrepareSortSupportFromOrderingOp(oper, ssup);
     591         924 : }
     592             : 
     593             : /* compare all the dimensions in the selected order */
     594             : int
     595    11123702 : multi_sort_compare(const void *a, const void *b, void *arg)
     596             : {
     597    11123702 :     MultiSortSupport mss = (MultiSortSupport) arg;
     598    11123702 :     SortItem   *ia = (SortItem *) a;
     599    11123702 :     SortItem   *ib = (SortItem *) b;
     600             :     int         i;
     601             : 
     602    18967390 :     for (i = 0; i < mss->ndims; i++)
     603             :     {
     604             :         int         compare;
     605             : 
     606    34340772 :         compare = ApplySortComparator(ia->values[i], ia->isnull[i],
     607    34340772 :                                       ib->values[i], ib->isnull[i],
     608    17170386 :                                       &mss->ssup[i]);
     609             : 
     610    17170386 :         if (compare != 0)
     611     9326698 :             return compare;
     612             :     }
     613             : 
     614             :     /* equal by default */
     615     1797004 :     return 0;
     616             : }
     617             : 
     618             : /* compare selected dimension */
     619             : int
     620      949620 : multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b,
     621             :                        MultiSortSupport mss)
     622             : {
     623     2848860 :     return ApplySortComparator(a->values[dim], a->isnull[dim],
     624     1899240 :                                b->values[dim], b->isnull[dim],
     625      949620 :                                &mss->ssup[dim]);
     626             : }
     627             : 
     628             : int
     629      993142 : multi_sort_compare_dims(int start, int end,
     630             :                         const SortItem *a, const SortItem *b,
     631             :                         MultiSortSupport mss)
     632             : {
     633             :     int         dim;
     634             : 
     635     2240038 :     for (dim = start; dim <= end; dim++)
     636             :     {
     637     2580836 :         int         r = ApplySortComparator(a->values[dim], a->isnull[dim],
     638     2580836 :                                             b->values[dim], b->isnull[dim],
     639     1290418 :                                             &mss->ssup[dim]);
     640             : 
     641     1290418 :         if (r != 0)
     642       43522 :             return r;
     643             :     }
     644             : 
     645      949620 :     return 0;
     646             : }
     647             : 
     648             : int
     649       73968 : compare_scalars_simple(const void *a, const void *b, void *arg)
     650             : {
     651       73968 :     return compare_datums_simple(*(Datum *) a,
     652             :                                  *(Datum *) b,
     653             :                                  (SortSupport) arg);
     654             : }
     655             : 
     656             : int
     657       83158 : compare_datums_simple(Datum a, Datum b, SortSupport ssup)
     658             : {
     659       83158 :     return ApplySortComparator(a, false, b, false, ssup);
     660             : }
     661             : 
     662             : /* simple counterpart to qsort_arg */
     663             : void *
     664       18858 : bsearch_arg(const void *key, const void *base, size_t nmemb, size_t size,
     665             :             int (*compar) (const void *, const void *, void *),
     666             :             void *arg)
     667             : {
     668             :     size_t      l,
     669             :                 u,
     670             :                 idx;
     671             :     const void *p;
     672             :     int         comparison;
     673             : 
     674       18858 :     l = 0;
     675       18858 :     u = nmemb;
     676       86752 :     while (l < u)
     677             :     {
     678       86752 :         idx = (l + u) / 2;
     679       86752 :         p = (void *) (((const char *) base) + (idx * size));
     680       86752 :         comparison = (*compar) (key, p, arg);
     681             : 
     682       86752 :         if (comparison < 0)
     683       40192 :             u = idx;
     684       46560 :         else if (comparison > 0)
     685       27702 :             l = idx + 1;
     686             :         else
     687       18858 :             return (void *) p;
     688             :     }
     689             : 
     690           0 :     return NULL;
     691             : }
     692             : 
     693             : /*
     694             :  * build_attnums_array
     695             :  *      Transforms a bitmap into an array of AttrNumber values.
     696             :  *
     697             :  * This is used for extended statistics only, so all the attribute must be
     698             :  * user-defined. That means offsetting by FirstLowInvalidHeapAttributeNumber
     699             :  * is not necessary here (and when querying the bitmap).
     700             :  */
     701             : AttrNumber *
     702         390 : build_attnums_array(Bitmapset *attrs, int *numattrs)
     703             : {
     704             :     int         i,
     705             :                 j;
     706             :     AttrNumber *attnums;
     707         390 :     int         num = bms_num_members(attrs);
     708             : 
     709         390 :     if (numattrs)
     710         390 :         *numattrs = num;
     711             : 
     712             :     /* build attnums from the bitmapset */
     713         390 :     attnums = (AttrNumber *) palloc(sizeof(AttrNumber) * num);
     714         390 :     i = 0;
     715         390 :     j = -1;
     716        1492 :     while ((j = bms_next_member(attrs, j)) >= 0)
     717             :     {
     718             :         /*
     719             :          * Make sure the bitmap contains only user-defined attributes. As
     720             :          * bitmaps can't contain negative values, this can be violated in two
     721             :          * ways. Firstly, the bitmap might contain 0 as a member, and secondly
     722             :          * the integer value might be larger than MaxAttrNumber.
     723             :          */
     724             :         Assert(AttrNumberIsForUserDefinedAttr(j));
     725             :         Assert(j <= MaxAttrNumber);
     726             : 
     727        1102 :         attnums[i++] = (AttrNumber) j;
     728             : 
     729             :         /* protect against overflows */
     730             :         Assert(i <= num);
     731             :     }
     732             : 
     733         390 :     return attnums;
     734             : }
     735             : 
     736             : /*
     737             :  * build_sorted_items
     738             :  *      build a sorted array of SortItem with values from rows
     739             :  *
     740             :  * Note: All the memory is allocated in a single chunk, so that the caller
     741             :  * can simply pfree the return value to release all of it.
     742             :  */
     743             : SortItem *
     744         348 : build_sorted_items(int numrows, int *nitems, HeapTuple *rows, TupleDesc tdesc,
     745             :                    MultiSortSupport mss, int numattrs, AttrNumber *attnums)
     746             : {
     747             :     int         i,
     748             :                 j,
     749             :                 len,
     750             :                 idx;
     751         348 :     int         nvalues = numrows * numattrs;
     752             : 
     753             :     SortItem   *items;
     754             :     Datum      *values;
     755             :     bool       *isnull;
     756             :     char       *ptr;
     757             : 
     758             :     /* Compute the total amount of memory we need (both items and values). */
     759         348 :     len = numrows * sizeof(SortItem) + nvalues * (sizeof(Datum) + sizeof(bool));
     760             : 
     761             :     /* Allocate the memory and split it into the pieces. */
     762         348 :     ptr = palloc0(len);
     763             : 
     764             :     /* items to sort */
     765         348 :     items = (SortItem *) ptr;
     766         348 :     ptr += numrows * sizeof(SortItem);
     767             : 
     768             :     /* values and null flags */
     769         348 :     values = (Datum *) ptr;
     770         348 :     ptr += nvalues * sizeof(Datum);
     771             : 
     772         348 :     isnull = (bool *) ptr;
     773         348 :     ptr += nvalues * sizeof(bool);
     774             : 
     775             :     /* make sure we consumed the whole buffer exactly */
     776             :     Assert((ptr - (char *) items) == len);
     777             : 
     778             :     /* fix the pointers to Datum and bool arrays */
     779         348 :     idx = 0;
     780     1305960 :     for (i = 0; i < numrows; i++)
     781             :     {
     782     1305612 :         bool        toowide = false;
     783             : 
     784     1305612 :         items[idx].values = &values[idx * numattrs];
     785     1305612 :         items[idx].isnull = &isnull[idx * numattrs];
     786             : 
     787             :         /* load the values/null flags from sample rows */
     788     4486436 :         for (j = 0; j < numattrs; j++)
     789             :         {
     790             :             Datum       value;
     791             :             bool        isnull;
     792             : 
     793     3180824 :             value = heap_getattr(rows[i], attnums[j], tdesc, &isnull);
     794             : 
     795             :             /*
     796             :              * If this is a varlena value, check if it's too wide and if yes
     797             :              * then skip the whole item. Otherwise detoast the value.
     798             :              *
     799             :              * XXX It may happen that we've already detoasted some preceding
     800             :              * values for the current item. We don't bother to cleanup those
     801             :              * on the assumption that those are small (below WIDTH_THRESHOLD)
     802             :              * and will be discarded at the end of analyze.
     803             :              */
     804     3180824 :             if ((!isnull) &&
     805     3106000 :                 (TupleDescAttr(tdesc, attnums[j] - 1)->attlen == -1))
     806             :             {
     807     1065376 :                 if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
     808             :                 {
     809           0 :                     toowide = true;
     810           0 :                     break;
     811             :                 }
     812             : 
     813     1065376 :                 value = PointerGetDatum(PG_DETOAST_DATUM(value));
     814             :             }
     815             : 
     816     3180824 :             items[idx].values[j] = value;
     817     3180824 :             items[idx].isnull[j] = isnull;
     818             :         }
     819             : 
     820     1305612 :         if (toowide)
     821           0 :             continue;
     822             : 
     823     1305612 :         idx++;
     824             :     }
     825             : 
     826             :     /* store the actual number of items (ignoring the too-wide ones) */
     827         348 :     *nitems = idx;
     828             : 
     829             :     /* all items were too wide */
     830         348 :     if (idx == 0)
     831             :     {
     832             :         /* everything is allocated as a single chunk */
     833           0 :         pfree(items);
     834           0 :         return NULL;
     835             :     }
     836             : 
     837             :     /* do the sort, using the multi-sort */
     838         348 :     qsort_arg((void *) items, idx, sizeof(SortItem),
     839             :               multi_sort_compare, mss);
     840             : 
     841         348 :     return items;
     842             : }
     843             : 
     844             : /*
     845             :  * has_stats_of_kind
     846             :  *      Check whether the list contains statistic of a given kind
     847             :  */
     848             : bool
     849        3312 : has_stats_of_kind(List *stats, char requiredkind)
     850             : {
     851             :     ListCell   *l;
     852             : 
     853        5000 :     foreach(l, stats)
     854             :     {
     855        3344 :         StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
     856             : 
     857        3344 :         if (stat->kind == requiredkind)
     858        1656 :             return true;
     859             :     }
     860             : 
     861        1656 :     return false;
     862             : }
     863             : 
     864             : /*
     865             :  * choose_best_statistics
     866             :  *      Look for and return statistics with the specified 'requiredkind' which
     867             :  *      have keys that match at least two of the given attnums.  Return NULL if
     868             :  *      there's no match.
     869             :  *
     870             :  * The current selection criteria is very simple - we choose the statistics
     871             :  * object referencing the most attributes in covered (and still unestimated
     872             :  * clauses), breaking ties in favor of objects with fewer keys overall.
     873             :  *
     874             :  * The clause_attnums is an array of bitmaps, storing attnums for individual
     875             :  * clauses. A NULL element means the clause is either incompatible or already
     876             :  * estimated.
     877             :  *
     878             :  * XXX If multiple statistics objects tie on both criteria, then which object
     879             :  * is chosen depends on the order that they appear in the stats list. Perhaps
     880             :  * further tiebreakers are needed.
     881             :  */
     882             : StatisticExtInfo *
     883         364 : choose_best_statistics(List *stats, char requiredkind,
     884             :                        Bitmapset **clause_attnums, int nclauses)
     885             : {
     886             :     ListCell   *lc;
     887         364 :     StatisticExtInfo *best_match = NULL;
     888         364 :     int         best_num_matched = 2;   /* goal #1: maximize */
     889         364 :     int         best_match_keys = (STATS_MAX_DIMENSIONS + 1);   /* goal #2: minimize */
     890             : 
     891         756 :     foreach(lc, stats)
     892             :     {
     893             :         int         i;
     894         392 :         StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
     895         392 :         Bitmapset  *matched = NULL;
     896             :         int         num_matched;
     897             :         int         numkeys;
     898             : 
     899             :         /* skip statistics that are not of the correct type */
     900         392 :         if (info->kind != requiredkind)
     901           0 :             continue;
     902             : 
     903             :         /*
     904             :          * Collect attributes in remaining (unestimated) clauses fully covered
     905             :          * by this statistic object.
     906             :          */
     907        1332 :         for (i = 0; i < nclauses; i++)
     908             :         {
     909             :             /* ignore incompatible/estimated clauses */
     910         940 :             if (!clause_attnums[i])
     911         496 :                 continue;
     912             : 
     913             :             /* ignore clauses that are not covered by this object */
     914         444 :             if (!bms_is_subset(clause_attnums[i], info->keys))
     915          44 :                 continue;
     916             : 
     917         400 :             matched = bms_add_members(matched, clause_attnums[i]);
     918             :         }
     919             : 
     920         392 :         num_matched = bms_num_members(matched);
     921         392 :         bms_free(matched);
     922             : 
     923             :         /*
     924             :          * save the actual number of keys in the stats so that we can choose
     925             :          * the narrowest stats with the most matching keys.
     926             :          */
     927         392 :         numkeys = bms_num_members(info->keys);
     928             : 
     929             :         /*
     930             :          * Use this object when it increases the number of matched clauses or
     931             :          * when it matches the same number of attributes but these stats have
     932             :          * fewer keys than any previous match.
     933             :          */
     934         392 :         if (num_matched > best_num_matched ||
     935         104 :             (num_matched == best_num_matched && numkeys < best_match_keys))
     936             :         {
     937         172 :             best_match = info;
     938         172 :             best_num_matched = num_matched;
     939         172 :             best_match_keys = numkeys;
     940             :         }
     941             :     }
     942             : 
     943         364 :     return best_match;
     944             : }
     945             : 
     946             : /*
     947             :  * statext_is_compatible_clause_internal
     948             :  *      Determines if the clause is compatible with MCV lists.
     949             :  *
     950             :  * Does the heavy lifting of actually inspecting the clauses for
     951             :  * statext_is_compatible_clause. It needs to be split like this because
     952             :  * of recursion.  The attnums bitmap is an input/output parameter collecting
     953             :  * attribute numbers from all compatible clauses (recursively).
     954             :  */
     955             : static bool
     956         912 : statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
     957             :                                       Index relid, Bitmapset **attnums)
     958             : {
     959             :     /* Look inside any binary-compatible relabeling (as in examine_variable) */
     960         912 :     if (IsA(clause, RelabelType))
     961           0 :         clause = (Node *) ((RelabelType *) clause)->arg;
     962             : 
     963             :     /* plain Var references (boolean Vars or recursive checks) */
     964         912 :     if (IsA(clause, Var))
     965             :     {
     966         448 :         Var        *var = (Var *) clause;
     967             : 
     968             :         /* Ensure var is from the correct relation */
     969         448 :         if (var->varno != relid)
     970           0 :             return false;
     971             : 
     972             :         /* we also better ensure the Var is from the current level */
     973         448 :         if (var->varlevelsup > 0)
     974           0 :             return false;
     975             : 
     976             :         /* Also skip system attributes (we don't allow stats on those). */
     977         448 :         if (!AttrNumberIsForUserDefinedAttr(var->varattno))
     978           0 :             return false;
     979             : 
     980         448 :         *attnums = bms_add_member(*attnums, var->varattno);
     981             : 
     982         448 :         return true;
     983             :     }
     984             : 
     985             :     /* (Var op Const) or (Const op Var) */
     986         464 :     if (is_opclause(clause))
     987             :     {
     988         280 :         RangeTblEntry *rte = root->simple_rte_array[relid];
     989         280 :         OpExpr     *expr = (OpExpr *) clause;
     990             :         Var        *var;
     991             : 
     992             :         /* Only expressions with two arguments are considered compatible. */
     993         280 :         if (list_length(expr->args) != 2)
     994           0 :             return false;
     995             : 
     996             :         /* Check if the expression has the right shape (one Var, one Const) */
     997         280 :         if (!examine_clause_args(expr->args, &var, NULL, NULL))
     998           0 :             return false;
     999             : 
    1000             :         /*
    1001             :          * If it's not one of the supported operators ("=", "<", ">", etc.),
    1002             :          * just ignore the clause, as it's not compatible with MCV lists.
    1003             :          *
    1004             :          * This uses the function for estimating selectivity, not the operator
    1005             :          * directly (a bit awkward, but well ...).
    1006             :          */
    1007         280 :         switch (get_oprrest(expr->opno))
    1008             :         {
    1009         280 :             case F_EQSEL:
    1010             :             case F_NEQSEL:
    1011             :             case F_SCALARLTSEL:
    1012             :             case F_SCALARLESEL:
    1013             :             case F_SCALARGTSEL:
    1014             :             case F_SCALARGESEL:
    1015             :                 /* supported, will continue with inspection of the Var */
    1016         280 :                 break;
    1017             : 
    1018           0 :             default:
    1019             :                 /* other estimators are considered unknown/unsupported */
    1020           0 :                 return false;
    1021             :         }
    1022             : 
    1023             :         /*
    1024             :          * If there are any securityQuals on the RTE from security barrier
    1025             :          * views or RLS policies, then the user may not have access to all the
    1026             :          * table's data, and we must check that the operator is leak-proof.
    1027             :          *
    1028             :          * If the operator is leaky, then we must ignore this clause for the
    1029             :          * purposes of estimating with MCV lists, otherwise the operator might
    1030             :          * reveal values from the MCV list that the user doesn't have
    1031             :          * permission to see.
    1032             :          */
    1033         280 :         if (rte->securityQuals != NIL &&
    1034          24 :             !get_func_leakproof(get_opcode(expr->opno)))
    1035          24 :             return false;
    1036             : 
    1037         256 :         return statext_is_compatible_clause_internal(root, (Node *) var,
    1038             :                                                      relid, attnums);
    1039             :     }
    1040             : 
    1041             :     /* Var IN Array */
    1042         184 :     if (IsA(clause, ScalarArrayOpExpr))
    1043             :     {
    1044          96 :         RangeTblEntry *rte = root->simple_rte_array[relid];
    1045          96 :         ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
    1046             :         Var        *var;
    1047             : 
    1048             :         /* Only expressions with two arguments are considered compatible. */
    1049          96 :         if (list_length(expr->args) != 2)
    1050           0 :             return false;
    1051             : 
    1052             :         /* Check if the expression has the right shape (one Var, one Const) */
    1053          96 :         if (!examine_clause_args(expr->args, &var, NULL, NULL))
    1054           0 :             return false;
    1055             : 
    1056             :         /*
    1057             :          * If it's not one of the supported operators ("=", "<", ">", etc.),
    1058             :          * just ignore the clause, as it's not compatible with MCV lists.
    1059             :          *
    1060             :          * This uses the function for estimating selectivity, not the operator
    1061             :          * directly (a bit awkward, but well ...).
    1062             :          */
    1063          96 :         switch (get_oprrest(expr->opno))
    1064             :         {
    1065          96 :             case F_EQSEL:
    1066             :             case F_NEQSEL:
    1067             :             case F_SCALARLTSEL:
    1068             :             case F_SCALARLESEL:
    1069             :             case F_SCALARGTSEL:
    1070             :             case F_SCALARGESEL:
    1071             :                 /* supported, will continue with inspection of the Var */
    1072          96 :                 break;
    1073             : 
    1074           0 :             default:
    1075             :                 /* other estimators are considered unknown/unsupported */
    1076           0 :                 return false;
    1077             :         }
    1078             : 
    1079             :         /*
    1080             :          * If there are any securityQuals on the RTE from security barrier
    1081             :          * views or RLS policies, then the user may not have access to all the
    1082             :          * table's data, and we must check that the operator is leak-proof.
    1083             :          *
    1084             :          * If the operator is leaky, then we must ignore this clause for the
    1085             :          * purposes of estimating with MCV lists, otherwise the operator might
    1086             :          * reveal values from the MCV list that the user doesn't have
    1087             :          * permission to see.
    1088             :          */
    1089          96 :         if (rte->securityQuals != NIL &&
    1090           0 :             !get_func_leakproof(get_opcode(expr->opno)))
    1091           0 :             return false;
    1092             : 
    1093          96 :         return statext_is_compatible_clause_internal(root, (Node *) var,
    1094             :                                                      relid, attnums);
    1095             :     }
    1096             : 
    1097             :     /* AND/OR/NOT clause */
    1098         176 :     if (is_andclause(clause) ||
    1099         156 :         is_orclause(clause) ||
    1100          68 :         is_notclause(clause))
    1101             :     {
    1102             :         /*
    1103             :          * AND/OR/NOT-clauses are supported if all sub-clauses are supported
    1104             :          *
    1105             :          * Perhaps we could improve this by handling mixed cases, when some of
    1106             :          * the clauses are supported and some are not. Selectivity for the
    1107             :          * supported subclauses would be computed using extended statistics,
    1108             :          * and the remaining clauses would be estimated using the traditional
    1109             :          * algorithm (product of selectivities).
    1110             :          *
    1111             :          * It however seems overly complex, and in a way we already do that
    1112             :          * because if we reject the whole clause as unsupported here, it will
    1113             :          * be eventually passed to clauselist_selectivity() which does exactly
    1114             :          * this (split into supported/unsupported clauses etc).
    1115             :          */
    1116          40 :         BoolExpr   *expr = (BoolExpr *) clause;
    1117             :         ListCell   *lc;
    1118             : 
    1119         116 :         foreach(lc, expr->args)
    1120             :         {
    1121             :             /*
    1122             :              * Had we found incompatible clause in the arguments, treat the
    1123             :              * whole clause as incompatible.
    1124             :              */
    1125          76 :             if (!statext_is_compatible_clause_internal(root,
    1126          76 :                                                        (Node *) lfirst(lc),
    1127             :                                                        relid, attnums))
    1128           0 :                 return false;
    1129             :         }
    1130             : 
    1131          40 :         return true;
    1132             :     }
    1133             : 
    1134             :     /* Var IS NULL */
    1135          48 :     if (IsA(clause, NullTest))
    1136             :     {
    1137          48 :         NullTest   *nt = (NullTest *) clause;
    1138             : 
    1139             :         /*
    1140             :          * Only simple (Var IS NULL) expressions supported for now. Maybe we
    1141             :          * could use examine_variable to fix this?
    1142             :          */
    1143          48 :         if (!IsA(nt->arg, Var))
    1144           0 :             return false;
    1145             : 
    1146          48 :         return statext_is_compatible_clause_internal(root, (Node *) (nt->arg),
    1147             :                                                      relid, attnums);
    1148             :     }
    1149             : 
    1150           0 :     return false;
    1151             : }
    1152             : 
    1153             : /*
    1154             :  * statext_is_compatible_clause
    1155             :  *      Determines if the clause is compatible with MCV lists.
    1156             :  *
    1157             :  * Currently, we only support three types of clauses:
    1158             :  *
    1159             :  * (a) OpExprs of the form (Var op Const), or (Const op Var), where the op
    1160             :  * is one of ("=", "<", ">", ">=", "<=")
    1161             :  *
    1162             :  * (b) (Var IS [NOT] NULL)
    1163             :  *
    1164             :  * (c) combinations using AND/OR/NOT
    1165             :  *
    1166             :  * In the future, the range of supported clauses may be expanded to more
    1167             :  * complex cases, for example (Var op Var).
    1168             :  */
    1169             : static bool
    1170         452 : statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid,
    1171             :                              Bitmapset **attnums)
    1172             : {
    1173         452 :     RangeTblEntry *rte = root->simple_rte_array[relid];
    1174         452 :     RestrictInfo *rinfo = (RestrictInfo *) clause;
    1175             :     Oid         userid;
    1176             : 
    1177         452 :     if (!IsA(rinfo, RestrictInfo))
    1178           0 :         return false;
    1179             : 
    1180             :     /* Pseudoconstants are not really interesting here. */
    1181         452 :     if (rinfo->pseudoconstant)
    1182           0 :         return false;
    1183             : 
    1184             :     /* clauses referencing multiple varnos are incompatible */
    1185         452 :     if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
    1186          16 :         return false;
    1187             : 
    1188             :     /* Check the clause and determine what attributes it references. */
    1189         436 :     if (!statext_is_compatible_clause_internal(root, (Node *) rinfo->clause,
    1190             :                                                relid, attnums))
    1191          24 :         return false;
    1192             : 
    1193             :     /*
    1194             :      * Check that the user has permission to read all these attributes.  Use
    1195             :      * checkAsUser if it's set, in case we're accessing the table via a view.
    1196             :      */
    1197         412 :     userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
    1198             : 
    1199         412 :     if (pg_class_aclcheck(rte->relid, userid, ACL_SELECT) != ACLCHECK_OK)
    1200             :     {
    1201             :         /* Don't have table privilege, must check individual columns */
    1202          16 :         if (bms_is_member(InvalidAttrNumber, *attnums))
    1203             :         {
    1204             :             /* Have a whole-row reference, must have access to all columns */
    1205           0 :             if (pg_attribute_aclcheck_all(rte->relid, userid, ACL_SELECT,
    1206             :                                           ACLMASK_ALL) != ACLCHECK_OK)
    1207           0 :                 return false;
    1208             :         }
    1209             :         else
    1210             :         {
    1211             :             /* Check the columns referenced by the clause */
    1212          16 :             int         attnum = -1;
    1213             : 
    1214          16 :             while ((attnum = bms_next_member(*attnums, attnum)) >= 0)
    1215             :             {
    1216          16 :                 if (pg_attribute_aclcheck(rte->relid, attnum, userid,
    1217             :                                           ACL_SELECT) != ACLCHECK_OK)
    1218          16 :                     return false;
    1219             :             }
    1220             :         }
    1221             :     }
    1222             : 
    1223             :     /* If we reach here, the clause is OK */
    1224         396 :     return true;
    1225             : }
    1226             : 
    1227             : /*
    1228             :  * statext_mcv_clauselist_selectivity
    1229             :  *      Estimate clauses using the best multi-column statistics.
    1230             :  *
    1231             :  * Applies available extended (multi-column) statistics on a table. There may
    1232             :  * be multiple applicable statistics (with respect to the clauses), in which
    1233             :  * case we use greedy approach. In each round we select the best statistic on
    1234             :  * a table (measured by the number of attributes extracted from the clauses
    1235             :  * and covered by it), and compute the selectivity for the supplied clauses.
    1236             :  * We repeat this process with the remaining clauses (if any), until none of
    1237             :  * the available statistics can be used.
    1238             :  *
    1239             :  * One of the main challenges with using MCV lists is how to extrapolate the
    1240             :  * estimate to the data not covered by the MCV list. To do that, we compute
    1241             :  * not only the "MCV selectivity" (selectivities for MCV items matching the
    1242             :  * supplied clauses), but also a couple of derived selectivities:
    1243             :  *
    1244             :  * - simple selectivity:  Computed without extended statistic, i.e. as if the
    1245             :  * columns/clauses were independent
    1246             :  *
    1247             :  * - base selectivity:  Similar to simple selectivity, but is computed using
    1248             :  * the extended statistic by adding up the base frequencies (that we compute
    1249             :  * and store for each MCV item) of matching MCV items.
    1250             :  *
    1251             :  * - total selectivity: Selectivity covered by the whole MCV list.
    1252             :  *
    1253             :  * - other selectivity: A selectivity estimate for data not covered by the MCV
    1254             :  * list (i.e. satisfying the clauses, but not common enough to make it into
    1255             :  * the MCV list)
    1256             :  *
    1257             :  * Note: While simple and base selectivities are defined in a quite similar
    1258             :  * way, the values are computed differently and are not therefore equal. The
    1259             :  * simple selectivity is computed as a product of per-clause estimates, while
    1260             :  * the base selectivity is computed by adding up base frequencies of matching
    1261             :  * items of the multi-column MCV list. So the values may differ for two main
    1262             :  * reasons - (a) the MCV list may not cover 100% of the data and (b) some of
    1263             :  * the MCV items did not match the estimated clauses.
    1264             :  *
    1265             :  * As both (a) and (b) reduce the base selectivity value, it generally holds
    1266             :  * that (simple_selectivity >= base_selectivity). If the MCV list covers all
    1267             :  * the data, the values may be equal.
    1268             :  *
    1269             :  * So, (simple_selectivity - base_selectivity) is an estimate for the part
    1270             :  * not covered by the MCV list, and (mcv_selectivity - base_selectivity) may
    1271             :  * be seen as a correction for the part covered by the MCV list. Those two
    1272             :  * statements are actually equivalent.
    1273             :  *
    1274             :  * Note: Due to rounding errors and minor differences in how the estimates
    1275             :  * are computed, the inequality may not always hold. Which is why we clamp
    1276             :  * the selectivities to prevent strange estimate (negative etc.).
    1277             :  *
    1278             :  * 'estimatedclauses' is an input/output parameter.  We set bits for the
    1279             :  * 0-based 'clauses' indexes we estimate for and also skip clause items that
    1280             :  * already have a bit set.
    1281             :  */
    1282             : static Selectivity
    1283        1656 : statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
    1284             :                                    JoinType jointype, SpecialJoinInfo *sjinfo,
    1285             :                                    RelOptInfo *rel, Bitmapset **estimatedclauses)
    1286             : {
    1287             :     ListCell   *l;
    1288             :     Bitmapset **list_attnums;
    1289             :     int         listidx;
    1290        1656 :     Selectivity sel = 1.0;
    1291             : 
    1292             :     /* check if there's any stats that might be useful for us. */
    1293        1656 :     if (!has_stats_of_kind(rel->statlist, STATS_EXT_MCV))
    1294        1464 :         return 1.0;
    1295             : 
    1296         192 :     list_attnums = (Bitmapset **) palloc(sizeof(Bitmapset *) *
    1297         192 :                                          list_length(clauses));
    1298             : 
    1299             :     /*
    1300             :      * Pre-process the clauses list to extract the attnums seen in each item.
    1301             :      * We need to determine if there's any clauses which will be useful for
    1302             :      * selectivity estimations with extended stats. Along the way we'll record
    1303             :      * all of the attnums for each clause in a list which we'll reference
    1304             :      * later so we don't need to repeat the same work again. We'll also keep
    1305             :      * track of all attnums seen.
    1306             :      *
    1307             :      * We also skip clauses that we already estimated using different types of
    1308             :      * statistics (we treat them as incompatible).
    1309             :      */
    1310         192 :     listidx = 0;
    1311         644 :     foreach(l, clauses)
    1312             :     {
    1313         452 :         Node       *clause = (Node *) lfirst(l);
    1314         452 :         Bitmapset  *attnums = NULL;
    1315             : 
    1316         904 :         if (!bms_is_member(listidx, *estimatedclauses) &&
    1317         452 :             statext_is_compatible_clause(root, clause, rel->relid, &attnums))
    1318         396 :             list_attnums[listidx] = attnums;
    1319             :         else
    1320          56 :             list_attnums[listidx] = NULL;
    1321             : 
    1322         452 :         listidx++;
    1323             :     }
    1324             : 
    1325             :     /* apply as many extended statistics as possible */
    1326             :     while (true)
    1327         172 :     {
    1328             :         StatisticExtInfo *stat;
    1329             :         List       *stat_clauses;
    1330             :         Selectivity simple_sel,
    1331             :                     mcv_sel,
    1332             :                     mcv_basesel,
    1333             :                     mcv_totalsel,
    1334             :                     other_sel,
    1335             :                     stat_sel;
    1336             : 
    1337             :         /* find the best suited statistics object for these attnums */
    1338         364 :         stat = choose_best_statistics(rel->statlist, STATS_EXT_MCV,
    1339             :                                       list_attnums, list_length(clauses));
    1340             : 
    1341             :         /*
    1342             :          * if no (additional) matching stats could be found then we've nothing
    1343             :          * to do
    1344             :          */
    1345         364 :         if (!stat)
    1346         192 :             break;
    1347             : 
    1348             :         /* Ensure choose_best_statistics produced an expected stats type. */
    1349             :         Assert(stat->kind == STATS_EXT_MCV);
    1350             : 
    1351             :         /* now filter the clauses to be estimated using the selected MCV */
    1352         172 :         stat_clauses = NIL;
    1353             : 
    1354         172 :         listidx = 0;
    1355         580 :         foreach(l, clauses)
    1356             :         {
    1357             :             /*
    1358             :              * If the clause is compatible with the selected statistics, mark
    1359             :              * it as estimated and add it to the list to estimate.
    1360             :              */
    1361         808 :             if (list_attnums[listidx] != NULL &&
    1362         400 :                 bms_is_subset(list_attnums[listidx], stat->keys))
    1363             :             {
    1364         392 :                 stat_clauses = lappend(stat_clauses, (Node *) lfirst(l));
    1365         392 :                 *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
    1366             : 
    1367         392 :                 bms_free(list_attnums[listidx]);
    1368         392 :                 list_attnums[listidx] = NULL;
    1369             :             }
    1370             : 
    1371         408 :             listidx++;
    1372             :         }
    1373             : 
    1374             :         /*
    1375             :          * First compute "simple" selectivity, i.e. without the extended
    1376             :          * statistics, and essentially assuming independence of the
    1377             :          * columns/clauses. We'll then use the various selectivities computed
    1378             :          * from MCV list to improve it.
    1379             :          */
    1380         172 :         simple_sel = clauselist_selectivity_simple(root, stat_clauses, varRelid,
    1381             :                                                    jointype, sjinfo, NULL);
    1382             : 
    1383             :         /*
    1384             :          * Now compute the multi-column estimate from the MCV list, along with
    1385             :          * the other selectivities (base & total selectivity).
    1386             :          */
    1387         172 :         mcv_sel = mcv_clauselist_selectivity(root, stat, stat_clauses, varRelid,
    1388             :                                              jointype, sjinfo, rel,
    1389             :                                              &mcv_basesel, &mcv_totalsel);
    1390             : 
    1391             :         /* Estimated selectivity of values not covered by MCV matches */
    1392         172 :         other_sel = simple_sel - mcv_basesel;
    1393         172 :         CLAMP_PROBABILITY(other_sel);
    1394             : 
    1395             :         /* The non-MCV selectivity can't exceed the 1 - mcv_totalsel. */
    1396         172 :         if (other_sel > 1.0 - mcv_totalsel)
    1397         160 :             other_sel = 1.0 - mcv_totalsel;
    1398             : 
    1399             :         /*
    1400             :          * Overall selectivity is the combination of MCV and non-MCV
    1401             :          * estimates.
    1402             :          */
    1403         172 :         stat_sel = mcv_sel + other_sel;
    1404         172 :         CLAMP_PROBABILITY(stat_sel);
    1405             : 
    1406             :         /* Factor the estimate from this MCV to the oveall estimate. */
    1407         172 :         sel *= stat_sel;
    1408             :     }
    1409             : 
    1410         192 :     return sel;
    1411             : }
    1412             : 
    1413             : /*
    1414             :  * statext_clauselist_selectivity
    1415             :  *      Estimate clauses using the best multi-column statistics.
    1416             :  */
    1417             : Selectivity
    1418        1656 : statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
    1419             :                                JoinType jointype, SpecialJoinInfo *sjinfo,
    1420             :                                RelOptInfo *rel, Bitmapset **estimatedclauses)
    1421             : {
    1422             :     Selectivity sel;
    1423             : 
    1424             :     /* First, try estimating clauses using a multivariate MCV list. */
    1425        1656 :     sel = statext_mcv_clauselist_selectivity(root, clauses, varRelid, jointype,
    1426             :                                              sjinfo, rel, estimatedclauses);
    1427             : 
    1428             :     /*
    1429             :      * Then, apply functional dependencies on the remaining clauses by calling
    1430             :      * dependencies_clauselist_selectivity.  Pass 'estimatedclauses' so the
    1431             :      * function can properly skip clauses already estimated above.
    1432             :      *
    1433             :      * The reasoning for applying dependencies last is that the more complex
    1434             :      * stats can track more complex correlations between the attributes, and
    1435             :      * so may be considered more reliable.
    1436             :      *
    1437             :      * For example, MCV list can give us an exact selectivity for values in
    1438             :      * two columns, while functional dependencies can only provide information
    1439             :      * about the overall strength of the dependency.
    1440             :      */
    1441        1656 :     sel *= dependencies_clauselist_selectivity(root, clauses, varRelid,
    1442             :                                                jointype, sjinfo, rel,
    1443             :                                                estimatedclauses);
    1444             : 
    1445        1656 :     return sel;
    1446             : }
    1447             : 
    1448             : /*
    1449             :  * examine_opclause_expression
    1450             :  *      Split expression into Var and Const parts.
    1451             :  *
    1452             :  * Attempts to match the arguments to either (Var op Const) or (Const op Var),
    1453             :  * possibly with a RelabelType on top. When the expression matches this form,
    1454             :  * returns true, otherwise returns false.
    1455             :  *
    1456             :  * Optionally returns pointers to the extracted Var/Const nodes, when passed
    1457             :  * non-null pointers (varp, cstp and varonleftp). The varonleftp flag specifies
    1458             :  * on which side of the operator we found the Var node.
    1459             :  */
    1460             : bool
    1461         700 : examine_clause_args(List *args, Var **varp, Const **cstp, bool *varonleftp)
    1462             : {
    1463             :     Var        *var;
    1464             :     Const      *cst;
    1465             :     bool        varonleft;
    1466             :     Node       *leftop,
    1467             :                *rightop;
    1468             : 
    1469             :     /* enforced by statext_is_compatible_clause_internal */
    1470             :     Assert(list_length(args) == 2);
    1471             : 
    1472         700 :     leftop = linitial(args);
    1473         700 :     rightop = lsecond(args);
    1474             : 
    1475             :     /* strip RelabelType from either side of the expression */
    1476         700 :     if (IsA(leftop, RelabelType))
    1477         204 :         leftop = (Node *) ((RelabelType *) leftop)->arg;
    1478             : 
    1479         700 :     if (IsA(rightop, RelabelType))
    1480          40 :         rightop = (Node *) ((RelabelType *) rightop)->arg;
    1481             : 
    1482         700 :     if (IsA(leftop, Var) && IsA(rightop, Const))
    1483             :     {
    1484         612 :         var = (Var *) leftop;
    1485         612 :         cst = (Const *) rightop;
    1486         612 :         varonleft = true;
    1487             :     }
    1488          88 :     else if (IsA(leftop, Const) && IsA(rightop, Var))
    1489             :     {
    1490          88 :         var = (Var *) rightop;
    1491          88 :         cst = (Const *) leftop;
    1492          88 :         varonleft = false;
    1493             :     }
    1494             :     else
    1495           0 :         return false;
    1496             : 
    1497             :     /* return pointers to the extracted parts if requested */
    1498         700 :     if (varp)
    1499         700 :         *varp = var;
    1500             : 
    1501         700 :     if (cstp)
    1502         324 :         *cstp = cst;
    1503             : 
    1504         700 :     if (varonleftp)
    1505         324 :         *varonleftp = varonleft;
    1506             : 
    1507         700 :     return true;
    1508             : }

Generated by: LCOV version 1.13