LCOV - code coverage report
Current view: top level - src/backend/statistics - extended_stats.c (source / functions) Hit Total Coverage
Test: PostgreSQL 16beta1 Lines: 736 795 92.6 %
Date: 2023-06-01 13:12:25 Functions: 32 33 97.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * extended_stats.c
       4             :  *    POSTGRES extended statistics
       5             :  *
       6             :  * Generic code supporting statistics objects created via CREATE STATISTICS.
       7             :  *
       8             :  *
       9             :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      10             :  * Portions Copyright (c) 1994, Regents of the University of California
      11             :  *
      12             :  * IDENTIFICATION
      13             :  *    src/backend/statistics/extended_stats.c
      14             :  *
      15             :  *-------------------------------------------------------------------------
      16             :  */
      17             : #include "postgres.h"
      18             : 
      19             : #include "access/detoast.h"
      20             : #include "access/genam.h"
      21             : #include "access/htup_details.h"
      22             : #include "access/table.h"
      23             : #include "catalog/indexing.h"
      24             : #include "catalog/pg_collation.h"
      25             : #include "catalog/pg_statistic_ext.h"
      26             : #include "catalog/pg_statistic_ext_data.h"
      27             : #include "executor/executor.h"
      28             : #include "commands/defrem.h"
      29             : #include "commands/progress.h"
      30             : #include "miscadmin.h"
      31             : #include "nodes/nodeFuncs.h"
      32             : #include "optimizer/clauses.h"
      33             : #include "optimizer/optimizer.h"
      34             : #include "parser/parsetree.h"
      35             : #include "pgstat.h"
      36             : #include "postmaster/autovacuum.h"
      37             : #include "statistics/extended_stats_internal.h"
      38             : #include "statistics/statistics.h"
      39             : #include "utils/acl.h"
      40             : #include "utils/array.h"
      41             : #include "utils/attoptcache.h"
      42             : #include "utils/builtins.h"
      43             : #include "utils/datum.h"
      44             : #include "utils/fmgroids.h"
      45             : #include "utils/lsyscache.h"
      46             : #include "utils/memutils.h"
      47             : #include "utils/rel.h"
      48             : #include "utils/selfuncs.h"
      49             : #include "utils/syscache.h"
      50             : #include "utils/typcache.h"
      51             : 
      52             : /*
      53             :  * To avoid consuming too much memory during analysis and/or too much space
      54             :  * in the resulting pg_statistic rows, we ignore varlena datums that are wider
      55             :  * than WIDTH_THRESHOLD (after detoasting!).  This is legitimate for MCV
      56             :  * and distinct-value calculations since a wide value is unlikely to be
      57             :  * duplicated at all, much less be a most-common value.  For the same reason,
      58             :  * ignoring wide values will not affect our estimates of histogram bin
      59             :  * boundaries very much.
      60             :  */
      61             : #define WIDTH_THRESHOLD  1024
      62             : 
      63             : /*
      64             :  * Used internally to refer to an individual statistics object, i.e.,
      65             :  * a pg_statistic_ext entry.
      66             :  */
      67             : typedef struct StatExtEntry
      68             : {
      69             :     Oid         statOid;        /* OID of pg_statistic_ext entry */
      70             :     char       *schema;         /* statistics object's schema */
      71             :     char       *name;           /* statistics object's name */
      72             :     Bitmapset  *columns;        /* attribute numbers covered by the object */
      73             :     List       *types;          /* 'char' list of enabled statistics kinds */
      74             :     int         stattarget;     /* statistics target (-1 for default) */
      75             :     List       *exprs;          /* expressions */
      76             : } StatExtEntry;
      77             : 
      78             : 
      79             : static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid);
      80             : static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs, List *exprs,
      81             :                                             int nvacatts, VacAttrStats **vacatts);
      82             : static void statext_store(Oid statOid, bool inh,
      83             :                           MVNDistinct *ndistinct, MVDependencies *dependencies,
      84             :                           MCVList *mcv, Datum exprs, VacAttrStats **stats);
      85             : static int  statext_compute_stattarget(int stattarget,
      86             :                                        int nattrs, VacAttrStats **stats);
      87             : 
      88             : /* Information needed to analyze a single simple expression. */
      89             : typedef struct AnlExprData
      90             : {
      91             :     Node       *expr;           /* expression to analyze */
      92             :     VacAttrStats *vacattrstat;  /* statistics attrs to analyze */
      93             : } AnlExprData;
      94             : 
      95             : static void compute_expr_stats(Relation onerel, double totalrows,
      96             :                                AnlExprData *exprdata, int nexprs,
      97             :                                HeapTuple *rows, int numrows);
      98             : static Datum serialize_expr_stats(AnlExprData *exprdata, int nexprs);
      99             : static Datum expr_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
     100             : static AnlExprData *build_expr_data(List *exprs, int stattarget);
     101             : 
     102             : static StatsBuildData *make_build_data(Relation rel, StatExtEntry *stat,
     103             :                                        int numrows, HeapTuple *rows,
     104             :                                        VacAttrStats **stats, int stattarget);
     105             : 
     106             : 
     107             : /*
     108             :  * Compute requested extended stats, using the rows sampled for the plain
     109             :  * (single-column) stats.
     110             :  *
     111             :  * This fetches a list of stats types from pg_statistic_ext, computes the
     112             :  * requested stats, and serializes them back into the catalog.
     113             :  */
     114             : void
     115       28246 : BuildRelationExtStatistics(Relation onerel, bool inh, double totalrows,
     116             :                            int numrows, HeapTuple *rows,
     117             :                            int natts, VacAttrStats **vacattrstats)
     118             : {
     119             :     Relation    pg_stext;
     120             :     ListCell   *lc;
     121             :     List       *statslist;
     122             :     MemoryContext cxt;
     123             :     MemoryContext oldcxt;
     124             :     int64       ext_cnt;
     125             : 
     126             :     /* Do nothing if there are no columns to analyze. */
     127       28246 :     if (!natts)
     128           6 :         return;
     129             : 
     130             :     /* the list of stats has to be allocated outside the memory context */
     131       28240 :     pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock);
     132       28240 :     statslist = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
     133             : 
     134             :     /* memory context for building each statistics object */
     135       28240 :     cxt = AllocSetContextCreate(CurrentMemoryContext,
     136             :                                 "BuildRelationExtStatistics",
     137             :                                 ALLOCSET_DEFAULT_SIZES);
     138       28240 :     oldcxt = MemoryContextSwitchTo(cxt);
     139             : 
     140             :     /* report this phase */
     141       28240 :     if (statslist != NIL)
     142             :     {
     143         270 :         const int   index[] = {
     144             :             PROGRESS_ANALYZE_PHASE,
     145             :             PROGRESS_ANALYZE_EXT_STATS_TOTAL
     146             :         };
     147         540 :         const int64 val[] = {
     148             :             PROGRESS_ANALYZE_PHASE_COMPUTE_EXT_STATS,
     149         270 :             list_length(statslist)
     150             :         };
     151             : 
     152         270 :         pgstat_progress_update_multi_param(2, index, val);
     153             :     }
     154             : 
     155       28240 :     ext_cnt = 0;
     156       28606 :     foreach(lc, statslist)
     157             :     {
     158         366 :         StatExtEntry *stat = (StatExtEntry *) lfirst(lc);
     159         366 :         MVNDistinct *ndistinct = NULL;
     160         366 :         MVDependencies *dependencies = NULL;
     161         366 :         MCVList    *mcv = NULL;
     162         366 :         Datum       exprstats = (Datum) 0;
     163             :         VacAttrStats **stats;
     164             :         ListCell   *lc2;
     165             :         int         stattarget;
     166             :         StatsBuildData *data;
     167             : 
     168             :         /*
     169             :          * Check if we can build these stats based on the column analyzed. If
     170             :          * not, report this fact (except in autovacuum) and move on.
     171             :          */
     172         366 :         stats = lookup_var_attr_stats(onerel, stat->columns, stat->exprs,
     173             :                                       natts, vacattrstats);
     174         366 :         if (!stats)
     175             :         {
     176          12 :             if (!IsAutoVacuumWorkerProcess())
     177          12 :                 ereport(WARNING,
     178             :                         (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
     179             :                          errmsg("statistics object \"%s.%s\" could not be computed for relation \"%s.%s\"",
     180             :                                 stat->schema, stat->name,
     181             :                                 get_namespace_name(onerel->rd_rel->relnamespace),
     182             :                                 RelationGetRelationName(onerel)),
     183             :                          errtable(onerel)));
     184          12 :             continue;
     185             :         }
     186             : 
     187             :         /* compute statistics target for this statistics object */
     188         354 :         stattarget = statext_compute_stattarget(stat->stattarget,
     189         354 :                                                 bms_num_members(stat->columns),
     190             :                                                 stats);
     191             : 
     192             :         /*
     193             :          * Don't rebuild statistics objects with statistics target set to 0
     194             :          * (we just leave the existing values around, just like we do for
     195             :          * regular per-column statistics).
     196             :          */
     197         354 :         if (stattarget == 0)
     198           6 :             continue;
     199             : 
     200             :         /* evaluate expressions (if the statistics object has any) */
     201         348 :         data = make_build_data(onerel, stat, numrows, rows, stats, stattarget);
     202             : 
     203             :         /* compute statistic of each requested type */
     204         954 :         foreach(lc2, stat->types)
     205             :         {
     206         606 :             char        t = (char) lfirst_int(lc2);
     207             : 
     208         606 :             if (t == STATS_EXT_NDISTINCT)
     209         156 :                 ndistinct = statext_ndistinct_build(totalrows, data);
     210         450 :             else if (t == STATS_EXT_DEPENDENCIES)
     211         120 :                 dependencies = statext_dependencies_build(data);
     212         330 :             else if (t == STATS_EXT_MCV)
     213         180 :                 mcv = statext_mcv_build(data, totalrows, stattarget);
     214         150 :             else if (t == STATS_EXT_EXPRESSIONS)
     215             :             {
     216             :                 AnlExprData *exprdata;
     217             :                 int         nexprs;
     218             : 
     219             :                 /* should not happen, thanks to checks when defining stats */
     220         150 :                 if (!stat->exprs)
     221           0 :                     elog(ERROR, "requested expression stats, but there are no expressions");
     222             : 
     223         150 :                 exprdata = build_expr_data(stat->exprs, stattarget);
     224         150 :                 nexprs = list_length(stat->exprs);
     225             : 
     226         150 :                 compute_expr_stats(onerel, totalrows,
     227             :                                    exprdata, nexprs,
     228             :                                    rows, numrows);
     229             : 
     230         150 :                 exprstats = serialize_expr_stats(exprdata, nexprs);
     231             :             }
     232             :         }
     233             : 
     234             :         /* store the statistics in the catalog */
     235         348 :         statext_store(stat->statOid, inh,
     236             :                       ndistinct, dependencies, mcv, exprstats, stats);
     237             : 
     238             :         /* for reporting progress */
     239         348 :         pgstat_progress_update_param(PROGRESS_ANALYZE_EXT_STATS_COMPUTED,
     240             :                                      ++ext_cnt);
     241             : 
     242             :         /* free the data used for building this statistics object */
     243         348 :         MemoryContextReset(cxt);
     244             :     }
     245             : 
     246       28240 :     MemoryContextSwitchTo(oldcxt);
     247       28240 :     MemoryContextDelete(cxt);
     248             : 
     249       28240 :     list_free(statslist);
     250             : 
     251       28240 :     table_close(pg_stext, RowExclusiveLock);
     252             : }
     253             : 
     254             : /*
     255             :  * ComputeExtStatisticsRows
     256             :  *      Compute number of rows required by extended statistics on a table.
     257             :  *
     258             :  * Computes number of rows we need to sample to build extended statistics on a
     259             :  * table. This only looks at statistics we can actually build - for example
     260             :  * when analyzing only some of the columns, this will skip statistics objects
     261             :  * that would require additional columns.
     262             :  *
     263             :  * See statext_compute_stattarget for details about how we compute the
     264             :  * statistics target for a statistics object (from the object target,
     265             :  * attribute targets and default statistics target).
     266             :  */
     267             : int
     268       47286 : ComputeExtStatisticsRows(Relation onerel,
     269             :                          int natts, VacAttrStats **vacattrstats)
     270             : {
     271             :     Relation    pg_stext;
     272             :     ListCell   *lc;
     273             :     List       *lstats;
     274             :     MemoryContext cxt;
     275             :     MemoryContext oldcxt;
     276       47286 :     int         result = 0;
     277             : 
     278             :     /* If there are no columns to analyze, just return 0. */
     279       47286 :     if (!natts)
     280           6 :         return 0;
     281             : 
     282       47280 :     cxt = AllocSetContextCreate(CurrentMemoryContext,
     283             :                                 "ComputeExtStatisticsRows",
     284             :                                 ALLOCSET_DEFAULT_SIZES);
     285       47280 :     oldcxt = MemoryContextSwitchTo(cxt);
     286             : 
     287       47280 :     pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock);
     288       47280 :     lstats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
     289             : 
     290       47646 :     foreach(lc, lstats)
     291             :     {
     292         366 :         StatExtEntry *stat = (StatExtEntry *) lfirst(lc);
     293             :         int         stattarget;
     294             :         VacAttrStats **stats;
     295         366 :         int         nattrs = bms_num_members(stat->columns);
     296             : 
     297             :         /*
     298             :          * Check if we can build this statistics object based on the columns
     299             :          * analyzed. If not, ignore it (don't report anything, we'll do that
     300             :          * during the actual build BuildRelationExtStatistics).
     301             :          */
     302         366 :         stats = lookup_var_attr_stats(onerel, stat->columns, stat->exprs,
     303             :                                       natts, vacattrstats);
     304             : 
     305         366 :         if (!stats)
     306          12 :             continue;
     307             : 
     308             :         /*
     309             :          * Compute statistics target, based on what's set for the statistic
     310             :          * object itself, and for its attributes.
     311             :          */
     312         354 :         stattarget = statext_compute_stattarget(stat->stattarget,
     313             :                                                 nattrs, stats);
     314             : 
     315             :         /* Use the largest value for all statistics objects. */
     316         354 :         if (stattarget > result)
     317         252 :             result = stattarget;
     318             :     }
     319             : 
     320       47280 :     table_close(pg_stext, RowExclusiveLock);
     321             : 
     322       47280 :     MemoryContextSwitchTo(oldcxt);
     323       47280 :     MemoryContextDelete(cxt);
     324             : 
     325             :     /* compute sample size based on the statistics target */
     326       47280 :     return (300 * result);
     327             : }
     328             : 
     329             : /*
     330             :  * statext_compute_stattarget
     331             :  *      compute statistics target for an extended statistic
     332             :  *
     333             :  * When computing target for extended statistics objects, we consider three
     334             :  * places where the target may be set - the statistics object itself,
     335             :  * attributes the statistics object is defined on, and then the default
     336             :  * statistics target.
     337             :  *
     338             :  * First we look at what's set for the statistics object itself, using the
     339             :  * ALTER STATISTICS ... SET STATISTICS command. If we find a valid value
     340             :  * there (i.e. not -1) we're done. Otherwise we look at targets set for any
     341             :  * of the attributes the statistic is defined on, and if there are columns
     342             :  * with defined target, we use the maximum value. We do this mostly for
     343             :  * backwards compatibility, because this is what we did before having
     344             :  * statistics target for extended statistics.
     345             :  *
     346             :  * And finally, if we still don't have a statistics target, we use the value
     347             :  * set in default_statistics_target.
     348             :  */
     349             : static int
     350         708 : statext_compute_stattarget(int stattarget, int nattrs, VacAttrStats **stats)
     351             : {
     352             :     int         i;
     353             : 
     354             :     /*
     355             :      * If there's statistics target set for the statistics object, use it. It
     356             :      * may be set to 0 which disables building of that statistic.
     357             :      */
     358         708 :     if (stattarget >= 0)
     359          12 :         return stattarget;
     360             : 
     361             :     /*
     362             :      * The target for the statistics object is set to -1, in which case we
     363             :      * look at the maximum target set for any of the attributes the object is
     364             :      * defined on.
     365             :      */
     366        1920 :     for (i = 0; i < nattrs; i++)
     367             :     {
     368             :         /* keep the maximum statistics target */
     369        1224 :         if (stats[i]->attr->attstattarget > stattarget)
     370         528 :             stattarget = stats[i]->attr->attstattarget;
     371             :     }
     372             : 
     373             :     /*
     374             :      * If the value is still negative (so neither the statistics object nor
     375             :      * any of the columns have custom statistics target set), use the global
     376             :      * default target.
     377             :      */
     378         696 :     if (stattarget < 0)
     379         168 :         stattarget = default_statistics_target;
     380             : 
     381             :     /* As this point we should have a valid statistics target. */
     382             :     Assert((stattarget >= 0) && (stattarget <= 10000));
     383             : 
     384         696 :     return stattarget;
     385             : }
     386             : 
     387             : /*
     388             :  * statext_is_kind_built
     389             :  *      Is this stat kind built in the given pg_statistic_ext_data tuple?
     390             :  */
     391             : bool
     392        7344 : statext_is_kind_built(HeapTuple htup, char type)
     393             : {
     394             :     AttrNumber  attnum;
     395             : 
     396        7344 :     switch (type)
     397             :     {
     398        1836 :         case STATS_EXT_NDISTINCT:
     399        1836 :             attnum = Anum_pg_statistic_ext_data_stxdndistinct;
     400        1836 :             break;
     401             : 
     402        1836 :         case STATS_EXT_DEPENDENCIES:
     403        1836 :             attnum = Anum_pg_statistic_ext_data_stxddependencies;
     404        1836 :             break;
     405             : 
     406        1836 :         case STATS_EXT_MCV:
     407        1836 :             attnum = Anum_pg_statistic_ext_data_stxdmcv;
     408        1836 :             break;
     409             : 
     410        1836 :         case STATS_EXT_EXPRESSIONS:
     411        1836 :             attnum = Anum_pg_statistic_ext_data_stxdexpr;
     412        1836 :             break;
     413             : 
     414           0 :         default:
     415           0 :             elog(ERROR, "unexpected statistics type requested: %d", type);
     416             :     }
     417             : 
     418        7344 :     return !heap_attisnull(htup, attnum, NULL);
     419             : }
     420             : 
     421             : /*
     422             :  * Return a list (of StatExtEntry) of statistics objects for the given relation.
     423             :  */
     424             : static List *
     425       75520 : fetch_statentries_for_relation(Relation pg_statext, Oid relid)
     426             : {
     427             :     SysScanDesc scan;
     428             :     ScanKeyData skey;
     429             :     HeapTuple   htup;
     430       75520 :     List       *result = NIL;
     431             : 
     432             :     /*
     433             :      * Prepare to scan pg_statistic_ext for entries having stxrelid = this
     434             :      * rel.
     435             :      */
     436       75520 :     ScanKeyInit(&skey,
     437             :                 Anum_pg_statistic_ext_stxrelid,
     438             :                 BTEqualStrategyNumber, F_OIDEQ,
     439             :                 ObjectIdGetDatum(relid));
     440             : 
     441       75520 :     scan = systable_beginscan(pg_statext, StatisticExtRelidIndexId, true,
     442             :                               NULL, 1, &skey);
     443             : 
     444       76252 :     while (HeapTupleIsValid(htup = systable_getnext(scan)))
     445             :     {
     446             :         StatExtEntry *entry;
     447             :         Datum       datum;
     448             :         bool        isnull;
     449             :         int         i;
     450             :         ArrayType  *arr;
     451             :         char       *enabled;
     452             :         Form_pg_statistic_ext staForm;
     453         732 :         List       *exprs = NIL;
     454             : 
     455         732 :         entry = palloc0(sizeof(StatExtEntry));
     456         732 :         staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
     457         732 :         entry->statOid = staForm->oid;
     458         732 :         entry->schema = get_namespace_name(staForm->stxnamespace);
     459         732 :         entry->name = pstrdup(NameStr(staForm->stxname));
     460         732 :         entry->stattarget = staForm->stxstattarget;
     461        2028 :         for (i = 0; i < staForm->stxkeys.dim1; i++)
     462             :         {
     463        1296 :             entry->columns = bms_add_member(entry->columns,
     464        1296 :                                             staForm->stxkeys.values[i]);
     465             :         }
     466             : 
     467             :         /* decode the stxkind char array into a list of chars */
     468         732 :         datum = SysCacheGetAttrNotNull(STATEXTOID, htup,
     469             :                                        Anum_pg_statistic_ext_stxkind);
     470         732 :         arr = DatumGetArrayTypeP(datum);
     471         732 :         if (ARR_NDIM(arr) != 1 ||
     472         732 :             ARR_HASNULL(arr) ||
     473         732 :             ARR_ELEMTYPE(arr) != CHAROID)
     474           0 :             elog(ERROR, "stxkind is not a 1-D char array");
     475         732 :         enabled = (char *) ARR_DATA_PTR(arr);
     476        2052 :         for (i = 0; i < ARR_DIMS(arr)[0]; i++)
     477             :         {
     478             :             Assert((enabled[i] == STATS_EXT_NDISTINCT) ||
     479             :                    (enabled[i] == STATS_EXT_DEPENDENCIES) ||
     480             :                    (enabled[i] == STATS_EXT_MCV) ||
     481             :                    (enabled[i] == STATS_EXT_EXPRESSIONS));
     482        1320 :             entry->types = lappend_int(entry->types, (int) enabled[i]);
     483             :         }
     484             : 
     485             :         /* decode expression (if any) */
     486         732 :         datum = SysCacheGetAttr(STATEXTOID, htup,
     487             :                                 Anum_pg_statistic_ext_stxexprs, &isnull);
     488             : 
     489         732 :         if (!isnull)
     490             :         {
     491             :             char       *exprsString;
     492             : 
     493         300 :             exprsString = TextDatumGetCString(datum);
     494         300 :             exprs = (List *) stringToNode(exprsString);
     495             : 
     496         300 :             pfree(exprsString);
     497             : 
     498             :             /*
     499             :              * Run the expressions through eval_const_expressions. This is not
     500             :              * just an optimization, but is necessary, because the planner
     501             :              * will be comparing them to similarly-processed qual clauses, and
     502             :              * may fail to detect valid matches without this.  We must not use
     503             :              * canonicalize_qual, however, since these aren't qual
     504             :              * expressions.
     505             :              */
     506         300 :             exprs = (List *) eval_const_expressions(NULL, (Node *) exprs);
     507             : 
     508             :             /* May as well fix opfuncids too */
     509         300 :             fix_opfuncids((Node *) exprs);
     510             :         }
     511             : 
     512         732 :         entry->exprs = exprs;
     513             : 
     514         732 :         result = lappend(result, entry);
     515             :     }
     516             : 
     517       75520 :     systable_endscan(scan);
     518             : 
     519       75520 :     return result;
     520             : }
     521             : 
     522             : /*
     523             :  * examine_attribute -- pre-analysis of a single column
     524             :  *
     525             :  * Determine whether the column is analyzable; if so, create and initialize
     526             :  * a VacAttrStats struct for it.  If not, return NULL.
     527             :  */
     528             : static VacAttrStats *
     529         576 : examine_attribute(Node *expr)
     530             : {
     531             :     HeapTuple   typtuple;
     532             :     VacAttrStats *stats;
     533             :     int         i;
     534             :     bool        ok;
     535             : 
     536             :     /*
     537             :      * Create the VacAttrStats struct.  Note that we only have a copy of the
     538             :      * fixed fields of the pg_attribute tuple.
     539             :      */
     540         576 :     stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
     541             : 
     542             :     /* fake the attribute */
     543         576 :     stats->attr = (Form_pg_attribute) palloc0(ATTRIBUTE_FIXED_PART_SIZE);
     544         576 :     stats->attr->attstattarget = -1;
     545             : 
     546             :     /*
     547             :      * When analyzing an expression, believe the expression tree's type not
     548             :      * the column datatype --- the latter might be the opckeytype storage type
     549             :      * of the opclass, which is not interesting for our purposes.  (Note: if
     550             :      * we did anything with non-expression statistics columns, we'd need to
     551             :      * figure out where to get the correct type info from, but for now that's
     552             :      * not a problem.)  It's not clear whether anyone will care about the
     553             :      * typmod, but we store that too just in case.
     554             :      */
     555         576 :     stats->attrtypid = exprType(expr);
     556         576 :     stats->attrtypmod = exprTypmod(expr);
     557         576 :     stats->attrcollid = exprCollation(expr);
     558             : 
     559         576 :     typtuple = SearchSysCacheCopy1(TYPEOID,
     560             :                                    ObjectIdGetDatum(stats->attrtypid));
     561         576 :     if (!HeapTupleIsValid(typtuple))
     562           0 :         elog(ERROR, "cache lookup failed for type %u", stats->attrtypid);
     563         576 :     stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple);
     564             : 
     565             :     /*
     566             :      * We don't actually analyze individual attributes, so no need to set the
     567             :      * memory context.
     568             :      */
     569         576 :     stats->anl_context = NULL;
     570         576 :     stats->tupattnum = InvalidAttrNumber;
     571             : 
     572             :     /*
     573             :      * The fields describing the stats->stavalues[n] element types default to
     574             :      * the type of the data being analyzed, but the type-specific typanalyze
     575             :      * function can change them if it wants to store something else.
     576             :      */
     577        3456 :     for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
     578             :     {
     579        2880 :         stats->statypid[i] = stats->attrtypid;
     580        2880 :         stats->statyplen[i] = stats->attrtype->typlen;
     581        2880 :         stats->statypbyval[i] = stats->attrtype->typbyval;
     582        2880 :         stats->statypalign[i] = stats->attrtype->typalign;
     583             :     }
     584             : 
     585             :     /*
     586             :      * Call the type-specific typanalyze function.  If none is specified, use
     587             :      * std_typanalyze().
     588             :      */
     589         576 :     if (OidIsValid(stats->attrtype->typanalyze))
     590           0 :         ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze,
     591             :                                            PointerGetDatum(stats)));
     592             :     else
     593         576 :         ok = std_typanalyze(stats);
     594             : 
     595         576 :     if (!ok || stats->compute_stats == NULL || stats->minrows <= 0)
     596             :     {
     597           0 :         heap_freetuple(typtuple);
     598           0 :         pfree(stats->attr);
     599           0 :         pfree(stats);
     600           0 :         return NULL;
     601             :     }
     602             : 
     603         576 :     return stats;
     604             : }
     605             : 
     606             : /*
     607             :  * examine_expression -- pre-analysis of a single expression
     608             :  *
     609             :  * Determine whether the expression is analyzable; if so, create and initialize
     610             :  * a VacAttrStats struct for it.  If not, return NULL.
     611             :  */
     612             : static VacAttrStats *
     613         576 : examine_expression(Node *expr, int stattarget)
     614             : {
     615             :     HeapTuple   typtuple;
     616             :     VacAttrStats *stats;
     617             :     int         i;
     618             :     bool        ok;
     619             : 
     620             :     Assert(expr != NULL);
     621             : 
     622             :     /*
     623             :      * Create the VacAttrStats struct.
     624             :      */
     625         576 :     stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
     626             : 
     627             :     /*
     628             :      * When analyzing an expression, believe the expression tree's type.
     629             :      */
     630         576 :     stats->attrtypid = exprType(expr);
     631         576 :     stats->attrtypmod = exprTypmod(expr);
     632             : 
     633             :     /*
     634             :      * We don't allow collation to be specified in CREATE STATISTICS, so we
     635             :      * have to use the collation specified for the expression. It's possible
     636             :      * to specify the collation in the expression "(col COLLATE "en_US")" in
     637             :      * which case exprCollation() does the right thing.
     638             :      */
     639         576 :     stats->attrcollid = exprCollation(expr);
     640             : 
     641             :     /*
     642             :      * We don't have any pg_attribute for expressions, so let's fake something
     643             :      * reasonable into attstattarget, which is the only thing std_typanalyze
     644             :      * needs.
     645             :      */
     646         576 :     stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE);
     647             : 
     648             :     /*
     649             :      * We can't have statistics target specified for the expression, so we
     650             :      * could use either the default_statistics_target, or the target computed
     651             :      * for the extended statistics. The second option seems more reasonable.
     652             :      */
     653         576 :     stats->attr->attstattarget = stattarget;
     654             : 
     655             :     /* initialize some basic fields */
     656         576 :     stats->attr->attrelid = InvalidOid;
     657         576 :     stats->attr->attnum = InvalidAttrNumber;
     658         576 :     stats->attr->atttypid = stats->attrtypid;
     659             : 
     660         576 :     typtuple = SearchSysCacheCopy1(TYPEOID,
     661             :                                    ObjectIdGetDatum(stats->attrtypid));
     662         576 :     if (!HeapTupleIsValid(typtuple))
     663           0 :         elog(ERROR, "cache lookup failed for type %u", stats->attrtypid);
     664             : 
     665         576 :     stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple);
     666         576 :     stats->anl_context = CurrentMemoryContext;   /* XXX should be using
     667             :                                                  * something else? */
     668         576 :     stats->tupattnum = InvalidAttrNumber;
     669             : 
     670             :     /*
     671             :      * The fields describing the stats->stavalues[n] element types default to
     672             :      * the type of the data being analyzed, but the type-specific typanalyze
     673             :      * function can change them if it wants to store something else.
     674             :      */
     675        3456 :     for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
     676             :     {
     677        2880 :         stats->statypid[i] = stats->attrtypid;
     678        2880 :         stats->statyplen[i] = stats->attrtype->typlen;
     679        2880 :         stats->statypbyval[i] = stats->attrtype->typbyval;
     680        2880 :         stats->statypalign[i] = stats->attrtype->typalign;
     681             :     }
     682             : 
     683             :     /*
     684             :      * Call the type-specific typanalyze function.  If none is specified, use
     685             :      * std_typanalyze().
     686             :      */
     687         576 :     if (OidIsValid(stats->attrtype->typanalyze))
     688           0 :         ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze,
     689             :                                            PointerGetDatum(stats)));
     690             :     else
     691         576 :         ok = std_typanalyze(stats);
     692             : 
     693         576 :     if (!ok || stats->compute_stats == NULL || stats->minrows <= 0)
     694             :     {
     695           0 :         heap_freetuple(typtuple);
     696           0 :         pfree(stats);
     697           0 :         return NULL;
     698             :     }
     699             : 
     700         576 :     return stats;
     701             : }
     702             : 
     703             : /*
     704             :  * Using 'vacatts' of size 'nvacatts' as input data, return a newly-built
     705             :  * VacAttrStats array which includes only the items corresponding to
     706             :  * attributes indicated by 'attrs'.  If we don't have all of the per-column
     707             :  * stats available to compute the extended stats, then we return NULL to
     708             :  * indicate to the caller that the stats should not be built.
     709             :  */
     710             : static VacAttrStats **
     711         732 : lookup_var_attr_stats(Relation rel, Bitmapset *attrs, List *exprs,
     712             :                       int nvacatts, VacAttrStats **vacatts)
     713             : {
     714         732 :     int         i = 0;
     715         732 :     int         x = -1;
     716             :     int         natts;
     717             :     VacAttrStats **stats;
     718             :     ListCell   *lc;
     719             : 
     720         732 :     natts = bms_num_members(attrs) + list_length(exprs);
     721             : 
     722         732 :     stats = (VacAttrStats **) palloc(natts * sizeof(VacAttrStats *));
     723             : 
     724             :     /* lookup VacAttrStats info for the requested columns (same attnum) */
     725        1992 :     while ((x = bms_next_member(attrs, x)) >= 0)
     726             :     {
     727             :         int         j;
     728             : 
     729        1284 :         stats[i] = NULL;
     730        4080 :         for (j = 0; j < nvacatts; j++)
     731             :         {
     732        4056 :             if (x == vacatts[j]->tupattnum)
     733             :             {
     734        1260 :                 stats[i] = vacatts[j];
     735        1260 :                 break;
     736             :             }
     737             :         }
     738             : 
     739        1284 :         if (!stats[i])
     740             :         {
     741             :             /*
     742             :              * Looks like stats were not gathered for one of the columns
     743             :              * required. We'll be unable to build the extended stats without
     744             :              * this column.
     745             :              */
     746          24 :             pfree(stats);
     747          24 :             return NULL;
     748             :         }
     749             : 
     750             :         /*
     751             :          * Sanity check that the column is not dropped - stats should have
     752             :          * been removed in this case.
     753             :          */
     754             :         Assert(!stats[i]->attr->attisdropped);
     755             : 
     756        1260 :         i++;
     757             :     }
     758             : 
     759             :     /* also add info for expressions */
     760        1284 :     foreach(lc, exprs)
     761             :     {
     762         576 :         Node       *expr = (Node *) lfirst(lc);
     763             : 
     764         576 :         stats[i] = examine_attribute(expr);
     765             : 
     766             :         /*
     767             :          * XXX We need tuple descriptor later, and we just grab it from
     768             :          * stats[0]->tupDesc (see e.g. statext_mcv_build). But as coded
     769             :          * examine_attribute does not set that, so just grab it from the first
     770             :          * vacatts element.
     771             :          */
     772         576 :         stats[i]->tupDesc = vacatts[0]->tupDesc;
     773             : 
     774         576 :         i++;
     775             :     }
     776             : 
     777         708 :     return stats;
     778             : }
     779             : 
     780             : /*
     781             :  * statext_store
     782             :  *  Serializes the statistics and stores them into the pg_statistic_ext_data
     783             :  *  tuple.
     784             :  */
     785             : static void
     786         348 : statext_store(Oid statOid, bool inh,
     787             :               MVNDistinct *ndistinct, MVDependencies *dependencies,
     788             :               MCVList *mcv, Datum exprs, VacAttrStats **stats)
     789             : {
     790             :     Relation    pg_stextdata;
     791             :     HeapTuple   stup;
     792             :     Datum       values[Natts_pg_statistic_ext_data];
     793             :     bool        nulls[Natts_pg_statistic_ext_data];
     794             : 
     795         348 :     pg_stextdata = table_open(StatisticExtDataRelationId, RowExclusiveLock);
     796             : 
     797         348 :     memset(nulls, true, sizeof(nulls));
     798         348 :     memset(values, 0, sizeof(values));
     799             : 
     800             :     /* basic info */
     801         348 :     values[Anum_pg_statistic_ext_data_stxoid - 1] = ObjectIdGetDatum(statOid);
     802         348 :     nulls[Anum_pg_statistic_ext_data_stxoid - 1] = false;
     803             : 
     804         348 :     values[Anum_pg_statistic_ext_data_stxdinherit - 1] = BoolGetDatum(inh);
     805         348 :     nulls[Anum_pg_statistic_ext_data_stxdinherit - 1] = false;
     806             : 
     807             :     /*
     808             :      * Construct a new pg_statistic_ext_data tuple, replacing the calculated
     809             :      * stats.
     810             :      */
     811         348 :     if (ndistinct != NULL)
     812             :     {
     813         156 :         bytea      *data = statext_ndistinct_serialize(ndistinct);
     814             : 
     815         156 :         nulls[Anum_pg_statistic_ext_data_stxdndistinct - 1] = (data == NULL);
     816         156 :         values[Anum_pg_statistic_ext_data_stxdndistinct - 1] = PointerGetDatum(data);
     817             :     }
     818             : 
     819         348 :     if (dependencies != NULL)
     820             :     {
     821         102 :         bytea      *data = statext_dependencies_serialize(dependencies);
     822             : 
     823         102 :         nulls[Anum_pg_statistic_ext_data_stxddependencies - 1] = (data == NULL);
     824         102 :         values[Anum_pg_statistic_ext_data_stxddependencies - 1] = PointerGetDatum(data);
     825             :     }
     826         348 :     if (mcv != NULL)
     827             :     {
     828         180 :         bytea      *data = statext_mcv_serialize(mcv, stats);
     829             : 
     830         180 :         nulls[Anum_pg_statistic_ext_data_stxdmcv - 1] = (data == NULL);
     831         180 :         values[Anum_pg_statistic_ext_data_stxdmcv - 1] = PointerGetDatum(data);
     832             :     }
     833         348 :     if (exprs != (Datum) 0)
     834             :     {
     835         150 :         nulls[Anum_pg_statistic_ext_data_stxdexpr - 1] = false;
     836         150 :         values[Anum_pg_statistic_ext_data_stxdexpr - 1] = exprs;
     837             :     }
     838             : 
     839             :     /*
     840             :      * Delete the old tuple if it exists, and insert a new one. It's easier
     841             :      * than trying to update or insert, based on various conditions.
     842             :      */
     843         348 :     RemoveStatisticsDataById(statOid, inh);
     844             : 
     845             :     /* form and insert a new tuple */
     846         348 :     stup = heap_form_tuple(RelationGetDescr(pg_stextdata), values, nulls);
     847         348 :     CatalogTupleInsert(pg_stextdata, stup);
     848             : 
     849         348 :     heap_freetuple(stup);
     850             : 
     851         348 :     table_close(pg_stextdata, RowExclusiveLock);
     852         348 : }
     853             : 
     854             : /* initialize multi-dimensional sort */
     855             : MultiSortSupport
     856        1260 : multi_sort_init(int ndims)
     857             : {
     858             :     MultiSortSupport mss;
     859             : 
     860             :     Assert(ndims >= 2);
     861             : 
     862        1260 :     mss = (MultiSortSupport) palloc0(offsetof(MultiSortSupportData, ssup)
     863        1260 :                                      + sizeof(SortSupportData) * ndims);
     864             : 
     865        1260 :     mss->ndims = ndims;
     866             : 
     867        1260 :     return mss;
     868             : }
     869             : 
     870             : /*
     871             :  * Prepare sort support info using the given sort operator and collation
     872             :  * at the position 'sortdim'
     873             :  */
     874             : void
     875        2994 : multi_sort_add_dimension(MultiSortSupport mss, int sortdim,
     876             :                          Oid oper, Oid collation)
     877             : {
     878        2994 :     SortSupport ssup = &mss->ssup[sortdim];
     879             : 
     880        2994 :     ssup->ssup_cxt = CurrentMemoryContext;
     881        2994 :     ssup->ssup_collation = collation;
     882        2994 :     ssup->ssup_nulls_first = false;
     883             : 
     884        2994 :     PrepareSortSupportFromOrderingOp(oper, ssup);
     885        2994 : }
     886             : 
     887             : /* compare all the dimensions in the selected order */
     888             : int
     889    15215334 : multi_sort_compare(const void *a, const void *b, void *arg)
     890             : {
     891    15215334 :     MultiSortSupport mss = (MultiSortSupport) arg;
     892    15215334 :     SortItem   *ia = (SortItem *) a;
     893    15215334 :     SortItem   *ib = (SortItem *) b;
     894             :     int         i;
     895             : 
     896    27545556 :     for (i = 0; i < mss->ndims; i++)
     897             :     {
     898             :         int         compare;
     899             : 
     900    23966712 :         compare = ApplySortComparator(ia->values[i], ia->isnull[i],
     901    23966712 :                                       ib->values[i], ib->isnull[i],
     902    23966712 :                                       &mss->ssup[i]);
     903             : 
     904    23966712 :         if (compare != 0)
     905    11636490 :             return compare;
     906             :     }
     907             : 
     908             :     /* equal by default */
     909     3578844 :     return 0;
     910             : }
     911             : 
     912             : /* compare selected dimension */
     913             : int
     914     1472580 : multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b,
     915             :                        MultiSortSupport mss)
     916             : {
     917     2945160 :     return ApplySortComparator(a->values[dim], a->isnull[dim],
     918     1472580 :                                b->values[dim], b->isnull[dim],
     919     1472580 :                                &mss->ssup[dim]);
     920             : }
     921             : 
     922             : int
     923     1504218 : multi_sort_compare_dims(int start, int end,
     924             :                         const SortItem *a, const SortItem *b,
     925             :                         MultiSortSupport mss)
     926             : {
     927             :     int         dim;
     928             : 
     929     3403332 :     for (dim = start; dim <= end; dim++)
     930             :     {
     931     1930752 :         int         r = ApplySortComparator(a->values[dim], a->isnull[dim],
     932     1930752 :                                             b->values[dim], b->isnull[dim],
     933     1930752 :                                             &mss->ssup[dim]);
     934             : 
     935     1930752 :         if (r != 0)
     936       31638 :             return r;
     937             :     }
     938             : 
     939     1472580 :     return 0;
     940             : }
     941             : 
     942             : int
     943      187380 : compare_scalars_simple(const void *a, const void *b, void *arg)
     944             : {
     945      187380 :     return compare_datums_simple(*(Datum *) a,
     946             :                                  *(Datum *) b,
     947             :                                  (SortSupport) arg);
     948             : }
     949             : 
     950             : int
     951      211068 : compare_datums_simple(Datum a, Datum b, SortSupport ssup)
     952             : {
     953      211068 :     return ApplySortComparator(a, false, b, false, ssup);
     954             : }
     955             : 
     956             : /*
     957             :  * build_attnums_array
     958             :  *      Transforms a bitmap into an array of AttrNumber values.
     959             :  *
     960             :  * This is used for extended statistics only, so all the attributes must be
     961             :  * user-defined. That means offsetting by FirstLowInvalidHeapAttributeNumber
     962             :  * is not necessary here (and when querying the bitmap).
     963             :  */
     964             : AttrNumber *
     965           0 : build_attnums_array(Bitmapset *attrs, int nexprs, int *numattrs)
     966             : {
     967             :     int         i,
     968             :                 j;
     969             :     AttrNumber *attnums;
     970           0 :     int         num = bms_num_members(attrs);
     971             : 
     972           0 :     if (numattrs)
     973           0 :         *numattrs = num;
     974             : 
     975             :     /* build attnums from the bitmapset */
     976           0 :     attnums = (AttrNumber *) palloc(sizeof(AttrNumber) * num);
     977           0 :     i = 0;
     978           0 :     j = -1;
     979           0 :     while ((j = bms_next_member(attrs, j)) >= 0)
     980             :     {
     981           0 :         int         attnum = (j - nexprs);
     982             : 
     983             :         /*
     984             :          * Make sure the bitmap contains only user-defined attributes. As
     985             :          * bitmaps can't contain negative values, this can be violated in two
     986             :          * ways. Firstly, the bitmap might contain 0 as a member, and secondly
     987             :          * the integer value might be larger than MaxAttrNumber.
     988             :          */
     989             :         Assert(AttributeNumberIsValid(attnum));
     990             :         Assert(attnum <= MaxAttrNumber);
     991             :         Assert(attnum >= (-nexprs));
     992             : 
     993           0 :         attnums[i++] = (AttrNumber) attnum;
     994             : 
     995             :         /* protect against overflows */
     996             :         Assert(i <= num);
     997             :     }
     998             : 
     999           0 :     return attnums;
    1000             : }
    1001             : 
    1002             : /*
    1003             :  * build_sorted_items
    1004             :  *      build a sorted array of SortItem with values from rows
    1005             :  *
    1006             :  * Note: All the memory is allocated in a single chunk, so that the caller
    1007             :  * can simply pfree the return value to release all of it.
    1008             :  */
    1009             : SortItem *
    1010         756 : build_sorted_items(StatsBuildData *data, int *nitems,
    1011             :                    MultiSortSupport mss,
    1012             :                    int numattrs, AttrNumber *attnums)
    1013             : {
    1014             :     int         i,
    1015             :                 j,
    1016             :                 len,
    1017             :                 nrows;
    1018         756 :     int         nvalues = data->numrows * numattrs;
    1019             : 
    1020             :     SortItem   *items;
    1021             :     Datum      *values;
    1022             :     bool       *isnull;
    1023             :     char       *ptr;
    1024             :     int        *typlen;
    1025             : 
    1026             :     /* Compute the total amount of memory we need (both items and values). */
    1027         756 :     len = data->numrows * sizeof(SortItem) + nvalues * (sizeof(Datum) + sizeof(bool));
    1028             : 
    1029             :     /* Allocate the memory and split it into the pieces. */
    1030         756 :     ptr = palloc0(len);
    1031             : 
    1032             :     /* items to sort */
    1033         756 :     items = (SortItem *) ptr;
    1034         756 :     ptr += data->numrows * sizeof(SortItem);
    1035             : 
    1036             :     /* values and null flags */
    1037         756 :     values = (Datum *) ptr;
    1038         756 :     ptr += nvalues * sizeof(Datum);
    1039             : 
    1040         756 :     isnull = (bool *) ptr;
    1041         756 :     ptr += nvalues * sizeof(bool);
    1042             : 
    1043             :     /* make sure we consumed the whole buffer exactly */
    1044             :     Assert((ptr - (char *) items) == len);
    1045             : 
    1046             :     /* fix the pointers to Datum and bool arrays */
    1047         756 :     nrows = 0;
    1048     1964568 :     for (i = 0; i < data->numrows; i++)
    1049             :     {
    1050     1963812 :         items[nrows].values = &values[nrows * numattrs];
    1051     1963812 :         items[nrows].isnull = &isnull[nrows * numattrs];
    1052             : 
    1053     1963812 :         nrows++;
    1054             :     }
    1055             : 
    1056             :     /* build a local cache of typlen for all attributes */
    1057         756 :     typlen = (int *) palloc(sizeof(int) * data->nattnums);
    1058        2850 :     for (i = 0; i < data->nattnums; i++)
    1059        2094 :         typlen[i] = get_typlen(data->stats[i]->attrtypid);
    1060             : 
    1061         756 :     nrows = 0;
    1062     1964568 :     for (i = 0; i < data->numrows; i++)
    1063             :     {
    1064     1963812 :         bool        toowide = false;
    1065             : 
    1066             :         /* load the values/null flags from sample rows */
    1067     6768012 :         for (j = 0; j < numattrs; j++)
    1068             :         {
    1069             :             Datum       value;
    1070             :             bool        isnull;
    1071             :             int         attlen;
    1072     4804200 :             AttrNumber  attnum = attnums[j];
    1073             : 
    1074             :             int         idx;
    1075             : 
    1076             :             /* match attnum to the pre-calculated data */
    1077     9493128 :             for (idx = 0; idx < data->nattnums; idx++)
    1078             :             {
    1079     9493128 :                 if (attnum == data->attnums[idx])
    1080     4804200 :                     break;
    1081             :             }
    1082             : 
    1083             :             Assert(idx < data->nattnums);
    1084             : 
    1085     4804200 :             value = data->values[idx][i];
    1086     4804200 :             isnull = data->nulls[idx][i];
    1087     4804200 :             attlen = typlen[idx];
    1088             : 
    1089             :             /*
    1090             :              * If this is a varlena value, check if it's too wide and if yes
    1091             :              * then skip the whole item. Otherwise detoast the value.
    1092             :              *
    1093             :              * XXX It may happen that we've already detoasted some preceding
    1094             :              * values for the current item. We don't bother to cleanup those
    1095             :              * on the assumption that those are small (below WIDTH_THRESHOLD)
    1096             :              * and will be discarded at the end of analyze.
    1097             :              */
    1098     4804200 :             if ((!isnull) && (attlen == -1))
    1099             :             {
    1100     1480200 :                 if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
    1101             :                 {
    1102           0 :                     toowide = true;
    1103           0 :                     break;
    1104             :                 }
    1105             : 
    1106     1480200 :                 value = PointerGetDatum(PG_DETOAST_DATUM(value));
    1107             :             }
    1108             : 
    1109     4804200 :             items[nrows].values[j] = value;
    1110     4804200 :             items[nrows].isnull[j] = isnull;
    1111             :         }
    1112             : 
    1113     1963812 :         if (toowide)
    1114           0 :             continue;
    1115             : 
    1116     1963812 :         nrows++;
    1117             :     }
    1118             : 
    1119             :     /* store the actual number of items (ignoring the too-wide ones) */
    1120         756 :     *nitems = nrows;
    1121             : 
    1122             :     /* all items were too wide */
    1123         756 :     if (nrows == 0)
    1124             :     {
    1125             :         /* everything is allocated as a single chunk */
    1126           0 :         pfree(items);
    1127           0 :         return NULL;
    1128             :     }
    1129             : 
    1130             :     /* do the sort, using the multi-sort */
    1131         756 :     qsort_interruptible(items, nrows, sizeof(SortItem),
    1132             :                         multi_sort_compare, mss);
    1133             : 
    1134         756 :     return items;
    1135             : }
    1136             : 
    1137             : /*
    1138             :  * has_stats_of_kind
    1139             :  *      Check whether the list contains statistic of a given kind
    1140             :  */
    1141             : bool
    1142        3684 : has_stats_of_kind(List *stats, char requiredkind)
    1143             : {
    1144             :     ListCell   *l;
    1145             : 
    1146        6168 :     foreach(l, stats)
    1147             :     {
    1148        4320 :         StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
    1149             : 
    1150        4320 :         if (stat->kind == requiredkind)
    1151        1836 :             return true;
    1152             :     }
    1153             : 
    1154        1848 :     return false;
    1155             : }
    1156             : 
    1157             : /*
    1158             :  * stat_find_expression
    1159             :  *      Search for an expression in statistics object's list of expressions.
    1160             :  *
    1161             :  * Returns the index of the expression in the statistics object's list of
    1162             :  * expressions, or -1 if not found.
    1163             :  */
    1164             : static int
    1165         516 : stat_find_expression(StatisticExtInfo *stat, Node *expr)
    1166             : {
    1167             :     ListCell   *lc;
    1168             :     int         idx;
    1169             : 
    1170         516 :     idx = 0;
    1171         996 :     foreach(lc, stat->exprs)
    1172             :     {
    1173         972 :         Node       *stat_expr = (Node *) lfirst(lc);
    1174             : 
    1175         972 :         if (equal(stat_expr, expr))
    1176         492 :             return idx;
    1177         480 :         idx++;
    1178             :     }
    1179             : 
    1180             :     /* Expression not found */
    1181          24 :     return -1;
    1182             : }
    1183             : 
    1184             : /*
    1185             :  * stat_covers_expressions
    1186             :  *      Test whether a statistics object covers all expressions in a list.
    1187             :  *
    1188             :  * Returns true if all expressions are covered.  If expr_idxs is non-NULL, it
    1189             :  * is populated with the indexes of the expressions found.
    1190             :  */
    1191             : static bool
    1192        2388 : stat_covers_expressions(StatisticExtInfo *stat, List *exprs,
    1193             :                         Bitmapset **expr_idxs)
    1194             : {
    1195             :     ListCell   *lc;
    1196             : 
    1197        2880 :     foreach(lc, exprs)
    1198             :     {
    1199         516 :         Node       *expr = (Node *) lfirst(lc);
    1200             :         int         expr_idx;
    1201             : 
    1202         516 :         expr_idx = stat_find_expression(stat, expr);
    1203         516 :         if (expr_idx == -1)
    1204          24 :             return false;
    1205             : 
    1206         492 :         if (expr_idxs != NULL)
    1207         246 :             *expr_idxs = bms_add_member(*expr_idxs, expr_idx);
    1208             :     }
    1209             : 
    1210             :     /* If we reach here, all expressions are covered */
    1211        2364 :     return true;
    1212             : }
    1213             : 
    1214             : /*
    1215             :  * choose_best_statistics
    1216             :  *      Look for and return statistics with the specified 'requiredkind' which
    1217             :  *      have keys that match at least two of the given attnums.  Return NULL if
    1218             :  *      there's no match.
    1219             :  *
    1220             :  * The current selection criteria is very simple - we choose the statistics
    1221             :  * object referencing the most attributes in covered (and still unestimated
    1222             :  * clauses), breaking ties in favor of objects with fewer keys overall.
    1223             :  *
    1224             :  * The clause_attnums is an array of bitmaps, storing attnums for individual
    1225             :  * clauses. A NULL element means the clause is either incompatible or already
    1226             :  * estimated.
    1227             :  *
    1228             :  * XXX If multiple statistics objects tie on both criteria, then which object
    1229             :  * is chosen depends on the order that they appear in the stats list. Perhaps
    1230             :  * further tiebreakers are needed.
    1231             :  */
    1232             : StatisticExtInfo *
    1233         996 : choose_best_statistics(List *stats, char requiredkind, bool inh,
    1234             :                        Bitmapset **clause_attnums, List **clause_exprs,
    1235             :                        int nclauses)
    1236             : {
    1237             :     ListCell   *lc;
    1238         996 :     StatisticExtInfo *best_match = NULL;
    1239         996 :     int         best_num_matched = 2;   /* goal #1: maximize */
    1240         996 :     int         best_match_keys = (STATS_MAX_DIMENSIONS + 1);   /* goal #2: minimize */
    1241             : 
    1242        2586 :     foreach(lc, stats)
    1243             :     {
    1244             :         int         i;
    1245        1590 :         StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
    1246        1590 :         Bitmapset  *matched_attnums = NULL;
    1247        1590 :         Bitmapset  *matched_exprs = NULL;
    1248             :         int         num_matched;
    1249             :         int         numkeys;
    1250             : 
    1251             :         /* skip statistics that are not of the correct type */
    1252        1590 :         if (info->kind != requiredkind)
    1253         468 :             continue;
    1254             : 
    1255             :         /* skip statistics with mismatching inheritance flag */
    1256        1122 :         if (info->inherit != inh)
    1257          24 :             continue;
    1258             : 
    1259             :         /*
    1260             :          * Collect attributes and expressions in remaining (unestimated)
    1261             :          * clauses fully covered by this statistic object.
    1262             :          *
    1263             :          * We know already estimated clauses have both clause_attnums and
    1264             :          * clause_exprs set to NULL. We leave the pointers NULL if already
    1265             :          * estimated, or we reset them to NULL after estimating the clause.
    1266             :          */
    1267        3942 :         for (i = 0; i < nclauses; i++)
    1268             :         {
    1269        2844 :             Bitmapset  *expr_idxs = NULL;
    1270             : 
    1271             :             /* ignore incompatible/estimated clauses */
    1272        2844 :             if (!clause_attnums[i] && !clause_exprs[i])
    1273        1632 :                 continue;
    1274             : 
    1275             :             /* ignore clauses that are not covered by this object */
    1276        1422 :             if (!bms_is_subset(clause_attnums[i], info->keys) ||
    1277        1230 :                 !stat_covers_expressions(info, clause_exprs[i], &expr_idxs))
    1278         210 :                 continue;
    1279             : 
    1280             :             /* record attnums and indexes of expressions covered */
    1281        1212 :             matched_attnums = bms_add_members(matched_attnums, clause_attnums[i]);
    1282        1212 :             matched_exprs = bms_add_members(matched_exprs, expr_idxs);
    1283             :         }
    1284             : 
    1285        1098 :         num_matched = bms_num_members(matched_attnums) + bms_num_members(matched_exprs);
    1286             : 
    1287        1098 :         bms_free(matched_attnums);
    1288        1098 :         bms_free(matched_exprs);
    1289             : 
    1290             :         /*
    1291             :          * save the actual number of keys in the stats so that we can choose
    1292             :          * the narrowest stats with the most matching keys.
    1293             :          */
    1294        1098 :         numkeys = bms_num_members(info->keys) + list_length(info->exprs);
    1295             : 
    1296             :         /*
    1297             :          * Use this object when it increases the number of matched attributes
    1298             :          * and expressions or when it matches the same number of attributes
    1299             :          * and expressions but these stats have fewer keys than any previous
    1300             :          * match.
    1301             :          */
    1302        1098 :         if (num_matched > best_num_matched ||
    1303         282 :             (num_matched == best_num_matched && numkeys < best_match_keys))
    1304             :         {
    1305         480 :             best_match = info;
    1306         480 :             best_num_matched = num_matched;
    1307         480 :             best_match_keys = numkeys;
    1308             :         }
    1309             :     }
    1310             : 
    1311         996 :     return best_match;
    1312             : }
    1313             : 
    1314             : /*
    1315             :  * statext_is_compatible_clause_internal
    1316             :  *      Determines if the clause is compatible with MCV lists.
    1317             :  *
    1318             :  * To be compatible, the given clause must be a combination of supported
    1319             :  * clauses built from Vars or sub-expressions (where a sub-expression is
    1320             :  * something that exactly matches an expression found in statistics objects).
    1321             :  * This function recursively examines the clause and extracts any
    1322             :  * sub-expressions that will need to be matched against statistics.
    1323             :  *
    1324             :  * Currently, we only support the following types of clauses:
    1325             :  *
    1326             :  * (a) OpExprs of the form (Var/Expr op Const), or (Const op Var/Expr), where
    1327             :  * the op is one of ("=", "<", ">", ">=", "<=")
    1328             :  *
    1329             :  * (b) (Var/Expr IS [NOT] NULL)
    1330             :  *
    1331             :  * (c) combinations using AND/OR/NOT
    1332             :  *
    1333             :  * (d) ScalarArrayOpExprs of the form (Var/Expr op ANY (Const)) or
    1334             :  * (Var/Expr op ALL (Const))
    1335             :  *
    1336             :  * In the future, the range of supported clauses may be expanded to more
    1337             :  * complex cases, for example (Var op Var).
    1338             :  *
    1339             :  * Arguments:
    1340             :  * clause: (sub)clause to be inspected (bare clause, not a RestrictInfo)
    1341             :  * relid: rel that all Vars in clause must belong to
    1342             :  * *attnums: input/output parameter collecting attribute numbers of all
    1343             :  *      mentioned Vars.  Note that we do not offset the attribute numbers,
    1344             :  *      so we can't cope with system columns.
    1345             :  * *exprs: input/output parameter collecting primitive subclauses within
    1346             :  *      the clause tree
    1347             :  *
    1348             :  * Returns false if there is something we definitively can't handle.
    1349             :  * On true return, we can proceed to match the *exprs against statistics.
    1350             :  */
    1351             : static bool
    1352        2400 : statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
    1353             :                                       Index relid, Bitmapset **attnums,
    1354             :                                       List **exprs)
    1355             : {
    1356             :     /* Look inside any binary-compatible relabeling (as in examine_variable) */
    1357        2400 :     if (IsA(clause, RelabelType))
    1358           0 :         clause = (Node *) ((RelabelType *) clause)->arg;
    1359             : 
    1360             :     /* plain Var references (boolean Vars or recursive checks) */
    1361        2400 :     if (IsA(clause, Var))
    1362             :     {
    1363        1068 :         Var        *var = (Var *) clause;
    1364             : 
    1365             :         /* Ensure var is from the correct relation */
    1366        1068 :         if (var->varno != relid)
    1367           0 :             return false;
    1368             : 
    1369             :         /* we also better ensure the Var is from the current level */
    1370        1068 :         if (var->varlevelsup > 0)
    1371           0 :             return false;
    1372             : 
    1373             :         /*
    1374             :          * Also reject system attributes and whole-row Vars (we don't allow
    1375             :          * stats on those).
    1376             :          */
    1377        1068 :         if (!AttrNumberIsForUserDefinedAttr(var->varattno))
    1378           0 :             return false;
    1379             : 
    1380             :         /* OK, record the attnum for later permissions checks. */
    1381        1068 :         *attnums = bms_add_member(*attnums, var->varattno);
    1382             : 
    1383        1068 :         return true;
    1384             :     }
    1385             : 
    1386             :     /* (Var/Expr op Const) or (Const op Var/Expr) */
    1387        1332 :     if (is_opclause(clause))
    1388             :     {
    1389         972 :         RangeTblEntry *rte = root->simple_rte_array[relid];
    1390         972 :         OpExpr     *expr = (OpExpr *) clause;
    1391             :         Node       *clause_expr;
    1392             : 
    1393             :         /* Only expressions with two arguments are considered compatible. */
    1394         972 :         if (list_length(expr->args) != 2)
    1395           0 :             return false;
    1396             : 
    1397             :         /* Check if the expression has the right shape */
    1398         972 :         if (!examine_opclause_args(expr->args, &clause_expr, NULL, NULL))
    1399           0 :             return false;
    1400             : 
    1401             :         /*
    1402             :          * If it's not one of the supported operators ("=", "<", ">", etc.),
    1403             :          * just ignore the clause, as it's not compatible with MCV lists.
    1404             :          *
    1405             :          * This uses the function for estimating selectivity, not the operator
    1406             :          * directly (a bit awkward, but well ...).
    1407             :          */
    1408         972 :         switch (get_oprrest(expr->opno))
    1409             :         {
    1410         972 :             case F_EQSEL:
    1411             :             case F_NEQSEL:
    1412             :             case F_SCALARLTSEL:
    1413             :             case F_SCALARLESEL:
    1414             :             case F_SCALARGTSEL:
    1415             :             case F_SCALARGESEL:
    1416             :                 /* supported, will continue with inspection of the Var/Expr */
    1417         972 :                 break;
    1418             : 
    1419           0 :             default:
    1420             :                 /* other estimators are considered unknown/unsupported */
    1421           0 :                 return false;
    1422             :         }
    1423             : 
    1424             :         /*
    1425             :          * If there are any securityQuals on the RTE from security barrier
    1426             :          * views or RLS policies, then the user may not have access to all the
    1427             :          * table's data, and we must check that the operator is leak-proof.
    1428             :          *
    1429             :          * If the operator is leaky, then we must ignore this clause for the
    1430             :          * purposes of estimating with MCV lists, otherwise the operator might
    1431             :          * reveal values from the MCV list that the user doesn't have
    1432             :          * permission to see.
    1433             :          */
    1434         972 :         if (rte->securityQuals != NIL &&
    1435          36 :             !get_func_leakproof(get_opcode(expr->opno)))
    1436          36 :             return false;
    1437             : 
    1438             :         /* Check (Var op Const) or (Const op Var) clauses by recursing. */
    1439         936 :         if (IsA(clause_expr, Var))
    1440         744 :             return statext_is_compatible_clause_internal(root, clause_expr,
    1441             :                                                          relid, attnums, exprs);
    1442             : 
    1443             :         /* Otherwise we have (Expr op Const) or (Const op Expr). */
    1444         192 :         *exprs = lappend(*exprs, clause_expr);
    1445         192 :         return true;
    1446             :     }
    1447             : 
    1448             :     /* Var/Expr IN Array */
    1449         360 :     if (IsA(clause, ScalarArrayOpExpr))
    1450             :     {
    1451         216 :         RangeTblEntry *rte = root->simple_rte_array[relid];
    1452         216 :         ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
    1453             :         Node       *clause_expr;
    1454             :         bool        expronleft;
    1455             : 
    1456             :         /* Only expressions with two arguments are considered compatible. */
    1457         216 :         if (list_length(expr->args) != 2)
    1458           0 :             return false;
    1459             : 
    1460             :         /* Check if the expression has the right shape (one Var, one Const) */
    1461         216 :         if (!examine_opclause_args(expr->args, &clause_expr, NULL, &expronleft))
    1462           0 :             return false;
    1463             : 
    1464             :         /* We only support Var on left, Const on right */
    1465         216 :         if (!expronleft)
    1466           6 :             return false;
    1467             : 
    1468             :         /*
    1469             :          * If it's not one of the supported operators ("=", "<", ">", etc.),
    1470             :          * just ignore the clause, as it's not compatible with MCV lists.
    1471             :          *
    1472             :          * This uses the function for estimating selectivity, not the operator
    1473             :          * directly (a bit awkward, but well ...).
    1474             :          */
    1475         210 :         switch (get_oprrest(expr->opno))
    1476             :         {
    1477         210 :             case F_EQSEL:
    1478             :             case F_NEQSEL:
    1479             :             case F_SCALARLTSEL:
    1480             :             case F_SCALARLESEL:
    1481             :             case F_SCALARGTSEL:
    1482             :             case F_SCALARGESEL:
    1483             :                 /* supported, will continue with inspection of the Var/Expr */
    1484         210 :                 break;
    1485             : 
    1486           0 :             default:
    1487             :                 /* other estimators are considered unknown/unsupported */
    1488           0 :                 return false;
    1489             :         }
    1490             : 
    1491             :         /*
    1492             :          * If there are any securityQuals on the RTE from security barrier
    1493             :          * views or RLS policies, then the user may not have access to all the
    1494             :          * table's data, and we must check that the operator is leak-proof.
    1495             :          *
    1496             :          * If the operator is leaky, then we must ignore this clause for the
    1497             :          * purposes of estimating with MCV lists, otherwise the operator might
    1498             :          * reveal values from the MCV list that the user doesn't have
    1499             :          * permission to see.
    1500             :          */
    1501         210 :         if (rte->securityQuals != NIL &&
    1502           0 :             !get_func_leakproof(get_opcode(expr->opno)))
    1503           0 :             return false;
    1504             : 
    1505             :         /* Check Var IN Array clauses by recursing. */
    1506         210 :         if (IsA(clause_expr, Var))
    1507         156 :             return statext_is_compatible_clause_internal(root, clause_expr,
    1508             :                                                          relid, attnums, exprs);
    1509             : 
    1510             :         /* Otherwise we have Expr IN Array. */
    1511          54 :         *exprs = lappend(*exprs, clause_expr);
    1512          54 :         return true;
    1513             :     }
    1514             : 
    1515             :     /* AND/OR/NOT clause */
    1516         288 :     if (is_andclause(clause) ||
    1517         276 :         is_orclause(clause) ||
    1518         132 :         is_notclause(clause))
    1519             :     {
    1520             :         /*
    1521             :          * AND/OR/NOT-clauses are supported if all sub-clauses are supported
    1522             :          *
    1523             :          * Perhaps we could improve this by handling mixed cases, when some of
    1524             :          * the clauses are supported and some are not. Selectivity for the
    1525             :          * supported subclauses would be computed using extended statistics,
    1526             :          * and the remaining clauses would be estimated using the traditional
    1527             :          * algorithm (product of selectivities).
    1528             :          *
    1529             :          * It however seems overly complex, and in a way we already do that
    1530             :          * because if we reject the whole clause as unsupported here, it will
    1531             :          * be eventually passed to clauselist_selectivity() which does exactly
    1532             :          * this (split into supported/unsupported clauses etc).
    1533             :          */
    1534          42 :         BoolExpr   *expr = (BoolExpr *) clause;
    1535             :         ListCell   *lc;
    1536             : 
    1537          96 :         foreach(lc, expr->args)
    1538             :         {
    1539             :             /*
    1540             :              * If we find an incompatible clause in the arguments, treat the
    1541             :              * whole clause as incompatible.
    1542             :              */
    1543          54 :             if (!statext_is_compatible_clause_internal(root,
    1544          54 :                                                        (Node *) lfirst(lc),
    1545             :                                                        relid, attnums, exprs))
    1546           0 :                 return false;
    1547             :         }
    1548             : 
    1549          42 :         return true;
    1550             :     }
    1551             : 
    1552             :     /* Var/Expr IS NULL */
    1553         102 :     if (IsA(clause, NullTest))
    1554             :     {
    1555          96 :         NullTest   *nt = (NullTest *) clause;
    1556             : 
    1557             :         /* Check Var IS NULL clauses by recursing. */
    1558          96 :         if (IsA(nt->arg, Var))
    1559          90 :             return statext_is_compatible_clause_internal(root, (Node *) (nt->arg),
    1560             :                                                          relid, attnums, exprs);
    1561             : 
    1562             :         /* Otherwise we have Expr IS NULL. */
    1563           6 :         *exprs = lappend(*exprs, nt->arg);
    1564           6 :         return true;
    1565             :     }
    1566             : 
    1567             :     /*
    1568             :      * Treat any other expressions as bare expressions to be matched against
    1569             :      * expressions in statistics objects.
    1570             :      */
    1571           6 :     *exprs = lappend(*exprs, clause);
    1572           6 :     return true;
    1573             : }
    1574             : 
    1575             : /*
    1576             :  * statext_is_compatible_clause
    1577             :  *      Determines if the clause is compatible with MCV lists.
    1578             :  *
    1579             :  * See statext_is_compatible_clause_internal, above, for the basic rules.
    1580             :  * This layer deals with RestrictInfo superstructure and applies permissions
    1581             :  * checks to verify that it's okay to examine all mentioned Vars.
    1582             :  *
    1583             :  * Arguments:
    1584             :  * clause: clause to be inspected (in RestrictInfo form)
    1585             :  * relid: rel that all Vars in clause must belong to
    1586             :  * *attnums: input/output parameter collecting attribute numbers of all
    1587             :  *      mentioned Vars.  Note that we do not offset the attribute numbers,
    1588             :  *      so we can't cope with system columns.
    1589             :  * *exprs: input/output parameter collecting primitive subclauses within
    1590             :  *      the clause tree
    1591             :  *
    1592             :  * Returns false if there is something we definitively can't handle.
    1593             :  * On true return, we can proceed to match the *exprs against statistics.
    1594             :  */
    1595             : static bool
    1596        1434 : statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid,
    1597             :                              Bitmapset **attnums, List **exprs)
    1598             : {
    1599        1434 :     RangeTblEntry *rte = root->simple_rte_array[relid];
    1600        1434 :     RelOptInfo *rel = root->simple_rel_array[relid];
    1601             :     RestrictInfo *rinfo;
    1602             :     int         clause_relid;
    1603             :     Oid         userid;
    1604             : 
    1605             :     /*
    1606             :      * Special-case handling for bare BoolExpr AND clauses, because the
    1607             :      * restrictinfo machinery doesn't build RestrictInfos on top of AND
    1608             :      * clauses.
    1609             :      */
    1610        1434 :     if (is_andclause(clause))
    1611             :     {
    1612          48 :         BoolExpr   *expr = (BoolExpr *) clause;
    1613             :         ListCell   *lc;
    1614             : 
    1615             :         /*
    1616             :          * Check that each sub-clause is compatible.  We expect these to be
    1617             :          * RestrictInfos.
    1618             :          */
    1619         162 :         foreach(lc, expr->args)
    1620             :         {
    1621         114 :             if (!statext_is_compatible_clause(root, (Node *) lfirst(lc),
    1622             :                                               relid, attnums, exprs))
    1623           0 :                 return false;
    1624             :         }
    1625             : 
    1626          48 :         return true;
    1627             :     }
    1628             : 
    1629             :     /* Otherwise it must be a RestrictInfo. */
    1630        1386 :     if (!IsA(clause, RestrictInfo))
    1631           0 :         return false;
    1632        1386 :     rinfo = (RestrictInfo *) clause;
    1633             : 
    1634             :     /* Pseudoconstants are not really interesting here. */
    1635        1386 :     if (rinfo->pseudoconstant)
    1636           6 :         return false;
    1637             : 
    1638             :     /* Clauses referencing other varnos are incompatible. */
    1639        1380 :     if (!bms_get_singleton_member(rinfo->clause_relids, &clause_relid) ||
    1640        1356 :         clause_relid != relid)
    1641          24 :         return false;
    1642             : 
    1643             :     /* Check the clause and determine what attributes it references. */
    1644        1356 :     if (!statext_is_compatible_clause_internal(root, (Node *) rinfo->clause,
    1645             :                                                relid, attnums, exprs))
    1646          42 :         return false;
    1647             : 
    1648             :     /*
    1649             :      * Check that the user has permission to read all required attributes.
    1650             :      */
    1651        1314 :     userid = OidIsValid(rel->userid) ? rel->userid : GetUserId();
    1652             : 
    1653             :     /* Table-level SELECT privilege is sufficient for all columns */
    1654        1314 :     if (pg_class_aclcheck(rte->relid, userid, ACL_SELECT) != ACLCHECK_OK)
    1655             :     {
    1656          36 :         Bitmapset  *clause_attnums = NULL;
    1657          36 :         int         attnum = -1;
    1658             : 
    1659             :         /*
    1660             :          * We have to check per-column privileges.  *attnums has the attnums
    1661             :          * for individual Vars we saw, but there may also be Vars within
    1662             :          * subexpressions in *exprs.  We can use pull_varattnos() to extract
    1663             :          * those, but there's an impedance mismatch: attnums returned by
    1664             :          * pull_varattnos() are offset by FirstLowInvalidHeapAttributeNumber,
    1665             :          * while attnums within *attnums aren't.  Convert *attnums to the
    1666             :          * offset style so we can combine the results.
    1667             :          */
    1668          66 :         while ((attnum = bms_next_member(*attnums, attnum)) >= 0)
    1669             :         {
    1670          30 :             clause_attnums =
    1671          30 :                 bms_add_member(clause_attnums,
    1672             :                                attnum - FirstLowInvalidHeapAttributeNumber);
    1673             :         }
    1674             : 
    1675             :         /* Now merge attnums from *exprs into clause_attnums */
    1676          36 :         if (*exprs != NIL)
    1677           6 :             pull_varattnos((Node *) *exprs, relid, &clause_attnums);
    1678             : 
    1679          36 :         attnum = -1;
    1680          36 :         while ((attnum = bms_next_member(clause_attnums, attnum)) >= 0)
    1681             :         {
    1682             :             /* Undo the offset */
    1683          36 :             AttrNumber  attno = attnum + FirstLowInvalidHeapAttributeNumber;
    1684             : 
    1685          36 :             if (attno == InvalidAttrNumber)
    1686             :             {
    1687             :                 /* Whole-row reference, so must have access to all columns */
    1688           6 :                 if (pg_attribute_aclcheck_all(rte->relid, userid, ACL_SELECT,
    1689             :                                               ACLMASK_ALL) != ACLCHECK_OK)
    1690          36 :                     return false;
    1691             :             }
    1692             :             else
    1693             :             {
    1694          30 :                 if (pg_attribute_aclcheck(rte->relid, attno, userid,
    1695             :                                           ACL_SELECT) != ACLCHECK_OK)
    1696          30 :                     return false;
    1697             :             }
    1698             :         }
    1699             :     }
    1700             : 
    1701             :     /* If we reach here, the clause is OK */
    1702        1278 :     return true;
    1703             : }
    1704             : 
    1705             : /*
    1706             :  * statext_mcv_clauselist_selectivity
    1707             :  *      Estimate clauses using the best multi-column statistics.
    1708             :  *
    1709             :  * Applies available extended (multi-column) statistics on a table. There may
    1710             :  * be multiple applicable statistics (with respect to the clauses), in which
    1711             :  * case we use greedy approach. In each round we select the best statistic on
    1712             :  * a table (measured by the number of attributes extracted from the clauses
    1713             :  * and covered by it), and compute the selectivity for the supplied clauses.
    1714             :  * We repeat this process with the remaining clauses (if any), until none of
    1715             :  * the available statistics can be used.
    1716             :  *
    1717             :  * One of the main challenges with using MCV lists is how to extrapolate the
    1718             :  * estimate to the data not covered by the MCV list. To do that, we compute
    1719             :  * not only the "MCV selectivity" (selectivities for MCV items matching the
    1720             :  * supplied clauses), but also the following related selectivities:
    1721             :  *
    1722             :  * - simple selectivity:  Computed without extended statistics, i.e. as if the
    1723             :  * columns/clauses were independent.
    1724             :  *
    1725             :  * - base selectivity:  Similar to simple selectivity, but is computed using
    1726             :  * the extended statistic by adding up the base frequencies (that we compute
    1727             :  * and store for each MCV item) of matching MCV items.
    1728             :  *
    1729             :  * - total selectivity: Selectivity covered by the whole MCV list.
    1730             :  *
    1731             :  * These are passed to mcv_combine_selectivities() which combines them to
    1732             :  * produce a selectivity estimate that makes use of both per-column statistics
    1733             :  * and the multi-column MCV statistics.
    1734             :  *
    1735             :  * 'estimatedclauses' is an input/output parameter.  We set bits for the
    1736             :  * 0-based 'clauses' indexes we estimate for and also skip clause items that
    1737             :  * already have a bit set.
    1738             :  */
    1739             : static Selectivity
    1740        1896 : statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
    1741             :                                    JoinType jointype, SpecialJoinInfo *sjinfo,
    1742             :                                    RelOptInfo *rel, Bitmapset **estimatedclauses,
    1743             :                                    bool is_or)
    1744             : {
    1745             :     ListCell   *l;
    1746             :     Bitmapset **list_attnums;   /* attnums extracted from the clause */
    1747             :     List      **list_exprs;     /* expressions matched to any statistic */
    1748             :     int         listidx;
    1749        1896 :     Selectivity sel = (is_or) ? 0.0 : 1.0;
    1750        1896 :     RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
    1751             : 
    1752             :     /* check if there's any stats that might be useful for us. */
    1753        1896 :     if (!has_stats_of_kind(rel->statlist, STATS_EXT_MCV))
    1754        1380 :         return sel;
    1755             : 
    1756         516 :     list_attnums = (Bitmapset **) palloc(sizeof(Bitmapset *) *
    1757         516 :                                          list_length(clauses));
    1758             : 
    1759             :     /* expressions extracted from complex expressions */
    1760         516 :     list_exprs = (List **) palloc(sizeof(Node *) * list_length(clauses));
    1761             : 
    1762             :     /*
    1763             :      * Pre-process the clauses list to extract the attnums and expressions
    1764             :      * seen in each item.  We need to determine if there are any clauses which
    1765             :      * will be useful for selectivity estimations with extended stats.  Along
    1766             :      * the way we'll record all of the attnums and expressions for each clause
    1767             :      * in lists which we'll reference later so we don't need to repeat the
    1768             :      * same work again.
    1769             :      *
    1770             :      * We also skip clauses that we already estimated using different types of
    1771             :      * statistics (we treat them as incompatible).
    1772             :      */
    1773         516 :     listidx = 0;
    1774        1836 :     foreach(l, clauses)
    1775             :     {
    1776        1320 :         Node       *clause = (Node *) lfirst(l);
    1777        1320 :         Bitmapset  *attnums = NULL;
    1778        1320 :         List       *exprs = NIL;
    1779             : 
    1780        2640 :         if (!bms_is_member(listidx, *estimatedclauses) &&
    1781        1320 :             statext_is_compatible_clause(root, clause, rel->relid, &attnums, &exprs))
    1782             :         {
    1783        1212 :             list_attnums[listidx] = attnums;
    1784        1212 :             list_exprs[listidx] = exprs;
    1785             :         }
    1786             :         else
    1787             :         {
    1788         108 :             list_attnums[listidx] = NULL;
    1789         108 :             list_exprs[listidx] = NIL;
    1790             :         }
    1791             : 
    1792        1320 :         listidx++;
    1793             :     }
    1794             : 
    1795             :     /* apply as many extended statistics as possible */
    1796             :     while (true)
    1797         480 :     {
    1798             :         StatisticExtInfo *stat;
    1799             :         List       *stat_clauses;
    1800             :         Bitmapset  *simple_clauses;
    1801             : 
    1802             :         /* find the best suited statistics object for these attnums */
    1803         996 :         stat = choose_best_statistics(rel->statlist, STATS_EXT_MCV, rte->inh,
    1804             :                                       list_attnums, list_exprs,
    1805             :                                       list_length(clauses));
    1806             : 
    1807             :         /*
    1808             :          * if no (additional) matching stats could be found then we've nothing
    1809             :          * to do
    1810             :          */
    1811         996 :         if (!stat)
    1812         516 :             break;
    1813             : 
    1814             :         /* Ensure choose_best_statistics produced an expected stats type. */
    1815             :         Assert(stat->kind == STATS_EXT_MCV);
    1816             : 
    1817             :         /* now filter the clauses to be estimated using the selected MCV */
    1818         480 :         stat_clauses = NIL;
    1819             : 
    1820             :         /* record which clauses are simple (single column or expression) */
    1821         480 :         simple_clauses = NULL;
    1822             : 
    1823         480 :         listidx = -1;
    1824        1728 :         foreach(l, clauses)
    1825             :         {
    1826             :             /* Increment the index before we decide if to skip the clause. */
    1827        1248 :             listidx++;
    1828             : 
    1829             :             /*
    1830             :              * Ignore clauses from which we did not extract any attnums or
    1831             :              * expressions (this needs to be consistent with what we do in
    1832             :              * choose_best_statistics).
    1833             :              *
    1834             :              * This also eliminates already estimated clauses - both those
    1835             :              * estimated before and during applying extended statistics.
    1836             :              *
    1837             :              * XXX This check is needed because both bms_is_subset and
    1838             :              * stat_covers_expressions return true for empty attnums and
    1839             :              * expressions.
    1840             :              */
    1841        1248 :             if (!list_attnums[listidx] && !list_exprs[listidx])
    1842          36 :                 continue;
    1843             : 
    1844             :             /*
    1845             :              * The clause was not estimated yet, and we've extracted either
    1846             :              * attnums or expressions from it. Ignore it if it's not fully
    1847             :              * covered by the chosen statistics object.
    1848             :              *
    1849             :              * We need to check both attributes and expressions, and reject if
    1850             :              * either is not covered.
    1851             :              */
    1852        1212 :             if (!bms_is_subset(list_attnums[listidx], stat->keys) ||
    1853        1158 :                 !stat_covers_expressions(stat, list_exprs[listidx], NULL))
    1854          60 :                 continue;
    1855             : 
    1856             :             /*
    1857             :              * Now we know the clause is compatible (we have either attnums or
    1858             :              * expressions extracted from it), and was not estimated yet.
    1859             :              */
    1860             : 
    1861             :             /* record simple clauses (single column or expression) */
    1862        1398 :             if ((list_attnums[listidx] == NULL &&
    1863         246 :                  list_length(list_exprs[listidx]) == 1) ||
    1864        1812 :                 (list_exprs[listidx] == NIL &&
    1865         906 :                  bms_membership(list_attnums[listidx]) == BMS_SINGLETON))
    1866        1092 :                 simple_clauses = bms_add_member(simple_clauses,
    1867             :                                                 list_length(stat_clauses));
    1868             : 
    1869             :             /* add clause to list and mark it as estimated */
    1870        1152 :             stat_clauses = lappend(stat_clauses, (Node *) lfirst(l));
    1871        1152 :             *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
    1872             : 
    1873             :             /*
    1874             :              * Reset the pointers, so that choose_best_statistics knows this
    1875             :              * clause was estimated and does not consider it again.
    1876             :              */
    1877        1152 :             bms_free(list_attnums[listidx]);
    1878        1152 :             list_attnums[listidx] = NULL;
    1879             : 
    1880        1152 :             list_free(list_exprs[listidx]);
    1881        1152 :             list_exprs[listidx] = NULL;
    1882             :         }
    1883             : 
    1884         480 :         if (is_or)
    1885             :         {
    1886          96 :             bool       *or_matches = NULL;
    1887          96 :             Selectivity simple_or_sel = 0.0,
    1888          96 :                         stat_sel = 0.0;
    1889             :             MCVList    *mcv_list;
    1890             : 
    1891             :             /* Load the MCV list stored in the statistics object */
    1892          96 :             mcv_list = statext_mcv_load(stat->statOid, rte->inh);
    1893             : 
    1894             :             /*
    1895             :              * Compute the selectivity of the ORed list of clauses covered by
    1896             :              * this statistics object by estimating each in turn and combining
    1897             :              * them using the formula P(A OR B) = P(A) + P(B) - P(A AND B).
    1898             :              * This allows us to use the multivariate MCV stats to better
    1899             :              * estimate the individual terms and their overlap.
    1900             :              *
    1901             :              * Each time we iterate this formula, the clause "A" above is
    1902             :              * equal to all the clauses processed so far, combined with "OR".
    1903             :              */
    1904          96 :             listidx = 0;
    1905         336 :             foreach(l, stat_clauses)
    1906             :             {
    1907         240 :                 Node       *clause = (Node *) lfirst(l);
    1908             :                 Selectivity simple_sel,
    1909             :                             overlap_simple_sel,
    1910             :                             mcv_sel,
    1911             :                             mcv_basesel,
    1912             :                             overlap_mcvsel,
    1913             :                             overlap_basesel,
    1914             :                             mcv_totalsel,
    1915             :                             clause_sel,
    1916             :                             overlap_sel;
    1917             : 
    1918             :                 /*
    1919             :                  * "Simple" selectivity of the next clause and its overlap
    1920             :                  * with any of the previous clauses.  These are our initial
    1921             :                  * estimates of P(B) and P(A AND B), assuming independence of
    1922             :                  * columns/clauses.
    1923             :                  */
    1924         240 :                 simple_sel = clause_selectivity_ext(root, clause, varRelid,
    1925             :                                                     jointype, sjinfo, false);
    1926             : 
    1927         240 :                 overlap_simple_sel = simple_or_sel * simple_sel;
    1928             : 
    1929             :                 /*
    1930             :                  * New "simple" selectivity of all clauses seen so far,
    1931             :                  * assuming independence.
    1932             :                  */
    1933         240 :                 simple_or_sel += simple_sel - overlap_simple_sel;
    1934         240 :                 CLAMP_PROBABILITY(simple_or_sel);
    1935             : 
    1936             :                 /*
    1937             :                  * Multi-column estimate of this clause using MCV statistics,
    1938             :                  * along with base and total selectivities, and corresponding
    1939             :                  * selectivities for the overlap term P(A AND B).
    1940             :                  */
    1941         240 :                 mcv_sel = mcv_clause_selectivity_or(root, stat, mcv_list,
    1942             :                                                     clause, &or_matches,
    1943             :                                                     &mcv_basesel,
    1944             :                                                     &overlap_mcvsel,
    1945             :                                                     &overlap_basesel,
    1946             :                                                     &mcv_totalsel);
    1947             : 
    1948             :                 /*
    1949             :                  * Combine the simple and multi-column estimates.
    1950             :                  *
    1951             :                  * If this clause is a simple single-column clause, then we
    1952             :                  * just use the simple selectivity estimate for it, since the
    1953             :                  * multi-column statistics are unlikely to improve on that
    1954             :                  * (and in fact could make it worse).  For the overlap, we
    1955             :                  * always make use of the multi-column statistics.
    1956             :                  */
    1957         240 :                 if (bms_is_member(listidx, simple_clauses))
    1958         192 :                     clause_sel = simple_sel;
    1959             :                 else
    1960          48 :                     clause_sel = mcv_combine_selectivities(simple_sel,
    1961             :                                                            mcv_sel,
    1962             :                                                            mcv_basesel,
    1963             :                                                            mcv_totalsel);
    1964             : 
    1965         240 :                 overlap_sel = mcv_combine_selectivities(overlap_simple_sel,
    1966             :                                                         overlap_mcvsel,
    1967             :                                                         overlap_basesel,
    1968             :                                                         mcv_totalsel);
    1969             : 
    1970             :                 /* Factor these into the result for this statistics object */
    1971         240 :                 stat_sel += clause_sel - overlap_sel;
    1972         240 :                 CLAMP_PROBABILITY(stat_sel);
    1973             : 
    1974         240 :                 listidx++;
    1975             :             }
    1976             : 
    1977             :             /*
    1978             :              * Factor the result for this statistics object into the overall
    1979             :              * result.  We treat the results from each separate statistics
    1980             :              * object as independent of one another.
    1981             :              */
    1982          96 :             sel = sel + stat_sel - sel * stat_sel;
    1983             :         }
    1984             :         else                    /* Implicitly-ANDed list of clauses */
    1985             :         {
    1986             :             Selectivity simple_sel,
    1987             :                         mcv_sel,
    1988             :                         mcv_basesel,
    1989             :                         mcv_totalsel,
    1990             :                         stat_sel;
    1991             : 
    1992             :             /*
    1993             :              * "Simple" selectivity, i.e. without any extended statistics,
    1994             :              * essentially assuming independence of the columns/clauses.
    1995             :              */
    1996         384 :             simple_sel = clauselist_selectivity_ext(root, stat_clauses,
    1997             :                                                     varRelid, jointype,
    1998             :                                                     sjinfo, false);
    1999             : 
    2000             :             /*
    2001             :              * Multi-column estimate using MCV statistics, along with base and
    2002             :              * total selectivities.
    2003             :              */
    2004         384 :             mcv_sel = mcv_clauselist_selectivity(root, stat, stat_clauses,
    2005             :                                                  varRelid, jointype, sjinfo,
    2006             :                                                  rel, &mcv_basesel,
    2007             :                                                  &mcv_totalsel);
    2008             : 
    2009             :             /* Combine the simple and multi-column estimates. */
    2010         384 :             stat_sel = mcv_combine_selectivities(simple_sel,
    2011             :                                                  mcv_sel,
    2012             :                                                  mcv_basesel,
    2013             :                                                  mcv_totalsel);
    2014             : 
    2015             :             /* Factor this into the overall result */
    2016         384 :             sel *= stat_sel;
    2017             :         }
    2018             :     }
    2019             : 
    2020         516 :     return sel;
    2021             : }
    2022             : 
    2023             : /*
    2024             :  * statext_clauselist_selectivity
    2025             :  *      Estimate clauses using the best multi-column statistics.
    2026             :  */
    2027             : Selectivity
    2028        1896 : statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
    2029             :                                JoinType jointype, SpecialJoinInfo *sjinfo,
    2030             :                                RelOptInfo *rel, Bitmapset **estimatedclauses,
    2031             :                                bool is_or)
    2032             : {
    2033             :     Selectivity sel;
    2034             : 
    2035             :     /* First, try estimating clauses using a multivariate MCV list. */
    2036        1896 :     sel = statext_mcv_clauselist_selectivity(root, clauses, varRelid, jointype,
    2037             :                                              sjinfo, rel, estimatedclauses, is_or);
    2038             : 
    2039             :     /*
    2040             :      * Functional dependencies only work for clauses connected by AND, so for
    2041             :      * OR clauses we're done.
    2042             :      */
    2043        1896 :     if (is_or)
    2044         108 :         return sel;
    2045             : 
    2046             :     /*
    2047             :      * Then, apply functional dependencies on the remaining clauses by calling
    2048             :      * dependencies_clauselist_selectivity.  Pass 'estimatedclauses' so the
    2049             :      * function can properly skip clauses already estimated above.
    2050             :      *
    2051             :      * The reasoning for applying dependencies last is that the more complex
    2052             :      * stats can track more complex correlations between the attributes, and
    2053             :      * so may be considered more reliable.
    2054             :      *
    2055             :      * For example, MCV list can give us an exact selectivity for values in
    2056             :      * two columns, while functional dependencies can only provide information
    2057             :      * about the overall strength of the dependency.
    2058             :      */
    2059        1788 :     sel *= dependencies_clauselist_selectivity(root, clauses, varRelid,
    2060             :                                                jointype, sjinfo, rel,
    2061             :                                                estimatedclauses);
    2062             : 
    2063        1788 :     return sel;
    2064             : }
    2065             : 
    2066             : /*
    2067             :  * examine_opclause_args
    2068             :  *      Split an operator expression's arguments into Expr and Const parts.
    2069             :  *
    2070             :  * Attempts to match the arguments to either (Expr op Const) or (Const op
    2071             :  * Expr), possibly with a RelabelType on top. When the expression matches this
    2072             :  * form, returns true, otherwise returns false.
    2073             :  *
    2074             :  * Optionally returns pointers to the extracted Expr/Const nodes, when passed
    2075             :  * non-null pointers (exprp, cstp and expronleftp). The expronleftp flag
    2076             :  * specifies on which side of the operator we found the expression node.
    2077             :  */
    2078             : bool
    2079        2268 : examine_opclause_args(List *args, Node **exprp, Const **cstp,
    2080             :                       bool *expronleftp)
    2081             : {
    2082             :     Node       *expr;
    2083             :     Const      *cst;
    2084             :     bool        expronleft;
    2085             :     Node       *leftop,
    2086             :                *rightop;
    2087             : 
    2088             :     /* enforced by statext_is_compatible_clause_internal */
    2089             :     Assert(list_length(args) == 2);
    2090             : 
    2091        2268 :     leftop = linitial(args);
    2092        2268 :     rightop = lsecond(args);
    2093             : 
    2094             :     /* strip RelabelType from either side of the expression */
    2095        2268 :     if (IsA(leftop, RelabelType))
    2096         324 :         leftop = (Node *) ((RelabelType *) leftop)->arg;
    2097             : 
    2098        2268 :     if (IsA(rightop, RelabelType))
    2099          60 :         rightop = (Node *) ((RelabelType *) rightop)->arg;
    2100             : 
    2101        2268 :     if (IsA(rightop, Const))
    2102             :     {
    2103        2106 :         expr = (Node *) leftop;
    2104        2106 :         cst = (Const *) rightop;
    2105        2106 :         expronleft = true;
    2106             :     }
    2107         162 :     else if (IsA(leftop, Const))
    2108             :     {
    2109         162 :         expr = (Node *) rightop;
    2110         162 :         cst = (Const *) leftop;
    2111         162 :         expronleft = false;
    2112             :     }
    2113             :     else
    2114           0 :         return false;
    2115             : 
    2116             :     /* return pointers to the extracted parts if requested */
    2117        2268 :     if (exprp)
    2118        2268 :         *exprp = expr;
    2119             : 
    2120        2268 :     if (cstp)
    2121        1080 :         *cstp = cst;
    2122             : 
    2123        2268 :     if (expronleftp)
    2124        1296 :         *expronleftp = expronleft;
    2125             : 
    2126        2268 :     return true;
    2127             : }
    2128             : 
    2129             : 
    2130             : /*
    2131             :  * Compute statistics about expressions of a relation.
    2132             :  */
    2133             : static void
    2134         150 : compute_expr_stats(Relation onerel, double totalrows,
    2135             :                    AnlExprData *exprdata, int nexprs,
    2136             :                    HeapTuple *rows, int numrows)
    2137             : {
    2138             :     MemoryContext expr_context,
    2139             :                 old_context;
    2140             :     int         ind,
    2141             :                 i;
    2142             : 
    2143         150 :     expr_context = AllocSetContextCreate(CurrentMemoryContext,
    2144             :                                          "Analyze Expression",
    2145             :                                          ALLOCSET_DEFAULT_SIZES);
    2146         150 :     old_context = MemoryContextSwitchTo(expr_context);
    2147             : 
    2148         438 :     for (ind = 0; ind < nexprs; ind++)
    2149             :     {
    2150         288 :         AnlExprData *thisdata = &exprdata[ind];
    2151         288 :         VacAttrStats *stats = thisdata->vacattrstat;
    2152         288 :         Node       *expr = thisdata->expr;
    2153             :         TupleTableSlot *slot;
    2154             :         EState     *estate;
    2155             :         ExprContext *econtext;
    2156             :         Datum      *exprvals;
    2157             :         bool       *exprnulls;
    2158             :         ExprState  *exprstate;
    2159             :         int         tcnt;
    2160             : 
    2161             :         /* Are we still in the main context? */
    2162             :         Assert(CurrentMemoryContext == expr_context);
    2163             : 
    2164             :         /*
    2165             :          * Need an EState for evaluation of expressions.  Create it in the
    2166             :          * per-expression context to be sure it gets cleaned up at the bottom
    2167             :          * of the loop.
    2168             :          */
    2169         288 :         estate = CreateExecutorState();
    2170         288 :         econtext = GetPerTupleExprContext(estate);
    2171             : 
    2172             :         /* Set up expression evaluation state */
    2173         288 :         exprstate = ExecPrepareExpr((Expr *) expr, estate);
    2174             : 
    2175             :         /* Need a slot to hold the current heap tuple, too */
    2176         288 :         slot = MakeSingleTupleTableSlot(RelationGetDescr(onerel),
    2177             :                                         &TTSOpsHeapTuple);
    2178             : 
    2179             :         /* Arrange for econtext's scan tuple to be the tuple under test */
    2180         288 :         econtext->ecxt_scantuple = slot;
    2181             : 
    2182             :         /* Compute and save expression values */
    2183         288 :         exprvals = (Datum *) palloc(numrows * sizeof(Datum));
    2184         288 :         exprnulls = (bool *) palloc(numrows * sizeof(bool));
    2185             : 
    2186         288 :         tcnt = 0;
    2187      399882 :         for (i = 0; i < numrows; i++)
    2188             :         {
    2189             :             Datum       datum;
    2190             :             bool        isnull;
    2191             : 
    2192             :             /*
    2193             :              * Reset the per-tuple context each time, to reclaim any cruft
    2194             :              * left behind by evaluating the statistics expressions.
    2195             :              */
    2196      399594 :             ResetExprContext(econtext);
    2197             : 
    2198             :             /* Set up for expression evaluation */
    2199      399594 :             ExecStoreHeapTuple(rows[i], slot, false);
    2200             : 
    2201             :             /*
    2202             :              * Evaluate the expression. We do this in the per-tuple context so
    2203             :              * as not to leak memory, and then copy the result into the
    2204             :              * context created at the beginning of this function.
    2205             :              */
    2206      399594 :             datum = ExecEvalExprSwitchContext(exprstate,
    2207      399594 :                                               GetPerTupleExprContext(estate),
    2208             :                                               &isnull);
    2209      399594 :             if (isnull)
    2210             :             {
    2211           0 :                 exprvals[tcnt] = (Datum) 0;
    2212           0 :                 exprnulls[tcnt] = true;
    2213             :             }
    2214             :             else
    2215             :             {
    2216             :                 /* Make sure we copy the data into the context. */
    2217             :                 Assert(CurrentMemoryContext == expr_context);
    2218             : 
    2219      799188 :                 exprvals[tcnt] = datumCopy(datum,
    2220      399594 :                                            stats->attrtype->typbyval,
    2221      399594 :                                            stats->attrtype->typlen);
    2222      399594 :                 exprnulls[tcnt] = false;
    2223             :             }
    2224             : 
    2225      399594 :             tcnt++;
    2226             :         }
    2227             : 
    2228             :         /*
    2229             :          * Now we can compute the statistics for the expression columns.
    2230             :          *
    2231             :          * XXX Unlike compute_index_stats we don't need to switch and reset
    2232             :          * memory contexts here, because we're only computing stats for a
    2233             :          * single expression (and not iterating over many indexes), so we just
    2234             :          * do it in expr_context. Note that compute_stats copies the result
    2235             :          * into stats->anl_context, so it does not disappear.
    2236             :          */
    2237         288 :         if (tcnt > 0)
    2238             :         {
    2239             :             AttributeOpts *aopt =
    2240         288 :                 get_attribute_options(stats->attr->attrelid,
    2241         288 :                                       stats->attr->attnum);
    2242             : 
    2243         288 :             stats->exprvals = exprvals;
    2244         288 :             stats->exprnulls = exprnulls;
    2245         288 :             stats->rowstride = 1;
    2246         288 :             stats->compute_stats(stats,
    2247             :                                  expr_fetch_func,
    2248             :                                  tcnt,
    2249             :                                  tcnt);
    2250             : 
    2251             :             /*
    2252             :              * If the n_distinct option is specified, it overrides the above
    2253             :              * computation.
    2254             :              */
    2255         288 :             if (aopt != NULL && aopt->n_distinct != 0.0)
    2256           0 :                 stats->stadistinct = aopt->n_distinct;
    2257             :         }
    2258             : 
    2259             :         /* And clean up */
    2260         288 :         MemoryContextSwitchTo(expr_context);
    2261             : 
    2262         288 :         ExecDropSingleTupleTableSlot(slot);
    2263         288 :         FreeExecutorState(estate);
    2264         288 :         MemoryContextResetAndDeleteChildren(expr_context);
    2265             :     }
    2266             : 
    2267         150 :     MemoryContextSwitchTo(old_context);
    2268         150 :     MemoryContextDelete(expr_context);
    2269         150 : }
    2270             : 
    2271             : 
    2272             : /*
    2273             :  * Fetch function for analyzing statistics object expressions.
    2274             :  *
    2275             :  * We have not bothered to construct tuples from the data, instead the data
    2276             :  * is just in Datum arrays.
    2277             :  */
    2278             : static Datum
    2279      399594 : expr_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
    2280             : {
    2281             :     int         i;
    2282             : 
    2283             :     /* exprvals and exprnulls are already offset for proper column */
    2284      399594 :     i = rownum * stats->rowstride;
    2285      399594 :     *isNull = stats->exprnulls[i];
    2286      399594 :     return stats->exprvals[i];
    2287             : }
    2288             : 
    2289             : /*
    2290             :  * Build analyze data for a list of expressions. As this is not tied
    2291             :  * directly to a relation (table or index), we have to fake some of
    2292             :  * the fields in examine_expression().
    2293             :  */
    2294             : static AnlExprData *
    2295         150 : build_expr_data(List *exprs, int stattarget)
    2296             : {
    2297             :     int         idx;
    2298         150 :     int         nexprs = list_length(exprs);
    2299             :     AnlExprData *exprdata;
    2300             :     ListCell   *lc;
    2301             : 
    2302         150 :     exprdata = (AnlExprData *) palloc0(nexprs * sizeof(AnlExprData));
    2303             : 
    2304         150 :     idx = 0;
    2305         438 :     foreach(lc, exprs)
    2306             :     {
    2307         288 :         Node       *expr = (Node *) lfirst(lc);
    2308         288 :         AnlExprData *thisdata = &exprdata[idx];
    2309             : 
    2310         288 :         thisdata->expr = expr;
    2311         288 :         thisdata->vacattrstat = examine_expression(expr, stattarget);
    2312         288 :         idx++;
    2313             :     }
    2314             : 
    2315         150 :     return exprdata;
    2316             : }
    2317             : 
    2318             : /* form an array of pg_statistic rows (per update_attstats) */
    2319             : static Datum
    2320         150 : serialize_expr_stats(AnlExprData *exprdata, int nexprs)
    2321             : {
    2322             :     int         exprno;
    2323             :     Oid         typOid;
    2324             :     Relation    sd;
    2325             : 
    2326         150 :     ArrayBuildState *astate = NULL;
    2327             : 
    2328         150 :     sd = table_open(StatisticRelationId, RowExclusiveLock);
    2329             : 
    2330             :     /* lookup OID of composite type for pg_statistic */
    2331         150 :     typOid = get_rel_type_id(StatisticRelationId);
    2332         150 :     if (!OidIsValid(typOid))
    2333           0 :         ereport(ERROR,
    2334             :                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
    2335             :                  errmsg("relation \"%s\" does not have a composite type",
    2336             :                         "pg_statistic")));
    2337             : 
    2338         438 :     for (exprno = 0; exprno < nexprs; exprno++)
    2339             :     {
    2340             :         int         i,
    2341             :                     k;
    2342         288 :         VacAttrStats *stats = exprdata[exprno].vacattrstat;
    2343             : 
    2344             :         Datum       values[Natts_pg_statistic];
    2345             :         bool        nulls[Natts_pg_statistic];
    2346             :         HeapTuple   stup;
    2347             : 
    2348         288 :         if (!stats->stats_valid)
    2349             :         {
    2350           0 :             astate = accumArrayResult(astate,
    2351             :                                       (Datum) 0,
    2352             :                                       true,
    2353             :                                       typOid,
    2354             :                                       CurrentMemoryContext);
    2355           0 :             continue;
    2356             :         }
    2357             : 
    2358             :         /*
    2359             :          * Construct a new pg_statistic tuple
    2360             :          */
    2361        9216 :         for (i = 0; i < Natts_pg_statistic; ++i)
    2362             :         {
    2363        8928 :             nulls[i] = false;
    2364             :         }
    2365             : 
    2366         288 :         values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(InvalidOid);
    2367         288 :         values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(InvalidAttrNumber);
    2368         288 :         values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(false);
    2369         288 :         values[Anum_pg_statistic_stanullfrac - 1] = Float4GetDatum(stats->stanullfrac);
    2370         288 :         values[Anum_pg_statistic_stawidth - 1] = Int32GetDatum(stats->stawidth);
    2371         288 :         values[Anum_pg_statistic_stadistinct - 1] = Float4GetDatum(stats->stadistinct);
    2372         288 :         i = Anum_pg_statistic_stakind1 - 1;
    2373        1728 :         for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
    2374             :         {
    2375        1440 :             values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */
    2376             :         }
    2377         288 :         i = Anum_pg_statistic_staop1 - 1;
    2378        1728 :         for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
    2379             :         {
    2380        1440 :             values[i++] = ObjectIdGetDatum(stats->staop[k]); /* staopN */
    2381             :         }
    2382         288 :         i = Anum_pg_statistic_stacoll1 - 1;
    2383        1728 :         for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
    2384             :         {
    2385        1440 :             values[i++] = ObjectIdGetDatum(stats->stacoll[k]);   /* stacollN */
    2386             :         }
    2387         288 :         i = Anum_pg_statistic_stanumbers1 - 1;
    2388        1728 :         for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
    2389             :         {
    2390        1440 :             int         nnum = stats->numnumbers[k];
    2391             : 
    2392        1440 :             if (nnum > 0)
    2393             :             {
    2394             :                 int         n;
    2395         564 :                 Datum      *numdatums = (Datum *) palloc(nnum * sizeof(Datum));
    2396             :                 ArrayType  *arry;
    2397             : 
    2398        4938 :                 for (n = 0; n < nnum; n++)
    2399        4374 :                     numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]);
    2400         564 :                 arry = construct_array_builtin(numdatums, nnum, FLOAT4OID);
    2401         564 :                 values[i++] = PointerGetDatum(arry);    /* stanumbersN */
    2402             :             }
    2403             :             else
    2404             :             {
    2405         876 :                 nulls[i] = true;
    2406         876 :                 values[i++] = (Datum) 0;
    2407             :             }
    2408             :         }
    2409         288 :         i = Anum_pg_statistic_stavalues1 - 1;
    2410        1728 :         for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
    2411             :         {
    2412        1440 :             if (stats->numvalues[k] > 0)
    2413             :             {
    2414             :                 ArrayType  *arry;
    2415             : 
    2416         306 :                 arry = construct_array(stats->stavalues[k],
    2417             :                                        stats->numvalues[k],
    2418             :                                        stats->statypid[k],
    2419         306 :                                        stats->statyplen[k],
    2420         306 :                                        stats->statypbyval[k],
    2421         306 :                                        stats->statypalign[k]);
    2422         306 :                 values[i++] = PointerGetDatum(arry);    /* stavaluesN */
    2423             :             }
    2424             :             else
    2425             :             {
    2426        1134 :                 nulls[i] = true;
    2427        1134 :                 values[i++] = (Datum) 0;
    2428             :             }
    2429             :         }
    2430             : 
    2431         288 :         stup = heap_form_tuple(RelationGetDescr(sd), values, nulls);
    2432             : 
    2433         288 :         astate = accumArrayResult(astate,
    2434             :                                   heap_copy_tuple_as_datum(stup, RelationGetDescr(sd)),
    2435             :                                   false,
    2436             :                                   typOid,
    2437             :                                   CurrentMemoryContext);
    2438             :     }
    2439             : 
    2440         150 :     table_close(sd, RowExclusiveLock);
    2441             : 
    2442         150 :     return makeArrayResult(astate, CurrentMemoryContext);
    2443             : }
    2444             : 
    2445             : /*
    2446             :  * Loads pg_statistic record from expression statistics for expression
    2447             :  * identified by the supplied index.
    2448             :  */
    2449             : HeapTuple
    2450        1644 : statext_expressions_load(Oid stxoid, bool inh, int idx)
    2451             : {
    2452             :     bool        isnull;
    2453             :     Datum       value;
    2454             :     HeapTuple   htup;
    2455             :     ExpandedArrayHeader *eah;
    2456             :     HeapTupleHeader td;
    2457             :     HeapTupleData tmptup;
    2458             :     HeapTuple   tup;
    2459             : 
    2460        1644 :     htup = SearchSysCache2(STATEXTDATASTXOID,
    2461             :                            ObjectIdGetDatum(stxoid), BoolGetDatum(inh));
    2462        1644 :     if (!HeapTupleIsValid(htup))
    2463           0 :         elog(ERROR, "cache lookup failed for statistics object %u", stxoid);
    2464             : 
    2465        1644 :     value = SysCacheGetAttr(STATEXTDATASTXOID, htup,
    2466             :                             Anum_pg_statistic_ext_data_stxdexpr, &isnull);
    2467        1644 :     if (isnull)
    2468           0 :         elog(ERROR,
    2469             :              "requested statistics kind \"%c\" is not yet built for statistics object %u",
    2470             :              STATS_EXT_DEPENDENCIES, stxoid);
    2471             : 
    2472        1644 :     eah = DatumGetExpandedArray(value);
    2473             : 
    2474        1644 :     deconstruct_expanded_array(eah);
    2475             : 
    2476        1644 :     td = DatumGetHeapTupleHeader(eah->dvalues[idx]);
    2477             : 
    2478             :     /* Build a temporary HeapTuple control structure */
    2479        1644 :     tmptup.t_len = HeapTupleHeaderGetDatumLength(td);
    2480        1644 :     ItemPointerSetInvalid(&(tmptup.t_self));
    2481        1644 :     tmptup.t_tableOid = InvalidOid;
    2482        1644 :     tmptup.t_data = td;
    2483             : 
    2484        1644 :     tup = heap_copytuple(&tmptup);
    2485             : 
    2486        1644 :     ReleaseSysCache(htup);
    2487             : 
    2488        1644 :     return tup;
    2489             : }
    2490             : 
    2491             : /*
    2492             :  * Evaluate the expressions, so that we can use the results to build
    2493             :  * all the requested statistics types. This matters especially for
    2494             :  * expensive expressions, of course.
    2495             :  */
    2496             : static StatsBuildData *
    2497         348 : make_build_data(Relation rel, StatExtEntry *stat, int numrows, HeapTuple *rows,
    2498             :                 VacAttrStats **stats, int stattarget)
    2499             : {
    2500             :     /* evaluated expressions */
    2501             :     StatsBuildData *result;
    2502             :     char       *ptr;
    2503             :     Size        len;
    2504             : 
    2505             :     int         i;
    2506             :     int         k;
    2507             :     int         idx;
    2508             :     TupleTableSlot *slot;
    2509             :     EState     *estate;
    2510             :     ExprContext *econtext;
    2511         348 :     List       *exprstates = NIL;
    2512         348 :     int         nkeys = bms_num_members(stat->columns) + list_length(stat->exprs);
    2513             :     ListCell   *lc;
    2514             : 
    2515             :     /* allocate everything as a single chunk, so we can free it easily */
    2516         348 :     len = MAXALIGN(sizeof(StatsBuildData));
    2517         348 :     len += MAXALIGN(sizeof(AttrNumber) * nkeys);    /* attnums */
    2518         348 :     len += MAXALIGN(sizeof(VacAttrStats *) * nkeys);    /* stats */
    2519             : 
    2520             :     /* values */
    2521         348 :     len += MAXALIGN(sizeof(Datum *) * nkeys);
    2522         348 :     len += nkeys * MAXALIGN(sizeof(Datum) * numrows);
    2523             : 
    2524             :     /* nulls */
    2525         348 :     len += MAXALIGN(sizeof(bool *) * nkeys);
    2526         348 :     len += nkeys * MAXALIGN(sizeof(bool) * numrows);
    2527             : 
    2528         348 :     ptr = palloc(len);
    2529             : 
    2530             :     /* set the pointers */
    2531         348 :     result = (StatsBuildData *) ptr;
    2532         348 :     ptr += MAXALIGN(sizeof(StatsBuildData));
    2533             : 
    2534             :     /* attnums */
    2535         348 :     result->attnums = (AttrNumber *) ptr;
    2536         348 :     ptr += MAXALIGN(sizeof(AttrNumber) * nkeys);
    2537             : 
    2538             :     /* stats */
    2539         348 :     result->stats = (VacAttrStats **) ptr;
    2540         348 :     ptr += MAXALIGN(sizeof(VacAttrStats *) * nkeys);
    2541             : 
    2542             :     /* values */
    2543         348 :     result->values = (Datum **) ptr;
    2544         348 :     ptr += MAXALIGN(sizeof(Datum *) * nkeys);
    2545             : 
    2546             :     /* nulls */
    2547         348 :     result->nulls = (bool **) ptr;
    2548         348 :     ptr += MAXALIGN(sizeof(bool *) * nkeys);
    2549             : 
    2550        1248 :     for (i = 0; i < nkeys; i++)
    2551             :     {
    2552         900 :         result->values[i] = (Datum *) ptr;
    2553         900 :         ptr += MAXALIGN(sizeof(Datum) * numrows);
    2554             : 
    2555         900 :         result->nulls[i] = (bool *) ptr;
    2556         900 :         ptr += MAXALIGN(sizeof(bool) * numrows);
    2557             :     }
    2558             : 
    2559             :     Assert((ptr - (char *) result) == len);
    2560             : 
    2561             :     /* we have it allocated, so let's fill the values */
    2562         348 :     result->nattnums = nkeys;
    2563         348 :     result->numrows = numrows;
    2564             : 
    2565             :     /* fill the attribute info - first attributes, then expressions */
    2566         348 :     idx = 0;
    2567         348 :     k = -1;
    2568         960 :     while ((k = bms_next_member(stat->columns, k)) >= 0)
    2569             :     {
    2570         612 :         result->attnums[idx] = k;
    2571         612 :         result->stats[idx] = stats[idx];
    2572             : 
    2573         612 :         idx++;
    2574             :     }
    2575             : 
    2576         348 :     k = -1;
    2577         636 :     foreach(lc, stat->exprs)
    2578             :     {
    2579         288 :         Node       *expr = (Node *) lfirst(lc);
    2580             : 
    2581         288 :         result->attnums[idx] = k;
    2582         288 :         result->stats[idx] = examine_expression(expr, stattarget);
    2583             : 
    2584         288 :         idx++;
    2585         288 :         k--;
    2586             :     }
    2587             : 
    2588             :     /* first extract values for all the regular attributes */
    2589      738966 :     for (i = 0; i < numrows; i++)
    2590             :     {
    2591      738618 :         idx = 0;
    2592      738618 :         k = -1;
    2593     2432454 :         while ((k = bms_next_member(stat->columns, k)) >= 0)
    2594             :         {
    2595     3387672 :             result->values[idx][i] = heap_getattr(rows[i], k,
    2596     1693836 :                                                   result->stats[idx]->tupDesc,
    2597     1693836 :                                                   &result->nulls[idx][i]);
    2598             : 
    2599     1693836 :             idx++;
    2600             :         }
    2601             :     }
    2602             : 
    2603             :     /* Need an EState for evaluation expressions. */
    2604         348 :     estate = CreateExecutorState();
    2605         348 :     econtext = GetPerTupleExprContext(estate);
    2606             : 
    2607             :     /* Need a slot to hold the current heap tuple, too */
    2608         348 :     slot = MakeSingleTupleTableSlot(RelationGetDescr(rel),
    2609             :                                     &TTSOpsHeapTuple);
    2610             : 
    2611             :     /* Arrange for econtext's scan tuple to be the tuple under test */
    2612         348 :     econtext->ecxt_scantuple = slot;
    2613             : 
    2614             :     /* Set up expression evaluation state */
    2615         348 :     exprstates = ExecPrepareExprList(stat->exprs, estate);
    2616             : 
    2617      738966 :     for (i = 0; i < numrows; i++)
    2618             :     {
    2619             :         /*
    2620             :          * Reset the per-tuple context each time, to reclaim any cruft left
    2621             :          * behind by evaluating the statistics object expressions.
    2622             :          */
    2623      738618 :         ResetExprContext(econtext);
    2624             : 
    2625             :         /* Set up for expression evaluation */
    2626      738618 :         ExecStoreHeapTuple(rows[i], slot, false);
    2627             : 
    2628      738618 :         idx = bms_num_members(stat->columns);
    2629     1138212 :         foreach(lc, exprstates)
    2630             :         {
    2631             :             Datum       datum;
    2632             :             bool        isnull;
    2633      399594 :             ExprState  *exprstate = (ExprState *) lfirst(lc);
    2634             : 
    2635             :             /*
    2636             :              * XXX This probably leaks memory. Maybe we should use
    2637             :              * ExecEvalExprSwitchContext but then we need to copy the result
    2638             :              * somewhere else.
    2639             :              */
    2640      399594 :             datum = ExecEvalExpr(exprstate,
    2641      399594 :                                  GetPerTupleExprContext(estate),
    2642             :                                  &isnull);
    2643      399594 :             if (isnull)
    2644             :             {
    2645           0 :                 result->values[idx][i] = (Datum) 0;
    2646           0 :                 result->nulls[idx][i] = true;
    2647             :             }
    2648             :             else
    2649             :             {
    2650      399594 :                 result->values[idx][i] = (Datum) datum;
    2651      399594 :                 result->nulls[idx][i] = false;
    2652             :             }
    2653             : 
    2654      399594 :             idx++;
    2655             :         }
    2656             :     }
    2657             : 
    2658         348 :     ExecDropSingleTupleTableSlot(slot);
    2659         348 :     FreeExecutorState(estate);
    2660             : 
    2661         348 :     return result;
    2662             : }

Generated by: LCOV version 1.14