LCOV - code coverage report
Current view: top level - src/backend/statistics - extended_stats.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 94.2 % 794 748
Test Date: 2026-04-15 19:16:22 Functions: 97.1 % 34 33
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * extended_stats.c
       4              :  *    POSTGRES extended statistics
       5              :  *
       6              :  * Generic code supporting statistics objects created via CREATE STATISTICS.
       7              :  *
       8              :  *
       9              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      10              :  * Portions Copyright (c) 1994, Regents of the University of California
      11              :  *
      12              :  * IDENTIFICATION
      13              :  *    src/backend/statistics/extended_stats.c
      14              :  *
      15              :  *-------------------------------------------------------------------------
      16              :  */
      17              : #include "postgres.h"
      18              : 
      19              : #include "access/detoast.h"
      20              : #include "access/genam.h"
      21              : #include "access/htup_details.h"
      22              : #include "access/table.h"
      23              : #include "catalog/indexing.h"
      24              : #include "catalog/pg_statistic_ext.h"
      25              : #include "catalog/pg_statistic_ext_data.h"
      26              : #include "commands/defrem.h"
      27              : #include "commands/progress.h"
      28              : #include "executor/executor.h"
      29              : #include "miscadmin.h"
      30              : #include "nodes/nodeFuncs.h"
      31              : #include "optimizer/optimizer.h"
      32              : #include "parser/parsetree.h"
      33              : #include "pgstat.h"
      34              : #include "postmaster/autovacuum.h"
      35              : #include "rewrite/rewriteHandler.h"
      36              : #include "statistics/extended_stats_internal.h"
      37              : #include "statistics/statistics.h"
      38              : #include "utils/acl.h"
      39              : #include "utils/array.h"
      40              : #include "utils/attoptcache.h"
      41              : #include "utils/builtins.h"
      42              : #include "utils/datum.h"
      43              : #include "utils/fmgroids.h"
      44              : #include "utils/lsyscache.h"
      45              : #include "utils/memutils.h"
      46              : #include "utils/rel.h"
      47              : #include "utils/selfuncs.h"
      48              : #include "utils/syscache.h"
      49              : 
      50              : /*
      51              :  * To avoid consuming too much memory during analysis and/or too much space
      52              :  * in the resulting pg_statistic rows, we ignore varlena datums that are wider
      53              :  * than WIDTH_THRESHOLD (after detoasting!).  This is legitimate for MCV
      54              :  * and distinct-value calculations since a wide value is unlikely to be
      55              :  * duplicated at all, much less be a most-common value.  For the same reason,
      56              :  * ignoring wide values will not affect our estimates of histogram bin
      57              :  * boundaries very much.
      58              :  */
      59              : #define WIDTH_THRESHOLD  1024
      60              : 
      61              : /*
      62              :  * Used internally to refer to an individual statistics object, i.e.,
      63              :  * a pg_statistic_ext entry.
      64              :  */
      65              : typedef struct StatExtEntry
      66              : {
      67              :     Oid         statOid;        /* OID of pg_statistic_ext entry */
      68              :     char       *schema;         /* statistics object's schema */
      69              :     char       *name;           /* statistics object's name */
      70              :     Bitmapset  *columns;        /* attribute numbers covered by the object */
      71              :     List       *types;          /* 'char' list of enabled statistics kinds */
      72              :     int         stattarget;     /* statistics target (-1 for default) */
      73              :     List       *exprs;          /* expressions */
      74              : } StatExtEntry;
      75              : 
      76              : 
      77              : static List *fetch_statentries_for_relation(Relation pg_statext, Relation rel);
      78              : static VacAttrStats **lookup_var_attr_stats(Bitmapset *attrs, List *exprs,
      79              :                                             int nvacatts, VacAttrStats **vacatts);
      80              : static void statext_store(Oid statOid, bool inh,
      81              :                           MVNDistinct *ndistinct, MVDependencies *dependencies,
      82              :                           MCVList *mcv, Datum exprs, VacAttrStats **stats);
      83              : static int  statext_compute_stattarget(int stattarget,
      84              :                                        int nattrs, VacAttrStats **stats);
      85              : 
      86              : /* Information needed to analyze a single simple expression. */
      87              : typedef struct AnlExprData
      88              : {
      89              :     Node       *expr;           /* expression to analyze */
      90              :     VacAttrStats *vacattrstat;  /* statistics attrs to analyze */
      91              : } AnlExprData;
      92              : 
      93              : static void compute_expr_stats(Relation onerel, AnlExprData *exprdata,
      94              :                                int nexprs, HeapTuple *rows, int numrows);
      95              : static Datum serialize_expr_stats(AnlExprData *exprdata, int nexprs);
      96              : static Datum expr_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
      97              : static AnlExprData *build_expr_data(List *exprs, int stattarget);
      98              : 
      99              : static StatsBuildData *make_build_data(Relation rel, StatExtEntry *stat,
     100              :                                        int numrows, HeapTuple *rows,
     101              :                                        VacAttrStats **stats, int stattarget);
     102              : 
     103              : 
     104              : /*
     105              :  * Compute requested extended stats, using the rows sampled for the plain
     106              :  * (single-column) stats.
     107              :  *
     108              :  * This fetches a list of stats types from pg_statistic_ext, computes the
     109              :  * requested stats, and serializes them back into the catalog.
     110              :  */
     111              : void
     112         6687 : BuildRelationExtStatistics(Relation onerel, bool inh, double totalrows,
     113              :                            int numrows, HeapTuple *rows,
     114              :                            int natts, VacAttrStats **vacattrstats)
     115              : {
     116              :     Relation    pg_stext;
     117              :     ListCell   *lc;
     118              :     List       *statslist;
     119              :     MemoryContext cxt;
     120              :     MemoryContext oldcxt;
     121              :     int64       ext_cnt;
     122              : 
     123              :     /* Do nothing if there are no columns to analyze. */
     124         6687 :     if (!natts)
     125           13 :         return;
     126              : 
     127              :     /* the list of stats has to be allocated outside the memory context */
     128         6674 :     pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock);
     129         6674 :     statslist = fetch_statentries_for_relation(pg_stext, onerel);
     130              : 
     131              :     /* memory context for building each statistics object */
     132         6674 :     cxt = AllocSetContextCreate(CurrentMemoryContext,
     133              :                                 "BuildRelationExtStatistics",
     134              :                                 ALLOCSET_DEFAULT_SIZES);
     135         6674 :     oldcxt = MemoryContextSwitchTo(cxt);
     136              : 
     137              :     /* report this phase */
     138         6674 :     if (statslist != NIL)
     139              :     {
     140          248 :         const int   index[] = {
     141              :             PROGRESS_ANALYZE_PHASE,
     142              :             PROGRESS_ANALYZE_EXT_STATS_TOTAL
     143              :         };
     144          496 :         const int64 val[] = {
     145              :             PROGRESS_ANALYZE_PHASE_COMPUTE_EXT_STATS,
     146          248 :             list_length(statslist)
     147              :         };
     148              : 
     149          248 :         pgstat_progress_update_multi_param(2, index, val);
     150              :     }
     151              : 
     152         6674 :     ext_cnt = 0;
     153         7058 :     foreach(lc, statslist)
     154              :     {
     155          384 :         StatExtEntry *stat = (StatExtEntry *) lfirst(lc);
     156          384 :         MVNDistinct *ndistinct = NULL;
     157          384 :         MVDependencies *dependencies = NULL;
     158          384 :         MCVList    *mcv = NULL;
     159          384 :         Datum       exprstats = (Datum) 0;
     160              :         VacAttrStats **stats;
     161              :         ListCell   *lc2;
     162              :         int         stattarget;
     163              :         StatsBuildData *data;
     164              : 
     165              :         /*
     166              :          * Check if we can build these stats based on the column analyzed. If
     167              :          * not, report this fact (except in autovacuum) and move on.
     168              :          */
     169          384 :         stats = lookup_var_attr_stats(stat->columns, stat->exprs,
     170              :                                       natts, vacattrstats);
     171          384 :         if (!stats)
     172              :         {
     173           10 :             if (!AmAutoVacuumWorkerProcess())
     174           10 :                 ereport(WARNING,
     175              :                         (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
     176              :                          errmsg("statistics object \"%s.%s\" could not be computed for relation \"%s.%s\"",
     177              :                                 stat->schema, stat->name,
     178              :                                 get_namespace_name(onerel->rd_rel->relnamespace),
     179              :                                 RelationGetRelationName(onerel)),
     180              :                          errtable(onerel)));
     181           10 :             continue;
     182              :         }
     183              : 
     184              :         /* compute statistics target for this statistics object */
     185          374 :         stattarget = statext_compute_stattarget(stat->stattarget,
     186          374 :                                                 bms_num_members(stat->columns),
     187              :                                                 stats);
     188              : 
     189              :         /*
     190              :          * Don't rebuild statistics objects with statistics target set to 0
     191              :          * (we just leave the existing values around, just like we do for
     192              :          * regular per-column statistics).
     193              :          */
     194          374 :         if (stattarget == 0)
     195            4 :             continue;
     196              : 
     197              :         /* evaluate expressions (if the statistics object has any) */
     198          370 :         data = make_build_data(onerel, stat, numrows, rows, stats, stattarget);
     199              : 
     200              :         /* compute statistic of each requested type */
     201         1077 :         foreach(lc2, stat->types)
     202              :         {
     203          707 :             char        t = (char) lfirst_int(lc2);
     204              : 
     205          707 :             if (t == STATS_EXT_NDISTINCT)
     206          181 :                 ndistinct = statext_ndistinct_build(totalrows, data);
     207          526 :             else if (t == STATS_EXT_DEPENDENCIES)
     208          146 :                 dependencies = statext_dependencies_build(data);
     209          380 :             else if (t == STATS_EXT_MCV)
     210          201 :                 mcv = statext_mcv_build(data, totalrows, stattarget);
     211          179 :             else if (t == STATS_EXT_EXPRESSIONS)
     212              :             {
     213              :                 AnlExprData *exprdata;
     214              :                 int         nexprs;
     215              : 
     216              :                 /* should not happen, thanks to checks when defining stats */
     217          179 :                 if (!stat->exprs)
     218            0 :                     elog(ERROR, "requested expression stats, but there are no expressions");
     219              : 
     220          179 :                 exprdata = build_expr_data(stat->exprs, stattarget);
     221          179 :                 nexprs = list_length(stat->exprs);
     222              : 
     223          179 :                 compute_expr_stats(onerel, exprdata, nexprs, rows, numrows);
     224              : 
     225          179 :                 exprstats = serialize_expr_stats(exprdata, nexprs);
     226              :             }
     227              :         }
     228              : 
     229              :         /* store the statistics in the catalog */
     230          370 :         statext_store(stat->statOid, inh,
     231              :                       ndistinct, dependencies, mcv, exprstats, stats);
     232              : 
     233              :         /* for reporting progress */
     234          370 :         pgstat_progress_update_param(PROGRESS_ANALYZE_EXT_STATS_COMPUTED,
     235              :                                      ++ext_cnt);
     236              : 
     237              :         /* free the data used for building this statistics object */
     238          370 :         MemoryContextReset(cxt);
     239              :     }
     240              : 
     241         6674 :     MemoryContextSwitchTo(oldcxt);
     242         6674 :     MemoryContextDelete(cxt);
     243              : 
     244         6674 :     list_free(statslist);
     245              : 
     246         6674 :     table_close(pg_stext, RowExclusiveLock);
     247              : }
     248              : 
     249              : /*
     250              :  * Test if the given relation has extended statistics objects.
     251              :  */
     252              : bool
     253           11 : HasRelationExtStatistics(Relation onerel)
     254              : {
     255              :     Relation    pg_statext;
     256              :     SysScanDesc scan;
     257              :     ScanKeyData skey;
     258              :     bool        found;
     259              : 
     260           11 :     pg_statext = table_open(StatisticExtRelationId, RowExclusiveLock);
     261              : 
     262              :     /*
     263              :      * Prepare to scan pg_statistic_ext for entries having stxrelid = this
     264              :      * rel.
     265              :      */
     266           11 :     ScanKeyInit(&skey,
     267              :                 Anum_pg_statistic_ext_stxrelid,
     268              :                 BTEqualStrategyNumber, F_OIDEQ,
     269              :                 ObjectIdGetDatum(RelationGetRelid(onerel)));
     270              : 
     271           11 :     scan = systable_beginscan(pg_statext, StatisticExtRelidIndexId, true,
     272              :                               NULL, 1, &skey);
     273              : 
     274           11 :     found = HeapTupleIsValid(systable_getnext(scan));
     275              : 
     276           11 :     systable_endscan(scan);
     277              : 
     278           11 :     table_close(pg_statext, RowExclusiveLock);
     279              : 
     280           11 :     return found;
     281              : }
     282              : 
     283              : /*
     284              :  * ComputeExtStatisticsRows
     285              :  *      Compute number of rows required by extended statistics on a table.
     286              :  *
     287              :  * Computes number of rows we need to sample to build extended statistics on a
     288              :  * table. This only looks at statistics we can actually build - for example
     289              :  * when analyzing only some of the columns, this will skip statistics objects
     290              :  * that would require additional columns.
     291              :  *
     292              :  * See statext_compute_stattarget for details about how we compute the
     293              :  * statistics target for a statistics object (from the object target,
     294              :  * attribute targets and default statistics target).
     295              :  */
     296              : int
     297        10139 : ComputeExtStatisticsRows(Relation onerel,
     298              :                          int natts, VacAttrStats **vacattrstats)
     299              : {
     300              :     Relation    pg_stext;
     301              :     ListCell   *lc;
     302              :     List       *lstats;
     303              :     MemoryContext cxt;
     304              :     MemoryContext oldcxt;
     305        10139 :     int         result = 0;
     306              : 
     307              :     /* If there are no columns to analyze, just return 0. */
     308        10139 :     if (!natts)
     309           49 :         return 0;
     310              : 
     311        10090 :     cxt = AllocSetContextCreate(CurrentMemoryContext,
     312              :                                 "ComputeExtStatisticsRows",
     313              :                                 ALLOCSET_DEFAULT_SIZES);
     314        10090 :     oldcxt = MemoryContextSwitchTo(cxt);
     315              : 
     316        10090 :     pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock);
     317        10090 :     lstats = fetch_statentries_for_relation(pg_stext, onerel);
     318              : 
     319        10474 :     foreach(lc, lstats)
     320              :     {
     321          384 :         StatExtEntry *stat = (StatExtEntry *) lfirst(lc);
     322              :         int         stattarget;
     323              :         VacAttrStats **stats;
     324          384 :         int         nattrs = bms_num_members(stat->columns);
     325              : 
     326              :         /*
     327              :          * Check if we can build this statistics object based on the columns
     328              :          * analyzed. If not, ignore it (don't report anything, we'll do that
     329              :          * during the actual build BuildRelationExtStatistics).
     330              :          */
     331          384 :         stats = lookup_var_attr_stats(stat->columns, stat->exprs,
     332              :                                       natts, vacattrstats);
     333              : 
     334          384 :         if (!stats)
     335           10 :             continue;
     336              : 
     337              :         /*
     338              :          * Compute statistics target, based on what's set for the statistic
     339              :          * object itself, and for its attributes.
     340              :          */
     341          374 :         stattarget = statext_compute_stattarget(stat->stattarget,
     342              :                                                 nattrs, stats);
     343              : 
     344              :         /* Use the largest value for all statistics objects. */
     345          374 :         if (stattarget > result)
     346          234 :             result = stattarget;
     347              :     }
     348              : 
     349        10090 :     table_close(pg_stext, RowExclusiveLock);
     350              : 
     351        10090 :     MemoryContextSwitchTo(oldcxt);
     352        10090 :     MemoryContextDelete(cxt);
     353              : 
     354              :     /* compute sample size based on the statistics target */
     355        10090 :     return (300 * result);
     356              : }
     357              : 
     358              : /*
     359              :  * statext_compute_stattarget
     360              :  *      compute statistics target for an extended statistic
     361              :  *
     362              :  * When computing target for extended statistics objects, we consider three
     363              :  * places where the target may be set - the statistics object itself,
     364              :  * attributes the statistics object is defined on, and then the default
     365              :  * statistics target.
     366              :  *
     367              :  * First we look at what's set for the statistics object itself, using the
     368              :  * ALTER STATISTICS ... SET STATISTICS command. If we find a valid value
     369              :  * there (i.e. not -1) we're done. Otherwise we look at targets set for any
     370              :  * of the attributes the statistic is defined on, and if there are columns
     371              :  * with defined target, we use the maximum value. We do this mostly for
     372              :  * backwards compatibility, because this is what we did before having
     373              :  * statistics target for extended statistics.
     374              :  *
     375              :  * And finally, if we still don't have a statistics target, we use the value
     376              :  * set in default_statistics_target.
     377              :  */
     378              : static int
     379          748 : statext_compute_stattarget(int stattarget, int nattrs, VacAttrStats **stats)
     380              : {
     381              :     int         i;
     382              : 
     383              :     /*
     384              :      * If there's statistics target set for the statistics object, use it. It
     385              :      * may be set to 0 which disables building of that statistic.
     386              :      */
     387          748 :     if (stattarget >= 0)
     388            8 :         return stattarget;
     389              : 
     390              :     /*
     391              :      * The target for the statistics object is set to -1, in which case we
     392              :      * look at the maximum target set for any of the attributes the object is
     393              :      * defined on.
     394              :      */
     395         1894 :     for (i = 0; i < nattrs; i++)
     396              :     {
     397              :         /* keep the maximum statistics target */
     398         1154 :         if (stats[i]->attstattarget > stattarget)
     399          532 :             stattarget = stats[i]->attstattarget;
     400              :     }
     401              : 
     402              :     /*
     403              :      * If the value is still negative (so neither the statistics object nor
     404              :      * any of the columns have custom statistics target set), use the global
     405              :      * default target.
     406              :      */
     407          740 :     if (stattarget < 0)
     408          208 :         stattarget = default_statistics_target;
     409              : 
     410              :     /* As this point we should have a valid statistics target. */
     411              :     Assert((stattarget >= 0) && (stattarget <= MAX_STATISTICS_TARGET));
     412              : 
     413          740 :     return stattarget;
     414              : }
     415              : 
     416              : /*
     417              :  * statext_is_kind_built
     418              :  *      Is this stat kind built in the given pg_statistic_ext_data tuple?
     419              :  */
     420              : bool
     421         6836 : statext_is_kind_built(HeapTuple htup, char type)
     422              : {
     423              :     AttrNumber  attnum;
     424              : 
     425         6836 :     switch (type)
     426              :     {
     427         1709 :         case STATS_EXT_NDISTINCT:
     428         1709 :             attnum = Anum_pg_statistic_ext_data_stxdndistinct;
     429         1709 :             break;
     430              : 
     431         1709 :         case STATS_EXT_DEPENDENCIES:
     432         1709 :             attnum = Anum_pg_statistic_ext_data_stxddependencies;
     433         1709 :             break;
     434              : 
     435         1709 :         case STATS_EXT_MCV:
     436         1709 :             attnum = Anum_pg_statistic_ext_data_stxdmcv;
     437         1709 :             break;
     438              : 
     439         1709 :         case STATS_EXT_EXPRESSIONS:
     440         1709 :             attnum = Anum_pg_statistic_ext_data_stxdexpr;
     441         1709 :             break;
     442              : 
     443            0 :         default:
     444            0 :             elog(ERROR, "unexpected statistics type requested: %d", type);
     445              :     }
     446              : 
     447         6836 :     return !heap_attisnull(htup, attnum, NULL);
     448              : }
     449              : 
     450              : /*
     451              :  * Return a list (of StatExtEntry) of statistics objects for the given relation.
     452              :  */
     453              : static List *
     454        16764 : fetch_statentries_for_relation(Relation pg_statext, Relation rel)
     455              : {
     456              :     SysScanDesc scan;
     457              :     ScanKeyData skey;
     458              :     HeapTuple   htup;
     459        16764 :     List       *result = NIL;
     460        16764 :     Oid         relid = RelationGetRelid(rel);
     461              : 
     462              :     /*
     463              :      * Prepare to scan pg_statistic_ext for entries having stxrelid = this
     464              :      * rel.
     465              :      */
     466        16764 :     ScanKeyInit(&skey,
     467              :                 Anum_pg_statistic_ext_stxrelid,
     468              :                 BTEqualStrategyNumber, F_OIDEQ,
     469              :                 ObjectIdGetDatum(relid));
     470              : 
     471        16764 :     scan = systable_beginscan(pg_statext, StatisticExtRelidIndexId, true,
     472              :                               NULL, 1, &skey);
     473              : 
     474        17532 :     while (HeapTupleIsValid(htup = systable_getnext(scan)))
     475              :     {
     476              :         StatExtEntry *entry;
     477              :         Datum       datum;
     478              :         bool        isnull;
     479              :         int         i;
     480              :         ArrayType  *arr;
     481              :         char       *enabled;
     482              :         Form_pg_statistic_ext staForm;
     483          768 :         List       *exprs = NIL;
     484              : 
     485          768 :         entry = palloc0_object(StatExtEntry);
     486          768 :         staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
     487          768 :         entry->statOid = staForm->oid;
     488          768 :         entry->schema = get_namespace_name(staForm->stxnamespace);
     489          768 :         entry->name = pstrdup(NameStr(staForm->stxname));
     490         1976 :         for (i = 0; i < staForm->stxkeys.dim1; i++)
     491              :         {
     492         1208 :             entry->columns = bms_add_member(entry->columns,
     493         1208 :                                             staForm->stxkeys.values[i]);
     494              :         }
     495              : 
     496          768 :         datum = SysCacheGetAttr(STATEXTOID, htup, Anum_pg_statistic_ext_stxstattarget, &isnull);
     497          768 :         entry->stattarget = isnull ? -1 : DatumGetInt16(datum);
     498              : 
     499              :         /* decode the stxkind char array into a list of chars */
     500          768 :         datum = SysCacheGetAttrNotNull(STATEXTOID, htup,
     501              :                                        Anum_pg_statistic_ext_stxkind);
     502          768 :         arr = DatumGetArrayTypeP(datum);
     503          768 :         if (ARR_NDIM(arr) != 1 ||
     504          768 :             ARR_HASNULL(arr) ||
     505          768 :             ARR_ELEMTYPE(arr) != CHAROID)
     506            0 :             elog(ERROR, "stxkind is not a 1-D char array");
     507          768 :         enabled = (char *) ARR_DATA_PTR(arr);
     508         2268 :         for (i = 0; i < ARR_DIMS(arr)[0]; i++)
     509              :         {
     510              :             Assert((enabled[i] == STATS_EXT_NDISTINCT) ||
     511              :                    (enabled[i] == STATS_EXT_DEPENDENCIES) ||
     512              :                    (enabled[i] == STATS_EXT_MCV) ||
     513              :                    (enabled[i] == STATS_EXT_EXPRESSIONS));
     514         1500 :             entry->types = lappend_int(entry->types, (int) enabled[i]);
     515              :         }
     516              : 
     517              :         /* decode expression (if any) */
     518          768 :         datum = SysCacheGetAttr(STATEXTOID, htup,
     519              :                                 Anum_pg_statistic_ext_stxexprs, &isnull);
     520              : 
     521          768 :         if (!isnull)
     522              :         {
     523              :             char       *exprsString;
     524              : 
     525          360 :             exprsString = TextDatumGetCString(datum);
     526          360 :             exprs = (List *) stringToNode(exprsString);
     527              : 
     528          360 :             pfree(exprsString);
     529              : 
     530              :             /* Expand virtual generated columns in the expressions */
     531          360 :             exprs = (List *) expand_generated_columns_in_expr((Node *) exprs, rel, 1);
     532              : 
     533              :             /*
     534              :              * Run the expressions through eval_const_expressions. This is not
     535              :              * just an optimization, but is necessary, because the planner
     536              :              * will be comparing them to similarly-processed qual clauses, and
     537              :              * may fail to detect valid matches without this.  We must not use
     538              :              * canonicalize_qual, however, since these aren't qual
     539              :              * expressions.
     540              :              */
     541          360 :             exprs = (List *) eval_const_expressions(NULL, (Node *) exprs);
     542              : 
     543              :             /* May as well fix opfuncids too */
     544          360 :             fix_opfuncids((Node *) exprs);
     545              :         }
     546              : 
     547          768 :         entry->exprs = exprs;
     548              : 
     549          768 :         result = lappend(result, entry);
     550              :     }
     551              : 
     552        16764 :     systable_endscan(scan);
     553              : 
     554        16764 :     return result;
     555              : }
     556              : 
     557              : /*
     558              :  * examine_attribute -- pre-analysis of a single column
     559              :  *
     560              :  * Determine whether the column is analyzable; if so, create and initialize
     561              :  * a VacAttrStats struct for it.  If not, return NULL.
     562              :  */
     563              : static VacAttrStats *
     564          648 : examine_attribute(Node *expr)
     565              : {
     566              :     HeapTuple   typtuple;
     567              :     VacAttrStats *stats;
     568              :     int         i;
     569              :     bool        ok;
     570              : 
     571              :     /*
     572              :      * Create the VacAttrStats struct.
     573              :      */
     574          648 :     stats = palloc0_object(VacAttrStats);
     575          648 :     stats->attstattarget = -1;
     576              : 
     577              :     /*
     578              :      * When analyzing an expression, believe the expression tree's type not
     579              :      * the column datatype --- the latter might be the opckeytype storage type
     580              :      * of the opclass, which is not interesting for our purposes.  (Note: if
     581              :      * we did anything with non-expression statistics columns, we'd need to
     582              :      * figure out where to get the correct type info from, but for now that's
     583              :      * not a problem.)  It's not clear whether anyone will care about the
     584              :      * typmod, but we store that too just in case.
     585              :      */
     586          648 :     stats->attrtypid = exprType(expr);
     587          648 :     stats->attrtypmod = exprTypmod(expr);
     588          648 :     stats->attrcollid = exprCollation(expr);
     589              : 
     590          648 :     typtuple = SearchSysCacheCopy1(TYPEOID,
     591              :                                    ObjectIdGetDatum(stats->attrtypid));
     592          648 :     if (!HeapTupleIsValid(typtuple))
     593            0 :         elog(ERROR, "cache lookup failed for type %u", stats->attrtypid);
     594          648 :     stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple);
     595              : 
     596              :     /*
     597              :      * We don't actually analyze individual attributes, so no need to set the
     598              :      * memory context.
     599              :      */
     600          648 :     stats->anl_context = NULL;
     601          648 :     stats->tupattnum = InvalidAttrNumber;
     602              : 
     603              :     /*
     604              :      * The fields describing the stats->stavalues[n] element types default to
     605              :      * the type of the data being analyzed, but the type-specific typanalyze
     606              :      * function can change them if it wants to store something else.
     607              :      */
     608         3888 :     for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
     609              :     {
     610         3240 :         stats->statypid[i] = stats->attrtypid;
     611         3240 :         stats->statyplen[i] = stats->attrtype->typlen;
     612         3240 :         stats->statypbyval[i] = stats->attrtype->typbyval;
     613         3240 :         stats->statypalign[i] = stats->attrtype->typalign;
     614              :     }
     615              : 
     616              :     /*
     617              :      * Call the type-specific typanalyze function.  If none is specified, use
     618              :      * std_typanalyze().
     619              :      */
     620          648 :     if (OidIsValid(stats->attrtype->typanalyze))
     621           44 :         ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze,
     622              :                                            PointerGetDatum(stats)));
     623              :     else
     624          604 :         ok = std_typanalyze(stats);
     625              : 
     626          648 :     if (!ok || stats->compute_stats == NULL || stats->minrows <= 0)
     627              :     {
     628            2 :         heap_freetuple(typtuple);
     629            2 :         pfree(stats);
     630            2 :         return NULL;
     631              :     }
     632              : 
     633          646 :     return stats;
     634              : }
     635              : 
     636              : /*
     637              :  * examine_expression -- pre-analysis of a single expression
     638              :  *
     639              :  * Determine whether the expression is analyzable; if so, create and initialize
     640              :  * a VacAttrStats struct for it.  If not, return NULL.
     641              :  */
     642              : static VacAttrStats *
     643          646 : examine_expression(Node *expr, int stattarget)
     644              : {
     645              :     HeapTuple   typtuple;
     646              :     VacAttrStats *stats;
     647              :     int         i;
     648              :     bool        ok;
     649              : 
     650              :     Assert(expr != NULL);
     651              : 
     652              :     /*
     653              :      * Create the VacAttrStats struct.
     654              :      */
     655          646 :     stats = palloc0_object(VacAttrStats);
     656              : 
     657              :     /*
     658              :      * We can't have statistics target specified for the expression, so we
     659              :      * could use either the default_statistics_target, or the target computed
     660              :      * for the extended statistics. The second option seems more reasonable.
     661              :      */
     662          646 :     stats->attstattarget = stattarget;
     663              : 
     664              :     /*
     665              :      * When analyzing an expression, believe the expression tree's type.
     666              :      */
     667          646 :     stats->attrtypid = exprType(expr);
     668          646 :     stats->attrtypmod = exprTypmod(expr);
     669              : 
     670              :     /*
     671              :      * We don't allow collation to be specified in CREATE STATISTICS, so we
     672              :      * have to use the collation specified for the expression. It's possible
     673              :      * to specify the collation in the expression "(col COLLATE "en_US")" in
     674              :      * which case exprCollation() does the right thing.
     675              :      */
     676          646 :     stats->attrcollid = exprCollation(expr);
     677              : 
     678          646 :     typtuple = SearchSysCacheCopy1(TYPEOID,
     679              :                                    ObjectIdGetDatum(stats->attrtypid));
     680          646 :     if (!HeapTupleIsValid(typtuple))
     681            0 :         elog(ERROR, "cache lookup failed for type %u", stats->attrtypid);
     682              : 
     683          646 :     stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple);
     684          646 :     stats->anl_context = CurrentMemoryContext;   /* XXX should be using
     685              :                                                  * something else? */
     686          646 :     stats->tupattnum = InvalidAttrNumber;
     687              : 
     688              :     /*
     689              :      * The fields describing the stats->stavalues[n] element types default to
     690              :      * the type of the data being analyzed, but the type-specific typanalyze
     691              :      * function can change them if it wants to store something else.
     692              :      */
     693         3876 :     for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
     694              :     {
     695         3230 :         stats->statypid[i] = stats->attrtypid;
     696         3230 :         stats->statyplen[i] = stats->attrtype->typlen;
     697         3230 :         stats->statypbyval[i] = stats->attrtype->typbyval;
     698         3230 :         stats->statypalign[i] = stats->attrtype->typalign;
     699              :     }
     700              : 
     701              :     /*
     702              :      * Call the type-specific typanalyze function.  If none is specified, use
     703              :      * std_typanalyze().
     704              :      */
     705          646 :     if (OidIsValid(stats->attrtype->typanalyze))
     706           42 :         ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze,
     707              :                                            PointerGetDatum(stats)));
     708              :     else
     709          604 :         ok = std_typanalyze(stats);
     710              : 
     711          646 :     if (!ok || stats->compute_stats == NULL || stats->minrows <= 0)
     712              :     {
     713            0 :         heap_freetuple(typtuple);
     714            0 :         pfree(stats);
     715            0 :         return NULL;
     716              :     }
     717              : 
     718          646 :     return stats;
     719              : }
     720              : 
     721              : /*
     722              :  * Using 'vacatts' of size 'nvacatts' as input data, return a newly-built
     723              :  * VacAttrStats array which includes only the items corresponding to
     724              :  * attributes indicated by 'attrs'.  If we don't have all of the per-column
     725              :  * stats available to compute the extended stats, then we return NULL to
     726              :  * indicate to the caller that the stats should not be built.
     727              :  */
     728              : static VacAttrStats **
     729          768 : lookup_var_attr_stats(Bitmapset *attrs, List *exprs,
     730              :                       int nvacatts, VacAttrStats **vacatts)
     731              : {
     732          768 :     int         i = 0;
     733          768 :     int         x = -1;
     734              :     int         natts;
     735              :     VacAttrStats **stats;
     736              :     ListCell   *lc;
     737              : 
     738          768 :     natts = bms_num_members(attrs) + list_length(exprs);
     739              : 
     740          768 :     stats = (VacAttrStats **) palloc(natts * sizeof(VacAttrStats *));
     741              : 
     742              :     /* lookup VacAttrStats info for the requested columns (same attnum) */
     743         1950 :     while ((x = bms_next_member(attrs, x)) >= 0)
     744              :     {
     745              :         int         j;
     746              : 
     747         1200 :         stats[i] = NULL;
     748         3424 :         for (j = 0; j < nvacatts; j++)
     749              :         {
     750         3406 :             if (x == vacatts[j]->tupattnum)
     751              :             {
     752         1182 :                 stats[i] = vacatts[j];
     753         1182 :                 break;
     754              :             }
     755              :         }
     756              : 
     757         1200 :         if (!stats[i])
     758              :         {
     759              :             /*
     760              :              * Looks like stats were not gathered for one of the columns
     761              :              * required. We'll be unable to build the extended stats without
     762              :              * this column.
     763              :              */
     764           18 :             pfree(stats);
     765           18 :             return NULL;
     766              :         }
     767              : 
     768         1182 :         i++;
     769              :     }
     770              : 
     771              :     /* also add info for expressions */
     772         1396 :     foreach(lc, exprs)
     773              :     {
     774          648 :         Node       *expr = (Node *) lfirst(lc);
     775              : 
     776          648 :         stats[i] = examine_attribute(expr);
     777              : 
     778              :         /*
     779              :          * If the expression has been found as non-analyzable, give up.  We
     780              :          * will not be able to build extended stats with it.
     781              :          */
     782          648 :         if (stats[i] == NULL)
     783              :         {
     784            2 :             pfree(stats);
     785            2 :             return NULL;
     786              :         }
     787              : 
     788              :         /*
     789              :          * XXX We need tuple descriptor later, and we just grab it from
     790              :          * stats[0]->tupDesc (see e.g. statext_mcv_build). But as coded
     791              :          * examine_attribute does not set that, so just grab it from the first
     792              :          * vacatts element.
     793              :          */
     794          646 :         stats[i]->tupDesc = vacatts[0]->tupDesc;
     795              : 
     796          646 :         i++;
     797              :     }
     798              : 
     799          748 :     return stats;
     800              : }
     801              : 
     802              : /*
     803              :  * statext_store
     804              :  *  Serializes the statistics and stores them into the pg_statistic_ext_data
     805              :  *  tuple.
     806              :  */
     807              : static void
     808          370 : statext_store(Oid statOid, bool inh,
     809              :               MVNDistinct *ndistinct, MVDependencies *dependencies,
     810              :               MCVList *mcv, Datum exprs, VacAttrStats **stats)
     811              : {
     812              :     Relation    pg_stextdata;
     813              :     HeapTuple   stup;
     814              :     Datum       values[Natts_pg_statistic_ext_data];
     815              :     bool        nulls[Natts_pg_statistic_ext_data];
     816              : 
     817          370 :     pg_stextdata = table_open(StatisticExtDataRelationId, RowExclusiveLock);
     818              : 
     819          370 :     memset(nulls, true, sizeof(nulls));
     820          370 :     memset(values, 0, sizeof(values));
     821              : 
     822              :     /* basic info */
     823          370 :     values[Anum_pg_statistic_ext_data_stxoid - 1] = ObjectIdGetDatum(statOid);
     824          370 :     nulls[Anum_pg_statistic_ext_data_stxoid - 1] = false;
     825              : 
     826          370 :     values[Anum_pg_statistic_ext_data_stxdinherit - 1] = BoolGetDatum(inh);
     827          370 :     nulls[Anum_pg_statistic_ext_data_stxdinherit - 1] = false;
     828              : 
     829              :     /*
     830              :      * Construct a new pg_statistic_ext_data tuple, replacing the calculated
     831              :      * stats.
     832              :      */
     833          370 :     if (ndistinct != NULL)
     834              :     {
     835          181 :         bytea      *data = statext_ndistinct_serialize(ndistinct);
     836              : 
     837          181 :         nulls[Anum_pg_statistic_ext_data_stxdndistinct - 1] = (data == NULL);
     838          181 :         values[Anum_pg_statistic_ext_data_stxdndistinct - 1] = PointerGetDatum(data);
     839              :     }
     840              : 
     841          370 :     if (dependencies != NULL)
     842              :     {
     843          134 :         bytea      *data = statext_dependencies_serialize(dependencies);
     844              : 
     845          134 :         nulls[Anum_pg_statistic_ext_data_stxddependencies - 1] = (data == NULL);
     846          134 :         values[Anum_pg_statistic_ext_data_stxddependencies - 1] = PointerGetDatum(data);
     847              :     }
     848          370 :     if (mcv != NULL)
     849              :     {
     850          201 :         bytea      *data = statext_mcv_serialize(mcv, stats);
     851              : 
     852          201 :         nulls[Anum_pg_statistic_ext_data_stxdmcv - 1] = (data == NULL);
     853          201 :         values[Anum_pg_statistic_ext_data_stxdmcv - 1] = PointerGetDatum(data);
     854              :     }
     855          370 :     if (exprs != (Datum) 0)
     856              :     {
     857          179 :         nulls[Anum_pg_statistic_ext_data_stxdexpr - 1] = false;
     858          179 :         values[Anum_pg_statistic_ext_data_stxdexpr - 1] = exprs;
     859              :     }
     860              : 
     861              :     /*
     862              :      * Delete the old tuple if it exists, and insert a new one. It's easier
     863              :      * than trying to update or insert, based on various conditions.
     864              :      */
     865          370 :     RemoveStatisticsDataById(statOid, inh);
     866              : 
     867              :     /* form and insert a new tuple */
     868          370 :     stup = heap_form_tuple(RelationGetDescr(pg_stextdata), values, nulls);
     869          370 :     CatalogTupleInsert(pg_stextdata, stup);
     870              : 
     871          370 :     heap_freetuple(stup);
     872              : 
     873          370 :     table_close(pg_stextdata, RowExclusiveLock);
     874          370 : }
     875              : 
     876              : /* initialize multi-dimensional sort */
     877              : MultiSortSupport
     878         1482 : multi_sort_init(int ndims)
     879              : {
     880              :     MultiSortSupport mss;
     881              : 
     882              :     Assert(ndims >= 2);
     883              : 
     884         1482 :     mss = (MultiSortSupport) palloc0(offsetof(MultiSortSupportData, ssup)
     885         1482 :                                      + sizeof(SortSupportData) * ndims);
     886              : 
     887         1482 :     mss->ndims = ndims;
     888              : 
     889         1482 :     return mss;
     890              : }
     891              : 
     892              : /*
     893              :  * Prepare sort support info using the given sort operator and collation
     894              :  * at the position 'sortdim'
     895              :  */
     896              : void
     897         3540 : multi_sort_add_dimension(MultiSortSupport mss, int sortdim,
     898              :                          Oid oper, Oid collation)
     899              : {
     900         3540 :     SortSupport ssup = &mss->ssup[sortdim];
     901              : 
     902         3540 :     ssup->ssup_cxt = CurrentMemoryContext;
     903         3540 :     ssup->ssup_collation = collation;
     904         3540 :     ssup->ssup_nulls_first = false;
     905              : 
     906         3540 :     PrepareSortSupportFromOrderingOp(oper, ssup);
     907         3540 : }
     908              : 
     909              : /* compare all the dimensions in the selected order */
     910              : int
     911     14787190 : multi_sort_compare(const void *a, const void *b, void *arg)
     912              : {
     913     14787190 :     MultiSortSupport mss = (MultiSortSupport) arg;
     914     14787190 :     const SortItem *ia = a;
     915     14787190 :     const SortItem *ib = b;
     916              :     int         i;
     917              : 
     918     26184154 :     for (i = 0; i < mss->ndims; i++)
     919              :     {
     920              :         int         compare;
     921              : 
     922     23414890 :         compare = ApplySortComparator(ia->values[i], ia->isnull[i],
     923     23414890 :                                       ib->values[i], ib->isnull[i],
     924     23414890 :                                       &mss->ssup[i]);
     925              : 
     926     23414890 :         if (compare != 0)
     927     12017926 :             return compare;
     928              :     }
     929              : 
     930              :     /* equal by default */
     931      2769264 :     return 0;
     932              : }
     933              : 
     934              : /* compare selected dimension */
     935              : int
     936       982576 : multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b,
     937              :                        MultiSortSupport mss)
     938              : {
     939      1965152 :     return ApplySortComparator(a->values[dim], a->isnull[dim],
     940       982576 :                                b->values[dim], b->isnull[dim],
     941       982576 :                                &mss->ssup[dim]);
     942              : }
     943              : 
     944              : int
     945      1005176 : multi_sort_compare_dims(int start, int end,
     946              :                         const SortItem *a, const SortItem *b,
     947              :                         MultiSortSupport mss)
     948              : {
     949              :     int         dim;
     950              : 
     951      2272140 :     for (dim = start; dim <= end; dim++)
     952              :     {
     953      1289564 :         int         r = ApplySortComparator(a->values[dim], a->isnull[dim],
     954      1289564 :                                             b->values[dim], b->isnull[dim],
     955      1289564 :                                             &mss->ssup[dim]);
     956              : 
     957      1289564 :         if (r != 0)
     958        22600 :             return r;
     959              :     }
     960              : 
     961       982576 :     return 0;
     962              : }
     963              : 
     964              : int
     965       144178 : compare_scalars_simple(const void *a, const void *b, void *arg)
     966              : {
     967       144178 :     return compare_datums_simple(*(const Datum *) a,
     968              :                                  *(const Datum *) b,
     969              :                                  (SortSupport) arg);
     970              : }
     971              : 
     972              : int
     973       163423 : compare_datums_simple(Datum a, Datum b, SortSupport ssup)
     974              : {
     975       163423 :     return ApplySortComparator(a, false, b, false, ssup);
     976              : }
     977              : 
     978              : /*
     979              :  * build_attnums_array
     980              :  *      Transforms a bitmap into an array of AttrNumber values.
     981              :  *
     982              :  * This is used for extended statistics only, so all the attributes must be
     983              :  * user-defined. That means offsetting by FirstLowInvalidHeapAttributeNumber
     984              :  * is not necessary here (and when querying the bitmap).
     985              :  */
     986              : AttrNumber *
     987            0 : build_attnums_array(Bitmapset *attrs, int nexprs, int *numattrs)
     988              : {
     989              :     int         i,
     990              :                 j;
     991              :     AttrNumber *attnums;
     992            0 :     int         num = bms_num_members(attrs);
     993              : 
     994            0 :     if (numattrs)
     995            0 :         *numattrs = num;
     996              : 
     997              :     /* build attnums from the bitmapset */
     998            0 :     attnums = palloc_array(AttrNumber, num);
     999            0 :     i = 0;
    1000            0 :     j = -1;
    1001            0 :     while ((j = bms_next_member(attrs, j)) >= 0)
    1002              :     {
    1003            0 :         int         attnum = (j - nexprs);
    1004              : 
    1005              :         /*
    1006              :          * Make sure the bitmap contains only user-defined attributes. As
    1007              :          * bitmaps can't contain negative values, this can be violated in two
    1008              :          * ways. Firstly, the bitmap might contain 0 as a member, and secondly
    1009              :          * the integer value might be larger than MaxAttrNumber.
    1010              :          */
    1011              :         Assert(AttributeNumberIsValid(attnum));
    1012              :         Assert(attnum <= MaxAttrNumber);
    1013              :         Assert(attnum >= (-nexprs));
    1014              : 
    1015            0 :         attnums[i++] = (AttrNumber) attnum;
    1016              : 
    1017              :         /* protect against overflows */
    1018              :         Assert(i <= num);
    1019              :     }
    1020              : 
    1021            0 :     return attnums;
    1022              : }
    1023              : 
    1024              : /*
    1025              :  * build_sorted_items
    1026              :  *      build a sorted array of SortItem with values from rows
    1027              :  *
    1028              :  * Note: All the memory is allocated in a single chunk, so that the caller
    1029              :  * can simply pfree the return value to release all of it.
    1030              :  */
    1031              : SortItem *
    1032          953 : build_sorted_items(StatsBuildData *data, int *nitems,
    1033              :                    MultiSortSupport mss,
    1034              :                    int numattrs, AttrNumber *attnums)
    1035              : {
    1036              :     int         i,
    1037              :                 j,
    1038              :                 nrows;
    1039          953 :     int         nvalues = data->numrows * numattrs;
    1040              :     Size        len;
    1041              :     SortItem   *items;
    1042              :     Datum      *values;
    1043              :     bool       *isnull;
    1044              :     char       *ptr;
    1045              :     int        *typlen;
    1046              : 
    1047              :     /* Compute the total amount of memory we need (both items and values). */
    1048          953 :     len = MAXALIGN(data->numrows * sizeof(SortItem)) +
    1049          953 :         nvalues * (sizeof(Datum) + sizeof(bool));
    1050              : 
    1051              :     /* Allocate the memory and split it into the pieces. */
    1052          953 :     ptr = palloc0(len);
    1053              : 
    1054              :     /* items to sort */
    1055          953 :     items = (SortItem *) ptr;
    1056              :     /* MAXALIGN ensures that the following Datums are suitably aligned */
    1057          953 :     ptr += MAXALIGN(data->numrows * sizeof(SortItem));
    1058              : 
    1059              :     /* values and null flags */
    1060          953 :     values = (Datum *) ptr;
    1061          953 :     ptr += nvalues * sizeof(Datum);
    1062              : 
    1063          953 :     isnull = (bool *) ptr;
    1064          953 :     ptr += nvalues * sizeof(bool);
    1065              : 
    1066              :     /* make sure we consumed the whole buffer exactly */
    1067              :     Assert((ptr - (char *) items) == len);
    1068              : 
    1069              :     /* fix the pointers to Datum and bool arrays */
    1070          953 :     nrows = 0;
    1071      1330209 :     for (i = 0; i < data->numrows; i++)
    1072              :     {
    1073      1329256 :         items[nrows].values = &values[nrows * numattrs];
    1074      1329256 :         items[nrows].isnull = &isnull[nrows * numattrs];
    1075              : 
    1076      1329256 :         nrows++;
    1077              :     }
    1078              : 
    1079              :     /* build a local cache of typlen for all attributes */
    1080          953 :     typlen = palloc_array(int, data->nattnums);
    1081         3759 :     for (i = 0; i < data->nattnums; i++)
    1082         2806 :         typlen[i] = get_typlen(data->stats[i]->attrtypid);
    1083              : 
    1084          953 :     nrows = 0;
    1085      1330209 :     for (i = 0; i < data->numrows; i++)
    1086              :     {
    1087      1329256 :         bool        toowide = false;
    1088              : 
    1089              :         /* load the values/null flags from sample rows */
    1090      4573704 :         for (j = 0; j < numattrs; j++)
    1091              :         {
    1092              :             Datum       value;
    1093              :             bool        isnull;
    1094              :             int         attlen;
    1095      3244448 :             AttrNumber  attnum = attnums[j];
    1096              : 
    1097              :             int         idx;
    1098              : 
    1099              :             /* match attnum to the pre-calculated data */
    1100      6395528 :             for (idx = 0; idx < data->nattnums; idx++)
    1101              :             {
    1102      6395528 :                 if (attnum == data->attnums[idx])
    1103      3244448 :                     break;
    1104              :             }
    1105              : 
    1106              :             Assert(idx < data->nattnums);
    1107              : 
    1108      3244448 :             value = data->values[idx][i];
    1109      3244448 :             isnull = data->nulls[idx][i];
    1110      3244448 :             attlen = typlen[idx];
    1111              : 
    1112              :             /*
    1113              :              * If this is a varlena value, check if it's too wide and if yes
    1114              :              * then skip the whole item. Otherwise detoast the value.
    1115              :              *
    1116              :              * XXX It may happen that we've already detoasted some preceding
    1117              :              * values for the current item. We don't bother to cleanup those
    1118              :              * on the assumption that those are small (below WIDTH_THRESHOLD)
    1119              :              * and will be discarded at the end of analyze.
    1120              :              */
    1121      3244448 :             if ((!isnull) && (attlen == -1))
    1122              :             {
    1123       991216 :                 if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
    1124              :                 {
    1125            0 :                     toowide = true;
    1126            0 :                     break;
    1127              :                 }
    1128              : 
    1129       991216 :                 value = PointerGetDatum(PG_DETOAST_DATUM(value));
    1130              :             }
    1131              : 
    1132      3244448 :             items[nrows].values[j] = value;
    1133      3244448 :             items[nrows].isnull[j] = isnull;
    1134              :         }
    1135              : 
    1136      1329256 :         if (toowide)
    1137            0 :             continue;
    1138              : 
    1139      1329256 :         nrows++;
    1140              :     }
    1141              : 
    1142              :     /* store the actual number of items (ignoring the too-wide ones) */
    1143          953 :     *nitems = nrows;
    1144              : 
    1145              :     /* all items were too wide */
    1146          953 :     if (nrows == 0)
    1147              :     {
    1148              :         /* everything is allocated as a single chunk */
    1149            0 :         pfree(items);
    1150            0 :         return NULL;
    1151              :     }
    1152              : 
    1153              :     /* do the sort, using the multi-sort */
    1154          953 :     qsort_interruptible(items, nrows, sizeof(SortItem),
    1155              :                         multi_sort_compare, mss);
    1156              : 
    1157          953 :     return items;
    1158              : }
    1159              : 
    1160              : /*
    1161              :  * has_stats_of_kind
    1162              :  *      Check whether the list contains statistic of a given kind
    1163              :  */
    1164              : bool
    1165         4210 : has_stats_of_kind(List *stats, char requiredkind)
    1166              : {
    1167              :     ListCell   *l;
    1168              : 
    1169         6945 :     foreach(l, stats)
    1170              :     {
    1171         4855 :         StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
    1172              : 
    1173         4855 :         if (stat->kind == requiredkind)
    1174         2120 :             return true;
    1175              :     }
    1176              : 
    1177         2090 :     return false;
    1178              : }
    1179              : 
    1180              : /*
    1181              :  * stat_find_expression
    1182              :  *      Search for an expression in statistics object's list of expressions.
    1183              :  *
    1184              :  * Returns the index of the expression in the statistics object's list of
    1185              :  * expressions, or -1 if not found.
    1186              :  */
    1187              : static int
    1188          535 : stat_find_expression(StatisticExtInfo *stat, Node *expr)
    1189              : {
    1190              :     ListCell   *lc;
    1191              :     int         idx;
    1192              : 
    1193          535 :     idx = 0;
    1194          995 :     foreach(lc, stat->exprs)
    1195              :     {
    1196          910 :         Node       *stat_expr = (Node *) lfirst(lc);
    1197              : 
    1198          910 :         if (equal(stat_expr, expr))
    1199          450 :             return idx;
    1200          460 :         idx++;
    1201              :     }
    1202              : 
    1203              :     /* Expression not found */
    1204           85 :     return -1;
    1205              : }
    1206              : 
    1207              : /*
    1208              :  * stat_covers_expressions
    1209              :  *      Test whether a statistics object covers all expressions in a list.
    1210              :  *
    1211              :  * Returns true if all expressions are covered.  If expr_idxs is non-NULL, it
    1212              :  * is populated with the indexes of the expressions found.
    1213              :  */
    1214              : static bool
    1215         2635 : stat_covers_expressions(StatisticExtInfo *stat, List *exprs,
    1216              :                         Bitmapset **expr_idxs)
    1217              : {
    1218              :     ListCell   *lc;
    1219              : 
    1220         3085 :     foreach(lc, exprs)
    1221              :     {
    1222          535 :         Node       *expr = (Node *) lfirst(lc);
    1223              :         int         expr_idx;
    1224              : 
    1225          535 :         expr_idx = stat_find_expression(stat, expr);
    1226          535 :         if (expr_idx == -1)
    1227           85 :             return false;
    1228              : 
    1229          450 :         if (expr_idxs != NULL)
    1230          225 :             *expr_idxs = bms_add_member(*expr_idxs, expr_idx);
    1231              :     }
    1232              : 
    1233              :     /* If we reach here, all expressions are covered */
    1234         2550 :     return true;
    1235              : }
    1236              : 
    1237              : /*
    1238              :  * choose_best_statistics
    1239              :  *      Look for and return statistics with the specified 'requiredkind' which
    1240              :  *      have keys that match at least two of the given attnums.  Return NULL if
    1241              :  *      there's no match.
    1242              :  *
    1243              :  * The current selection criteria is very simple - we choose the statistics
    1244              :  * object referencing the most attributes in covered (and still unestimated
    1245              :  * clauses), breaking ties in favor of objects with fewer keys overall.
    1246              :  *
    1247              :  * The clause_attnums is an array of bitmaps, storing attnums for individual
    1248              :  * clauses. A NULL element means the clause is either incompatible or already
    1249              :  * estimated.
    1250              :  *
    1251              :  * XXX If multiple statistics objects tie on both criteria, then which object
    1252              :  * is chosen depends on the order that they appear in the stats list. Perhaps
    1253              :  * further tiebreakers are needed.
    1254              :  */
    1255              : StatisticExtInfo *
    1256         1145 : choose_best_statistics(List *stats, char requiredkind, bool inh,
    1257              :                        Bitmapset **clause_attnums, List **clause_exprs,
    1258              :                        int nclauses)
    1259              : {
    1260              :     ListCell   *lc;
    1261         1145 :     StatisticExtInfo *best_match = NULL;
    1262         1145 :     int         best_num_matched = 2;   /* goal #1: maximize */
    1263         1145 :     int         best_match_keys = (STATS_MAX_DIMENSIONS + 1);   /* goal #2: minimize */
    1264              : 
    1265         3015 :     foreach(lc, stats)
    1266              :     {
    1267              :         int         i;
    1268         1870 :         StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
    1269         1870 :         Bitmapset  *matched_attnums = NULL;
    1270         1870 :         Bitmapset  *matched_exprs = NULL;
    1271              :         int         num_matched;
    1272              :         int         numkeys;
    1273              : 
    1274              :         /* skip statistics that are not of the correct type */
    1275         1870 :         if (info->kind != requiredkind)
    1276          410 :             continue;
    1277              : 
    1278              :         /* skip statistics with mismatching inheritance flag */
    1279         1460 :         if (info->inherit != inh)
    1280           20 :             continue;
    1281              : 
    1282              :         /*
    1283              :          * Collect attributes and expressions in remaining (unestimated)
    1284              :          * clauses fully covered by this statistic object.
    1285              :          *
    1286              :          * We know already estimated clauses have both clause_attnums and
    1287              :          * clause_exprs set to NULL. We leave the pointers NULL if already
    1288              :          * estimated, or we reset them to NULL after estimating the clause.
    1289              :          */
    1290         4890 :         for (i = 0; i < nclauses; i++)
    1291              :         {
    1292         3450 :             Bitmapset  *expr_idxs = NULL;
    1293              : 
    1294              :             /* ignore incompatible/estimated clauses */
    1295         3450 :             if (!clause_attnums[i] && !clause_exprs[i])
    1296         2090 :                 continue;
    1297              : 
    1298              :             /* ignore clauses that are not covered by this object */
    1299         1715 :             if (!bms_is_subset(clause_attnums[i], info->keys) ||
    1300         1440 :                 !stat_covers_expressions(info, clause_exprs[i], &expr_idxs))
    1301          355 :                 continue;
    1302              : 
    1303              :             /* record attnums and indexes of expressions covered */
    1304         1360 :             matched_attnums = bms_add_members(matched_attnums, clause_attnums[i]);
    1305         1360 :             matched_exprs = bms_add_members(matched_exprs, expr_idxs);
    1306              :         }
    1307              : 
    1308         1440 :         num_matched = bms_num_members(matched_attnums) + bms_num_members(matched_exprs);
    1309              : 
    1310         1440 :         bms_free(matched_attnums);
    1311         1440 :         bms_free(matched_exprs);
    1312              : 
    1313              :         /*
    1314              :          * save the actual number of keys in the stats so that we can choose
    1315              :          * the narrowest stats with the most matching keys.
    1316              :          */
    1317         1440 :         numkeys = bms_num_members(info->keys) + list_length(info->exprs);
    1318              : 
    1319              :         /*
    1320              :          * Use this object when it increases the number of matched attributes
    1321              :          * and expressions or when it matches the same number of attributes
    1322              :          * and expressions but these stats have fewer keys than any previous
    1323              :          * match.
    1324              :          */
    1325         1440 :         if (num_matched > best_num_matched ||
    1326          355 :             (num_matched == best_num_matched && numkeys < best_match_keys))
    1327              :         {
    1328          515 :             best_match = info;
    1329          515 :             best_num_matched = num_matched;
    1330          515 :             best_match_keys = numkeys;
    1331              :         }
    1332              :     }
    1333              : 
    1334         1145 :     return best_match;
    1335              : }
    1336              : 
    1337              : /*
    1338              :  * statext_is_compatible_clause_internal
    1339              :  *      Determines if the clause is compatible with MCV lists.
    1340              :  *
    1341              :  * To be compatible, the given clause must be a combination of supported
    1342              :  * clauses built from Vars or sub-expressions (where a sub-expression is
    1343              :  * something that exactly matches an expression found in statistics objects).
    1344              :  * This function recursively examines the clause and extracts any
    1345              :  * sub-expressions that will need to be matched against statistics.
    1346              :  *
    1347              :  * Currently, we only support the following types of clauses:
    1348              :  *
    1349              :  * (a) OpExprs of the form (Var/Expr op Const), or (Const op Var/Expr), where
    1350              :  * the op is one of ("=", "<", ">", ">=", "<=")
    1351              :  *
    1352              :  * (b) (Var/Expr IS [NOT] NULL)
    1353              :  *
    1354              :  * (c) combinations using AND/OR/NOT
    1355              :  *
    1356              :  * (d) ScalarArrayOpExprs of the form (Var/Expr op ANY (Const)) or
    1357              :  * (Var/Expr op ALL (Const))
    1358              :  *
    1359              :  * In the future, the range of supported clauses may be expanded to more
    1360              :  * complex cases, for example (Var op Var).
    1361              :  *
    1362              :  * Arguments:
    1363              :  * clause: (sub)clause to be inspected (bare clause, not a RestrictInfo)
    1364              :  * relid: rel that all Vars in clause must belong to
    1365              :  * *attnums: input/output parameter collecting attribute numbers of all
    1366              :  *      mentioned Vars.  Note that we do not offset the attribute numbers,
    1367              :  *      so we can't cope with system columns.
    1368              :  * *exprs: input/output parameter collecting primitive subclauses within
    1369              :  *      the clause tree
    1370              :  * *leakproof: input/output parameter recording the leakproofness of the
    1371              :  *      clause tree.  This should be true initially, and will be set to false
    1372              :  *      if any operator function used in an OpExpr is not leakproof.
    1373              :  *
    1374              :  * Returns false if there is something we definitively can't handle.
    1375              :  * On true return, we can proceed to match the *exprs against statistics.
    1376              :  */
    1377              : static bool
    1378         2905 : statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
    1379              :                                       Index relid, Bitmapset **attnums,
    1380              :                                       List **exprs, bool *leakproof)
    1381              : {
    1382              :     /* Look inside any binary-compatible relabeling (as in examine_variable) */
    1383         2905 :     if (IsA(clause, RelabelType))
    1384            0 :         clause = (Node *) ((RelabelType *) clause)->arg;
    1385              : 
    1386              :     /* plain Var references (boolean Vars or recursive checks) */
    1387         2905 :     if (IsA(clause, Var))
    1388              :     {
    1389         1290 :         Var        *var = (Var *) clause;
    1390              : 
    1391              :         /* Ensure var is from the correct relation */
    1392         1290 :         if (var->varno != relid)
    1393            0 :             return false;
    1394              : 
    1395              :         /* we also better ensure the Var is from the current level */
    1396         1290 :         if (var->varlevelsup > 0)
    1397            0 :             return false;
    1398              : 
    1399              :         /*
    1400              :          * Also reject system attributes and whole-row Vars (we don't allow
    1401              :          * stats on those).
    1402              :          */
    1403         1290 :         if (!AttrNumberIsForUserDefinedAttr(var->varattno))
    1404            0 :             return false;
    1405              : 
    1406              :         /* OK, record the attnum for later permissions checks. */
    1407         1290 :         *attnums = bms_add_member(*attnums, var->varattno);
    1408              : 
    1409         1290 :         return true;
    1410              :     }
    1411              : 
    1412              :     /* (Var/Expr op Const) or (Const op Var/Expr) */
    1413         1615 :     if (is_opclause(clause))
    1414              :     {
    1415         1180 :         OpExpr     *expr = (OpExpr *) clause;
    1416              :         Node       *clause_expr;
    1417              : 
    1418              :         /* Only expressions with two arguments are considered compatible. */
    1419         1180 :         if (list_length(expr->args) != 2)
    1420            0 :             return false;
    1421              : 
    1422              :         /* Check if the expression has the right shape */
    1423         1180 :         if (!examine_opclause_args(expr->args, &clause_expr, NULL, NULL))
    1424            0 :             return false;
    1425              : 
    1426              :         /*
    1427              :          * If it's not one of the supported operators ("=", "<", ">", etc.),
    1428              :          * just ignore the clause, as it's not compatible with MCV lists.
    1429              :          *
    1430              :          * This uses the function for estimating selectivity, not the operator
    1431              :          * directly (a bit awkward, but well ...).
    1432              :          */
    1433         1180 :         switch (get_oprrest(expr->opno))
    1434              :         {
    1435         1180 :             case F_EQSEL:
    1436              :             case F_NEQSEL:
    1437              :             case F_SCALARLTSEL:
    1438              :             case F_SCALARLESEL:
    1439              :             case F_SCALARGTSEL:
    1440              :             case F_SCALARGESEL:
    1441              :                 /* supported, will continue with inspection of the Var/Expr */
    1442         1180 :                 break;
    1443              : 
    1444            0 :             default:
    1445              :                 /* other estimators are considered unknown/unsupported */
    1446            0 :                 return false;
    1447              :         }
    1448              : 
    1449              :         /* Check if the operator is leakproof */
    1450         1180 :         if (*leakproof)
    1451         1170 :             *leakproof = get_func_leakproof(get_opcode(expr->opno));
    1452              : 
    1453              :         /* Check (Var op Const) or (Const op Var) clauses by recursing. */
    1454         1180 :         if (IsA(clause_expr, Var))
    1455          960 :             return statext_is_compatible_clause_internal(root, clause_expr,
    1456              :                                                          relid, attnums,
    1457              :                                                          exprs, leakproof);
    1458              : 
    1459              :         /* Otherwise we have (Expr op Const) or (Const op Expr). */
    1460          220 :         *exprs = lappend(*exprs, clause_expr);
    1461          220 :         return true;
    1462              :     }
    1463              : 
    1464              :     /* Var/Expr IN Array */
    1465          435 :     if (IsA(clause, ScalarArrayOpExpr))
    1466              :     {
    1467          240 :         ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
    1468              :         Node       *clause_expr;
    1469              :         bool        expronleft;
    1470              : 
    1471              :         /* Only expressions with two arguments are considered compatible. */
    1472          240 :         if (list_length(expr->args) != 2)
    1473            0 :             return false;
    1474              : 
    1475              :         /* Check if the expression has the right shape (one Var, one Const) */
    1476          240 :         if (!examine_opclause_args(expr->args, &clause_expr, NULL, &expronleft))
    1477            0 :             return false;
    1478              : 
    1479              :         /* We only support Var on left, Const on right */
    1480          240 :         if (!expronleft)
    1481            5 :             return false;
    1482              : 
    1483              :         /*
    1484              :          * If it's not one of the supported operators ("=", "<", ">", etc.),
    1485              :          * just ignore the clause, as it's not compatible with MCV lists.
    1486              :          *
    1487              :          * This uses the function for estimating selectivity, not the operator
    1488              :          * directly (a bit awkward, but well ...).
    1489              :          */
    1490          235 :         switch (get_oprrest(expr->opno))
    1491              :         {
    1492          235 :             case F_EQSEL:
    1493              :             case F_NEQSEL:
    1494              :             case F_SCALARLTSEL:
    1495              :             case F_SCALARLESEL:
    1496              :             case F_SCALARGTSEL:
    1497              :             case F_SCALARGESEL:
    1498              :                 /* supported, will continue with inspection of the Var/Expr */
    1499          235 :                 break;
    1500              : 
    1501            0 :             default:
    1502              :                 /* other estimators are considered unknown/unsupported */
    1503            0 :                 return false;
    1504              :         }
    1505              : 
    1506              :         /* Check if the operator is leakproof */
    1507          235 :         if (*leakproof)
    1508          235 :             *leakproof = get_func_leakproof(get_opcode(expr->opno));
    1509              : 
    1510              :         /* Check Var IN Array clauses by recursing. */
    1511          235 :         if (IsA(clause_expr, Var))
    1512          190 :             return statext_is_compatible_clause_internal(root, clause_expr,
    1513              :                                                          relid, attnums,
    1514              :                                                          exprs, leakproof);
    1515              : 
    1516              :         /* Otherwise we have Expr IN Array. */
    1517           45 :         *exprs = lappend(*exprs, clause_expr);
    1518           45 :         return true;
    1519              :     }
    1520              : 
    1521              :     /* AND/OR/NOT clause */
    1522          390 :     if (is_andclause(clause) ||
    1523          345 :         is_orclause(clause) ||
    1524          150 :         is_notclause(clause))
    1525              :     {
    1526              :         /*
    1527              :          * AND/OR/NOT-clauses are supported if all sub-clauses are supported
    1528              :          *
    1529              :          * Perhaps we could improve this by handling mixed cases, when some of
    1530              :          * the clauses are supported and some are not. Selectivity for the
    1531              :          * supported subclauses would be computed using extended statistics,
    1532              :          * and the remaining clauses would be estimated using the traditional
    1533              :          * algorithm (product of selectivities).
    1534              :          *
    1535              :          * It however seems overly complex, and in a way we already do that
    1536              :          * because if we reject the whole clause as unsupported here, it will
    1537              :          * be eventually passed to clauselist_selectivity() which does exactly
    1538              :          * this (split into supported/unsupported clauses etc).
    1539              :          */
    1540           70 :         BoolExpr   *expr = (BoolExpr *) clause;
    1541              :         ListCell   *lc;
    1542              : 
    1543          185 :         foreach(lc, expr->args)
    1544              :         {
    1545              :             /*
    1546              :              * If we find an incompatible clause in the arguments, treat the
    1547              :              * whole clause as incompatible.
    1548              :              */
    1549          115 :             if (!statext_is_compatible_clause_internal(root,
    1550          115 :                                                        (Node *) lfirst(lc),
    1551              :                                                        relid, attnums, exprs,
    1552              :                                                        leakproof))
    1553            0 :                 return false;
    1554              :         }
    1555              : 
    1556           70 :         return true;
    1557              :     }
    1558              : 
    1559              :     /* Var/Expr IS NULL */
    1560          125 :     if (IsA(clause, NullTest))
    1561              :     {
    1562          120 :         NullTest   *nt = (NullTest *) clause;
    1563              : 
    1564              :         /* Check Var IS NULL clauses by recursing. */
    1565          120 :         if (IsA(nt->arg, Var))
    1566           75 :             return statext_is_compatible_clause_internal(root,
    1567           75 :                                                          (Node *) (nt->arg),
    1568              :                                                          relid, attnums,
    1569              :                                                          exprs, leakproof);
    1570              : 
    1571              :         /* Otherwise we have Expr IS NULL. */
    1572           45 :         *exprs = lappend(*exprs, nt->arg);
    1573           45 :         return true;
    1574              :     }
    1575              : 
    1576              :     /*
    1577              :      * Treat any other expressions as bare expressions to be matched against
    1578              :      * expressions in statistics objects.
    1579              :      */
    1580            5 :     *exprs = lappend(*exprs, clause);
    1581            5 :     return true;
    1582              : }
    1583              : 
    1584              : /*
    1585              :  * statext_is_compatible_clause
    1586              :  *      Determines if the clause is compatible with MCV lists.
    1587              :  *
    1588              :  * See statext_is_compatible_clause_internal, above, for the basic rules.
    1589              :  * This layer deals with RestrictInfo superstructure and applies permissions
    1590              :  * checks to verify that it's okay to examine all mentioned Vars.
    1591              :  *
    1592              :  * Arguments:
    1593              :  * clause: clause to be inspected (in RestrictInfo form)
    1594              :  * relid: rel that all Vars in clause must belong to
    1595              :  * *attnums: input/output parameter collecting attribute numbers of all
    1596              :  *      mentioned Vars.  Note that we do not offset the attribute numbers,
    1597              :  *      so we can't cope with system columns.
    1598              :  * *exprs: input/output parameter collecting primitive subclauses within
    1599              :  *      the clause tree
    1600              :  *
    1601              :  * Returns false if there is something we definitively can't handle.
    1602              :  * On true return, we can proceed to match the *exprs against statistics.
    1603              :  */
    1604              : static bool
    1605         1610 : statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid,
    1606              :                              Bitmapset **attnums, List **exprs)
    1607              : {
    1608              :     RestrictInfo *rinfo;
    1609              :     int         clause_relid;
    1610              :     bool        leakproof;
    1611              : 
    1612              :     /*
    1613              :      * Special-case handling for bare BoolExpr AND clauses, because the
    1614              :      * restrictinfo machinery doesn't build RestrictInfos on top of AND
    1615              :      * clauses.
    1616              :      */
    1617         1610 :     if (is_andclause(clause))
    1618              :     {
    1619           40 :         BoolExpr   *expr = (BoolExpr *) clause;
    1620              :         ListCell   *lc;
    1621              : 
    1622              :         /*
    1623              :          * Check that each sub-clause is compatible.  We expect these to be
    1624              :          * RestrictInfos.
    1625              :          */
    1626          135 :         foreach(lc, expr->args)
    1627              :         {
    1628           95 :             if (!statext_is_compatible_clause(root, (Node *) lfirst(lc),
    1629              :                                               relid, attnums, exprs))
    1630            0 :                 return false;
    1631              :         }
    1632              : 
    1633           40 :         return true;
    1634              :     }
    1635              : 
    1636              :     /* Otherwise it must be a RestrictInfo. */
    1637         1570 :     if (!IsA(clause, RestrictInfo))
    1638            0 :         return false;
    1639         1570 :     rinfo = (RestrictInfo *) clause;
    1640              : 
    1641              :     /* Pseudoconstants are not really interesting here. */
    1642         1570 :     if (rinfo->pseudoconstant)
    1643            5 :         return false;
    1644              : 
    1645              :     /* Clauses referencing other varnos are incompatible. */
    1646         1565 :     if (!bms_get_singleton_member(rinfo->clause_relids, &clause_relid) ||
    1647         1565 :         clause_relid != relid)
    1648            0 :         return false;
    1649              : 
    1650              :     /*
    1651              :      * Check the clause, determine what attributes it references, and whether
    1652              :      * it includes any non-leakproof operators.
    1653              :      */
    1654         1565 :     leakproof = true;
    1655         1565 :     if (!statext_is_compatible_clause_internal(root, (Node *) rinfo->clause,
    1656              :                                                relid, attnums, exprs,
    1657              :                                                &leakproof))
    1658            5 :         return false;
    1659              : 
    1660              :     /*
    1661              :      * If the clause includes any non-leakproof operators, check that the user
    1662              :      * has permission to read all required attributes, otherwise the operators
    1663              :      * might reveal values from the MCV list that the user doesn't have
    1664              :      * permission to see.  We require all rows to be selectable --- there must
    1665              :      * be no securityQuals from security barrier views or RLS policies.  See
    1666              :      * similar code in examine_variable(), examine_simple_variable(), and
    1667              :      * statistic_proc_security_check().
    1668              :      *
    1669              :      * Note that for an inheritance child, the permission checks are performed
    1670              :      * on the inheritance root parent, and whole-table select privilege on the
    1671              :      * parent doesn't guarantee that the user could read all columns of the
    1672              :      * child. Therefore we must check all referenced columns.
    1673              :      */
    1674         1560 :     if (!leakproof)
    1675              :     {
    1676          210 :         Bitmapset  *clause_attnums = NULL;
    1677          210 :         int         attnum = -1;
    1678              : 
    1679              :         /*
    1680              :          * We have to check per-column privileges.  *attnums has the attnums
    1681              :          * for individual Vars we saw, but there may also be Vars within
    1682              :          * subexpressions in *exprs.  We can use pull_varattnos() to extract
    1683              :          * those, but there's an impedance mismatch: attnums returned by
    1684              :          * pull_varattnos() are offset by FirstLowInvalidHeapAttributeNumber,
    1685              :          * while attnums within *attnums aren't.  Convert *attnums to the
    1686              :          * offset style so we can combine the results.
    1687              :          */
    1688          410 :         while ((attnum = bms_next_member(*attnums, attnum)) >= 0)
    1689              :         {
    1690          200 :             clause_attnums =
    1691          200 :                 bms_add_member(clause_attnums,
    1692              :                                attnum - FirstLowInvalidHeapAttributeNumber);
    1693              :         }
    1694              : 
    1695              :         /* Now merge attnums from *exprs into clause_attnums */
    1696          210 :         if (*exprs != NIL)
    1697           40 :             pull_varattnos((Node *) *exprs, relid, &clause_attnums);
    1698              : 
    1699              :         /* Must have permission to read all rows from these columns */
    1700          210 :         if (!all_rows_selectable(root, relid, clause_attnums))
    1701          190 :             return false;
    1702              :     }
    1703              : 
    1704              :     /* If we reach here, the clause is OK */
    1705         1370 :     return true;
    1706              : }
    1707              : 
    1708              : /*
    1709              :  * statext_mcv_clauselist_selectivity
    1710              :  *      Estimate clauses using the best multi-column statistics.
    1711              :  *
    1712              :  * Applies available extended (multi-column) statistics on a table. There may
    1713              :  * be multiple applicable statistics (with respect to the clauses), in which
    1714              :  * case we use greedy approach. In each round we select the best statistic on
    1715              :  * a table (measured by the number of attributes extracted from the clauses
    1716              :  * and covered by it), and compute the selectivity for the supplied clauses.
    1717              :  * We repeat this process with the remaining clauses (if any), until none of
    1718              :  * the available statistics can be used.
    1719              :  *
    1720              :  * One of the main challenges with using MCV lists is how to extrapolate the
    1721              :  * estimate to the data not covered by the MCV list. To do that, we compute
    1722              :  * not only the "MCV selectivity" (selectivities for MCV items matching the
    1723              :  * supplied clauses), but also the following related selectivities:
    1724              :  *
    1725              :  * - simple selectivity:  Computed without extended statistics, i.e. as if the
    1726              :  * columns/clauses were independent.
    1727              :  *
    1728              :  * - base selectivity:  Similar to simple selectivity, but is computed using
    1729              :  * the extended statistic by adding up the base frequencies (that we compute
    1730              :  * and store for each MCV item) of matching MCV items.
    1731              :  *
    1732              :  * - total selectivity: Selectivity covered by the whole MCV list.
    1733              :  *
    1734              :  * These are passed to mcv_combine_selectivities() which combines them to
    1735              :  * produce a selectivity estimate that makes use of both per-column statistics
    1736              :  * and the multi-column MCV statistics.
    1737              :  *
    1738              :  * 'estimatedclauses' is an input/output parameter.  We set bits for the
    1739              :  * 0-based 'clauses' indexes we estimate for and also skip clause items that
    1740              :  * already have a bit set.
    1741              :  */
    1742              : static Selectivity
    1743         2170 : statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
    1744              :                                    JoinType jointype, SpecialJoinInfo *sjinfo,
    1745              :                                    RelOptInfo *rel, Bitmapset **estimatedclauses,
    1746              :                                    bool is_or)
    1747              : {
    1748              :     ListCell   *l;
    1749              :     Bitmapset **list_attnums;   /* attnums extracted from the clause */
    1750              :     List      **list_exprs;     /* expressions matched to any statistic */
    1751              :     int         listidx;
    1752         2170 :     Selectivity sel = (is_or) ? 0.0 : 1.0;
    1753         2170 :     RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
    1754              : 
    1755              :     /* check if there's any stats that might be useful for us. */
    1756         2170 :     if (!has_stats_of_kind(rel->statlist, STATS_EXT_MCV))
    1757         1540 :         return sel;
    1758              : 
    1759          630 :     list_attnums = palloc_array(Bitmapset *, list_length(clauses));
    1760              : 
    1761              :     /* expressions extracted from complex expressions */
    1762          630 :     list_exprs = palloc_array(List *, list_length(clauses));
    1763              : 
    1764              :     /*
    1765              :      * Pre-process the clauses list to extract the attnums and expressions
    1766              :      * seen in each item.  We need to determine if there are any clauses which
    1767              :      * will be useful for selectivity estimations with extended stats.  Along
    1768              :      * the way we'll record all of the attnums and expressions for each clause
    1769              :      * in lists which we'll reference later so we don't need to repeat the
    1770              :      * same work again.
    1771              :      *
    1772              :      * We also skip clauses that we already estimated using different types of
    1773              :      * statistics (we treat them as incompatible).
    1774              :      */
    1775          630 :     listidx = 0;
    1776         2145 :     foreach(l, clauses)
    1777              :     {
    1778         1515 :         Node       *clause = (Node *) lfirst(l);
    1779         1515 :         Bitmapset  *attnums = NULL;
    1780         1515 :         List       *exprs = NIL;
    1781              : 
    1782         3030 :         if (!bms_is_member(listidx, *estimatedclauses) &&
    1783         1515 :             statext_is_compatible_clause(root, clause, rel->relid, &attnums, &exprs))
    1784              :         {
    1785         1315 :             list_attnums[listidx] = attnums;
    1786         1315 :             list_exprs[listidx] = exprs;
    1787              :         }
    1788              :         else
    1789              :         {
    1790          200 :             list_attnums[listidx] = NULL;
    1791          200 :             list_exprs[listidx] = NIL;
    1792              :         }
    1793              : 
    1794         1515 :         listidx++;
    1795              :     }
    1796              : 
    1797              :     /* apply as many extended statistics as possible */
    1798              :     while (true)
    1799          515 :     {
    1800              :         StatisticExtInfo *stat;
    1801              :         List       *stat_clauses;
    1802              :         Bitmapset  *simple_clauses;
    1803              : 
    1804              :         /* find the best suited statistics object for these attnums */
    1805         1145 :         stat = choose_best_statistics(rel->statlist, STATS_EXT_MCV, rte->inh,
    1806              :                                       list_attnums, list_exprs,
    1807              :                                       list_length(clauses));
    1808              : 
    1809              :         /*
    1810              :          * if no (additional) matching stats could be found then we've nothing
    1811              :          * to do
    1812              :          */
    1813         1145 :         if (!stat)
    1814          630 :             break;
    1815              : 
    1816              :         /* Ensure choose_best_statistics produced an expected stats type. */
    1817              :         Assert(stat->kind == STATS_EXT_MCV);
    1818              : 
    1819              :         /* now filter the clauses to be estimated using the selected MCV */
    1820          515 :         stat_clauses = NIL;
    1821              : 
    1822              :         /* record which clauses are simple (single column or expression) */
    1823          515 :         simple_clauses = NULL;
    1824              : 
    1825          515 :         listidx = -1;
    1826         1790 :         foreach(l, clauses)
    1827              :         {
    1828              :             /* Increment the index before we decide if to skip the clause. */
    1829         1275 :             listidx++;
    1830              : 
    1831              :             /*
    1832              :              * Ignore clauses from which we did not extract any attnums or
    1833              :              * expressions (this needs to be consistent with what we do in
    1834              :              * choose_best_statistics).
    1835              :              *
    1836              :              * This also eliminates already estimated clauses - both those
    1837              :              * estimated before and during applying extended statistics.
    1838              :              *
    1839              :              * XXX This check is needed because both bms_is_subset and
    1840              :              * stat_covers_expressions return true for empty attnums and
    1841              :              * expressions.
    1842              :              */
    1843         1275 :             if (!list_attnums[listidx] && !list_exprs[listidx])
    1844           30 :                 continue;
    1845              : 
    1846              :             /*
    1847              :              * The clause was not estimated yet, and we've extracted either
    1848              :              * attnums or expressions from it. Ignore it if it's not fully
    1849              :              * covered by the chosen statistics object.
    1850              :              *
    1851              :              * We need to check both attributes and expressions, and reject if
    1852              :              * either is not covered.
    1853              :              */
    1854         1245 :             if (!bms_is_subset(list_attnums[listidx], stat->keys) ||
    1855         1195 :                 !stat_covers_expressions(stat, list_exprs[listidx], NULL))
    1856           55 :                 continue;
    1857              : 
    1858              :             /*
    1859              :              * Now we know the clause is compatible (we have either attnums or
    1860              :              * expressions extracted from it), and was not estimated yet.
    1861              :              */
    1862              : 
    1863              :             /* record simple clauses (single column or expression) */
    1864         1415 :             if ((list_attnums[listidx] == NULL &&
    1865          225 :                  list_length(list_exprs[listidx]) == 1) ||
    1866         1930 :                 (list_exprs[listidx] == NIL &&
    1867          965 :                  bms_membership(list_attnums[listidx]) == BMS_SINGLETON))
    1868         1140 :                 simple_clauses = bms_add_member(simple_clauses,
    1869              :                                                 list_length(stat_clauses));
    1870              : 
    1871              :             /* add clause to list and mark it as estimated */
    1872         1190 :             stat_clauses = lappend(stat_clauses, (Node *) lfirst(l));
    1873         1190 :             *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
    1874              : 
    1875              :             /*
    1876              :              * Reset the pointers, so that choose_best_statistics knows this
    1877              :              * clause was estimated and does not consider it again.
    1878              :              */
    1879         1190 :             bms_free(list_attnums[listidx]);
    1880         1190 :             list_attnums[listidx] = NULL;
    1881              : 
    1882         1190 :             list_free(list_exprs[listidx]);
    1883         1190 :             list_exprs[listidx] = NULL;
    1884              :         }
    1885              : 
    1886          515 :         if (is_or)
    1887              :         {
    1888           80 :             bool       *or_matches = NULL;
    1889           80 :             Selectivity simple_or_sel = 0.0,
    1890           80 :                         stat_sel = 0.0;
    1891              :             MCVList    *mcv_list;
    1892              : 
    1893              :             /* Load the MCV list stored in the statistics object */
    1894           80 :             mcv_list = statext_mcv_load(stat->statOid, rte->inh);
    1895              : 
    1896              :             /*
    1897              :              * Compute the selectivity of the ORed list of clauses covered by
    1898              :              * this statistics object by estimating each in turn and combining
    1899              :              * them using the formula P(A OR B) = P(A) + P(B) - P(A AND B).
    1900              :              * This allows us to use the multivariate MCV stats to better
    1901              :              * estimate the individual terms and their overlap.
    1902              :              *
    1903              :              * Each time we iterate this formula, the clause "A" above is
    1904              :              * equal to all the clauses processed so far, combined with "OR".
    1905              :              */
    1906           80 :             listidx = 0;
    1907          280 :             foreach(l, stat_clauses)
    1908              :             {
    1909          200 :                 Node       *clause = (Node *) lfirst(l);
    1910              :                 Selectivity simple_sel,
    1911              :                             overlap_simple_sel,
    1912              :                             mcv_sel,
    1913              :                             mcv_basesel,
    1914              :                             overlap_mcvsel,
    1915              :                             overlap_basesel,
    1916              :                             mcv_totalsel,
    1917              :                             clause_sel,
    1918              :                             overlap_sel;
    1919              : 
    1920              :                 /*
    1921              :                  * "Simple" selectivity of the next clause and its overlap
    1922              :                  * with any of the previous clauses.  These are our initial
    1923              :                  * estimates of P(B) and P(A AND B), assuming independence of
    1924              :                  * columns/clauses.
    1925              :                  */
    1926          200 :                 simple_sel = clause_selectivity_ext(root, clause, varRelid,
    1927              :                                                     jointype, sjinfo, false);
    1928              : 
    1929          200 :                 overlap_simple_sel = simple_or_sel * simple_sel;
    1930              : 
    1931              :                 /*
    1932              :                  * New "simple" selectivity of all clauses seen so far,
    1933              :                  * assuming independence.
    1934              :                  */
    1935          200 :                 simple_or_sel += simple_sel - overlap_simple_sel;
    1936          200 :                 CLAMP_PROBABILITY(simple_or_sel);
    1937              : 
    1938              :                 /*
    1939              :                  * Multi-column estimate of this clause using MCV statistics,
    1940              :                  * along with base and total selectivities, and corresponding
    1941              :                  * selectivities for the overlap term P(A AND B).
    1942              :                  */
    1943          200 :                 mcv_sel = mcv_clause_selectivity_or(root, stat, mcv_list,
    1944              :                                                     clause, &or_matches,
    1945              :                                                     &mcv_basesel,
    1946              :                                                     &overlap_mcvsel,
    1947              :                                                     &overlap_basesel,
    1948              :                                                     &mcv_totalsel);
    1949              : 
    1950              :                 /*
    1951              :                  * Combine the simple and multi-column estimates.
    1952              :                  *
    1953              :                  * If this clause is a simple single-column clause, then we
    1954              :                  * just use the simple selectivity estimate for it, since the
    1955              :                  * multi-column statistics are unlikely to improve on that
    1956              :                  * (and in fact could make it worse).  For the overlap, we
    1957              :                  * always make use of the multi-column statistics.
    1958              :                  */
    1959          200 :                 if (bms_is_member(listidx, simple_clauses))
    1960          160 :                     clause_sel = simple_sel;
    1961              :                 else
    1962           40 :                     clause_sel = mcv_combine_selectivities(simple_sel,
    1963              :                                                            mcv_sel,
    1964              :                                                            mcv_basesel,
    1965              :                                                            mcv_totalsel);
    1966              : 
    1967          200 :                 overlap_sel = mcv_combine_selectivities(overlap_simple_sel,
    1968              :                                                         overlap_mcvsel,
    1969              :                                                         overlap_basesel,
    1970              :                                                         mcv_totalsel);
    1971              : 
    1972              :                 /* Factor these into the result for this statistics object */
    1973          200 :                 stat_sel += clause_sel - overlap_sel;
    1974          200 :                 CLAMP_PROBABILITY(stat_sel);
    1975              : 
    1976          200 :                 listidx++;
    1977              :             }
    1978              : 
    1979              :             /*
    1980              :              * Factor the result for this statistics object into the overall
    1981              :              * result.  We treat the results from each separate statistics
    1982              :              * object as independent of one another.
    1983              :              */
    1984           80 :             sel = sel + stat_sel - sel * stat_sel;
    1985              :         }
    1986              :         else                    /* Implicitly-ANDed list of clauses */
    1987              :         {
    1988              :             Selectivity simple_sel,
    1989              :                         mcv_sel,
    1990              :                         mcv_basesel,
    1991              :                         mcv_totalsel,
    1992              :                         stat_sel;
    1993              : 
    1994              :             /*
    1995              :              * "Simple" selectivity, i.e. without any extended statistics,
    1996              :              * essentially assuming independence of the columns/clauses.
    1997              :              */
    1998          435 :             simple_sel = clauselist_selectivity_ext(root, stat_clauses,
    1999              :                                                     varRelid, jointype,
    2000              :                                                     sjinfo, false);
    2001              : 
    2002              :             /*
    2003              :              * Multi-column estimate using MCV statistics, along with base and
    2004              :              * total selectivities.
    2005              :              */
    2006          435 :             mcv_sel = mcv_clauselist_selectivity(root, stat, stat_clauses,
    2007              :                                                  varRelid, jointype, sjinfo,
    2008              :                                                  rel, &mcv_basesel,
    2009              :                                                  &mcv_totalsel);
    2010              : 
    2011              :             /* Combine the simple and multi-column estimates. */
    2012          435 :             stat_sel = mcv_combine_selectivities(simple_sel,
    2013              :                                                  mcv_sel,
    2014              :                                                  mcv_basesel,
    2015              :                                                  mcv_totalsel);
    2016              : 
    2017              :             /* Factor this into the overall result */
    2018          435 :             sel *= stat_sel;
    2019              :         }
    2020              :     }
    2021              : 
    2022          630 :     return sel;
    2023              : }
    2024              : 
    2025              : /*
    2026              :  * statext_clauselist_selectivity
    2027              :  *      Estimate clauses using the best multi-column statistics.
    2028              :  */
    2029              : Selectivity
    2030         2170 : statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
    2031              :                                JoinType jointype, SpecialJoinInfo *sjinfo,
    2032              :                                RelOptInfo *rel, Bitmapset **estimatedclauses,
    2033              :                                bool is_or)
    2034              : {
    2035              :     Selectivity sel;
    2036              : 
    2037              :     /* First, try estimating clauses using a multivariate MCV list. */
    2038         2170 :     sel = statext_mcv_clauselist_selectivity(root, clauses, varRelid, jointype,
    2039              :                                              sjinfo, rel, estimatedclauses, is_or);
    2040              : 
    2041              :     /*
    2042              :      * Functional dependencies only work for clauses connected by AND, so for
    2043              :      * OR clauses we're done.
    2044              :      */
    2045         2170 :     if (is_or)
    2046          130 :         return sel;
    2047              : 
    2048              :     /*
    2049              :      * Then, apply functional dependencies on the remaining clauses by calling
    2050              :      * dependencies_clauselist_selectivity.  Pass 'estimatedclauses' so the
    2051              :      * function can properly skip clauses already estimated above.
    2052              :      *
    2053              :      * The reasoning for applying dependencies last is that the more complex
    2054              :      * stats can track more complex correlations between the attributes, and
    2055              :      * so may be considered more reliable.
    2056              :      *
    2057              :      * For example, MCV list can give us an exact selectivity for values in
    2058              :      * two columns, while functional dependencies can only provide information
    2059              :      * about the overall strength of the dependency.
    2060              :      */
    2061         2040 :     sel *= dependencies_clauselist_selectivity(root, clauses, varRelid,
    2062              :                                                jointype, sjinfo, rel,
    2063              :                                                estimatedclauses);
    2064              : 
    2065         2040 :     return sel;
    2066              : }
    2067              : 
    2068              : /*
    2069              :  * examine_opclause_args
    2070              :  *      Split an operator expression's arguments into Expr and Const parts.
    2071              :  *
    2072              :  * Attempts to match the arguments to either (Expr op Const) or (Const op
    2073              :  * Expr), possibly with a RelabelType on top. When the expression matches this
    2074              :  * form, returns true, otherwise returns false.
    2075              :  *
    2076              :  * Optionally returns pointers to the extracted Expr/Const nodes, when passed
    2077              :  * non-null pointers (exprp, cstp and expronleftp). The expronleftp flag
    2078              :  * specifies on which side of the operator we found the expression node.
    2079              :  */
    2080              : bool
    2081         2555 : examine_opclause_args(List *args, Node **exprp, Const **cstp,
    2082              :                       bool *expronleftp)
    2083              : {
    2084              :     Node       *expr;
    2085              :     Const      *cst;
    2086              :     bool        expronleft;
    2087              :     Node       *leftop,
    2088              :                *rightop;
    2089              : 
    2090              :     /* enforced by statext_is_compatible_clause_internal */
    2091              :     Assert(list_length(args) == 2);
    2092              : 
    2093         2555 :     leftop = linitial(args);
    2094         2555 :     rightop = lsecond(args);
    2095              : 
    2096              :     /* strip RelabelType from either side of the expression */
    2097         2555 :     if (IsA(leftop, RelabelType))
    2098          270 :         leftop = (Node *) ((RelabelType *) leftop)->arg;
    2099              : 
    2100         2555 :     if (IsA(rightop, RelabelType))
    2101           50 :         rightop = (Node *) ((RelabelType *) rightop)->arg;
    2102              : 
    2103         2555 :     if (IsA(rightop, Const))
    2104              :     {
    2105         2420 :         expr = leftop;
    2106         2420 :         cst = (Const *) rightop;
    2107         2420 :         expronleft = true;
    2108              :     }
    2109          135 :     else if (IsA(leftop, Const))
    2110              :     {
    2111          135 :         expr = rightop;
    2112          135 :         cst = (Const *) leftop;
    2113          135 :         expronleft = false;
    2114              :     }
    2115              :     else
    2116            0 :         return false;
    2117              : 
    2118              :     /* return pointers to the extracted parts if requested */
    2119         2555 :     if (exprp)
    2120         2555 :         *exprp = expr;
    2121              : 
    2122         2555 :     if (cstp)
    2123         1135 :         *cstp = cst;
    2124              : 
    2125         2555 :     if (expronleftp)
    2126         1375 :         *expronleftp = expronleft;
    2127              : 
    2128         2555 :     return true;
    2129              : }
    2130              : 
    2131              : 
    2132              : /*
    2133              :  * Compute statistics about expressions of a relation.
    2134              :  */
    2135              : static void
    2136          179 : compute_expr_stats(Relation onerel, AnlExprData *exprdata, int nexprs,
    2137              :                    HeapTuple *rows, int numrows)
    2138              : {
    2139              :     MemoryContext expr_context,
    2140              :                 old_context;
    2141              :     int         ind,
    2142              :                 i;
    2143              : 
    2144          179 :     expr_context = AllocSetContextCreate(CurrentMemoryContext,
    2145              :                                          "Analyze Expression",
    2146              :                                          ALLOCSET_DEFAULT_SIZES);
    2147          179 :     old_context = MemoryContextSwitchTo(expr_context);
    2148              : 
    2149          502 :     for (ind = 0; ind < nexprs; ind++)
    2150              :     {
    2151          323 :         AnlExprData *thisdata = &exprdata[ind];
    2152          323 :         VacAttrStats *stats = thisdata->vacattrstat;
    2153          323 :         Node       *expr = thisdata->expr;
    2154              :         TupleTableSlot *slot;
    2155              :         EState     *estate;
    2156              :         ExprContext *econtext;
    2157              :         Datum      *exprvals;
    2158              :         bool       *exprnulls;
    2159              :         ExprState  *exprstate;
    2160              :         int         tcnt;
    2161              : 
    2162              :         /* Are we still in the main context? */
    2163              :         Assert(CurrentMemoryContext == expr_context);
    2164              : 
    2165              :         /*
    2166              :          * Need an EState for evaluation of expressions.  Create it in the
    2167              :          * per-expression context to be sure it gets cleaned up at the bottom
    2168              :          * of the loop.
    2169              :          */
    2170          323 :         estate = CreateExecutorState();
    2171          323 :         econtext = GetPerTupleExprContext(estate);
    2172              : 
    2173              :         /* Set up expression evaluation state */
    2174          323 :         exprstate = ExecPrepareExpr((Expr *) expr, estate);
    2175              : 
    2176              :         /* Need a slot to hold the current heap tuple, too */
    2177          323 :         slot = MakeSingleTupleTableSlot(RelationGetDescr(onerel),
    2178              :                                         &TTSOpsHeapTuple);
    2179              : 
    2180              :         /* Arrange for econtext's scan tuple to be the tuple under test */
    2181          323 :         econtext->ecxt_scantuple = slot;
    2182              : 
    2183              :         /* Compute and save expression values */
    2184          323 :         exprvals = (Datum *) palloc(numrows * sizeof(Datum));
    2185          323 :         exprnulls = (bool *) palloc(numrows * sizeof(bool));
    2186              : 
    2187          323 :         tcnt = 0;
    2188       310114 :         for (i = 0; i < numrows; i++)
    2189              :         {
    2190              :             Datum       datum;
    2191              :             bool        isnull;
    2192              : 
    2193              :             /*
    2194              :              * Reset the per-tuple context each time, to reclaim any cruft
    2195              :              * left behind by evaluating the statistics expressions.
    2196              :              */
    2197       309791 :             ResetExprContext(econtext);
    2198              : 
    2199              :             /* Set up for expression evaluation */
    2200       309791 :             ExecStoreHeapTuple(rows[i], slot, false);
    2201              : 
    2202              :             /*
    2203              :              * Evaluate the expression. We do this in the per-tuple context so
    2204              :              * as not to leak memory, and then copy the result into the
    2205              :              * context created at the beginning of this function.
    2206              :              */
    2207       309791 :             datum = ExecEvalExprSwitchContext(exprstate,
    2208       309791 :                                               GetPerTupleExprContext(estate),
    2209              :                                               &isnull);
    2210       309791 :             if (isnull)
    2211              :             {
    2212            8 :                 exprvals[tcnt] = (Datum) 0;
    2213            8 :                 exprnulls[tcnt] = true;
    2214              :             }
    2215              :             else
    2216              :             {
    2217              :                 /* Make sure we copy the data into the context. */
    2218              :                 Assert(CurrentMemoryContext == expr_context);
    2219              : 
    2220       619566 :                 exprvals[tcnt] = datumCopy(datum,
    2221       309783 :                                            stats->attrtype->typbyval,
    2222       309783 :                                            stats->attrtype->typlen);
    2223       309783 :                 exprnulls[tcnt] = false;
    2224              :             }
    2225              : 
    2226       309791 :             tcnt++;
    2227              :         }
    2228              : 
    2229              :         /*
    2230              :          * Now we can compute the statistics for the expression columns.
    2231              :          *
    2232              :          * XXX Unlike compute_index_stats we don't need to switch and reset
    2233              :          * memory contexts here, because we're only computing stats for a
    2234              :          * single expression (and not iterating over many indexes), so we just
    2235              :          * do it in expr_context. Note that compute_stats copies the result
    2236              :          * into stats->anl_context, so it does not disappear.
    2237              :          */
    2238          323 :         if (tcnt > 0)
    2239              :         {
    2240              :             AttributeOpts *aopt =
    2241          323 :                 get_attribute_options(onerel->rd_id, stats->tupattnum);
    2242              : 
    2243          323 :             stats->exprvals = exprvals;
    2244          323 :             stats->exprnulls = exprnulls;
    2245          323 :             stats->rowstride = 1;
    2246          323 :             stats->compute_stats(stats,
    2247              :                                  expr_fetch_func,
    2248              :                                  tcnt,
    2249              :                                  tcnt);
    2250              : 
    2251              :             /*
    2252              :              * If the n_distinct option is specified, it overrides the above
    2253              :              * computation.
    2254              :              */
    2255          323 :             if (aopt != NULL && aopt->n_distinct != 0.0)
    2256            0 :                 stats->stadistinct = aopt->n_distinct;
    2257              :         }
    2258              : 
    2259              :         /* And clean up */
    2260          323 :         MemoryContextSwitchTo(expr_context);
    2261              : 
    2262          323 :         ExecDropSingleTupleTableSlot(slot);
    2263          323 :         FreeExecutorState(estate);
    2264          323 :         MemoryContextReset(expr_context);
    2265              :     }
    2266              : 
    2267          179 :     MemoryContextSwitchTo(old_context);
    2268          179 :     MemoryContextDelete(expr_context);
    2269          179 : }
    2270              : 
    2271              : 
    2272              : /*
    2273              :  * Fetch function for analyzing statistics object expressions.
    2274              :  *
    2275              :  * We have not bothered to construct tuples from the data, instead the data
    2276              :  * is just in Datum arrays.
    2277              :  */
    2278              : static Datum
    2279       309804 : expr_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
    2280              : {
    2281              :     int         i;
    2282              : 
    2283              :     /* exprvals and exprnulls are already offset for proper column */
    2284       309804 :     i = rownum * stats->rowstride;
    2285       309804 :     *isNull = stats->exprnulls[i];
    2286       309804 :     return stats->exprvals[i];
    2287              : }
    2288              : 
    2289              : /*
    2290              :  * Build analyze data for a list of expressions. As this is not tied
    2291              :  * directly to a relation (table or index), we have to fake some of
    2292              :  * the fields in examine_expression().
    2293              :  */
    2294              : static AnlExprData *
    2295          179 : build_expr_data(List *exprs, int stattarget)
    2296              : {
    2297              :     int         idx;
    2298          179 :     int         nexprs = list_length(exprs);
    2299              :     AnlExprData *exprdata;
    2300              :     ListCell   *lc;
    2301              : 
    2302          179 :     exprdata = (AnlExprData *) palloc0(nexprs * sizeof(AnlExprData));
    2303              : 
    2304          179 :     idx = 0;
    2305          502 :     foreach(lc, exprs)
    2306              :     {
    2307          323 :         Node       *expr = (Node *) lfirst(lc);
    2308          323 :         AnlExprData *thisdata = &exprdata[idx];
    2309              : 
    2310          323 :         thisdata->expr = expr;
    2311          323 :         thisdata->vacattrstat = examine_expression(expr, stattarget);
    2312          323 :         idx++;
    2313              :     }
    2314              : 
    2315          179 :     return exprdata;
    2316              : }
    2317              : 
    2318              : /* form an array of pg_statistic rows (per update_attstats) */
    2319              : static Datum
    2320          179 : serialize_expr_stats(AnlExprData *exprdata, int nexprs)
    2321              : {
    2322              :     int         exprno;
    2323              :     Oid         typOid;
    2324              :     Relation    sd;
    2325              : 
    2326          179 :     ArrayBuildState *astate = NULL;
    2327              : 
    2328          179 :     sd = table_open(StatisticRelationId, RowExclusiveLock);
    2329              : 
    2330              :     /* lookup OID of composite type for pg_statistic */
    2331          179 :     typOid = get_rel_type_id(StatisticRelationId);
    2332          179 :     if (!OidIsValid(typOid))
    2333            0 :         ereport(ERROR,
    2334              :                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
    2335              :                  errmsg("relation \"%s\" does not have a composite type",
    2336              :                         "pg_statistic")));
    2337              : 
    2338          502 :     for (exprno = 0; exprno < nexprs; exprno++)
    2339              :     {
    2340              :         int         i,
    2341              :                     k;
    2342          323 :         VacAttrStats *stats = exprdata[exprno].vacattrstat;
    2343              : 
    2344              :         Datum       values[Natts_pg_statistic];
    2345              :         bool        nulls[Natts_pg_statistic];
    2346              :         HeapTuple   stup;
    2347              : 
    2348          323 :         if (!stats->stats_valid)
    2349              :         {
    2350            1 :             astate = accumArrayResult(astate,
    2351              :                                       (Datum) 0,
    2352              :                                       true,
    2353              :                                       typOid,
    2354              :                                       CurrentMemoryContext);
    2355            1 :             continue;
    2356              :         }
    2357              : 
    2358              :         /*
    2359              :          * Construct a new pg_statistic tuple
    2360              :          */
    2361        10304 :         for (i = 0; i < Natts_pg_statistic; ++i)
    2362              :         {
    2363         9982 :             nulls[i] = false;
    2364              :         }
    2365              : 
    2366          322 :         values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(InvalidOid);
    2367          322 :         values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(InvalidAttrNumber);
    2368          322 :         values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(false);
    2369          322 :         values[Anum_pg_statistic_stanullfrac - 1] = Float4GetDatum(stats->stanullfrac);
    2370          322 :         values[Anum_pg_statistic_stawidth - 1] = Int32GetDatum(stats->stawidth);
    2371          322 :         values[Anum_pg_statistic_stadistinct - 1] = Float4GetDatum(stats->stadistinct);
    2372          322 :         i = Anum_pg_statistic_stakind1 - 1;
    2373         1932 :         for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
    2374              :         {
    2375         1610 :             values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */
    2376              :         }
    2377          322 :         i = Anum_pg_statistic_staop1 - 1;
    2378         1932 :         for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
    2379              :         {
    2380         1610 :             values[i++] = ObjectIdGetDatum(stats->staop[k]); /* staopN */
    2381              :         }
    2382          322 :         i = Anum_pg_statistic_stacoll1 - 1;
    2383         1932 :         for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
    2384              :         {
    2385         1610 :             values[i++] = ObjectIdGetDatum(stats->stacoll[k]);   /* stacollN */
    2386              :         }
    2387          322 :         i = Anum_pg_statistic_stanumbers1 - 1;
    2388         1932 :         for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
    2389              :         {
    2390         1610 :             int         nnum = stats->numnumbers[k];
    2391              : 
    2392         1610 :             if (nnum > 0)
    2393              :             {
    2394              :                 int         n;
    2395          572 :                 Datum      *numdatums = (Datum *) palloc(nnum * sizeof(Datum));
    2396              :                 ArrayType  *arry;
    2397              : 
    2398         4322 :                 for (n = 0; n < nnum; n++)
    2399         3750 :                     numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]);
    2400          572 :                 arry = construct_array_builtin(numdatums, nnum, FLOAT4OID);
    2401          572 :                 values[i++] = PointerGetDatum(arry);    /* stanumbersN */
    2402              :             }
    2403              :             else
    2404              :             {
    2405         1038 :                 nulls[i] = true;
    2406         1038 :                 values[i++] = (Datum) 0;
    2407              :             }
    2408              :         }
    2409          322 :         i = Anum_pg_statistic_stavalues1 - 1;
    2410         1932 :         for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
    2411              :         {
    2412         1610 :             if (stats->numvalues[k] > 0)
    2413              :             {
    2414              :                 ArrayType  *arry;
    2415              : 
    2416          358 :                 arry = construct_array(stats->stavalues[k],
    2417              :                                        stats->numvalues[k],
    2418              :                                        stats->statypid[k],
    2419          358 :                                        stats->statyplen[k],
    2420          358 :                                        stats->statypbyval[k],
    2421          358 :                                        stats->statypalign[k]);
    2422          358 :                 values[i++] = PointerGetDatum(arry);    /* stavaluesN */
    2423              :             }
    2424              :             else
    2425              :             {
    2426         1252 :                 nulls[i] = true;
    2427         1252 :                 values[i++] = (Datum) 0;
    2428              :             }
    2429              :         }
    2430              : 
    2431          322 :         stup = heap_form_tuple(RelationGetDescr(sd), values, nulls);
    2432              : 
    2433          322 :         astate = accumArrayResult(astate,
    2434              :                                   heap_copy_tuple_as_datum(stup, RelationGetDescr(sd)),
    2435              :                                   false,
    2436              :                                   typOid,
    2437              :                                   CurrentMemoryContext);
    2438              :     }
    2439              : 
    2440          179 :     table_close(sd, RowExclusiveLock);
    2441              : 
    2442          179 :     return makeArrayResult(astate, CurrentMemoryContext);
    2443              : }
    2444              : 
    2445              : /*
    2446              :  * Loads pg_statistic record from expression statistics for expression
    2447              :  * identified by the supplied index.
    2448              :  *
    2449              :  * Returns the pg_statistic record found, or NULL if there is no statistics
    2450              :  * data to use.
    2451              :  */
    2452              : HeapTuple
    2453         1416 : statext_expressions_load(Oid stxoid, bool inh, int idx)
    2454              : {
    2455              :     bool        isnull;
    2456              :     Datum       value;
    2457              :     HeapTuple   htup;
    2458              :     ExpandedArrayHeader *eah;
    2459              :     HeapTupleHeader td;
    2460              :     HeapTupleData tmptup;
    2461              :     HeapTuple   tup;
    2462              : 
    2463         1416 :     htup = SearchSysCache2(STATEXTDATASTXOID,
    2464              :                            ObjectIdGetDatum(stxoid), BoolGetDatum(inh));
    2465         1416 :     if (!HeapTupleIsValid(htup))
    2466            0 :         elog(ERROR, "cache lookup failed for statistics object %u", stxoid);
    2467              : 
    2468         1416 :     value = SysCacheGetAttr(STATEXTDATASTXOID, htup,
    2469              :                             Anum_pg_statistic_ext_data_stxdexpr, &isnull);
    2470         1416 :     if (isnull)
    2471            0 :         elog(ERROR,
    2472              :              "requested statistics kind \"%c\" is not yet built for statistics object %u",
    2473              :              STATS_EXT_EXPRESSIONS, stxoid);
    2474              : 
    2475         1416 :     eah = DatumGetExpandedArray(value);
    2476              : 
    2477         1416 :     deconstruct_expanded_array(eah);
    2478              : 
    2479         1416 :     if (eah->dnulls && eah->dnulls[idx])
    2480              :     {
    2481              :         /* No data found for this expression, give up. */
    2482            1 :         ReleaseSysCache(htup);
    2483            1 :         return NULL;
    2484              :     }
    2485              : 
    2486         1415 :     td = DatumGetHeapTupleHeader(eah->dvalues[idx]);
    2487              : 
    2488              :     /* Build a temporary HeapTuple control structure */
    2489         1415 :     tmptup.t_len = HeapTupleHeaderGetDatumLength(td);
    2490         1415 :     ItemPointerSetInvalid(&(tmptup.t_self));
    2491         1415 :     tmptup.t_tableOid = InvalidOid;
    2492         1415 :     tmptup.t_data = td;
    2493              : 
    2494         1415 :     tup = heap_copytuple(&tmptup);
    2495              : 
    2496         1415 :     ReleaseSysCache(htup);
    2497              : 
    2498         1415 :     return tup;
    2499              : }
    2500              : 
    2501              : /*
    2502              :  * Evaluate the expressions, so that we can use the results to build
    2503              :  * all the requested statistics types. This matters especially for
    2504              :  * expensive expressions, of course.
    2505              :  */
    2506              : static StatsBuildData *
    2507          370 : make_build_data(Relation rel, StatExtEntry *stat, int numrows, HeapTuple *rows,
    2508              :                 VacAttrStats **stats, int stattarget)
    2509              : {
    2510              :     /* evaluated expressions */
    2511              :     StatsBuildData *result;
    2512              :     char       *ptr;
    2513              :     Size        len;
    2514              : 
    2515              :     int         i;
    2516              :     int         k;
    2517              :     int         idx;
    2518              :     TupleTableSlot *slot;
    2519              :     EState     *estate;
    2520              :     ExprContext *econtext;
    2521          370 :     List       *exprstates = NIL;
    2522          370 :     int         nkeys = bms_num_members(stat->columns) + list_length(stat->exprs);
    2523              :     ListCell   *lc;
    2524              : 
    2525              :     /* allocate everything as a single chunk, so we can free it easily */
    2526          370 :     len = MAXALIGN(sizeof(StatsBuildData));
    2527          370 :     len += MAXALIGN(sizeof(AttrNumber) * nkeys);    /* attnums */
    2528          370 :     len += MAXALIGN(sizeof(VacAttrStats *) * nkeys);    /* stats */
    2529              : 
    2530              :     /* values */
    2531          370 :     len += MAXALIGN(sizeof(Datum *) * nkeys);
    2532          370 :     len += nkeys * MAXALIGN(sizeof(Datum) * numrows);
    2533              : 
    2534              :     /* nulls */
    2535          370 :     len += MAXALIGN(sizeof(bool *) * nkeys);
    2536          370 :     len += nkeys * MAXALIGN(sizeof(bool) * numrows);
    2537              : 
    2538          370 :     ptr = palloc(len);
    2539              : 
    2540              :     /* set the pointers */
    2541          370 :     result = (StatsBuildData *) ptr;
    2542          370 :     ptr += MAXALIGN(sizeof(StatsBuildData));
    2543              : 
    2544              :     /* attnums */
    2545          370 :     result->attnums = (AttrNumber *) ptr;
    2546          370 :     ptr += MAXALIGN(sizeof(AttrNumber) * nkeys);
    2547              : 
    2548              :     /* stats */
    2549          370 :     result->stats = (VacAttrStats **) ptr;
    2550          370 :     ptr += MAXALIGN(sizeof(VacAttrStats *) * nkeys);
    2551              : 
    2552              :     /* values */
    2553          370 :     result->values = (Datum **) ptr;
    2554          370 :     ptr += MAXALIGN(sizeof(Datum *) * nkeys);
    2555              : 
    2556              :     /* nulls */
    2557          370 :     result->nulls = (bool **) ptr;
    2558          370 :     ptr += MAXALIGN(sizeof(bool *) * nkeys);
    2559              : 
    2560         1270 :     for (i = 0; i < nkeys; i++)
    2561              :     {
    2562          900 :         result->values[i] = (Datum *) ptr;
    2563          900 :         ptr += MAXALIGN(sizeof(Datum) * numrows);
    2564              : 
    2565          900 :         result->nulls[i] = (bool *) ptr;
    2566          900 :         ptr += MAXALIGN(sizeof(bool) * numrows);
    2567              :     }
    2568              : 
    2569              :     Assert((ptr - (char *) result) == len);
    2570              : 
    2571              :     /* we have it allocated, so let's fill the values */
    2572          370 :     result->nattnums = nkeys;
    2573          370 :     result->numrows = numrows;
    2574              : 
    2575              :     /* fill the attribute info - first attributes, then expressions */
    2576          370 :     idx = 0;
    2577          370 :     k = -1;
    2578          947 :     while ((k = bms_next_member(stat->columns, k)) >= 0)
    2579              :     {
    2580          577 :         result->attnums[idx] = k;
    2581          577 :         result->stats[idx] = stats[idx];
    2582              : 
    2583          577 :         idx++;
    2584              :     }
    2585              : 
    2586          370 :     k = -1;
    2587          693 :     foreach(lc, stat->exprs)
    2588              :     {
    2589          323 :         Node       *expr = (Node *) lfirst(lc);
    2590              : 
    2591          323 :         result->attnums[idx] = k;
    2592          323 :         result->stats[idx] = examine_expression(expr, stattarget);
    2593              : 
    2594          323 :         idx++;
    2595          323 :         k--;
    2596              :     }
    2597              : 
    2598              :     /* first extract values for all the regular attributes */
    2599       631034 :     for (i = 0; i < numrows; i++)
    2600              :     {
    2601       630664 :         idx = 0;
    2602       630664 :         k = -1;
    2603      2033073 :         while ((k = bms_next_member(stat->columns, k)) >= 0)
    2604              :         {
    2605      2804818 :             result->values[idx][i] = heap_getattr(rows[i], k,
    2606      1402409 :                                                   result->stats[idx]->tupDesc,
    2607      1402409 :                                                   &result->nulls[idx][i]);
    2608              : 
    2609      1402409 :             idx++;
    2610              :         }
    2611              :     }
    2612              : 
    2613              :     /* Need an EState for evaluation expressions. */
    2614          370 :     estate = CreateExecutorState();
    2615          370 :     econtext = GetPerTupleExprContext(estate);
    2616              : 
    2617              :     /* Need a slot to hold the current heap tuple, too */
    2618          370 :     slot = MakeSingleTupleTableSlot(RelationGetDescr(rel),
    2619              :                                     &TTSOpsHeapTuple);
    2620              : 
    2621              :     /* Arrange for econtext's scan tuple to be the tuple under test */
    2622          370 :     econtext->ecxt_scantuple = slot;
    2623              : 
    2624              :     /* Set up expression evaluation state */
    2625          370 :     exprstates = ExecPrepareExprList(stat->exprs, estate);
    2626              : 
    2627       631034 :     for (i = 0; i < numrows; i++)
    2628              :     {
    2629              :         /*
    2630              :          * Reset the per-tuple context each time, to reclaim any cruft left
    2631              :          * behind by evaluating the statistics object expressions.
    2632              :          */
    2633       630664 :         ResetExprContext(econtext);
    2634              : 
    2635              :         /* Set up for expression evaluation */
    2636       630664 :         ExecStoreHeapTuple(rows[i], slot, false);
    2637              : 
    2638       630664 :         idx = bms_num_members(stat->columns);
    2639       940455 :         foreach(lc, exprstates)
    2640              :         {
    2641              :             Datum       datum;
    2642              :             bool        isnull;
    2643       309791 :             ExprState  *exprstate = (ExprState *) lfirst(lc);
    2644              : 
    2645              :             /*
    2646              :              * XXX This probably leaks memory. Maybe we should use
    2647              :              * ExecEvalExprSwitchContext but then we need to copy the result
    2648              :              * somewhere else.
    2649              :              */
    2650       309791 :             datum = ExecEvalExpr(exprstate,
    2651       309791 :                                  GetPerTupleExprContext(estate),
    2652              :                                  &isnull);
    2653       309791 :             if (isnull)
    2654              :             {
    2655            8 :                 result->values[idx][i] = (Datum) 0;
    2656            8 :                 result->nulls[idx][i] = true;
    2657              :             }
    2658              :             else
    2659              :             {
    2660       309783 :                 result->values[idx][i] = datum;
    2661       309783 :                 result->nulls[idx][i] = false;
    2662              :             }
    2663              : 
    2664       309791 :             idx++;
    2665              :         }
    2666              :     }
    2667              : 
    2668          370 :     ExecDropSingleTupleTableSlot(slot);
    2669          370 :     FreeExecutorState(estate);
    2670              : 
    2671          370 :     return result;
    2672              : }
        

Generated by: LCOV version 2.0-1