Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * analyze.c
4 : * the Postgres statistics generator
5 : *
6 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/commands/analyze.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include <math.h>
18 :
19 : #include "access/detoast.h"
20 : #include "access/genam.h"
21 : #include "access/multixact.h"
22 : #include "access/relation.h"
23 : #include "access/table.h"
24 : #include "access/tableam.h"
25 : #include "access/transam.h"
26 : #include "access/tupconvert.h"
27 : #include "access/visibilitymap.h"
28 : #include "access/xact.h"
29 : #include "catalog/index.h"
30 : #include "catalog/indexing.h"
31 : #include "catalog/pg_inherits.h"
32 : #include "commands/dbcommands.h"
33 : #include "commands/progress.h"
34 : #include "commands/tablecmds.h"
35 : #include "commands/vacuum.h"
36 : #include "common/pg_prng.h"
37 : #include "executor/executor.h"
38 : #include "foreign/fdwapi.h"
39 : #include "miscadmin.h"
40 : #include "nodes/nodeFuncs.h"
41 : #include "parser/parse_oper.h"
42 : #include "parser/parse_relation.h"
43 : #include "pgstat.h"
44 : #include "postmaster/autovacuum.h"
45 : #include "statistics/extended_stats_internal.h"
46 : #include "statistics/statistics.h"
47 : #include "storage/bufmgr.h"
48 : #include "storage/procarray.h"
49 : #include "utils/attoptcache.h"
50 : #include "utils/datum.h"
51 : #include "utils/guc.h"
52 : #include "utils/lsyscache.h"
53 : #include "utils/memutils.h"
54 : #include "utils/pg_rusage.h"
55 : #include "utils/sampling.h"
56 : #include "utils/sortsupport.h"
57 : #include "utils/spccache.h"
58 : #include "utils/syscache.h"
59 : #include "utils/timestamp.h"
60 :
61 :
62 : /* Per-index data for ANALYZE */
63 : typedef struct AnlIndexData
64 : {
65 : IndexInfo *indexInfo; /* BuildIndexInfo result */
66 : double tupleFract; /* fraction of rows for partial index */
67 : VacAttrStats **vacattrstats; /* index attrs to analyze */
68 : int attr_cnt;
69 : } AnlIndexData;
70 :
71 :
72 : /* Default statistics target (GUC parameter) */
73 : int default_statistics_target = 100;
74 :
75 : /* A few variables that don't seem worth passing around as parameters */
76 : static MemoryContext anl_context = NULL;
77 : static BufferAccessStrategy vac_strategy;
78 :
79 :
80 : static void do_analyze_rel(Relation onerel,
81 : VacuumParams *params, List *va_cols,
82 : AcquireSampleRowsFunc acquirefunc, BlockNumber relpages,
83 : bool inh, bool in_outer_xact, int elevel);
84 : static void compute_index_stats(Relation onerel, double totalrows,
85 : AnlIndexData *indexdata, int nindexes,
86 : HeapTuple *rows, int numrows,
87 : MemoryContext col_context);
88 : static VacAttrStats *examine_attribute(Relation onerel, int attnum,
89 : Node *index_expr);
90 : static int acquire_sample_rows(Relation onerel, int elevel,
91 : HeapTuple *rows, int targrows,
92 : double *totalrows, double *totaldeadrows);
93 : static int compare_rows(const void *a, const void *b, void *arg);
94 : static int acquire_inherited_sample_rows(Relation onerel, int elevel,
95 : HeapTuple *rows, int targrows,
96 : double *totalrows, double *totaldeadrows);
97 : static void update_attstats(Oid relid, bool inh,
98 : int natts, VacAttrStats **vacattrstats);
99 : static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
100 : static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
101 :
102 :
103 : /*
104 : * analyze_rel() -- analyze one relation
105 : *
106 : * relid identifies the relation to analyze. If relation is supplied, use
107 : * the name therein for reporting any failure to open/lock the rel; do not
108 : * use it once we've successfully opened the rel, since it might be stale.
109 : */
110 : void
111 12538 : analyze_rel(Oid relid, RangeVar *relation,
112 : VacuumParams *params, List *va_cols, bool in_outer_xact,
113 : BufferAccessStrategy bstrategy)
114 : {
115 : Relation onerel;
116 : int elevel;
117 12538 : AcquireSampleRowsFunc acquirefunc = NULL;
118 12538 : BlockNumber relpages = 0;
119 :
120 : /* Select logging level */
121 12538 : if (params->options & VACOPT_VERBOSE)
122 0 : elevel = INFO;
123 : else
124 12538 : elevel = DEBUG2;
125 :
126 : /* Set up static variables */
127 12538 : vac_strategy = bstrategy;
128 :
129 : /*
130 : * Check for user-requested abort.
131 : */
132 12538 : CHECK_FOR_INTERRUPTS();
133 :
134 : /*
135 : * Open the relation, getting ShareUpdateExclusiveLock to ensure that two
136 : * ANALYZEs don't run on it concurrently. (This also locks out a
137 : * concurrent VACUUM, which doesn't matter much at the moment but might
138 : * matter if we ever try to accumulate stats on dead tuples.) If the rel
139 : * has been dropped since we last saw it, we don't need to process it.
140 : *
141 : * Make sure to generate only logs for ANALYZE in this case.
142 : */
143 12538 : onerel = vacuum_open_relation(relid, relation, params->options & ~(VACOPT_VACUUM),
144 12538 : params->log_min_duration >= 0,
145 : ShareUpdateExclusiveLock);
146 :
147 : /* leave if relation could not be opened or locked */
148 12538 : if (!onerel)
149 170 : return;
150 :
151 : /*
152 : * Check if relation needs to be skipped based on privileges. This check
153 : * happens also when building the relation list to analyze for a manual
154 : * operation, and needs to be done additionally here as ANALYZE could
155 : * happen across multiple transactions where privileges could have changed
156 : * in-between. Make sure to generate only logs for ANALYZE in this case.
157 : */
158 12530 : if (!vacuum_is_permitted_for_relation(RelationGetRelid(onerel),
159 : onerel->rd_rel,
160 12530 : params->options & ~VACOPT_VACUUM))
161 : {
162 36 : relation_close(onerel, ShareUpdateExclusiveLock);
163 36 : return;
164 : }
165 :
166 : /*
167 : * Silently ignore tables that are temp tables of other backends ---
168 : * trying to analyze these is rather pointless, since their contents are
169 : * probably not up-to-date on disk. (We don't throw a warning here; it
170 : * would just lead to chatter during a database-wide ANALYZE.)
171 : */
172 12494 : if (RELATION_IS_OTHER_TEMP(onerel))
173 : {
174 0 : relation_close(onerel, ShareUpdateExclusiveLock);
175 0 : return;
176 : }
177 :
178 : /*
179 : * We can ANALYZE any table except pg_statistic. See update_attstats
180 : */
181 12494 : if (RelationGetRelid(onerel) == StatisticRelationId)
182 : {
183 126 : relation_close(onerel, ShareUpdateExclusiveLock);
184 126 : return;
185 : }
186 :
187 : /*
188 : * Check that it's of an analyzable relkind, and set up appropriately.
189 : */
190 12368 : if (onerel->rd_rel->relkind == RELKIND_RELATION ||
191 694 : onerel->rd_rel->relkind == RELKIND_MATVIEW)
192 : {
193 : /* Regular table, so we'll use the regular row acquisition function */
194 11674 : acquirefunc = acquire_sample_rows;
195 : /* Also get regular table's size */
196 11674 : relpages = RelationGetNumberOfBlocks(onerel);
197 : }
198 694 : else if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
199 : {
200 : /*
201 : * For a foreign table, call the FDW's hook function to see whether it
202 : * supports analysis.
203 : */
204 : FdwRoutine *fdwroutine;
205 50 : bool ok = false;
206 :
207 50 : fdwroutine = GetFdwRoutineForRelation(onerel, false);
208 :
209 50 : if (fdwroutine->AnalyzeForeignTable != NULL)
210 50 : ok = fdwroutine->AnalyzeForeignTable(onerel,
211 : &acquirefunc,
212 : &relpages);
213 :
214 50 : if (!ok)
215 : {
216 0 : ereport(WARNING,
217 : (errmsg("skipping \"%s\" --- cannot analyze this foreign table",
218 : RelationGetRelationName(onerel))));
219 0 : relation_close(onerel, ShareUpdateExclusiveLock);
220 0 : return;
221 : }
222 : }
223 644 : else if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
224 : {
225 : /*
226 : * For partitioned tables, we want to do the recursive ANALYZE below.
227 : */
228 : }
229 : else
230 : {
231 : /* No need for a WARNING if we already complained during VACUUM */
232 0 : if (!(params->options & VACOPT_VACUUM))
233 0 : ereport(WARNING,
234 : (errmsg("skipping \"%s\" --- cannot analyze non-tables or special system tables",
235 : RelationGetRelationName(onerel))));
236 0 : relation_close(onerel, ShareUpdateExclusiveLock);
237 0 : return;
238 : }
239 :
240 : /*
241 : * OK, let's do it. First, initialize progress reporting.
242 : */
243 12368 : pgstat_progress_start_command(PROGRESS_COMMAND_ANALYZE,
244 : RelationGetRelid(onerel));
245 :
246 : /*
247 : * Do the normal non-recursive ANALYZE. We can skip this for partitioned
248 : * tables, which don't contain any rows.
249 : */
250 12368 : if (onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
251 11724 : do_analyze_rel(onerel, params, va_cols, acquirefunc,
252 : relpages, false, in_outer_xact, elevel);
253 :
254 : /*
255 : * If there are child tables, do recursive ANALYZE.
256 : */
257 12328 : if (onerel->rd_rel->relhassubclass)
258 700 : do_analyze_rel(onerel, params, va_cols, acquirefunc, relpages,
259 : true, in_outer_xact, elevel);
260 :
261 : /*
262 : * Close source relation now, but keep lock so that no one deletes it
263 : * before we commit. (If someone did, they'd fail to clean up the entries
264 : * we made in pg_statistic. Also, releasing the lock before commit would
265 : * expose us to concurrent-update failures in update_attstats.)
266 : */
267 12310 : relation_close(onerel, NoLock);
268 :
269 12310 : pgstat_progress_end_command();
270 : }
271 :
272 : /*
273 : * do_analyze_rel() -- analyze one relation, recursively or not
274 : *
275 : * Note that "acquirefunc" is only relevant for the non-inherited case.
276 : * For the inherited case, acquire_inherited_sample_rows() determines the
277 : * appropriate acquirefunc for each child table.
278 : */
279 : static void
280 12424 : do_analyze_rel(Relation onerel, VacuumParams *params,
281 : List *va_cols, AcquireSampleRowsFunc acquirefunc,
282 : BlockNumber relpages, bool inh, bool in_outer_xact,
283 : int elevel)
284 : {
285 : int attr_cnt,
286 : tcnt,
287 : i,
288 : ind;
289 : Relation *Irel;
290 : int nindexes;
291 : bool hasindex;
292 : VacAttrStats **vacattrstats;
293 : AnlIndexData *indexdata;
294 : int targrows,
295 : numrows,
296 : minrows;
297 : double totalrows,
298 : totaldeadrows;
299 : HeapTuple *rows;
300 : PGRUsage ru0;
301 12424 : TimestampTz starttime = 0;
302 : MemoryContext caller_context;
303 : Oid save_userid;
304 : int save_sec_context;
305 : int save_nestlevel;
306 12424 : int64 AnalyzePageHit = VacuumPageHit;
307 12424 : int64 AnalyzePageMiss = VacuumPageMiss;
308 12424 : int64 AnalyzePageDirty = VacuumPageDirty;
309 12424 : PgStat_Counter startreadtime = 0;
310 12424 : PgStat_Counter startwritetime = 0;
311 :
312 12424 : if (inh)
313 700 : ereport(elevel,
314 : (errmsg("analyzing \"%s.%s\" inheritance tree",
315 : get_namespace_name(RelationGetNamespace(onerel)),
316 : RelationGetRelationName(onerel))));
317 : else
318 11724 : ereport(elevel,
319 : (errmsg("analyzing \"%s.%s\"",
320 : get_namespace_name(RelationGetNamespace(onerel)),
321 : RelationGetRelationName(onerel))));
322 :
323 : /*
324 : * Set up a working context so that we can easily free whatever junk gets
325 : * created.
326 : */
327 12424 : anl_context = AllocSetContextCreate(CurrentMemoryContext,
328 : "Analyze",
329 : ALLOCSET_DEFAULT_SIZES);
330 12424 : caller_context = MemoryContextSwitchTo(anl_context);
331 :
332 : /*
333 : * Switch to the table owner's userid, so that any index functions are run
334 : * as that user. Also lock down security-restricted operations and
335 : * arrange to make GUC variable changes local to this command.
336 : */
337 12424 : GetUserIdAndSecContext(&save_userid, &save_sec_context);
338 12424 : SetUserIdAndSecContext(onerel->rd_rel->relowner,
339 : save_sec_context | SECURITY_RESTRICTED_OPERATION);
340 12424 : save_nestlevel = NewGUCNestLevel();
341 12424 : RestrictSearchPath();
342 :
343 : /* measure elapsed time iff autovacuum logging requires it */
344 12424 : if (AmAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
345 : {
346 150 : if (track_io_timing)
347 : {
348 0 : startreadtime = pgStatBlockReadTime;
349 0 : startwritetime = pgStatBlockWriteTime;
350 : }
351 :
352 150 : pg_rusage_init(&ru0);
353 150 : starttime = GetCurrentTimestamp();
354 : }
355 :
356 : /*
357 : * Determine which columns to analyze
358 : *
359 : * Note that system attributes are never analyzed, so we just reject them
360 : * at the lookup stage. We also reject duplicate column mentions. (We
361 : * could alternatively ignore duplicates, but analyzing a column twice
362 : * won't work; we'd end up making a conflicting update in pg_statistic.)
363 : */
364 12424 : if (va_cols != NIL)
365 : {
366 94 : Bitmapset *unique_cols = NULL;
367 : ListCell *le;
368 :
369 94 : vacattrstats = (VacAttrStats **) palloc(list_length(va_cols) *
370 : sizeof(VacAttrStats *));
371 94 : tcnt = 0;
372 164 : foreach(le, va_cols)
373 : {
374 120 : char *col = strVal(lfirst(le));
375 :
376 120 : i = attnameAttNum(onerel, col, false);
377 120 : if (i == InvalidAttrNumber)
378 38 : ereport(ERROR,
379 : (errcode(ERRCODE_UNDEFINED_COLUMN),
380 : errmsg("column \"%s\" of relation \"%s\" does not exist",
381 : col, RelationGetRelationName(onerel))));
382 82 : if (bms_is_member(i, unique_cols))
383 12 : ereport(ERROR,
384 : (errcode(ERRCODE_DUPLICATE_COLUMN),
385 : errmsg("column \"%s\" of relation \"%s\" appears more than once",
386 : col, RelationGetRelationName(onerel))));
387 70 : unique_cols = bms_add_member(unique_cols, i);
388 :
389 70 : vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
390 70 : if (vacattrstats[tcnt] != NULL)
391 70 : tcnt++;
392 : }
393 44 : attr_cnt = tcnt;
394 : }
395 : else
396 : {
397 12330 : attr_cnt = onerel->rd_att->natts;
398 : vacattrstats = (VacAttrStats **)
399 12330 : palloc(attr_cnt * sizeof(VacAttrStats *));
400 12330 : tcnt = 0;
401 99124 : for (i = 1; i <= attr_cnt; i++)
402 : {
403 86794 : vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
404 86794 : if (vacattrstats[tcnt] != NULL)
405 86782 : tcnt++;
406 : }
407 12330 : attr_cnt = tcnt;
408 : }
409 :
410 : /*
411 : * Open all indexes of the relation, and see if there are any analyzable
412 : * columns in the indexes. We do not analyze index columns if there was
413 : * an explicit column list in the ANALYZE command, however.
414 : *
415 : * If we are doing a recursive scan, we don't want to touch the parent's
416 : * indexes at all. If we're processing a partitioned table, we need to
417 : * know if there are any indexes, but we don't want to process them.
418 : */
419 12374 : if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
420 : {
421 626 : List *idxs = RelationGetIndexList(onerel);
422 :
423 626 : Irel = NULL;
424 626 : nindexes = 0;
425 626 : hasindex = idxs != NIL;
426 626 : list_free(idxs);
427 : }
428 11748 : else if (!inh)
429 : {
430 11692 : vac_open_indexes(onerel, AccessShareLock, &nindexes, &Irel);
431 11692 : hasindex = nindexes > 0;
432 : }
433 : else
434 : {
435 56 : Irel = NULL;
436 56 : nindexes = 0;
437 56 : hasindex = false;
438 : }
439 12374 : indexdata = NULL;
440 12374 : if (nindexes > 0)
441 : {
442 8964 : indexdata = (AnlIndexData *) palloc0(nindexes * sizeof(AnlIndexData));
443 25768 : for (ind = 0; ind < nindexes; ind++)
444 : {
445 16804 : AnlIndexData *thisdata = &indexdata[ind];
446 : IndexInfo *indexInfo;
447 :
448 16804 : thisdata->indexInfo = indexInfo = BuildIndexInfo(Irel[ind]);
449 16804 : thisdata->tupleFract = 1.0; /* fix later if partial */
450 16804 : if (indexInfo->ii_Expressions != NIL && va_cols == NIL)
451 : {
452 66 : ListCell *indexpr_item = list_head(indexInfo->ii_Expressions);
453 :
454 66 : thisdata->vacattrstats = (VacAttrStats **)
455 66 : palloc(indexInfo->ii_NumIndexAttrs * sizeof(VacAttrStats *));
456 66 : tcnt = 0;
457 132 : for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
458 : {
459 66 : int keycol = indexInfo->ii_IndexAttrNumbers[i];
460 :
461 66 : if (keycol == 0)
462 : {
463 : /* Found an index expression */
464 : Node *indexkey;
465 :
466 66 : if (indexpr_item == NULL) /* shouldn't happen */
467 0 : elog(ERROR, "too few entries in indexprs list");
468 66 : indexkey = (Node *) lfirst(indexpr_item);
469 66 : indexpr_item = lnext(indexInfo->ii_Expressions,
470 : indexpr_item);
471 132 : thisdata->vacattrstats[tcnt] =
472 66 : examine_attribute(Irel[ind], i + 1, indexkey);
473 66 : if (thisdata->vacattrstats[tcnt] != NULL)
474 66 : tcnt++;
475 : }
476 : }
477 66 : thisdata->attr_cnt = tcnt;
478 : }
479 : }
480 : }
481 :
482 : /*
483 : * Determine how many rows we need to sample, using the worst case from
484 : * all analyzable columns. We use a lower bound of 100 rows to avoid
485 : * possible overflow in Vitter's algorithm. (Note: that will also be the
486 : * target in the corner case where there are no analyzable columns.)
487 : */
488 12374 : targrows = 100;
489 99202 : for (i = 0; i < attr_cnt; i++)
490 : {
491 86828 : if (targrows < vacattrstats[i]->minrows)
492 12368 : targrows = vacattrstats[i]->minrows;
493 : }
494 29178 : for (ind = 0; ind < nindexes; ind++)
495 : {
496 16804 : AnlIndexData *thisdata = &indexdata[ind];
497 :
498 16870 : for (i = 0; i < thisdata->attr_cnt; i++)
499 : {
500 66 : if (targrows < thisdata->vacattrstats[i]->minrows)
501 0 : targrows = thisdata->vacattrstats[i]->minrows;
502 : }
503 : }
504 :
505 : /*
506 : * Look at extended statistics objects too, as those may define custom
507 : * statistics target. So we may need to sample more rows and then build
508 : * the statistics with enough detail.
509 : */
510 12374 : minrows = ComputeExtStatisticsRows(onerel, attr_cnt, vacattrstats);
511 :
512 12374 : if (targrows < minrows)
513 0 : targrows = minrows;
514 :
515 : /*
516 : * Acquire the sample rows
517 : */
518 12374 : rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));
519 12374 : pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE,
520 : inh ? PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS_INH :
521 : PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS);
522 12374 : if (inh)
523 682 : numrows = acquire_inherited_sample_rows(onerel, elevel,
524 : rows, targrows,
525 : &totalrows, &totaldeadrows);
526 : else
527 11692 : numrows = (*acquirefunc) (onerel, elevel,
528 : rows, targrows,
529 : &totalrows, &totaldeadrows);
530 :
531 : /*
532 : * Compute the statistics. Temporary results during the calculations for
533 : * each column are stored in a child context. The calc routines are
534 : * responsible to make sure that whatever they store into the VacAttrStats
535 : * structure is allocated in anl_context.
536 : */
537 12372 : if (numrows > 0)
538 : {
539 : MemoryContext col_context,
540 : old_context;
541 :
542 8432 : pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE,
543 : PROGRESS_ANALYZE_PHASE_COMPUTE_STATS);
544 :
545 8432 : col_context = AllocSetContextCreate(anl_context,
546 : "Analyze Column",
547 : ALLOCSET_DEFAULT_SIZES);
548 8432 : old_context = MemoryContextSwitchTo(col_context);
549 :
550 72200 : for (i = 0; i < attr_cnt; i++)
551 : {
552 63768 : VacAttrStats *stats = vacattrstats[i];
553 : AttributeOpts *aopt;
554 :
555 63768 : stats->rows = rows;
556 63768 : stats->tupDesc = onerel->rd_att;
557 63768 : stats->compute_stats(stats,
558 : std_fetch_func,
559 : numrows,
560 : totalrows);
561 :
562 : /*
563 : * If the appropriate flavor of the n_distinct option is
564 : * specified, override with the corresponding value.
565 : */
566 63768 : aopt = get_attribute_options(onerel->rd_id, stats->tupattnum);
567 63768 : if (aopt != NULL)
568 : {
569 : float8 n_distinct;
570 :
571 6 : n_distinct = inh ? aopt->n_distinct_inherited : aopt->n_distinct;
572 6 : if (n_distinct != 0.0)
573 6 : stats->stadistinct = n_distinct;
574 : }
575 :
576 63768 : MemoryContextReset(col_context);
577 : }
578 :
579 8432 : if (nindexes > 0)
580 5248 : compute_index_stats(onerel, totalrows,
581 : indexdata, nindexes,
582 : rows, numrows,
583 : col_context);
584 :
585 8426 : MemoryContextSwitchTo(old_context);
586 8426 : MemoryContextDelete(col_context);
587 :
588 : /*
589 : * Emit the completed stats rows into pg_statistic, replacing any
590 : * previous statistics for the target columns. (If there are stats in
591 : * pg_statistic for columns we didn't process, we leave them alone.)
592 : */
593 8426 : update_attstats(RelationGetRelid(onerel), inh,
594 : attr_cnt, vacattrstats);
595 :
596 18592 : for (ind = 0; ind < nindexes; ind++)
597 : {
598 10166 : AnlIndexData *thisdata = &indexdata[ind];
599 :
600 10166 : update_attstats(RelationGetRelid(Irel[ind]), false,
601 : thisdata->attr_cnt, thisdata->vacattrstats);
602 : }
603 :
604 : /* Build extended statistics (if there are any). */
605 8426 : BuildRelationExtStatistics(onerel, inh, totalrows, numrows, rows,
606 : attr_cnt, vacattrstats);
607 : }
608 :
609 12366 : pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE,
610 : PROGRESS_ANALYZE_PHASE_FINALIZE_ANALYZE);
611 :
612 : /*
613 : * Update pages/tuples stats in pg_class ... but not if we're doing
614 : * inherited stats.
615 : *
616 : * We assume that VACUUM hasn't set pg_class.reltuples already, even
617 : * during a VACUUM ANALYZE. Although VACUUM often updates pg_class,
618 : * exceptions exist. A "VACUUM (ANALYZE, INDEX_CLEANUP OFF)" command will
619 : * never update pg_class entries for index relations. It's also possible
620 : * that an individual index's pg_class entry won't be updated during
621 : * VACUUM if the index AM returns NULL from its amvacuumcleanup() routine.
622 : */
623 12366 : if (!inh)
624 : {
625 : BlockNumber relallvisible;
626 :
627 11684 : if (RELKIND_HAS_STORAGE(onerel->rd_rel->relkind))
628 11636 : visibilitymap_count(onerel, &relallvisible, NULL);
629 : else
630 48 : relallvisible = 0;
631 :
632 : /* Update pg_class for table relation */
633 11684 : vac_update_relstats(onerel,
634 : relpages,
635 : totalrows,
636 : relallvisible,
637 : hasindex,
638 : InvalidTransactionId,
639 : InvalidMultiXactId,
640 : NULL, NULL,
641 : in_outer_xact);
642 :
643 : /* Same for indexes */
644 28476 : for (ind = 0; ind < nindexes; ind++)
645 : {
646 16792 : AnlIndexData *thisdata = &indexdata[ind];
647 : double totalindexrows;
648 :
649 16792 : totalindexrows = ceil(thisdata->tupleFract * totalrows);
650 16792 : vac_update_relstats(Irel[ind],
651 16792 : RelationGetNumberOfBlocks(Irel[ind]),
652 : totalindexrows,
653 : 0,
654 : false,
655 : InvalidTransactionId,
656 : InvalidMultiXactId,
657 : NULL, NULL,
658 : in_outer_xact);
659 : }
660 : }
661 682 : else if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
662 : {
663 : /*
664 : * Partitioned tables don't have storage, so we don't set any fields
665 : * in their pg_class entries except for reltuples and relhasindex.
666 : */
667 626 : vac_update_relstats(onerel, -1, totalrows,
668 : 0, hasindex, InvalidTransactionId,
669 : InvalidMultiXactId,
670 : NULL, NULL,
671 : in_outer_xact);
672 : }
673 :
674 : /*
675 : * Now report ANALYZE to the cumulative stats system. For regular tables,
676 : * we do it only if not doing inherited stats. For partitioned tables, we
677 : * only do it for inherited stats. (We're never called for not-inherited
678 : * stats on partitioned tables anyway.)
679 : *
680 : * Reset the changes_since_analyze counter only if we analyzed all
681 : * columns; otherwise, there is still work for auto-analyze to do.
682 : */
683 12366 : if (!inh)
684 11684 : pgstat_report_analyze(onerel, totalrows, totaldeadrows,
685 : (va_cols == NIL));
686 682 : else if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
687 626 : pgstat_report_analyze(onerel, 0, 0, (va_cols == NIL));
688 :
689 : /*
690 : * If this isn't part of VACUUM ANALYZE, let index AMs do cleanup.
691 : *
692 : * Note that most index AMs perform a no-op as a matter of policy for
693 : * amvacuumcleanup() when called in ANALYZE-only mode. The only exception
694 : * among core index AMs is GIN/ginvacuumcleanup().
695 : */
696 12366 : if (!(params->options & VACOPT_VACUUM))
697 : {
698 25774 : for (ind = 0; ind < nindexes; ind++)
699 : {
700 : IndexBulkDeleteResult *stats;
701 : IndexVacuumInfo ivinfo;
702 :
703 14796 : ivinfo.index = Irel[ind];
704 14796 : ivinfo.heaprel = onerel;
705 14796 : ivinfo.analyze_only = true;
706 14796 : ivinfo.estimated_count = true;
707 14796 : ivinfo.message_level = elevel;
708 14796 : ivinfo.num_heap_tuples = onerel->rd_rel->reltuples;
709 14796 : ivinfo.strategy = vac_strategy;
710 :
711 14796 : stats = index_vacuum_cleanup(&ivinfo, NULL);
712 :
713 14796 : if (stats)
714 0 : pfree(stats);
715 : }
716 : }
717 :
718 : /* Done with indexes */
719 12366 : vac_close_indexes(nindexes, Irel, NoLock);
720 :
721 : /* Log the action if appropriate */
722 12366 : if (AmAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
723 : {
724 150 : TimestampTz endtime = GetCurrentTimestamp();
725 :
726 164 : if (params->log_min_duration == 0 ||
727 14 : TimestampDifferenceExceeds(starttime, endtime,
728 : params->log_min_duration))
729 : {
730 : long delay_in_ms;
731 136 : double read_rate = 0;
732 136 : double write_rate = 0;
733 : StringInfoData buf;
734 :
735 : /*
736 : * Calculate the difference in the Page Hit/Miss/Dirty that
737 : * happened as part of the analyze by subtracting out the
738 : * pre-analyze values which we saved above.
739 : */
740 136 : AnalyzePageHit = VacuumPageHit - AnalyzePageHit;
741 136 : AnalyzePageMiss = VacuumPageMiss - AnalyzePageMiss;
742 136 : AnalyzePageDirty = VacuumPageDirty - AnalyzePageDirty;
743 :
744 : /*
745 : * We do not expect an analyze to take > 25 days and it simplifies
746 : * things a bit to use TimestampDifferenceMilliseconds.
747 : */
748 136 : delay_in_ms = TimestampDifferenceMilliseconds(starttime, endtime);
749 :
750 : /*
751 : * Note that we are reporting these read/write rates in the same
752 : * manner as VACUUM does, which means that while the 'average read
753 : * rate' here actually corresponds to page misses and resulting
754 : * reads which are also picked up by track_io_timing, if enabled,
755 : * the 'average write rate' is actually talking about the rate of
756 : * pages being dirtied, not being written out, so it's typical to
757 : * have a non-zero 'avg write rate' while I/O timings only reports
758 : * reads.
759 : *
760 : * It's not clear that an ANALYZE will ever result in
761 : * FlushBuffer() being called, but we track and support reporting
762 : * on I/O write time in case that changes as it's practically free
763 : * to do so anyway.
764 : */
765 :
766 136 : if (delay_in_ms > 0)
767 : {
768 136 : read_rate = (double) BLCKSZ * AnalyzePageMiss / (1024 * 1024) /
769 136 : (delay_in_ms / 1000.0);
770 136 : write_rate = (double) BLCKSZ * AnalyzePageDirty / (1024 * 1024) /
771 136 : (delay_in_ms / 1000.0);
772 : }
773 :
774 : /*
775 : * We split this up so we don't emit empty I/O timing values when
776 : * track_io_timing isn't enabled.
777 : */
778 :
779 136 : initStringInfo(&buf);
780 136 : appendStringInfo(&buf, _("automatic analyze of table \"%s.%s.%s\"\n"),
781 : get_database_name(MyDatabaseId),
782 136 : get_namespace_name(RelationGetNamespace(onerel)),
783 136 : RelationGetRelationName(onerel));
784 136 : if (track_io_timing)
785 : {
786 0 : double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
787 0 : double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
788 :
789 0 : appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
790 : read_ms, write_ms);
791 : }
792 136 : appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
793 : read_rate, write_rate);
794 136 : appendStringInfo(&buf, _("buffer usage: %lld hits, %lld misses, %lld dirtied\n"),
795 : (long long) AnalyzePageHit,
796 : (long long) AnalyzePageMiss,
797 : (long long) AnalyzePageDirty);
798 136 : appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
799 :
800 136 : ereport(LOG,
801 : (errmsg_internal("%s", buf.data)));
802 :
803 136 : pfree(buf.data);
804 : }
805 : }
806 :
807 : /* Roll back any GUC changes executed by index functions */
808 12366 : AtEOXact_GUC(false, save_nestlevel);
809 :
810 : /* Restore userid and security context */
811 12366 : SetUserIdAndSecContext(save_userid, save_sec_context);
812 :
813 : /* Restore current context and release memory */
814 12366 : MemoryContextSwitchTo(caller_context);
815 12366 : MemoryContextDelete(anl_context);
816 12366 : anl_context = NULL;
817 12366 : }
818 :
819 : /*
820 : * Compute statistics about indexes of a relation
821 : */
822 : static void
823 5248 : compute_index_stats(Relation onerel, double totalrows,
824 : AnlIndexData *indexdata, int nindexes,
825 : HeapTuple *rows, int numrows,
826 : MemoryContext col_context)
827 : {
828 : MemoryContext ind_context,
829 : old_context;
830 : Datum values[INDEX_MAX_KEYS];
831 : bool isnull[INDEX_MAX_KEYS];
832 : int ind,
833 : i;
834 :
835 5248 : ind_context = AllocSetContextCreate(anl_context,
836 : "Analyze Index",
837 : ALLOCSET_DEFAULT_SIZES);
838 5248 : old_context = MemoryContextSwitchTo(ind_context);
839 :
840 15420 : for (ind = 0; ind < nindexes; ind++)
841 : {
842 10178 : AnlIndexData *thisdata = &indexdata[ind];
843 10178 : IndexInfo *indexInfo = thisdata->indexInfo;
844 10178 : int attr_cnt = thisdata->attr_cnt;
845 : TupleTableSlot *slot;
846 : EState *estate;
847 : ExprContext *econtext;
848 : ExprState *predicate;
849 : Datum *exprvals;
850 : bool *exprnulls;
851 : int numindexrows,
852 : tcnt,
853 : rowno;
854 : double totalindexrows;
855 :
856 : /* Ignore index if no columns to analyze and not partial */
857 10178 : if (attr_cnt == 0 && indexInfo->ii_Predicate == NIL)
858 10074 : continue;
859 :
860 : /*
861 : * Need an EState for evaluation of index expressions and
862 : * partial-index predicates. Create it in the per-index context to be
863 : * sure it gets cleaned up at the bottom of the loop.
864 : */
865 104 : estate = CreateExecutorState();
866 104 : econtext = GetPerTupleExprContext(estate);
867 : /* Need a slot to hold the current heap tuple, too */
868 104 : slot = MakeSingleTupleTableSlot(RelationGetDescr(onerel),
869 : &TTSOpsHeapTuple);
870 :
871 : /* Arrange for econtext's scan tuple to be the tuple under test */
872 104 : econtext->ecxt_scantuple = slot;
873 :
874 : /* Set up execution state for predicate. */
875 104 : predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
876 :
877 : /* Compute and save index expression values */
878 104 : exprvals = (Datum *) palloc(numrows * attr_cnt * sizeof(Datum));
879 104 : exprnulls = (bool *) palloc(numrows * attr_cnt * sizeof(bool));
880 104 : numindexrows = 0;
881 104 : tcnt = 0;
882 94630 : for (rowno = 0; rowno < numrows; rowno++)
883 : {
884 94532 : HeapTuple heapTuple = rows[rowno];
885 :
886 94532 : vacuum_delay_point();
887 :
888 : /*
889 : * Reset the per-tuple context each time, to reclaim any cruft
890 : * left behind by evaluating the predicate or index expressions.
891 : */
892 94532 : ResetExprContext(econtext);
893 :
894 : /* Set up for predicate or expression evaluation */
895 94532 : ExecStoreHeapTuple(heapTuple, slot, false);
896 :
897 : /* If index is partial, check predicate */
898 94532 : if (predicate != NULL)
899 : {
900 20066 : if (!ExecQual(predicate, econtext))
901 19328 : continue;
902 : }
903 75204 : numindexrows++;
904 :
905 75204 : if (attr_cnt > 0)
906 : {
907 : /*
908 : * Evaluate the index row to compute expression values. We
909 : * could do this by hand, but FormIndexDatum is convenient.
910 : */
911 74466 : FormIndexDatum(indexInfo,
912 : slot,
913 : estate,
914 : values,
915 : isnull);
916 :
917 : /*
918 : * Save just the columns we care about. We copy the values
919 : * into ind_context from the estate's per-tuple context.
920 : */
921 148920 : for (i = 0; i < attr_cnt; i++)
922 : {
923 74460 : VacAttrStats *stats = thisdata->vacattrstats[i];
924 74460 : int attnum = stats->tupattnum;
925 :
926 74460 : if (isnull[attnum - 1])
927 : {
928 0 : exprvals[tcnt] = (Datum) 0;
929 0 : exprnulls[tcnt] = true;
930 : }
931 : else
932 : {
933 148920 : exprvals[tcnt] = datumCopy(values[attnum - 1],
934 74460 : stats->attrtype->typbyval,
935 74460 : stats->attrtype->typlen);
936 74460 : exprnulls[tcnt] = false;
937 : }
938 74460 : tcnt++;
939 : }
940 : }
941 : }
942 :
943 : /*
944 : * Having counted the number of rows that pass the predicate in the
945 : * sample, we can estimate the total number of rows in the index.
946 : */
947 98 : thisdata->tupleFract = (double) numindexrows / (double) numrows;
948 98 : totalindexrows = ceil(thisdata->tupleFract * totalrows);
949 :
950 : /*
951 : * Now we can compute the statistics for the expression columns.
952 : */
953 98 : if (numindexrows > 0)
954 : {
955 90 : MemoryContextSwitchTo(col_context);
956 144 : for (i = 0; i < attr_cnt; i++)
957 : {
958 54 : VacAttrStats *stats = thisdata->vacattrstats[i];
959 :
960 54 : stats->exprvals = exprvals + i;
961 54 : stats->exprnulls = exprnulls + i;
962 54 : stats->rowstride = attr_cnt;
963 54 : stats->compute_stats(stats,
964 : ind_fetch_func,
965 : numindexrows,
966 : totalindexrows);
967 :
968 54 : MemoryContextReset(col_context);
969 : }
970 : }
971 :
972 : /* And clean up */
973 98 : MemoryContextSwitchTo(ind_context);
974 :
975 98 : ExecDropSingleTupleTableSlot(slot);
976 98 : FreeExecutorState(estate);
977 98 : MemoryContextReset(ind_context);
978 : }
979 :
980 5242 : MemoryContextSwitchTo(old_context);
981 5242 : MemoryContextDelete(ind_context);
982 5242 : }
983 :
984 : /*
985 : * examine_attribute -- pre-analysis of a single column
986 : *
987 : * Determine whether the column is analyzable; if so, create and initialize
988 : * a VacAttrStats struct for it. If not, return NULL.
989 : *
990 : * If index_expr isn't NULL, then we're trying to analyze an expression index,
991 : * and index_expr is the expression tree representing the column's data.
992 : */
993 : static VacAttrStats *
994 86930 : examine_attribute(Relation onerel, int attnum, Node *index_expr)
995 : {
996 86930 : Form_pg_attribute attr = TupleDescAttr(onerel->rd_att, attnum - 1);
997 : int attstattarget;
998 : HeapTuple atttuple;
999 : Datum dat;
1000 : bool isnull;
1001 : HeapTuple typtuple;
1002 : VacAttrStats *stats;
1003 : int i;
1004 : bool ok;
1005 :
1006 : /* Never analyze dropped columns */
1007 86930 : if (attr->attisdropped)
1008 6 : return NULL;
1009 :
1010 : /*
1011 : * Get attstattarget value. Set to -1 if null. (Analyze functions expect
1012 : * -1 to mean use default_statistics_target; see for example
1013 : * std_typanalyze.)
1014 : */
1015 86924 : atttuple = SearchSysCache2(ATTNUM, ObjectIdGetDatum(RelationGetRelid(onerel)), Int16GetDatum(attnum));
1016 86924 : if (!HeapTupleIsValid(atttuple))
1017 0 : elog(ERROR, "cache lookup failed for attribute %d of relation %u",
1018 : attnum, RelationGetRelid(onerel));
1019 86924 : dat = SysCacheGetAttr(ATTNUM, atttuple, Anum_pg_attribute_attstattarget, &isnull);
1020 86924 : attstattarget = isnull ? -1 : DatumGetInt16(dat);
1021 86924 : ReleaseSysCache(atttuple);
1022 :
1023 : /* Don't analyze column if user has specified not to */
1024 86924 : if (attstattarget == 0)
1025 6 : return NULL;
1026 :
1027 : /*
1028 : * Create the VacAttrStats struct.
1029 : */
1030 86918 : stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
1031 86918 : stats->attstattarget = attstattarget;
1032 :
1033 : /*
1034 : * When analyzing an expression index, believe the expression tree's type
1035 : * not the column datatype --- the latter might be the opckeytype storage
1036 : * type of the opclass, which is not interesting for our purposes. (Note:
1037 : * if we did anything with non-expression index columns, we'd need to
1038 : * figure out where to get the correct type info from, but for now that's
1039 : * not a problem.) It's not clear whether anyone will care about the
1040 : * typmod, but we store that too just in case.
1041 : */
1042 86918 : if (index_expr)
1043 : {
1044 66 : stats->attrtypid = exprType(index_expr);
1045 66 : stats->attrtypmod = exprTypmod(index_expr);
1046 :
1047 : /*
1048 : * If a collation has been specified for the index column, use that in
1049 : * preference to anything else; but if not, fall back to whatever we
1050 : * can get from the expression.
1051 : */
1052 66 : if (OidIsValid(onerel->rd_indcollation[attnum - 1]))
1053 12 : stats->attrcollid = onerel->rd_indcollation[attnum - 1];
1054 : else
1055 54 : stats->attrcollid = exprCollation(index_expr);
1056 : }
1057 : else
1058 : {
1059 86852 : stats->attrtypid = attr->atttypid;
1060 86852 : stats->attrtypmod = attr->atttypmod;
1061 86852 : stats->attrcollid = attr->attcollation;
1062 : }
1063 :
1064 86918 : typtuple = SearchSysCacheCopy1(TYPEOID,
1065 : ObjectIdGetDatum(stats->attrtypid));
1066 86918 : if (!HeapTupleIsValid(typtuple))
1067 0 : elog(ERROR, "cache lookup failed for type %u", stats->attrtypid);
1068 86918 : stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple);
1069 86918 : stats->anl_context = anl_context;
1070 86918 : stats->tupattnum = attnum;
1071 :
1072 : /*
1073 : * The fields describing the stats->stavalues[n] element types default to
1074 : * the type of the data being analyzed, but the type-specific typanalyze
1075 : * function can change them if it wants to store something else.
1076 : */
1077 521508 : for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
1078 : {
1079 434590 : stats->statypid[i] = stats->attrtypid;
1080 434590 : stats->statyplen[i] = stats->attrtype->typlen;
1081 434590 : stats->statypbyval[i] = stats->attrtype->typbyval;
1082 434590 : stats->statypalign[i] = stats->attrtype->typalign;
1083 : }
1084 :
1085 : /*
1086 : * Call the type-specific typanalyze function. If none is specified, use
1087 : * std_typanalyze().
1088 : */
1089 86918 : if (OidIsValid(stats->attrtype->typanalyze))
1090 5566 : ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze,
1091 : PointerGetDatum(stats)));
1092 : else
1093 81352 : ok = std_typanalyze(stats);
1094 :
1095 86918 : if (!ok || stats->compute_stats == NULL || stats->minrows <= 0)
1096 : {
1097 0 : heap_freetuple(typtuple);
1098 0 : pfree(stats);
1099 0 : return NULL;
1100 : }
1101 :
1102 86918 : return stats;
1103 : }
1104 :
1105 : /*
1106 : * Read stream callback returning the next BlockNumber as chosen by the
1107 : * BlockSampling algorithm.
1108 : */
1109 : static BlockNumber
1110 109512 : block_sampling_read_stream_next(ReadStream *stream,
1111 : void *callback_private_data,
1112 : void *per_buffer_data)
1113 : {
1114 109512 : BlockSamplerData *bs = callback_private_data;
1115 :
1116 109512 : return BlockSampler_HasMore(bs) ? BlockSampler_Next(bs) : InvalidBlockNumber;
1117 : }
1118 :
1119 : /*
1120 : * acquire_sample_rows -- acquire a random sample of rows from the table
1121 : *
1122 : * Selected rows are returned in the caller-allocated array rows[], which
1123 : * must have at least targrows entries.
1124 : * The actual number of rows selected is returned as the function result.
1125 : * We also estimate the total numbers of live and dead rows in the table,
1126 : * and return them into *totalrows and *totaldeadrows, respectively.
1127 : *
1128 : * The returned list of tuples is in order by physical position in the table.
1129 : * (We will rely on this later to derive correlation estimates.)
1130 : *
1131 : * As of May 2004 we use a new two-stage method: Stage one selects up
1132 : * to targrows random blocks (or all blocks, if there aren't so many).
1133 : * Stage two scans these blocks and uses the Vitter algorithm to create
1134 : * a random sample of targrows rows (or less, if there are less in the
1135 : * sample of blocks). The two stages are executed simultaneously: each
1136 : * block is processed as soon as stage one returns its number and while
1137 : * the rows are read stage two controls which ones are to be inserted
1138 : * into the sample.
1139 : *
1140 : * Although every row has an equal chance of ending up in the final
1141 : * sample, this sampling method is not perfect: not every possible
1142 : * sample has an equal chance of being selected. For large relations
1143 : * the number of different blocks represented by the sample tends to be
1144 : * too small. We can live with that for now. Improvements are welcome.
1145 : *
1146 : * An important property of this sampling method is that because we do
1147 : * look at a statistically unbiased set of blocks, we should get
1148 : * unbiased estimates of the average numbers of live and dead rows per
1149 : * block. The previous sampling method put too much credence in the row
1150 : * density near the start of the table.
1151 : */
1152 : static int
1153 13368 : acquire_sample_rows(Relation onerel, int elevel,
1154 : HeapTuple *rows, int targrows,
1155 : double *totalrows, double *totaldeadrows)
1156 : {
1157 13368 : int numrows = 0; /* # rows now in reservoir */
1158 13368 : double samplerows = 0; /* total # rows collected */
1159 13368 : double liverows = 0; /* # live rows seen */
1160 13368 : double deadrows = 0; /* # dead rows seen */
1161 13368 : double rowstoskip = -1; /* -1 means not set yet */
1162 : uint32 randseed; /* Seed for block sampler(s) */
1163 : BlockNumber totalblocks;
1164 : TransactionId OldestXmin;
1165 : BlockSamplerData bs;
1166 : ReservoirStateData rstate;
1167 : TupleTableSlot *slot;
1168 : TableScanDesc scan;
1169 : BlockNumber nblocks;
1170 13368 : BlockNumber blksdone = 0;
1171 : ReadStream *stream;
1172 :
1173 : Assert(targrows > 0);
1174 :
1175 13368 : totalblocks = RelationGetNumberOfBlocks(onerel);
1176 :
1177 : /* Need a cutoff xmin for HeapTupleSatisfiesVacuum */
1178 13368 : OldestXmin = GetOldestNonRemovableTransactionId(onerel);
1179 :
1180 : /* Prepare for sampling block numbers */
1181 13368 : randseed = pg_prng_uint32(&pg_global_prng_state);
1182 13368 : nblocks = BlockSampler_Init(&bs, totalblocks, targrows, randseed);
1183 :
1184 : /* Report sampling block numbers */
1185 13368 : pgstat_progress_update_param(PROGRESS_ANALYZE_BLOCKS_TOTAL,
1186 : nblocks);
1187 :
1188 : /* Prepare for sampling rows */
1189 13368 : reservoir_init_selection_state(&rstate, targrows);
1190 :
1191 13368 : scan = table_beginscan_analyze(onerel);
1192 13368 : slot = table_slot_create(onerel, NULL);
1193 :
1194 13368 : stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE,
1195 : vac_strategy,
1196 : scan->rs_rd,
1197 : MAIN_FORKNUM,
1198 : block_sampling_read_stream_next,
1199 : &bs,
1200 : 0);
1201 :
1202 : /* Outer loop over blocks to sample */
1203 109512 : while (table_scan_analyze_next_block(scan, stream))
1204 : {
1205 96144 : vacuum_delay_point();
1206 :
1207 8359382 : while (table_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot))
1208 : {
1209 : /*
1210 : * The first targrows sample rows are simply copied into the
1211 : * reservoir. Then we start replacing tuples in the sample until
1212 : * we reach the end of the relation. This algorithm is from Jeff
1213 : * Vitter's paper (see full citation in utils/misc/sampling.c). It
1214 : * works by repeatedly computing the number of tuples to skip
1215 : * before selecting a tuple, which replaces a randomly chosen
1216 : * element of the reservoir (current set of tuples). At all times
1217 : * the reservoir is a true random sample of the tuples we've
1218 : * passed over so far, so when we fall off the end of the relation
1219 : * we're done.
1220 : */
1221 8263238 : if (numrows < targrows)
1222 8014218 : rows[numrows++] = ExecCopySlotHeapTuple(slot);
1223 : else
1224 : {
1225 : /*
1226 : * t in Vitter's paper is the number of records already
1227 : * processed. If we need to compute a new S value, we must
1228 : * use the not-yet-incremented value of samplerows as t.
1229 : */
1230 249020 : if (rowstoskip < 0)
1231 114736 : rowstoskip = reservoir_get_next_S(&rstate, samplerows, targrows);
1232 :
1233 249020 : if (rowstoskip <= 0)
1234 : {
1235 : /*
1236 : * Found a suitable tuple, so save it, replacing one old
1237 : * tuple at random
1238 : */
1239 114688 : int k = (int) (targrows * sampler_random_fract(&rstate.randstate));
1240 :
1241 : Assert(k >= 0 && k < targrows);
1242 114688 : heap_freetuple(rows[k]);
1243 114688 : rows[k] = ExecCopySlotHeapTuple(slot);
1244 : }
1245 :
1246 249020 : rowstoskip -= 1;
1247 : }
1248 :
1249 8263238 : samplerows += 1;
1250 : }
1251 :
1252 96144 : pgstat_progress_update_param(PROGRESS_ANALYZE_BLOCKS_DONE,
1253 : ++blksdone);
1254 : }
1255 :
1256 13368 : read_stream_end(stream);
1257 :
1258 13368 : ExecDropSingleTupleTableSlot(slot);
1259 13368 : table_endscan(scan);
1260 :
1261 : /*
1262 : * If we didn't find as many tuples as we wanted then we're done. No sort
1263 : * is needed, since they're already in order.
1264 : *
1265 : * Otherwise we need to sort the collected tuples by position
1266 : * (itempointer). It's not worth worrying about corner cases where the
1267 : * tuples are already sorted.
1268 : */
1269 13368 : if (numrows == targrows)
1270 158 : qsort_interruptible(rows, numrows, sizeof(HeapTuple),
1271 : compare_rows, NULL);
1272 :
1273 : /*
1274 : * Estimate total numbers of live and dead rows in relation, extrapolating
1275 : * on the assumption that the average tuple density in pages we didn't
1276 : * scan is the same as in the pages we did scan. Since what we scanned is
1277 : * a random sample of the pages in the relation, this should be a good
1278 : * assumption.
1279 : */
1280 13368 : if (bs.m > 0)
1281 : {
1282 9484 : *totalrows = floor((liverows / bs.m) * totalblocks + 0.5);
1283 9484 : *totaldeadrows = floor((deadrows / bs.m) * totalblocks + 0.5);
1284 : }
1285 : else
1286 : {
1287 3884 : *totalrows = 0.0;
1288 3884 : *totaldeadrows = 0.0;
1289 : }
1290 :
1291 : /*
1292 : * Emit some interesting relation info
1293 : */
1294 13368 : ereport(elevel,
1295 : (errmsg("\"%s\": scanned %d of %u pages, "
1296 : "containing %.0f live rows and %.0f dead rows; "
1297 : "%d rows in sample, %.0f estimated total rows",
1298 : RelationGetRelationName(onerel),
1299 : bs.m, totalblocks,
1300 : liverows, deadrows,
1301 : numrows, *totalrows)));
1302 :
1303 13368 : return numrows;
1304 : }
1305 :
1306 : /*
1307 : * Comparator for sorting rows[] array
1308 : */
1309 : static int
1310 3910946 : compare_rows(const void *a, const void *b, void *arg)
1311 : {
1312 3910946 : HeapTuple ha = *(const HeapTuple *) a;
1313 3910946 : HeapTuple hb = *(const HeapTuple *) b;
1314 3910946 : BlockNumber ba = ItemPointerGetBlockNumber(&ha->t_self);
1315 3910946 : OffsetNumber oa = ItemPointerGetOffsetNumber(&ha->t_self);
1316 3910946 : BlockNumber bb = ItemPointerGetBlockNumber(&hb->t_self);
1317 3910946 : OffsetNumber ob = ItemPointerGetOffsetNumber(&hb->t_self);
1318 :
1319 3910946 : if (ba < bb)
1320 897460 : return -1;
1321 3013486 : if (ba > bb)
1322 844358 : return 1;
1323 2169128 : if (oa < ob)
1324 1414690 : return -1;
1325 754438 : if (oa > ob)
1326 754438 : return 1;
1327 0 : return 0;
1328 : }
1329 :
1330 :
1331 : /*
1332 : * acquire_inherited_sample_rows -- acquire sample rows from inheritance tree
1333 : *
1334 : * This has the same API as acquire_sample_rows, except that rows are
1335 : * collected from all inheritance children as well as the specified table.
1336 : * We fail and return zero if there are no inheritance children, or if all
1337 : * children are foreign tables that don't support ANALYZE.
1338 : */
1339 : static int
1340 682 : acquire_inherited_sample_rows(Relation onerel, int elevel,
1341 : HeapTuple *rows, int targrows,
1342 : double *totalrows, double *totaldeadrows)
1343 : {
1344 : List *tableOIDs;
1345 : Relation *rels;
1346 : AcquireSampleRowsFunc *acquirefuncs;
1347 : double *relblocks;
1348 : double totalblocks;
1349 : int numrows,
1350 : nrels,
1351 : i;
1352 : ListCell *lc;
1353 : bool has_child;
1354 :
1355 : /* Initialize output parameters to zero now, in case we exit early */
1356 682 : *totalrows = 0;
1357 682 : *totaldeadrows = 0;
1358 :
1359 : /*
1360 : * Find all members of inheritance set. We only need AccessShareLock on
1361 : * the children.
1362 : */
1363 : tableOIDs =
1364 682 : find_all_inheritors(RelationGetRelid(onerel), AccessShareLock, NULL);
1365 :
1366 : /*
1367 : * Check that there's at least one descendant, else fail. This could
1368 : * happen despite analyze_rel's relhassubclass check, if table once had a
1369 : * child but no longer does. In that case, we can clear the
1370 : * relhassubclass field so as not to make the same mistake again later.
1371 : * (This is safe because we hold ShareUpdateExclusiveLock.)
1372 : */
1373 682 : if (list_length(tableOIDs) < 2)
1374 : {
1375 : /* CCI because we already updated the pg_class row in this command */
1376 0 : CommandCounterIncrement();
1377 0 : SetRelationHasSubclass(RelationGetRelid(onerel), false);
1378 0 : ereport(elevel,
1379 : (errmsg("skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no child tables",
1380 : get_namespace_name(RelationGetNamespace(onerel)),
1381 : RelationGetRelationName(onerel))));
1382 0 : return 0;
1383 : }
1384 :
1385 : /*
1386 : * Identify acquirefuncs to use, and count blocks in all the relations.
1387 : * The result could overflow BlockNumber, so we use double arithmetic.
1388 : */
1389 682 : rels = (Relation *) palloc(list_length(tableOIDs) * sizeof(Relation));
1390 : acquirefuncs = (AcquireSampleRowsFunc *)
1391 682 : palloc(list_length(tableOIDs) * sizeof(AcquireSampleRowsFunc));
1392 682 : relblocks = (double *) palloc(list_length(tableOIDs) * sizeof(double));
1393 682 : totalblocks = 0;
1394 682 : nrels = 0;
1395 682 : has_child = false;
1396 3260 : foreach(lc, tableOIDs)
1397 : {
1398 2578 : Oid childOID = lfirst_oid(lc);
1399 : Relation childrel;
1400 2578 : AcquireSampleRowsFunc acquirefunc = NULL;
1401 2578 : BlockNumber relpages = 0;
1402 :
1403 : /* We already got the needed lock */
1404 2578 : childrel = table_open(childOID, NoLock);
1405 :
1406 : /* Ignore if temp table of another backend */
1407 2578 : if (RELATION_IS_OTHER_TEMP(childrel))
1408 : {
1409 : /* ... but release the lock on it */
1410 : Assert(childrel != onerel);
1411 0 : table_close(childrel, AccessShareLock);
1412 692 : continue;
1413 : }
1414 :
1415 : /* Check table type (MATVIEW can't happen, but might as well allow) */
1416 2578 : if (childrel->rd_rel->relkind == RELKIND_RELATION ||
1417 722 : childrel->rd_rel->relkind == RELKIND_MATVIEW)
1418 : {
1419 : /* Regular table, so use the regular row acquisition function */
1420 1856 : acquirefunc = acquire_sample_rows;
1421 1856 : relpages = RelationGetNumberOfBlocks(childrel);
1422 : }
1423 722 : else if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
1424 : {
1425 : /*
1426 : * For a foreign table, call the FDW's hook function to see
1427 : * whether it supports analysis.
1428 : */
1429 : FdwRoutine *fdwroutine;
1430 30 : bool ok = false;
1431 :
1432 30 : fdwroutine = GetFdwRoutineForRelation(childrel, false);
1433 :
1434 30 : if (fdwroutine->AnalyzeForeignTable != NULL)
1435 30 : ok = fdwroutine->AnalyzeForeignTable(childrel,
1436 : &acquirefunc,
1437 : &relpages);
1438 :
1439 30 : if (!ok)
1440 : {
1441 : /* ignore, but release the lock on it */
1442 : Assert(childrel != onerel);
1443 0 : table_close(childrel, AccessShareLock);
1444 0 : continue;
1445 : }
1446 : }
1447 : else
1448 : {
1449 : /*
1450 : * ignore, but release the lock on it. don't try to unlock the
1451 : * passed-in relation
1452 : */
1453 : Assert(childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
1454 692 : if (childrel != onerel)
1455 66 : table_close(childrel, AccessShareLock);
1456 : else
1457 626 : table_close(childrel, NoLock);
1458 692 : continue;
1459 : }
1460 :
1461 : /* OK, we'll process this child */
1462 1886 : has_child = true;
1463 1886 : rels[nrels] = childrel;
1464 1886 : acquirefuncs[nrels] = acquirefunc;
1465 1886 : relblocks[nrels] = (double) relpages;
1466 1886 : totalblocks += (double) relpages;
1467 1886 : nrels++;
1468 : }
1469 :
1470 : /*
1471 : * If we don't have at least one child table to consider, fail. If the
1472 : * relation is a partitioned table, it's not counted as a child table.
1473 : */
1474 682 : if (!has_child)
1475 : {
1476 0 : ereport(elevel,
1477 : (errmsg("skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no analyzable child tables",
1478 : get_namespace_name(RelationGetNamespace(onerel)),
1479 : RelationGetRelationName(onerel))));
1480 0 : return 0;
1481 : }
1482 :
1483 : /*
1484 : * Now sample rows from each relation, proportionally to its fraction of
1485 : * the total block count. (This might be less than desirable if the child
1486 : * rels have radically different free-space percentages, but it's not
1487 : * clear that it's worth working harder.)
1488 : */
1489 682 : pgstat_progress_update_param(PROGRESS_ANALYZE_CHILD_TABLES_TOTAL,
1490 : nrels);
1491 682 : numrows = 0;
1492 2568 : for (i = 0; i < nrels; i++)
1493 : {
1494 1886 : Relation childrel = rels[i];
1495 1886 : AcquireSampleRowsFunc acquirefunc = acquirefuncs[i];
1496 1886 : double childblocks = relblocks[i];
1497 :
1498 : /*
1499 : * Report progress. The sampling function will normally report blocks
1500 : * done/total, but we need to reset them to 0 here, so that they don't
1501 : * show an old value until that.
1502 : */
1503 : {
1504 1886 : const int progress_index[] = {
1505 : PROGRESS_ANALYZE_CURRENT_CHILD_TABLE_RELID,
1506 : PROGRESS_ANALYZE_BLOCKS_DONE,
1507 : PROGRESS_ANALYZE_BLOCKS_TOTAL
1508 : };
1509 1886 : const int64 progress_vals[] = {
1510 1886 : RelationGetRelid(childrel),
1511 : 0,
1512 : 0,
1513 : };
1514 :
1515 1886 : pgstat_progress_update_multi_param(3, progress_index, progress_vals);
1516 : }
1517 :
1518 1886 : if (childblocks > 0)
1519 : {
1520 : int childtargrows;
1521 :
1522 1756 : childtargrows = (int) rint(targrows * childblocks / totalblocks);
1523 : /* Make sure we don't overrun due to roundoff error */
1524 1756 : childtargrows = Min(childtargrows, targrows - numrows);
1525 1756 : if (childtargrows > 0)
1526 : {
1527 : int childrows;
1528 : double trows,
1529 : tdrows;
1530 :
1531 : /* Fetch a random sample of the child's rows */
1532 1756 : childrows = (*acquirefunc) (childrel, elevel,
1533 1756 : rows + numrows, childtargrows,
1534 : &trows, &tdrows);
1535 :
1536 : /* We may need to convert from child's rowtype to parent's */
1537 1756 : if (childrows > 0 &&
1538 1756 : !equalRowTypes(RelationGetDescr(childrel),
1539 : RelationGetDescr(onerel)))
1540 : {
1541 : TupleConversionMap *map;
1542 :
1543 1700 : map = convert_tuples_by_name(RelationGetDescr(childrel),
1544 : RelationGetDescr(onerel));
1545 1700 : if (map != NULL)
1546 : {
1547 : int j;
1548 :
1549 105890 : for (j = 0; j < childrows; j++)
1550 : {
1551 : HeapTuple newtup;
1552 :
1553 105796 : newtup = execute_attr_map_tuple(rows[numrows + j], map);
1554 105796 : heap_freetuple(rows[numrows + j]);
1555 105796 : rows[numrows + j] = newtup;
1556 : }
1557 94 : free_conversion_map(map);
1558 : }
1559 : }
1560 :
1561 : /* And add to counts */
1562 1756 : numrows += childrows;
1563 1756 : *totalrows += trows;
1564 1756 : *totaldeadrows += tdrows;
1565 : }
1566 : }
1567 :
1568 : /*
1569 : * Note: we cannot release the child-table locks, since we may have
1570 : * pointers to their TOAST tables in the sampled rows.
1571 : */
1572 1886 : table_close(childrel, NoLock);
1573 1886 : pgstat_progress_update_param(PROGRESS_ANALYZE_CHILD_TABLES_DONE,
1574 1886 : i + 1);
1575 : }
1576 :
1577 682 : return numrows;
1578 : }
1579 :
1580 :
1581 : /*
1582 : * update_attstats() -- update attribute statistics for one relation
1583 : *
1584 : * Statistics are stored in several places: the pg_class row for the
1585 : * relation has stats about the whole relation, and there is a
1586 : * pg_statistic row for each (non-system) attribute that has ever
1587 : * been analyzed. The pg_class values are updated by VACUUM, not here.
1588 : *
1589 : * pg_statistic rows are just added or updated normally. This means
1590 : * that pg_statistic will probably contain some deleted rows at the
1591 : * completion of a vacuum cycle, unless it happens to get vacuumed last.
1592 : *
1593 : * To keep things simple, we punt for pg_statistic, and don't try
1594 : * to compute or store rows for pg_statistic itself in pg_statistic.
1595 : * This could possibly be made to work, but it's not worth the trouble.
1596 : * Note analyze_rel() has seen to it that we won't come here when
1597 : * vacuuming pg_statistic itself.
1598 : *
1599 : * Note: there would be a race condition here if two backends could
1600 : * ANALYZE the same table concurrently. Presently, we lock that out
1601 : * by taking a self-exclusive lock on the relation in analyze_rel().
1602 : */
1603 : static void
1604 18592 : update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats)
1605 : {
1606 : Relation sd;
1607 : int attno;
1608 18592 : CatalogIndexState indstate = NULL;
1609 :
1610 18592 : if (natts <= 0)
1611 10112 : return; /* nothing to do */
1612 :
1613 8480 : sd = table_open(StatisticRelationId, RowExclusiveLock);
1614 :
1615 72302 : for (attno = 0; attno < natts; attno++)
1616 : {
1617 63822 : VacAttrStats *stats = vacattrstats[attno];
1618 : HeapTuple stup,
1619 : oldtup;
1620 : int i,
1621 : k,
1622 : n;
1623 : Datum values[Natts_pg_statistic];
1624 : bool nulls[Natts_pg_statistic];
1625 : bool replaces[Natts_pg_statistic];
1626 :
1627 : /* Ignore attr if we weren't able to collect stats */
1628 63822 : if (!stats->stats_valid)
1629 6 : continue;
1630 :
1631 : /*
1632 : * Construct a new pg_statistic tuple
1633 : */
1634 2042112 : for (i = 0; i < Natts_pg_statistic; ++i)
1635 : {
1636 1978296 : nulls[i] = false;
1637 1978296 : replaces[i] = true;
1638 : }
1639 :
1640 63816 : values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(relid);
1641 63816 : values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(stats->tupattnum);
1642 63816 : values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(inh);
1643 63816 : values[Anum_pg_statistic_stanullfrac - 1] = Float4GetDatum(stats->stanullfrac);
1644 63816 : values[Anum_pg_statistic_stawidth - 1] = Int32GetDatum(stats->stawidth);
1645 63816 : values[Anum_pg_statistic_stadistinct - 1] = Float4GetDatum(stats->stadistinct);
1646 63816 : i = Anum_pg_statistic_stakind1 - 1;
1647 382896 : for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
1648 : {
1649 319080 : values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */
1650 : }
1651 63816 : i = Anum_pg_statistic_staop1 - 1;
1652 382896 : for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
1653 : {
1654 319080 : values[i++] = ObjectIdGetDatum(stats->staop[k]); /* staopN */
1655 : }
1656 63816 : i = Anum_pg_statistic_stacoll1 - 1;
1657 382896 : for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
1658 : {
1659 319080 : values[i++] = ObjectIdGetDatum(stats->stacoll[k]); /* stacollN */
1660 : }
1661 63816 : i = Anum_pg_statistic_stanumbers1 - 1;
1662 382896 : for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
1663 : {
1664 319080 : int nnum = stats->numnumbers[k];
1665 :
1666 319080 : if (nnum > 0)
1667 : {
1668 98736 : Datum *numdatums = (Datum *) palloc(nnum * sizeof(Datum));
1669 : ArrayType *arry;
1670 :
1671 802080 : for (n = 0; n < nnum; n++)
1672 703344 : numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]);
1673 98736 : arry = construct_array_builtin(numdatums, nnum, FLOAT4OID);
1674 98736 : values[i++] = PointerGetDatum(arry); /* stanumbersN */
1675 : }
1676 : else
1677 : {
1678 220344 : nulls[i] = true;
1679 220344 : values[i++] = (Datum) 0;
1680 : }
1681 : }
1682 63816 : i = Anum_pg_statistic_stavalues1 - 1;
1683 382896 : for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
1684 : {
1685 319080 : if (stats->numvalues[k] > 0)
1686 : {
1687 : ArrayType *arry;
1688 :
1689 69704 : arry = construct_array(stats->stavalues[k],
1690 : stats->numvalues[k],
1691 : stats->statypid[k],
1692 69704 : stats->statyplen[k],
1693 69704 : stats->statypbyval[k],
1694 69704 : stats->statypalign[k]);
1695 69704 : values[i++] = PointerGetDatum(arry); /* stavaluesN */
1696 : }
1697 : else
1698 : {
1699 249376 : nulls[i] = true;
1700 249376 : values[i++] = (Datum) 0;
1701 : }
1702 : }
1703 :
1704 : /* Is there already a pg_statistic tuple for this attribute? */
1705 127632 : oldtup = SearchSysCache3(STATRELATTINH,
1706 : ObjectIdGetDatum(relid),
1707 63816 : Int16GetDatum(stats->tupattnum),
1708 : BoolGetDatum(inh));
1709 :
1710 : /* Open index information when we know we need it */
1711 63816 : if (indstate == NULL)
1712 8474 : indstate = CatalogOpenIndexes(sd);
1713 :
1714 63816 : if (HeapTupleIsValid(oldtup))
1715 : {
1716 : /* Yes, replace it */
1717 25900 : stup = heap_modify_tuple(oldtup,
1718 : RelationGetDescr(sd),
1719 : values,
1720 : nulls,
1721 : replaces);
1722 25900 : ReleaseSysCache(oldtup);
1723 25900 : CatalogTupleUpdateWithInfo(sd, &stup->t_self, stup, indstate);
1724 : }
1725 : else
1726 : {
1727 : /* No, insert new tuple */
1728 37916 : stup = heap_form_tuple(RelationGetDescr(sd), values, nulls);
1729 37916 : CatalogTupleInsertWithInfo(sd, stup, indstate);
1730 : }
1731 :
1732 63816 : heap_freetuple(stup);
1733 : }
1734 :
1735 8480 : if (indstate != NULL)
1736 8474 : CatalogCloseIndexes(indstate);
1737 8480 : table_close(sd, RowExclusiveLock);
1738 : }
1739 :
1740 : /*
1741 : * Standard fetch function for use by compute_stats subroutines.
1742 : *
1743 : * This exists to provide some insulation between compute_stats routines
1744 : * and the actual storage of the sample data.
1745 : */
1746 : static Datum
1747 59908308 : std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
1748 : {
1749 59908308 : int attnum = stats->tupattnum;
1750 59908308 : HeapTuple tuple = stats->rows[rownum];
1751 59908308 : TupleDesc tupDesc = stats->tupDesc;
1752 :
1753 59908308 : return heap_getattr(tuple, attnum, tupDesc, isNull);
1754 : }
1755 :
1756 : /*
1757 : * Fetch function for analyzing index expressions.
1758 : *
1759 : * We have not bothered to construct index tuples, instead the data is
1760 : * just in Datum arrays.
1761 : */
1762 : static Datum
1763 74460 : ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
1764 : {
1765 : int i;
1766 :
1767 : /* exprvals and exprnulls are already offset for proper column */
1768 74460 : i = rownum * stats->rowstride;
1769 74460 : *isNull = stats->exprnulls[i];
1770 74460 : return stats->exprvals[i];
1771 : }
1772 :
1773 :
1774 : /*==========================================================================
1775 : *
1776 : * Code below this point represents the "standard" type-specific statistics
1777 : * analysis algorithms. This code can be replaced on a per-data-type basis
1778 : * by setting a nonzero value in pg_type.typanalyze.
1779 : *
1780 : *==========================================================================
1781 : */
1782 :
1783 :
1784 : /*
1785 : * To avoid consuming too much memory during analysis and/or too much space
1786 : * in the resulting pg_statistic rows, we ignore varlena datums that are wider
1787 : * than WIDTH_THRESHOLD (after detoasting!). This is legitimate for MCV
1788 : * and distinct-value calculations since a wide value is unlikely to be
1789 : * duplicated at all, much less be a most-common value. For the same reason,
1790 : * ignoring wide values will not affect our estimates of histogram bin
1791 : * boundaries very much.
1792 : */
1793 : #define WIDTH_THRESHOLD 1024
1794 :
1795 : #define swapInt(a,b) do {int _tmp; _tmp=a; a=b; b=_tmp;} while(0)
1796 : #define swapDatum(a,b) do {Datum _tmp; _tmp=a; a=b; b=_tmp;} while(0)
1797 :
1798 : /*
1799 : * Extra information used by the default analysis routines
1800 : */
1801 : typedef struct
1802 : {
1803 : int count; /* # of duplicates */
1804 : int first; /* values[] index of first occurrence */
1805 : } ScalarMCVItem;
1806 :
1807 : typedef struct
1808 : {
1809 : SortSupport ssup;
1810 : int *tupnoLink;
1811 : } CompareScalarsContext;
1812 :
1813 :
1814 : static void compute_trivial_stats(VacAttrStatsP stats,
1815 : AnalyzeAttrFetchFunc fetchfunc,
1816 : int samplerows,
1817 : double totalrows);
1818 : static void compute_distinct_stats(VacAttrStatsP stats,
1819 : AnalyzeAttrFetchFunc fetchfunc,
1820 : int samplerows,
1821 : double totalrows);
1822 : static void compute_scalar_stats(VacAttrStatsP stats,
1823 : AnalyzeAttrFetchFunc fetchfunc,
1824 : int samplerows,
1825 : double totalrows);
1826 : static int compare_scalars(const void *a, const void *b, void *arg);
1827 : static int compare_mcvs(const void *a, const void *b, void *arg);
1828 : static int analyze_mcv_list(int *mcv_counts,
1829 : int num_mcv,
1830 : double stadistinct,
1831 : double stanullfrac,
1832 : int samplerows,
1833 : double totalrows);
1834 :
1835 :
1836 : /*
1837 : * std_typanalyze -- the default type-specific typanalyze function
1838 : */
1839 : bool
1840 88040 : std_typanalyze(VacAttrStats *stats)
1841 : {
1842 : Oid ltopr;
1843 : Oid eqopr;
1844 : StdAnalyzeData *mystats;
1845 :
1846 : /* If the attstattarget column is negative, use the default value */
1847 88040 : if (stats->attstattarget < 0)
1848 87458 : stats->attstattarget = default_statistics_target;
1849 :
1850 : /* Look for default "<" and "=" operators for column's type */
1851 88040 : get_sort_group_operators(stats->attrtypid,
1852 : false, false, false,
1853 : <opr, &eqopr, NULL,
1854 : NULL);
1855 :
1856 : /* Save the operator info for compute_stats routines */
1857 88040 : mystats = (StdAnalyzeData *) palloc(sizeof(StdAnalyzeData));
1858 88040 : mystats->eqopr = eqopr;
1859 88040 : mystats->eqfunc = OidIsValid(eqopr) ? get_opcode(eqopr) : InvalidOid;
1860 88040 : mystats->ltopr = ltopr;
1861 88040 : stats->extra_data = mystats;
1862 :
1863 : /*
1864 : * Determine which standard statistics algorithm to use
1865 : */
1866 88040 : if (OidIsValid(eqopr) && OidIsValid(ltopr))
1867 : {
1868 : /* Seems to be a scalar datatype */
1869 85378 : stats->compute_stats = compute_scalar_stats;
1870 : /*--------------------
1871 : * The following choice of minrows is based on the paper
1872 : * "Random sampling for histogram construction: how much is enough?"
1873 : * by Surajit Chaudhuri, Rajeev Motwani and Vivek Narasayya, in
1874 : * Proceedings of ACM SIGMOD International Conference on Management
1875 : * of Data, 1998, Pages 436-447. Their Corollary 1 to Theorem 5
1876 : * says that for table size n, histogram size k, maximum relative
1877 : * error in bin size f, and error probability gamma, the minimum
1878 : * random sample size is
1879 : * r = 4 * k * ln(2*n/gamma) / f^2
1880 : * Taking f = 0.5, gamma = 0.01, n = 10^6 rows, we obtain
1881 : * r = 305.82 * k
1882 : * Note that because of the log function, the dependence on n is
1883 : * quite weak; even at n = 10^12, a 300*k sample gives <= 0.66
1884 : * bin size error with probability 0.99. So there's no real need to
1885 : * scale for n, which is a good thing because we don't necessarily
1886 : * know it at this point.
1887 : *--------------------
1888 : */
1889 85378 : stats->minrows = 300 * stats->attstattarget;
1890 : }
1891 2662 : else if (OidIsValid(eqopr))
1892 : {
1893 : /* We can still recognize distinct values */
1894 2302 : stats->compute_stats = compute_distinct_stats;
1895 : /* Might as well use the same minrows as above */
1896 2302 : stats->minrows = 300 * stats->attstattarget;
1897 : }
1898 : else
1899 : {
1900 : /* Can't do much but the trivial stuff */
1901 360 : stats->compute_stats = compute_trivial_stats;
1902 : /* Might as well use the same minrows as above */
1903 360 : stats->minrows = 300 * stats->attstattarget;
1904 : }
1905 :
1906 88040 : return true;
1907 : }
1908 :
1909 :
1910 : /*
1911 : * compute_trivial_stats() -- compute very basic column statistics
1912 : *
1913 : * We use this when we cannot find a hash "=" operator for the datatype.
1914 : *
1915 : * We determine the fraction of non-null rows and the average datum width.
1916 : */
1917 : static void
1918 234 : compute_trivial_stats(VacAttrStatsP stats,
1919 : AnalyzeAttrFetchFunc fetchfunc,
1920 : int samplerows,
1921 : double totalrows)
1922 : {
1923 : int i;
1924 234 : int null_cnt = 0;
1925 234 : int nonnull_cnt = 0;
1926 234 : double total_width = 0;
1927 468 : bool is_varlena = (!stats->attrtype->typbyval &&
1928 234 : stats->attrtype->typlen == -1);
1929 468 : bool is_varwidth = (!stats->attrtype->typbyval &&
1930 234 : stats->attrtype->typlen < 0);
1931 :
1932 686584 : for (i = 0; i < samplerows; i++)
1933 : {
1934 : Datum value;
1935 : bool isnull;
1936 :
1937 686350 : vacuum_delay_point();
1938 :
1939 686350 : value = fetchfunc(stats, i, &isnull);
1940 :
1941 : /* Check for null/nonnull */
1942 686350 : if (isnull)
1943 : {
1944 419600 : null_cnt++;
1945 419600 : continue;
1946 : }
1947 266750 : nonnull_cnt++;
1948 :
1949 : /*
1950 : * If it's a variable-width field, add up widths for average width
1951 : * calculation. Note that if the value is toasted, we use the toasted
1952 : * width. We don't bother with this calculation if it's a fixed-width
1953 : * type.
1954 : */
1955 266750 : if (is_varlena)
1956 : {
1957 34166 : total_width += VARSIZE_ANY(DatumGetPointer(value));
1958 : }
1959 232584 : else if (is_varwidth)
1960 : {
1961 : /* must be cstring */
1962 0 : total_width += strlen(DatumGetCString(value)) + 1;
1963 : }
1964 : }
1965 :
1966 : /* We can only compute average width if we found some non-null values. */
1967 234 : if (nonnull_cnt > 0)
1968 : {
1969 104 : stats->stats_valid = true;
1970 : /* Do the simple null-frac and width stats */
1971 104 : stats->stanullfrac = (double) null_cnt / (double) samplerows;
1972 104 : if (is_varwidth)
1973 38 : stats->stawidth = total_width / (double) nonnull_cnt;
1974 : else
1975 66 : stats->stawidth = stats->attrtype->typlen;
1976 104 : stats->stadistinct = 0.0; /* "unknown" */
1977 : }
1978 130 : else if (null_cnt > 0)
1979 : {
1980 : /* We found only nulls; assume the column is entirely null */
1981 130 : stats->stats_valid = true;
1982 130 : stats->stanullfrac = 1.0;
1983 130 : if (is_varwidth)
1984 130 : stats->stawidth = 0; /* "unknown" */
1985 : else
1986 0 : stats->stawidth = stats->attrtype->typlen;
1987 130 : stats->stadistinct = 0.0; /* "unknown" */
1988 : }
1989 234 : }
1990 :
1991 :
1992 : /*
1993 : * compute_distinct_stats() -- compute column statistics including ndistinct
1994 : *
1995 : * We use this when we can find only an "=" operator for the datatype.
1996 : *
1997 : * We determine the fraction of non-null rows, the average width, the
1998 : * most common values, and the (estimated) number of distinct values.
1999 : *
2000 : * The most common values are determined by brute force: we keep a list
2001 : * of previously seen values, ordered by number of times seen, as we scan
2002 : * the samples. A newly seen value is inserted just after the last
2003 : * multiply-seen value, causing the bottommost (oldest) singly-seen value
2004 : * to drop off the list. The accuracy of this method, and also its cost,
2005 : * depend mainly on the length of the list we are willing to keep.
2006 : */
2007 : static void
2008 1672 : compute_distinct_stats(VacAttrStatsP stats,
2009 : AnalyzeAttrFetchFunc fetchfunc,
2010 : int samplerows,
2011 : double totalrows)
2012 : {
2013 : int i;
2014 1672 : int null_cnt = 0;
2015 1672 : int nonnull_cnt = 0;
2016 1672 : int toowide_cnt = 0;
2017 1672 : double total_width = 0;
2018 2828 : bool is_varlena = (!stats->attrtype->typbyval &&
2019 1156 : stats->attrtype->typlen == -1);
2020 2828 : bool is_varwidth = (!stats->attrtype->typbyval &&
2021 1156 : stats->attrtype->typlen < 0);
2022 : FmgrInfo f_cmpeq;
2023 : typedef struct
2024 : {
2025 : Datum value;
2026 : int count;
2027 : } TrackItem;
2028 : TrackItem *track;
2029 : int track_cnt,
2030 : track_max;
2031 1672 : int num_mcv = stats->attstattarget;
2032 1672 : StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
2033 :
2034 : /*
2035 : * We track up to 2*n values for an n-element MCV list; but at least 10
2036 : */
2037 1672 : track_max = 2 * num_mcv;
2038 1672 : if (track_max < 10)
2039 78 : track_max = 10;
2040 1672 : track = (TrackItem *) palloc(track_max * sizeof(TrackItem));
2041 1672 : track_cnt = 0;
2042 :
2043 1672 : fmgr_info(mystats->eqfunc, &f_cmpeq);
2044 :
2045 1102758 : for (i = 0; i < samplerows; i++)
2046 : {
2047 : Datum value;
2048 : bool isnull;
2049 : bool match;
2050 : int firstcount1,
2051 : j;
2052 :
2053 1101086 : vacuum_delay_point();
2054 :
2055 1101086 : value = fetchfunc(stats, i, &isnull);
2056 :
2057 : /* Check for null/nonnull */
2058 1101086 : if (isnull)
2059 : {
2060 920376 : null_cnt++;
2061 920376 : continue;
2062 : }
2063 180710 : nonnull_cnt++;
2064 :
2065 : /*
2066 : * If it's a variable-width field, add up widths for average width
2067 : * calculation. Note that if the value is toasted, we use the toasted
2068 : * width. We don't bother with this calculation if it's a fixed-width
2069 : * type.
2070 : */
2071 180710 : if (is_varlena)
2072 : {
2073 65698 : total_width += VARSIZE_ANY(DatumGetPointer(value));
2074 :
2075 : /*
2076 : * If the value is toasted, we want to detoast it just once to
2077 : * avoid repeated detoastings and resultant excess memory usage
2078 : * during the comparisons. Also, check to see if the value is
2079 : * excessively wide, and if so don't detoast at all --- just
2080 : * ignore the value.
2081 : */
2082 65698 : if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
2083 : {
2084 0 : toowide_cnt++;
2085 0 : continue;
2086 : }
2087 65698 : value = PointerGetDatum(PG_DETOAST_DATUM(value));
2088 : }
2089 115012 : else if (is_varwidth)
2090 : {
2091 : /* must be cstring */
2092 0 : total_width += strlen(DatumGetCString(value)) + 1;
2093 : }
2094 :
2095 : /*
2096 : * See if the value matches anything we're already tracking.
2097 : */
2098 180710 : match = false;
2099 180710 : firstcount1 = track_cnt;
2100 394992 : for (j = 0; j < track_cnt; j++)
2101 : {
2102 390014 : if (DatumGetBool(FunctionCall2Coll(&f_cmpeq,
2103 : stats->attrcollid,
2104 390014 : value, track[j].value)))
2105 : {
2106 175732 : match = true;
2107 175732 : break;
2108 : }
2109 214282 : if (j < firstcount1 && track[j].count == 1)
2110 2932 : firstcount1 = j;
2111 : }
2112 :
2113 180710 : if (match)
2114 : {
2115 : /* Found a match */
2116 175732 : track[j].count++;
2117 : /* This value may now need to "bubble up" in the track list */
2118 181622 : while (j > 0 && track[j].count > track[j - 1].count)
2119 : {
2120 5890 : swapDatum(track[j].value, track[j - 1].value);
2121 5890 : swapInt(track[j].count, track[j - 1].count);
2122 5890 : j--;
2123 : }
2124 : }
2125 : else
2126 : {
2127 : /* No match. Insert at head of count-1 list */
2128 4978 : if (track_cnt < track_max)
2129 4596 : track_cnt++;
2130 81142 : for (j = track_cnt - 1; j > firstcount1; j--)
2131 : {
2132 76164 : track[j].value = track[j - 1].value;
2133 76164 : track[j].count = track[j - 1].count;
2134 : }
2135 4978 : if (firstcount1 < track_cnt)
2136 : {
2137 4978 : track[firstcount1].value = value;
2138 4978 : track[firstcount1].count = 1;
2139 : }
2140 : }
2141 : }
2142 :
2143 : /* We can only compute real stats if we found some non-null values. */
2144 1672 : if (nonnull_cnt > 0)
2145 : {
2146 : int nmultiple,
2147 : summultiple;
2148 :
2149 1212 : stats->stats_valid = true;
2150 : /* Do the simple null-frac and width stats */
2151 1212 : stats->stanullfrac = (double) null_cnt / (double) samplerows;
2152 1212 : if (is_varwidth)
2153 696 : stats->stawidth = total_width / (double) nonnull_cnt;
2154 : else
2155 516 : stats->stawidth = stats->attrtype->typlen;
2156 :
2157 : /* Count the number of values we found multiple times */
2158 1212 : summultiple = 0;
2159 4464 : for (nmultiple = 0; nmultiple < track_cnt; nmultiple++)
2160 : {
2161 3862 : if (track[nmultiple].count == 1)
2162 610 : break;
2163 3252 : summultiple += track[nmultiple].count;
2164 : }
2165 :
2166 1212 : if (nmultiple == 0)
2167 : {
2168 : /*
2169 : * If we found no repeated non-null values, assume it's a unique
2170 : * column; but be sure to discount for any nulls we found.
2171 : */
2172 156 : stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
2173 : }
2174 1056 : else if (track_cnt < track_max && toowide_cnt == 0 &&
2175 : nmultiple == track_cnt)
2176 : {
2177 : /*
2178 : * Our track list includes every value in the sample, and every
2179 : * value appeared more than once. Assume the column has just
2180 : * these values. (This case is meant to address columns with
2181 : * small, fixed sets of possible values, such as boolean or enum
2182 : * columns. If there are any values that appear just once in the
2183 : * sample, including too-wide values, we should assume that that's
2184 : * not what we're dealing with.)
2185 : */
2186 602 : stats->stadistinct = track_cnt;
2187 : }
2188 : else
2189 : {
2190 : /*----------
2191 : * Estimate the number of distinct values using the estimator
2192 : * proposed by Haas and Stokes in IBM Research Report RJ 10025:
2193 : * n*d / (n - f1 + f1*n/N)
2194 : * where f1 is the number of distinct values that occurred
2195 : * exactly once in our sample of n rows (from a total of N),
2196 : * and d is the total number of distinct values in the sample.
2197 : * This is their Duj1 estimator; the other estimators they
2198 : * recommend are considerably more complex, and are numerically
2199 : * very unstable when n is much smaller than N.
2200 : *
2201 : * In this calculation, we consider only non-nulls. We used to
2202 : * include rows with null values in the n and N counts, but that
2203 : * leads to inaccurate answers in columns with many nulls, and
2204 : * it's intuitively bogus anyway considering the desired result is
2205 : * the number of distinct non-null values.
2206 : *
2207 : * We assume (not very reliably!) that all the multiply-occurring
2208 : * values are reflected in the final track[] list, and the other
2209 : * nonnull values all appeared but once. (XXX this usually
2210 : * results in a drastic overestimate of ndistinct. Can we do
2211 : * any better?)
2212 : *----------
2213 : */
2214 454 : int f1 = nonnull_cnt - summultiple;
2215 454 : int d = f1 + nmultiple;
2216 454 : double n = samplerows - null_cnt;
2217 454 : double N = totalrows * (1.0 - stats->stanullfrac);
2218 : double stadistinct;
2219 :
2220 : /* N == 0 shouldn't happen, but just in case ... */
2221 454 : if (N > 0)
2222 454 : stadistinct = (n * d) / ((n - f1) + f1 * n / N);
2223 : else
2224 0 : stadistinct = 0;
2225 :
2226 : /* Clamp to sane range in case of roundoff error */
2227 454 : if (stadistinct < d)
2228 124 : stadistinct = d;
2229 454 : if (stadistinct > N)
2230 0 : stadistinct = N;
2231 : /* And round to integer */
2232 454 : stats->stadistinct = floor(stadistinct + 0.5);
2233 : }
2234 :
2235 : /*
2236 : * If we estimated the number of distinct values at more than 10% of
2237 : * the total row count (a very arbitrary limit), then assume that
2238 : * stadistinct should scale with the row count rather than be a fixed
2239 : * value.
2240 : */
2241 1212 : if (stats->stadistinct > 0.1 * totalrows)
2242 278 : stats->stadistinct = -(stats->stadistinct / totalrows);
2243 :
2244 : /*
2245 : * Decide how many values are worth storing as most-common values. If
2246 : * we are able to generate a complete MCV list (all the values in the
2247 : * sample will fit, and we think these are all the ones in the table),
2248 : * then do so. Otherwise, store only those values that are
2249 : * significantly more common than the values not in the list.
2250 : *
2251 : * Note: the first of these cases is meant to address columns with
2252 : * small, fixed sets of possible values, such as boolean or enum
2253 : * columns. If we can *completely* represent the column population by
2254 : * an MCV list that will fit into the stats target, then we should do
2255 : * so and thus provide the planner with complete information. But if
2256 : * the MCV list is not complete, it's generally worth being more
2257 : * selective, and not just filling it all the way up to the stats
2258 : * target.
2259 : */
2260 1212 : if (track_cnt < track_max && toowide_cnt == 0 &&
2261 1204 : stats->stadistinct > 0 &&
2262 : track_cnt <= num_mcv)
2263 : {
2264 : /* Track list includes all values seen, and all will fit */
2265 752 : num_mcv = track_cnt;
2266 : }
2267 : else
2268 : {
2269 : int *mcv_counts;
2270 :
2271 : /* Incomplete list; decide how many values are worth keeping */
2272 460 : if (num_mcv > track_cnt)
2273 404 : num_mcv = track_cnt;
2274 :
2275 460 : if (num_mcv > 0)
2276 : {
2277 460 : mcv_counts = (int *) palloc(num_mcv * sizeof(int));
2278 1250 : for (i = 0; i < num_mcv; i++)
2279 790 : mcv_counts[i] = track[i].count;
2280 :
2281 460 : num_mcv = analyze_mcv_list(mcv_counts, num_mcv,
2282 460 : stats->stadistinct,
2283 460 : stats->stanullfrac,
2284 : samplerows, totalrows);
2285 : }
2286 : }
2287 :
2288 : /* Generate MCV slot entry */
2289 1212 : if (num_mcv > 0)
2290 : {
2291 : MemoryContext old_context;
2292 : Datum *mcv_values;
2293 : float4 *mcv_freqs;
2294 :
2295 : /* Must copy the target values into anl_context */
2296 1206 : old_context = MemoryContextSwitchTo(stats->anl_context);
2297 1206 : mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
2298 1206 : mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
2299 5488 : for (i = 0; i < num_mcv; i++)
2300 : {
2301 8564 : mcv_values[i] = datumCopy(track[i].value,
2302 4282 : stats->attrtype->typbyval,
2303 4282 : stats->attrtype->typlen);
2304 4282 : mcv_freqs[i] = (double) track[i].count / (double) samplerows;
2305 : }
2306 1206 : MemoryContextSwitchTo(old_context);
2307 :
2308 1206 : stats->stakind[0] = STATISTIC_KIND_MCV;
2309 1206 : stats->staop[0] = mystats->eqopr;
2310 1206 : stats->stacoll[0] = stats->attrcollid;
2311 1206 : stats->stanumbers[0] = mcv_freqs;
2312 1206 : stats->numnumbers[0] = num_mcv;
2313 1206 : stats->stavalues[0] = mcv_values;
2314 1206 : stats->numvalues[0] = num_mcv;
2315 :
2316 : /*
2317 : * Accept the defaults for stats->statypid and others. They have
2318 : * been set before we were called (see vacuum.h)
2319 : */
2320 : }
2321 : }
2322 460 : else if (null_cnt > 0)
2323 : {
2324 : /* We found only nulls; assume the column is entirely null */
2325 460 : stats->stats_valid = true;
2326 460 : stats->stanullfrac = 1.0;
2327 460 : if (is_varwidth)
2328 460 : stats->stawidth = 0; /* "unknown" */
2329 : else
2330 0 : stats->stawidth = stats->attrtype->typlen;
2331 460 : stats->stadistinct = 0.0; /* "unknown" */
2332 : }
2333 :
2334 : /* We don't need to bother cleaning up any of our temporary palloc's */
2335 1672 : }
2336 :
2337 :
2338 : /*
2339 : * compute_scalar_stats() -- compute column statistics
2340 : *
2341 : * We use this when we can find "=" and "<" operators for the datatype.
2342 : *
2343 : * We determine the fraction of non-null rows, the average width, the
2344 : * most common values, the (estimated) number of distinct values, the
2345 : * distribution histogram, and the correlation of physical to logical order.
2346 : *
2347 : * The desired stats can be determined fairly easily after sorting the
2348 : * data values into order.
2349 : */
2350 : static void
2351 62174 : compute_scalar_stats(VacAttrStatsP stats,
2352 : AnalyzeAttrFetchFunc fetchfunc,
2353 : int samplerows,
2354 : double totalrows)
2355 : {
2356 : int i;
2357 62174 : int null_cnt = 0;
2358 62174 : int nonnull_cnt = 0;
2359 62174 : int toowide_cnt = 0;
2360 62174 : double total_width = 0;
2361 77470 : bool is_varlena = (!stats->attrtype->typbyval &&
2362 15296 : stats->attrtype->typlen == -1);
2363 77470 : bool is_varwidth = (!stats->attrtype->typbyval &&
2364 15296 : stats->attrtype->typlen < 0);
2365 : double corr_xysum;
2366 : SortSupportData ssup;
2367 : ScalarItem *values;
2368 62174 : int values_cnt = 0;
2369 : int *tupnoLink;
2370 : ScalarMCVItem *track;
2371 62174 : int track_cnt = 0;
2372 62174 : int num_mcv = stats->attstattarget;
2373 62174 : int num_bins = stats->attstattarget;
2374 62174 : StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
2375 :
2376 62174 : values = (ScalarItem *) palloc(samplerows * sizeof(ScalarItem));
2377 62174 : tupnoLink = (int *) palloc(samplerows * sizeof(int));
2378 62174 : track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem));
2379 :
2380 62174 : memset(&ssup, 0, sizeof(ssup));
2381 62174 : ssup.ssup_cxt = CurrentMemoryContext;
2382 62174 : ssup.ssup_collation = stats->attrcollid;
2383 62174 : ssup.ssup_nulls_first = false;
2384 :
2385 : /*
2386 : * For now, don't perform abbreviated key conversion, because full values
2387 : * are required for MCV slot generation. Supporting that optimization
2388 : * would necessitate teaching compare_scalars() to call a tie-breaker.
2389 : */
2390 62174 : ssup.abbreviate = false;
2391 :
2392 62174 : PrepareSortSupportFromOrderingOp(mystats->ltopr, &ssup);
2393 :
2394 : /* Initial scan to find sortable values */
2395 54945738 : for (i = 0; i < samplerows; i++)
2396 : {
2397 : Datum value;
2398 : bool isnull;
2399 :
2400 54883564 : vacuum_delay_point();
2401 :
2402 54883564 : value = fetchfunc(stats, i, &isnull);
2403 :
2404 : /* Check for null/nonnull */
2405 54883564 : if (isnull)
2406 : {
2407 7412138 : null_cnt++;
2408 7437938 : continue;
2409 : }
2410 47471426 : nonnull_cnt++;
2411 :
2412 : /*
2413 : * If it's a variable-width field, add up widths for average width
2414 : * calculation. Note that if the value is toasted, we use the toasted
2415 : * width. We don't bother with this calculation if it's a fixed-width
2416 : * type.
2417 : */
2418 47471426 : if (is_varlena)
2419 : {
2420 5457992 : total_width += VARSIZE_ANY(DatumGetPointer(value));
2421 :
2422 : /*
2423 : * If the value is toasted, we want to detoast it just once to
2424 : * avoid repeated detoastings and resultant excess memory usage
2425 : * during the comparisons. Also, check to see if the value is
2426 : * excessively wide, and if so don't detoast at all --- just
2427 : * ignore the value.
2428 : */
2429 5457992 : if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
2430 : {
2431 25800 : toowide_cnt++;
2432 25800 : continue;
2433 : }
2434 5432192 : value = PointerGetDatum(PG_DETOAST_DATUM(value));
2435 : }
2436 42013434 : else if (is_varwidth)
2437 : {
2438 : /* must be cstring */
2439 0 : total_width += strlen(DatumGetCString(value)) + 1;
2440 : }
2441 :
2442 : /* Add it to the list to be sorted */
2443 47445626 : values[values_cnt].value = value;
2444 47445626 : values[values_cnt].tupno = values_cnt;
2445 47445626 : tupnoLink[values_cnt] = values_cnt;
2446 47445626 : values_cnt++;
2447 : }
2448 :
2449 : /* We can only compute real stats if we found some sortable values. */
2450 62174 : if (values_cnt > 0)
2451 : {
2452 : int ndistinct, /* # distinct values in sample */
2453 : nmultiple, /* # that appear multiple times */
2454 : num_hist,
2455 : dups_cnt;
2456 57930 : int slot_idx = 0;
2457 : CompareScalarsContext cxt;
2458 :
2459 : /* Sort the collected values */
2460 57930 : cxt.ssup = &ssup;
2461 57930 : cxt.tupnoLink = tupnoLink;
2462 57930 : qsort_interruptible(values, values_cnt, sizeof(ScalarItem),
2463 : compare_scalars, &cxt);
2464 :
2465 : /*
2466 : * Now scan the values in order, find the most common ones, and also
2467 : * accumulate ordering-correlation statistics.
2468 : *
2469 : * To determine which are most common, we first have to count the
2470 : * number of duplicates of each value. The duplicates are adjacent in
2471 : * the sorted list, so a brute-force approach is to compare successive
2472 : * datum values until we find two that are not equal. However, that
2473 : * requires N-1 invocations of the datum comparison routine, which are
2474 : * completely redundant with work that was done during the sort. (The
2475 : * sort algorithm must at some point have compared each pair of items
2476 : * that are adjacent in the sorted order; otherwise it could not know
2477 : * that it's ordered the pair correctly.) We exploit this by having
2478 : * compare_scalars remember the highest tupno index that each
2479 : * ScalarItem has been found equal to. At the end of the sort, a
2480 : * ScalarItem's tupnoLink will still point to itself if and only if it
2481 : * is the last item of its group of duplicates (since the group will
2482 : * be ordered by tupno).
2483 : */
2484 57930 : corr_xysum = 0;
2485 57930 : ndistinct = 0;
2486 57930 : nmultiple = 0;
2487 57930 : dups_cnt = 0;
2488 47503556 : for (i = 0; i < values_cnt; i++)
2489 : {
2490 47445626 : int tupno = values[i].tupno;
2491 :
2492 47445626 : corr_xysum += ((double) i) * ((double) tupno);
2493 47445626 : dups_cnt++;
2494 47445626 : if (tupnoLink[tupno] == tupno)
2495 : {
2496 : /* Reached end of duplicates of this value */
2497 9531386 : ndistinct++;
2498 9531386 : if (dups_cnt > 1)
2499 : {
2500 842892 : nmultiple++;
2501 842892 : if (track_cnt < num_mcv ||
2502 348778 : dups_cnt > track[track_cnt - 1].count)
2503 : {
2504 : /*
2505 : * Found a new item for the mcv list; find its
2506 : * position, bubbling down old items if needed. Loop
2507 : * invariant is that j points at an empty/ replaceable
2508 : * slot.
2509 : */
2510 : int j;
2511 :
2512 564590 : if (track_cnt < num_mcv)
2513 494114 : track_cnt++;
2514 7255882 : for (j = track_cnt - 1; j > 0; j--)
2515 : {
2516 7192482 : if (dups_cnt <= track[j - 1].count)
2517 501190 : break;
2518 6691292 : track[j].count = track[j - 1].count;
2519 6691292 : track[j].first = track[j - 1].first;
2520 : }
2521 564590 : track[j].count = dups_cnt;
2522 564590 : track[j].first = i + 1 - dups_cnt;
2523 : }
2524 : }
2525 9531386 : dups_cnt = 0;
2526 : }
2527 : }
2528 :
2529 57930 : stats->stats_valid = true;
2530 : /* Do the simple null-frac and width stats */
2531 57930 : stats->stanullfrac = (double) null_cnt / (double) samplerows;
2532 57930 : if (is_varwidth)
2533 8520 : stats->stawidth = total_width / (double) nonnull_cnt;
2534 : else
2535 49410 : stats->stawidth = stats->attrtype->typlen;
2536 :
2537 57930 : if (nmultiple == 0)
2538 : {
2539 : /*
2540 : * If we found no repeated non-null values, assume it's a unique
2541 : * column; but be sure to discount for any nulls we found.
2542 : */
2543 15724 : stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
2544 : }
2545 42206 : else if (toowide_cnt == 0 && nmultiple == ndistinct)
2546 : {
2547 : /*
2548 : * Every value in the sample appeared more than once. Assume the
2549 : * column has just these values. (This case is meant to address
2550 : * columns with small, fixed sets of possible values, such as
2551 : * boolean or enum columns. If there are any values that appear
2552 : * just once in the sample, including too-wide values, we should
2553 : * assume that that's not what we're dealing with.)
2554 : */
2555 26038 : stats->stadistinct = ndistinct;
2556 : }
2557 : else
2558 : {
2559 : /*----------
2560 : * Estimate the number of distinct values using the estimator
2561 : * proposed by Haas and Stokes in IBM Research Report RJ 10025:
2562 : * n*d / (n - f1 + f1*n/N)
2563 : * where f1 is the number of distinct values that occurred
2564 : * exactly once in our sample of n rows (from a total of N),
2565 : * and d is the total number of distinct values in the sample.
2566 : * This is their Duj1 estimator; the other estimators they
2567 : * recommend are considerably more complex, and are numerically
2568 : * very unstable when n is much smaller than N.
2569 : *
2570 : * In this calculation, we consider only non-nulls. We used to
2571 : * include rows with null values in the n and N counts, but that
2572 : * leads to inaccurate answers in columns with many nulls, and
2573 : * it's intuitively bogus anyway considering the desired result is
2574 : * the number of distinct non-null values.
2575 : *
2576 : * Overwidth values are assumed to have been distinct.
2577 : *----------
2578 : */
2579 16168 : int f1 = ndistinct - nmultiple + toowide_cnt;
2580 16168 : int d = f1 + nmultiple;
2581 16168 : double n = samplerows - null_cnt;
2582 16168 : double N = totalrows * (1.0 - stats->stanullfrac);
2583 : double stadistinct;
2584 :
2585 : /* N == 0 shouldn't happen, but just in case ... */
2586 16168 : if (N > 0)
2587 16168 : stadistinct = (n * d) / ((n - f1) + f1 * n / N);
2588 : else
2589 0 : stadistinct = 0;
2590 :
2591 : /* Clamp to sane range in case of roundoff error */
2592 16168 : if (stadistinct < d)
2593 760 : stadistinct = d;
2594 16168 : if (stadistinct > N)
2595 0 : stadistinct = N;
2596 : /* And round to integer */
2597 16168 : stats->stadistinct = floor(stadistinct + 0.5);
2598 : }
2599 :
2600 : /*
2601 : * If we estimated the number of distinct values at more than 10% of
2602 : * the total row count (a very arbitrary limit), then assume that
2603 : * stadistinct should scale with the row count rather than be a fixed
2604 : * value.
2605 : */
2606 57930 : if (stats->stadistinct > 0.1 * totalrows)
2607 11936 : stats->stadistinct = -(stats->stadistinct / totalrows);
2608 :
2609 : /*
2610 : * Decide how many values are worth storing as most-common values. If
2611 : * we are able to generate a complete MCV list (all the values in the
2612 : * sample will fit, and we think these are all the ones in the table),
2613 : * then do so. Otherwise, store only those values that are
2614 : * significantly more common than the values not in the list.
2615 : *
2616 : * Note: the first of these cases is meant to address columns with
2617 : * small, fixed sets of possible values, such as boolean or enum
2618 : * columns. If we can *completely* represent the column population by
2619 : * an MCV list that will fit into the stats target, then we should do
2620 : * so and thus provide the planner with complete information. But if
2621 : * the MCV list is not complete, it's generally worth being more
2622 : * selective, and not just filling it all the way up to the stats
2623 : * target.
2624 : */
2625 57930 : if (track_cnt == ndistinct && toowide_cnt == 0 &&
2626 25434 : stats->stadistinct > 0 &&
2627 : track_cnt <= num_mcv)
2628 : {
2629 : /* Track list includes all values seen, and all will fit */
2630 22934 : num_mcv = track_cnt;
2631 : }
2632 : else
2633 : {
2634 : int *mcv_counts;
2635 :
2636 : /* Incomplete list; decide how many values are worth keeping */
2637 34996 : if (num_mcv > track_cnt)
2638 31704 : num_mcv = track_cnt;
2639 :
2640 34996 : if (num_mcv > 0)
2641 : {
2642 19272 : mcv_counts = (int *) palloc(num_mcv * sizeof(int));
2643 388272 : for (i = 0; i < num_mcv; i++)
2644 369000 : mcv_counts[i] = track[i].count;
2645 :
2646 19272 : num_mcv = analyze_mcv_list(mcv_counts, num_mcv,
2647 19272 : stats->stadistinct,
2648 19272 : stats->stanullfrac,
2649 : samplerows, totalrows);
2650 : }
2651 : }
2652 :
2653 : /* Generate MCV slot entry */
2654 57930 : if (num_mcv > 0)
2655 : {
2656 : MemoryContext old_context;
2657 : Datum *mcv_values;
2658 : float4 *mcv_freqs;
2659 :
2660 : /* Must copy the target values into anl_context */
2661 42158 : old_context = MemoryContextSwitchTo(stats->anl_context);
2662 42158 : mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
2663 42158 : mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
2664 536082 : for (i = 0; i < num_mcv; i++)
2665 : {
2666 987848 : mcv_values[i] = datumCopy(values[track[i].first].value,
2667 493924 : stats->attrtype->typbyval,
2668 493924 : stats->attrtype->typlen);
2669 493924 : mcv_freqs[i] = (double) track[i].count / (double) samplerows;
2670 : }
2671 42158 : MemoryContextSwitchTo(old_context);
2672 :
2673 42158 : stats->stakind[slot_idx] = STATISTIC_KIND_MCV;
2674 42158 : stats->staop[slot_idx] = mystats->eqopr;
2675 42158 : stats->stacoll[slot_idx] = stats->attrcollid;
2676 42158 : stats->stanumbers[slot_idx] = mcv_freqs;
2677 42158 : stats->numnumbers[slot_idx] = num_mcv;
2678 42158 : stats->stavalues[slot_idx] = mcv_values;
2679 42158 : stats->numvalues[slot_idx] = num_mcv;
2680 :
2681 : /*
2682 : * Accept the defaults for stats->statypid and others. They have
2683 : * been set before we were called (see vacuum.h)
2684 : */
2685 42158 : slot_idx++;
2686 : }
2687 :
2688 : /*
2689 : * Generate a histogram slot entry if there are at least two distinct
2690 : * values not accounted for in the MCV list. (This ensures the
2691 : * histogram won't collapse to empty or a singleton.)
2692 : */
2693 57930 : num_hist = ndistinct - num_mcv;
2694 57930 : if (num_hist > num_bins)
2695 9764 : num_hist = num_bins + 1;
2696 57930 : if (num_hist >= 2)
2697 : {
2698 : MemoryContext old_context;
2699 : Datum *hist_values;
2700 : int nvals;
2701 : int pos,
2702 : posfrac,
2703 : delta,
2704 : deltafrac;
2705 :
2706 : /* Sort the MCV items into position order to speed next loop */
2707 25860 : qsort_interruptible(track, num_mcv, sizeof(ScalarMCVItem),
2708 : compare_mcvs, NULL);
2709 :
2710 : /*
2711 : * Collapse out the MCV items from the values[] array.
2712 : *
2713 : * Note we destroy the values[] array here... but we don't need it
2714 : * for anything more. We do, however, still need values_cnt.
2715 : * nvals will be the number of remaining entries in values[].
2716 : */
2717 25860 : if (num_mcv > 0)
2718 : {
2719 : int src,
2720 : dest;
2721 : int j;
2722 :
2723 13582 : src = dest = 0;
2724 13582 : j = 0; /* index of next interesting MCV item */
2725 483034 : while (src < values_cnt)
2726 : {
2727 : int ncopy;
2728 :
2729 469452 : if (j < num_mcv)
2730 : {
2731 459208 : int first = track[j].first;
2732 :
2733 459208 : if (src >= first)
2734 : {
2735 : /* advance past this MCV item */
2736 336912 : src = first + track[j].count;
2737 336912 : j++;
2738 336912 : continue;
2739 : }
2740 122296 : ncopy = first - src;
2741 : }
2742 : else
2743 10244 : ncopy = values_cnt - src;
2744 132540 : memmove(&values[dest], &values[src],
2745 : ncopy * sizeof(ScalarItem));
2746 132540 : src += ncopy;
2747 132540 : dest += ncopy;
2748 : }
2749 13582 : nvals = dest;
2750 : }
2751 : else
2752 12278 : nvals = values_cnt;
2753 : Assert(nvals >= num_hist);
2754 :
2755 : /* Must copy the target values into anl_context */
2756 25860 : old_context = MemoryContextSwitchTo(stats->anl_context);
2757 25860 : hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
2758 :
2759 : /*
2760 : * The object of this loop is to copy the first and last values[]
2761 : * entries along with evenly-spaced values in between. So the
2762 : * i'th value is values[(i * (nvals - 1)) / (num_hist - 1)]. But
2763 : * computing that subscript directly risks integer overflow when
2764 : * the stats target is more than a couple thousand. Instead we
2765 : * add (nvals - 1) / (num_hist - 1) to pos at each step, tracking
2766 : * the integral and fractional parts of the sum separately.
2767 : */
2768 25860 : delta = (nvals - 1) / (num_hist - 1);
2769 25860 : deltafrac = (nvals - 1) % (num_hist - 1);
2770 25860 : pos = posfrac = 0;
2771 :
2772 1393202 : for (i = 0; i < num_hist; i++)
2773 : {
2774 2734684 : hist_values[i] = datumCopy(values[pos].value,
2775 1367342 : stats->attrtype->typbyval,
2776 1367342 : stats->attrtype->typlen);
2777 1367342 : pos += delta;
2778 1367342 : posfrac += deltafrac;
2779 1367342 : if (posfrac >= (num_hist - 1))
2780 : {
2781 : /* fractional part exceeds 1, carry to integer part */
2782 421698 : pos++;
2783 421698 : posfrac -= (num_hist - 1);
2784 : }
2785 : }
2786 :
2787 25860 : MemoryContextSwitchTo(old_context);
2788 :
2789 25860 : stats->stakind[slot_idx] = STATISTIC_KIND_HISTOGRAM;
2790 25860 : stats->staop[slot_idx] = mystats->ltopr;
2791 25860 : stats->stacoll[slot_idx] = stats->attrcollid;
2792 25860 : stats->stavalues[slot_idx] = hist_values;
2793 25860 : stats->numvalues[slot_idx] = num_hist;
2794 :
2795 : /*
2796 : * Accept the defaults for stats->statypid and others. They have
2797 : * been set before we were called (see vacuum.h)
2798 : */
2799 25860 : slot_idx++;
2800 : }
2801 :
2802 : /* Generate a correlation entry if there are multiple values */
2803 57930 : if (values_cnt > 1)
2804 : {
2805 : MemoryContext old_context;
2806 : float4 *corrs;
2807 : double corr_xsum,
2808 : corr_x2sum;
2809 :
2810 : /* Must copy the target values into anl_context */
2811 54436 : old_context = MemoryContextSwitchTo(stats->anl_context);
2812 54436 : corrs = (float4 *) palloc(sizeof(float4));
2813 54436 : MemoryContextSwitchTo(old_context);
2814 :
2815 : /*----------
2816 : * Since we know the x and y value sets are both
2817 : * 0, 1, ..., values_cnt-1
2818 : * we have sum(x) = sum(y) =
2819 : * (values_cnt-1)*values_cnt / 2
2820 : * and sum(x^2) = sum(y^2) =
2821 : * (values_cnt-1)*values_cnt*(2*values_cnt-1) / 6.
2822 : *----------
2823 : */
2824 54436 : corr_xsum = ((double) (values_cnt - 1)) *
2825 54436 : ((double) values_cnt) / 2.0;
2826 54436 : corr_x2sum = ((double) (values_cnt - 1)) *
2827 54436 : ((double) values_cnt) * (double) (2 * values_cnt - 1) / 6.0;
2828 :
2829 : /* And the correlation coefficient reduces to */
2830 54436 : corrs[0] = (values_cnt * corr_xysum - corr_xsum * corr_xsum) /
2831 54436 : (values_cnt * corr_x2sum - corr_xsum * corr_xsum);
2832 :
2833 54436 : stats->stakind[slot_idx] = STATISTIC_KIND_CORRELATION;
2834 54436 : stats->staop[slot_idx] = mystats->ltopr;
2835 54436 : stats->stacoll[slot_idx] = stats->attrcollid;
2836 54436 : stats->stanumbers[slot_idx] = corrs;
2837 54436 : stats->numnumbers[slot_idx] = 1;
2838 54436 : slot_idx++;
2839 : }
2840 : }
2841 4244 : else if (nonnull_cnt > 0)
2842 : {
2843 : /* We found some non-null values, but they were all too wide */
2844 : Assert(nonnull_cnt == toowide_cnt);
2845 256 : stats->stats_valid = true;
2846 : /* Do the simple null-frac and width stats */
2847 256 : stats->stanullfrac = (double) null_cnt / (double) samplerows;
2848 256 : if (is_varwidth)
2849 256 : stats->stawidth = total_width / (double) nonnull_cnt;
2850 : else
2851 0 : stats->stawidth = stats->attrtype->typlen;
2852 : /* Assume all too-wide values are distinct, so it's a unique column */
2853 256 : stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
2854 : }
2855 3988 : else if (null_cnt > 0)
2856 : {
2857 : /* We found only nulls; assume the column is entirely null */
2858 3988 : stats->stats_valid = true;
2859 3988 : stats->stanullfrac = 1.0;
2860 3988 : if (is_varwidth)
2861 3438 : stats->stawidth = 0; /* "unknown" */
2862 : else
2863 550 : stats->stawidth = stats->attrtype->typlen;
2864 3988 : stats->stadistinct = 0.0; /* "unknown" */
2865 : }
2866 :
2867 : /* We don't need to bother cleaning up any of our temporary palloc's */
2868 62174 : }
2869 :
2870 : /*
2871 : * Comparator for sorting ScalarItems
2872 : *
2873 : * Aside from sorting the items, we update the tupnoLink[] array
2874 : * whenever two ScalarItems are found to contain equal datums. The array
2875 : * is indexed by tupno; for each ScalarItem, it contains the highest
2876 : * tupno that that item's datum has been found to be equal to. This allows
2877 : * us to avoid additional comparisons in compute_scalar_stats().
2878 : */
2879 : static int
2880 435950520 : compare_scalars(const void *a, const void *b, void *arg)
2881 : {
2882 435950520 : Datum da = ((const ScalarItem *) a)->value;
2883 435950520 : int ta = ((const ScalarItem *) a)->tupno;
2884 435950520 : Datum db = ((const ScalarItem *) b)->value;
2885 435950520 : int tb = ((const ScalarItem *) b)->tupno;
2886 435950520 : CompareScalarsContext *cxt = (CompareScalarsContext *) arg;
2887 : int compare;
2888 :
2889 435950520 : compare = ApplySortComparator(da, false, db, false, cxt->ssup);
2890 435950520 : if (compare != 0)
2891 163647080 : return compare;
2892 :
2893 : /*
2894 : * The two datums are equal, so update cxt->tupnoLink[].
2895 : */
2896 272303440 : if (cxt->tupnoLink[ta] < tb)
2897 39588252 : cxt->tupnoLink[ta] = tb;
2898 272303440 : if (cxt->tupnoLink[tb] < ta)
2899 2853706 : cxt->tupnoLink[tb] = ta;
2900 :
2901 : /*
2902 : * For equal datums, sort by tupno
2903 : */
2904 272303440 : return ta - tb;
2905 : }
2906 :
2907 : /*
2908 : * Comparator for sorting ScalarMCVItems by position
2909 : */
2910 : static int
2911 1723592 : compare_mcvs(const void *a, const void *b, void *arg)
2912 : {
2913 1723592 : int da = ((const ScalarMCVItem *) a)->first;
2914 1723592 : int db = ((const ScalarMCVItem *) b)->first;
2915 :
2916 1723592 : return da - db;
2917 : }
2918 :
2919 : /*
2920 : * Analyze the list of common values in the sample and decide how many are
2921 : * worth storing in the table's MCV list.
2922 : *
2923 : * mcv_counts is assumed to be a list of the counts of the most common values
2924 : * seen in the sample, starting with the most common. The return value is the
2925 : * number that are significantly more common than the values not in the list,
2926 : * and which are therefore deemed worth storing in the table's MCV list.
2927 : */
2928 : static int
2929 19732 : analyze_mcv_list(int *mcv_counts,
2930 : int num_mcv,
2931 : double stadistinct,
2932 : double stanullfrac,
2933 : int samplerows,
2934 : double totalrows)
2935 : {
2936 : double ndistinct_table;
2937 : double sumcount;
2938 : int i;
2939 :
2940 : /*
2941 : * If the entire table was sampled, keep the whole list. This also
2942 : * protects us against division by zero in the code below.
2943 : */
2944 19732 : if (samplerows == totalrows || totalrows <= 1.0)
2945 18916 : return num_mcv;
2946 :
2947 : /* Re-extract the estimated number of distinct nonnull values in table */
2948 816 : ndistinct_table = stadistinct;
2949 816 : if (ndistinct_table < 0)
2950 154 : ndistinct_table = -ndistinct_table * totalrows;
2951 :
2952 : /*
2953 : * Exclude the least common values from the MCV list, if they are not
2954 : * significantly more common than the estimated selectivity they would
2955 : * have if they weren't in the list. All non-MCV values are assumed to be
2956 : * equally common, after taking into account the frequencies of all the
2957 : * values in the MCV list and the number of nulls (c.f. eqsel()).
2958 : *
2959 : * Here sumcount tracks the total count of all but the last (least common)
2960 : * value in the MCV list, allowing us to determine the effect of excluding
2961 : * that value from the list.
2962 : *
2963 : * Note that we deliberately do this by removing values from the full
2964 : * list, rather than starting with an empty list and adding values,
2965 : * because the latter approach can fail to add any values if all the most
2966 : * common values have around the same frequency and make up the majority
2967 : * of the table, so that the overall average frequency of all values is
2968 : * roughly the same as that of the common values. This would lead to any
2969 : * uncommon values being significantly overestimated.
2970 : */
2971 816 : sumcount = 0.0;
2972 1718 : for (i = 0; i < num_mcv - 1; i++)
2973 902 : sumcount += mcv_counts[i];
2974 :
2975 958 : while (num_mcv > 0)
2976 : {
2977 : double selec,
2978 : otherdistinct,
2979 : N,
2980 : n,
2981 : K,
2982 : variance,
2983 : stddev;
2984 :
2985 : /*
2986 : * Estimated selectivity the least common value would have if it
2987 : * wasn't in the MCV list (c.f. eqsel()).
2988 : */
2989 958 : selec = 1.0 - sumcount / samplerows - stanullfrac;
2990 958 : if (selec < 0.0)
2991 0 : selec = 0.0;
2992 958 : if (selec > 1.0)
2993 0 : selec = 1.0;
2994 958 : otherdistinct = ndistinct_table - (num_mcv - 1);
2995 958 : if (otherdistinct > 1)
2996 958 : selec /= otherdistinct;
2997 :
2998 : /*
2999 : * If the value is kept in the MCV list, its population frequency is
3000 : * assumed to equal its sample frequency. We use the lower end of a
3001 : * textbook continuity-corrected Wald-type confidence interval to
3002 : * determine if that is significantly more common than the non-MCV
3003 : * frequency --- specifically we assume the population frequency is
3004 : * highly likely to be within around 2 standard errors of the sample
3005 : * frequency, which equates to an interval of 2 standard deviations
3006 : * either side of the sample count, plus an additional 0.5 for the
3007 : * continuity correction. Since we are sampling without replacement,
3008 : * this is a hypergeometric distribution.
3009 : *
3010 : * XXX: Empirically, this approach seems to work quite well, but it
3011 : * may be worth considering more advanced techniques for estimating
3012 : * the confidence interval of the hypergeometric distribution.
3013 : */
3014 958 : N = totalrows;
3015 958 : n = samplerows;
3016 958 : K = N * mcv_counts[num_mcv - 1] / n;
3017 958 : variance = n * K * (N - K) * (N - n) / (N * N * (N - 1));
3018 958 : stddev = sqrt(variance);
3019 :
3020 958 : if (mcv_counts[num_mcv - 1] > selec * samplerows + 2 * stddev + 0.5)
3021 : {
3022 : /*
3023 : * The value is significantly more common than the non-MCV
3024 : * selectivity would suggest. Keep it, and all the other more
3025 : * common values in the list.
3026 : */
3027 762 : break;
3028 : }
3029 : else
3030 : {
3031 : /* Discard this value and consider the next least common value */
3032 196 : num_mcv--;
3033 196 : if (num_mcv == 0)
3034 54 : break;
3035 142 : sumcount -= mcv_counts[num_mcv - 1];
3036 : }
3037 : }
3038 816 : return num_mcv;
3039 : }
|