Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * analyze.c
4 : * the Postgres statistics generator
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/commands/analyze.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include <math.h>
18 :
19 : #include "access/detoast.h"
20 : #include "access/genam.h"
21 : #include "access/multixact.h"
22 : #include "access/relation.h"
23 : #include "access/sysattr.h"
24 : #include "access/table.h"
25 : #include "access/tableam.h"
26 : #include "access/transam.h"
27 : #include "access/tupconvert.h"
28 : #include "access/visibilitymap.h"
29 : #include "access/xact.h"
30 : #include "catalog/catalog.h"
31 : #include "catalog/index.h"
32 : #include "catalog/indexing.h"
33 : #include "catalog/pg_collation.h"
34 : #include "catalog/pg_inherits.h"
35 : #include "catalog/pg_namespace.h"
36 : #include "catalog/pg_statistic_ext.h"
37 : #include "commands/dbcommands.h"
38 : #include "commands/progress.h"
39 : #include "commands/tablecmds.h"
40 : #include "commands/vacuum.h"
41 : #include "common/pg_prng.h"
42 : #include "executor/executor.h"
43 : #include "foreign/fdwapi.h"
44 : #include "miscadmin.h"
45 : #include "nodes/nodeFuncs.h"
46 : #include "parser/parse_oper.h"
47 : #include "parser/parse_relation.h"
48 : #include "pgstat.h"
49 : #include "postmaster/autovacuum.h"
50 : #include "statistics/extended_stats_internal.h"
51 : #include "statistics/statistics.h"
52 : #include "storage/bufmgr.h"
53 : #include "storage/lmgr.h"
54 : #include "storage/proc.h"
55 : #include "storage/procarray.h"
56 : #include "utils/acl.h"
57 : #include "utils/attoptcache.h"
58 : #include "utils/builtins.h"
59 : #include "utils/datum.h"
60 : #include "utils/fmgroids.h"
61 : #include "utils/guc.h"
62 : #include "utils/lsyscache.h"
63 : #include "utils/memutils.h"
64 : #include "utils/pg_rusage.h"
65 : #include "utils/sampling.h"
66 : #include "utils/sortsupport.h"
67 : #include "utils/spccache.h"
68 : #include "utils/syscache.h"
69 : #include "utils/timestamp.h"
70 :
71 :
72 : /* Per-index data for ANALYZE */
73 : typedef struct AnlIndexData
74 : {
75 : IndexInfo *indexInfo; /* BuildIndexInfo result */
76 : double tupleFract; /* fraction of rows for partial index */
77 : VacAttrStats **vacattrstats; /* index attrs to analyze */
78 : int attr_cnt;
79 : } AnlIndexData;
80 :
81 :
82 : /* Default statistics target (GUC parameter) */
83 : int default_statistics_target = 100;
84 :
85 : /* A few variables that don't seem worth passing around as parameters */
86 : static MemoryContext anl_context = NULL;
87 : static BufferAccessStrategy vac_strategy;
88 :
89 :
90 : static void do_analyze_rel(Relation onerel,
91 : VacuumParams *params, List *va_cols,
92 : AcquireSampleRowsFunc acquirefunc, BlockNumber relpages,
93 : bool inh, bool in_outer_xact, int elevel);
94 : static void compute_index_stats(Relation onerel, double totalrows,
95 : AnlIndexData *indexdata, int nindexes,
96 : HeapTuple *rows, int numrows,
97 : MemoryContext col_context);
98 : static VacAttrStats *examine_attribute(Relation onerel, int attnum,
99 : Node *index_expr);
100 : static int acquire_sample_rows(Relation onerel, int elevel,
101 : HeapTuple *rows, int targrows,
102 : double *totalrows, double *totaldeadrows);
103 : static int compare_rows(const void *a, const void *b, void *arg);
104 : static int acquire_inherited_sample_rows(Relation onerel, int elevel,
105 : HeapTuple *rows, int targrows,
106 : double *totalrows, double *totaldeadrows);
107 : static void update_attstats(Oid relid, bool inh,
108 : int natts, VacAttrStats **vacattrstats);
109 : static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
110 : static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
111 :
112 :
113 : /*
114 : * analyze_rel() -- analyze one relation
115 : *
116 : * relid identifies the relation to analyze. If relation is supplied, use
117 : * the name therein for reporting any failure to open/lock the rel; do not
118 : * use it once we've successfully opened the rel, since it might be stale.
119 : */
120 : void
121 47960 : analyze_rel(Oid relid, RangeVar *relation,
122 : VacuumParams *params, List *va_cols, bool in_outer_xact,
123 : BufferAccessStrategy bstrategy)
124 : {
125 : Relation onerel;
126 : int elevel;
127 47960 : AcquireSampleRowsFunc acquirefunc = NULL;
128 47960 : BlockNumber relpages = 0;
129 :
130 : /* Select logging level */
131 47960 : if (params->options & VACOPT_VERBOSE)
132 0 : elevel = INFO;
133 : else
134 47960 : elevel = DEBUG2;
135 :
136 : /* Set up static variables */
137 47960 : vac_strategy = bstrategy;
138 :
139 : /*
140 : * Check for user-requested abort.
141 : */
142 47960 : CHECK_FOR_INTERRUPTS();
143 :
144 : /*
145 : * Open the relation, getting ShareUpdateExclusiveLock to ensure that two
146 : * ANALYZEs don't run on it concurrently. (This also locks out a
147 : * concurrent VACUUM, which doesn't matter much at the moment but might
148 : * matter if we ever try to accumulate stats on dead tuples.) If the rel
149 : * has been dropped since we last saw it, we don't need to process it.
150 : *
151 : * Make sure to generate only logs for ANALYZE in this case.
152 : */
153 47960 : onerel = vacuum_open_relation(relid, relation, params->options & ~(VACOPT_VACUUM),
154 47960 : params->log_min_duration >= 0,
155 : ShareUpdateExclusiveLock);
156 :
157 : /* leave if relation could not be opened or locked */
158 47960 : if (!onerel)
159 678 : return;
160 :
161 : /*
162 : * Check if relation needs to be skipped based on privileges. This check
163 : * happens also when building the relation list to analyze for a manual
164 : * operation, and needs to be done additionally here as ANALYZE could
165 : * happen across multiple transactions where privileges could have changed
166 : * in-between. Make sure to generate only logs for ANALYZE in this case.
167 : */
168 47944 : if (!vacuum_is_permitted_for_relation(RelationGetRelid(onerel),
169 : onerel->rd_rel,
170 47944 : params->options & VACOPT_ANALYZE))
171 : {
172 18 : relation_close(onerel, ShareUpdateExclusiveLock);
173 18 : return;
174 : }
175 :
176 : /*
177 : * Silently ignore tables that are temp tables of other backends ---
178 : * trying to analyze these is rather pointless, since their contents are
179 : * probably not up-to-date on disk. (We don't throw a warning here; it
180 : * would just lead to chatter during a database-wide ANALYZE.)
181 : */
182 47926 : if (RELATION_IS_OTHER_TEMP(onerel))
183 : {
184 0 : relation_close(onerel, ShareUpdateExclusiveLock);
185 0 : return;
186 : }
187 :
188 : /*
189 : * We can ANALYZE any table except pg_statistic. See update_attstats
190 : */
191 47926 : if (RelationGetRelid(onerel) == StatisticRelationId)
192 : {
193 644 : relation_close(onerel, ShareUpdateExclusiveLock);
194 644 : return;
195 : }
196 :
197 : /*
198 : * Check that it's of an analyzable relkind, and set up appropriately.
199 : */
200 47282 : if (onerel->rd_rel->relkind == RELKIND_RELATION ||
201 694 : onerel->rd_rel->relkind == RELKIND_MATVIEW)
202 : {
203 : /* Regular table, so we'll use the regular row acquisition function */
204 46588 : acquirefunc = acquire_sample_rows;
205 : /* Also get regular table's size */
206 46588 : relpages = RelationGetNumberOfBlocks(onerel);
207 : }
208 694 : else if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
209 : {
210 : /*
211 : * For a foreign table, call the FDW's hook function to see whether it
212 : * supports analysis.
213 : */
214 : FdwRoutine *fdwroutine;
215 50 : bool ok = false;
216 :
217 50 : fdwroutine = GetFdwRoutineForRelation(onerel, false);
218 :
219 50 : if (fdwroutine->AnalyzeForeignTable != NULL)
220 50 : ok = fdwroutine->AnalyzeForeignTable(onerel,
221 : &acquirefunc,
222 : &relpages);
223 :
224 50 : if (!ok)
225 : {
226 0 : ereport(WARNING,
227 : (errmsg("skipping \"%s\" --- cannot analyze this foreign table",
228 : RelationGetRelationName(onerel))));
229 0 : relation_close(onerel, ShareUpdateExclusiveLock);
230 0 : return;
231 : }
232 : }
233 644 : else if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
234 : {
235 : /*
236 : * For partitioned tables, we want to do the recursive ANALYZE below.
237 : */
238 : }
239 : else
240 : {
241 : /* No need for a WARNING if we already complained during VACUUM */
242 0 : if (!(params->options & VACOPT_VACUUM))
243 0 : ereport(WARNING,
244 : (errmsg("skipping \"%s\" --- cannot analyze non-tables or special system tables",
245 : RelationGetRelationName(onerel))));
246 0 : relation_close(onerel, ShareUpdateExclusiveLock);
247 0 : return;
248 : }
249 :
250 : /*
251 : * OK, let's do it. First, initialize progress reporting.
252 : */
253 47282 : pgstat_progress_start_command(PROGRESS_COMMAND_ANALYZE,
254 : RelationGetRelid(onerel));
255 :
256 : /*
257 : * Do the normal non-recursive ANALYZE. We can skip this for partitioned
258 : * tables, which don't contain any rows.
259 : */
260 47282 : if (onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
261 46638 : do_analyze_rel(onerel, params, va_cols, acquirefunc,
262 : relpages, false, in_outer_xact, elevel);
263 :
264 : /*
265 : * If there are child tables, do recursive ANALYZE.
266 : */
267 47240 : if (onerel->rd_rel->relhassubclass)
268 698 : do_analyze_rel(onerel, params, va_cols, acquirefunc, relpages,
269 : true, in_outer_xact, elevel);
270 :
271 : /*
272 : * Close source relation now, but keep lock so that no one deletes it
273 : * before we commit. (If someone did, they'd fail to clean up the entries
274 : * we made in pg_statistic. Also, releasing the lock before commit would
275 : * expose us to concurrent-update failures in update_attstats.)
276 : */
277 47222 : relation_close(onerel, NoLock);
278 :
279 47222 : pgstat_progress_end_command();
280 : }
281 :
282 : /*
283 : * do_analyze_rel() -- analyze one relation, recursively or not
284 : *
285 : * Note that "acquirefunc" is only relevant for the non-inherited case.
286 : * For the inherited case, acquire_inherited_sample_rows() determines the
287 : * appropriate acquirefunc for each child table.
288 : */
289 : static void
290 47336 : do_analyze_rel(Relation onerel, VacuumParams *params,
291 : List *va_cols, AcquireSampleRowsFunc acquirefunc,
292 : BlockNumber relpages, bool inh, bool in_outer_xact,
293 : int elevel)
294 : {
295 : int attr_cnt,
296 : tcnt,
297 : i,
298 : ind;
299 : Relation *Irel;
300 : int nindexes;
301 : bool hasindex;
302 : VacAttrStats **vacattrstats;
303 : AnlIndexData *indexdata;
304 : int targrows,
305 : numrows,
306 : minrows;
307 : double totalrows,
308 : totaldeadrows;
309 : HeapTuple *rows;
310 : PGRUsage ru0;
311 47336 : TimestampTz starttime = 0;
312 : MemoryContext caller_context;
313 : Oid save_userid;
314 : int save_sec_context;
315 : int save_nestlevel;
316 47336 : int64 AnalyzePageHit = VacuumPageHit;
317 47336 : int64 AnalyzePageMiss = VacuumPageMiss;
318 47336 : int64 AnalyzePageDirty = VacuumPageDirty;
319 47336 : PgStat_Counter startreadtime = 0;
320 47336 : PgStat_Counter startwritetime = 0;
321 :
322 47336 : if (inh)
323 698 : ereport(elevel,
324 : (errmsg("analyzing \"%s.%s\" inheritance tree",
325 : get_namespace_name(RelationGetNamespace(onerel)),
326 : RelationGetRelationName(onerel))));
327 : else
328 46638 : ereport(elevel,
329 : (errmsg("analyzing \"%s.%s\"",
330 : get_namespace_name(RelationGetNamespace(onerel)),
331 : RelationGetRelationName(onerel))));
332 :
333 : /*
334 : * Set up a working context so that we can easily free whatever junk gets
335 : * created.
336 : */
337 47336 : anl_context = AllocSetContextCreate(CurrentMemoryContext,
338 : "Analyze",
339 : ALLOCSET_DEFAULT_SIZES);
340 47336 : caller_context = MemoryContextSwitchTo(anl_context);
341 :
342 : /*
343 : * Switch to the table owner's userid, so that any index functions are run
344 : * as that user. Also lock down security-restricted operations and
345 : * arrange to make GUC variable changes local to this command.
346 : */
347 47336 : GetUserIdAndSecContext(&save_userid, &save_sec_context);
348 47336 : SetUserIdAndSecContext(onerel->rd_rel->relowner,
349 : save_sec_context | SECURITY_RESTRICTED_OPERATION);
350 47336 : save_nestlevel = NewGUCNestLevel();
351 :
352 : /* measure elapsed time iff autovacuum logging requires it */
353 47336 : if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
354 : {
355 350 : if (track_io_timing)
356 : {
357 0 : startreadtime = pgStatBlockReadTime;
358 0 : startwritetime = pgStatBlockWriteTime;
359 : }
360 :
361 350 : pg_rusage_init(&ru0);
362 350 : starttime = GetCurrentTimestamp();
363 : }
364 :
365 : /*
366 : * Determine which columns to analyze
367 : *
368 : * Note that system attributes are never analyzed, so we just reject them
369 : * at the lookup stage. We also reject duplicate column mentions. (We
370 : * could alternatively ignore duplicates, but analyzing a column twice
371 : * won't work; we'd end up making a conflicting update in pg_statistic.)
372 : */
373 47336 : if (va_cols != NIL)
374 : {
375 94 : Bitmapset *unique_cols = NULL;
376 : ListCell *le;
377 :
378 94 : vacattrstats = (VacAttrStats **) palloc(list_length(va_cols) *
379 : sizeof(VacAttrStats *));
380 94 : tcnt = 0;
381 164 : foreach(le, va_cols)
382 : {
383 120 : char *col = strVal(lfirst(le));
384 :
385 120 : i = attnameAttNum(onerel, col, false);
386 120 : if (i == InvalidAttrNumber)
387 38 : ereport(ERROR,
388 : (errcode(ERRCODE_UNDEFINED_COLUMN),
389 : errmsg("column \"%s\" of relation \"%s\" does not exist",
390 : col, RelationGetRelationName(onerel))));
391 82 : if (bms_is_member(i, unique_cols))
392 12 : ereport(ERROR,
393 : (errcode(ERRCODE_DUPLICATE_COLUMN),
394 : errmsg("column \"%s\" of relation \"%s\" appears more than once",
395 : col, RelationGetRelationName(onerel))));
396 70 : unique_cols = bms_add_member(unique_cols, i);
397 :
398 70 : vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
399 70 : if (vacattrstats[tcnt] != NULL)
400 70 : tcnt++;
401 : }
402 44 : attr_cnt = tcnt;
403 : }
404 : else
405 : {
406 47242 : attr_cnt = onerel->rd_att->natts;
407 : vacattrstats = (VacAttrStats **)
408 47242 : palloc(attr_cnt * sizeof(VacAttrStats *));
409 47242 : tcnt = 0;
410 438370 : for (i = 1; i <= attr_cnt; i++)
411 : {
412 391128 : vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
413 391128 : if (vacattrstats[tcnt] != NULL)
414 391116 : tcnt++;
415 : }
416 47242 : attr_cnt = tcnt;
417 : }
418 :
419 : /*
420 : * Open all indexes of the relation, and see if there are any analyzable
421 : * columns in the indexes. We do not analyze index columns if there was
422 : * an explicit column list in the ANALYZE command, however.
423 : *
424 : * If we are doing a recursive scan, we don't want to touch the parent's
425 : * indexes at all. If we're processing a partitioned table, we need to
426 : * know if there are any indexes, but we don't want to process them.
427 : */
428 47286 : if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
429 : {
430 626 : List *idxs = RelationGetIndexList(onerel);
431 :
432 626 : Irel = NULL;
433 626 : nindexes = 0;
434 626 : hasindex = idxs != NIL;
435 626 : list_free(idxs);
436 : }
437 46660 : else if (!inh)
438 : {
439 46606 : vac_open_indexes(onerel, AccessShareLock, &nindexes, &Irel);
440 46606 : hasindex = nindexes > 0;
441 : }
442 : else
443 : {
444 54 : Irel = NULL;
445 54 : nindexes = 0;
446 54 : hasindex = false;
447 : }
448 47286 : indexdata = NULL;
449 47286 : if (nindexes > 0)
450 : {
451 41762 : indexdata = (AnlIndexData *) palloc0(nindexes * sizeof(AnlIndexData));
452 122526 : for (ind = 0; ind < nindexes; ind++)
453 : {
454 80766 : AnlIndexData *thisdata = &indexdata[ind];
455 : IndexInfo *indexInfo;
456 :
457 80766 : thisdata->indexInfo = indexInfo = BuildIndexInfo(Irel[ind]);
458 80764 : thisdata->tupleFract = 1.0; /* fix later if partial */
459 80764 : if (indexInfo->ii_Expressions != NIL && va_cols == NIL)
460 : {
461 62 : ListCell *indexpr_item = list_head(indexInfo->ii_Expressions);
462 :
463 62 : thisdata->vacattrstats = (VacAttrStats **)
464 62 : palloc(indexInfo->ii_NumIndexAttrs * sizeof(VacAttrStats *));
465 62 : tcnt = 0;
466 128 : for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
467 : {
468 66 : int keycol = indexInfo->ii_IndexAttrNumbers[i];
469 :
470 66 : if (keycol == 0)
471 : {
472 : /* Found an index expression */
473 : Node *indexkey;
474 :
475 62 : if (indexpr_item == NULL) /* shouldn't happen */
476 0 : elog(ERROR, "too few entries in indexprs list");
477 62 : indexkey = (Node *) lfirst(indexpr_item);
478 62 : indexpr_item = lnext(indexInfo->ii_Expressions,
479 : indexpr_item);
480 124 : thisdata->vacattrstats[tcnt] =
481 62 : examine_attribute(Irel[ind], i + 1, indexkey);
482 62 : if (thisdata->vacattrstats[tcnt] != NULL)
483 62 : tcnt++;
484 : }
485 : }
486 62 : thisdata->attr_cnt = tcnt;
487 : }
488 : }
489 : }
490 :
491 : /*
492 : * Determine how many rows we need to sample, using the worst case from
493 : * all analyzable columns. We use a lower bound of 100 rows to avoid
494 : * possible overflow in Vitter's algorithm. (Note: that will also be the
495 : * target in the corner case where there are no analyzable columns.)
496 : */
497 47284 : targrows = 100;
498 438444 : for (i = 0; i < attr_cnt; i++)
499 : {
500 391160 : if (targrows < vacattrstats[i]->minrows)
501 47278 : targrows = vacattrstats[i]->minrows;
502 : }
503 128048 : for (ind = 0; ind < nindexes; ind++)
504 : {
505 80764 : AnlIndexData *thisdata = &indexdata[ind];
506 :
507 80826 : for (i = 0; i < thisdata->attr_cnt; i++)
508 : {
509 62 : if (targrows < thisdata->vacattrstats[i]->minrows)
510 0 : targrows = thisdata->vacattrstats[i]->minrows;
511 : }
512 : }
513 :
514 : /*
515 : * Look at extended statistics objects too, as those may define custom
516 : * statistics target. So we may need to sample more rows and then build
517 : * the statistics with enough detail.
518 : */
519 47284 : minrows = ComputeExtStatisticsRows(onerel, attr_cnt, vacattrstats);
520 :
521 47284 : if (targrows < minrows)
522 0 : targrows = minrows;
523 :
524 : /*
525 : * Acquire the sample rows
526 : */
527 47284 : rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));
528 47284 : pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE,
529 : inh ? PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS_INH :
530 : PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS);
531 47284 : if (inh)
532 680 : numrows = acquire_inherited_sample_rows(onerel, elevel,
533 : rows, targrows,
534 : &totalrows, &totaldeadrows);
535 : else
536 46604 : numrows = (*acquirefunc) (onerel, elevel,
537 : rows, targrows,
538 : &totalrows, &totaldeadrows);
539 :
540 : /*
541 : * Compute the statistics. Temporary results during the calculations for
542 : * each column are stored in a child context. The calc routines are
543 : * responsible to make sure that whatever they store into the VacAttrStats
544 : * structure is allocated in anl_context.
545 : */
546 47282 : if (numrows > 0)
547 : {
548 : MemoryContext col_context,
549 : old_context;
550 :
551 28248 : pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE,
552 : PROGRESS_ANALYZE_PHASE_COMPUTE_STATS);
553 :
554 28248 : col_context = AllocSetContextCreate(anl_context,
555 : "Analyze Column",
556 : ALLOCSET_DEFAULT_SIZES);
557 28248 : old_context = MemoryContextSwitchTo(col_context);
558 :
559 304700 : for (i = 0; i < attr_cnt; i++)
560 : {
561 276452 : VacAttrStats *stats = vacattrstats[i];
562 : AttributeOpts *aopt;
563 :
564 276452 : stats->rows = rows;
565 276452 : stats->tupDesc = onerel->rd_att;
566 276452 : stats->compute_stats(stats,
567 : std_fetch_func,
568 : numrows,
569 : totalrows);
570 :
571 : /*
572 : * If the appropriate flavor of the n_distinct option is
573 : * specified, override with the corresponding value.
574 : */
575 276452 : aopt = get_attribute_options(onerel->rd_id, stats->attr->attnum);
576 276452 : if (aopt != NULL)
577 : {
578 : float8 n_distinct;
579 :
580 6 : n_distinct = inh ? aopt->n_distinct_inherited : aopt->n_distinct;
581 6 : if (n_distinct != 0.0)
582 6 : stats->stadistinct = n_distinct;
583 : }
584 :
585 276452 : MemoryContextResetAndDeleteChildren(col_context);
586 : }
587 :
588 28248 : if (nindexes > 0)
589 23022 : compute_index_stats(onerel, totalrows,
590 : indexdata, nindexes,
591 : rows, numrows,
592 : col_context);
593 :
594 28242 : MemoryContextSwitchTo(old_context);
595 28242 : MemoryContextDelete(col_context);
596 :
597 : /*
598 : * Emit the completed stats rows into pg_statistic, replacing any
599 : * previous statistics for the target columns. (If there are stats in
600 : * pg_statistic for columns we didn't process, we leave them alone.)
601 : */
602 28242 : update_attstats(RelationGetRelid(onerel), inh,
603 : attr_cnt, vacattrstats);
604 :
605 75428 : for (ind = 0; ind < nindexes; ind++)
606 : {
607 47186 : AnlIndexData *thisdata = &indexdata[ind];
608 :
609 47186 : update_attstats(RelationGetRelid(Irel[ind]), false,
610 : thisdata->attr_cnt, thisdata->vacattrstats);
611 : }
612 :
613 : /* Build extended statistics (if there are any). */
614 28242 : BuildRelationExtStatistics(onerel, inh, totalrows, numrows, rows,
615 : attr_cnt, vacattrstats);
616 : }
617 :
618 47276 : pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE,
619 : PROGRESS_ANALYZE_PHASE_FINALIZE_ANALYZE);
620 :
621 : /*
622 : * Update pages/tuples stats in pg_class ... but not if we're doing
623 : * inherited stats.
624 : *
625 : * We assume that VACUUM hasn't set pg_class.reltuples already, even
626 : * during a VACUUM ANALYZE. Although VACUUM often updates pg_class,
627 : * exceptions exist. A "VACUUM (ANALYZE, INDEX_CLEANUP OFF)" command will
628 : * never update pg_class entries for index relations. It's also possible
629 : * that an individual index's pg_class entry won't be updated during
630 : * VACUUM if the index AM returns NULL from its amvacuumcleanup() routine.
631 : */
632 47276 : if (!inh)
633 : {
634 : BlockNumber relallvisible;
635 :
636 46596 : visibilitymap_count(onerel, &relallvisible, NULL);
637 :
638 : /* Update pg_class for table relation */
639 46596 : vac_update_relstats(onerel,
640 : relpages,
641 : totalrows,
642 : relallvisible,
643 : hasindex,
644 : InvalidTransactionId,
645 : InvalidMultiXactId,
646 : NULL, NULL,
647 : in_outer_xact);
648 :
649 : /* Same for indexes */
650 127348 : for (ind = 0; ind < nindexes; ind++)
651 : {
652 80752 : AnlIndexData *thisdata = &indexdata[ind];
653 : double totalindexrows;
654 :
655 80752 : totalindexrows = ceil(thisdata->tupleFract * totalrows);
656 80752 : vac_update_relstats(Irel[ind],
657 80752 : RelationGetNumberOfBlocks(Irel[ind]),
658 : totalindexrows,
659 : 0,
660 : false,
661 : InvalidTransactionId,
662 : InvalidMultiXactId,
663 : NULL, NULL,
664 : in_outer_xact);
665 : }
666 : }
667 680 : else if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
668 : {
669 : /*
670 : * Partitioned tables don't have storage, so we don't set any fields
671 : * in their pg_class entries except for reltuples and relhasindex.
672 : */
673 626 : vac_update_relstats(onerel, -1, totalrows,
674 : 0, hasindex, InvalidTransactionId,
675 : InvalidMultiXactId,
676 : NULL, NULL,
677 : in_outer_xact);
678 : }
679 :
680 : /*
681 : * Now report ANALYZE to the cumulative stats system. For regular tables,
682 : * we do it only if not doing inherited stats. For partitioned tables, we
683 : * only do it for inherited stats. (We're never called for not-inherited
684 : * stats on partitioned tables anyway.)
685 : *
686 : * Reset the changes_since_analyze counter only if we analyzed all
687 : * columns; otherwise, there is still work for auto-analyze to do.
688 : */
689 47276 : if (!inh)
690 46596 : pgstat_report_analyze(onerel, totalrows, totaldeadrows,
691 : (va_cols == NIL));
692 680 : else if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
693 626 : pgstat_report_analyze(onerel, 0, 0, (va_cols == NIL));
694 :
695 : /*
696 : * If this isn't part of VACUUM ANALYZE, let index AMs do cleanup.
697 : *
698 : * Note that most index AMs perform a no-op as a matter of policy for
699 : * amvacuumcleanup() when called in ANALYZE-only mode. The only exception
700 : * among core index AMs is GIN/ginvacuumcleanup().
701 : */
702 47276 : if (!(params->options & VACOPT_VACUUM))
703 : {
704 125754 : for (ind = 0; ind < nindexes; ind++)
705 : {
706 : IndexBulkDeleteResult *stats;
707 : IndexVacuumInfo ivinfo;
708 :
709 79504 : ivinfo.index = Irel[ind];
710 79504 : ivinfo.heaprel = onerel;
711 79504 : ivinfo.analyze_only = true;
712 79504 : ivinfo.estimated_count = true;
713 79504 : ivinfo.message_level = elevel;
714 79504 : ivinfo.num_heap_tuples = onerel->rd_rel->reltuples;
715 79504 : ivinfo.strategy = vac_strategy;
716 :
717 79504 : stats = index_vacuum_cleanup(&ivinfo, NULL);
718 :
719 79504 : if (stats)
720 0 : pfree(stats);
721 : }
722 : }
723 :
724 : /* Done with indexes */
725 47276 : vac_close_indexes(nindexes, Irel, NoLock);
726 :
727 : /* Log the action if appropriate */
728 47276 : if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
729 : {
730 350 : TimestampTz endtime = GetCurrentTimestamp();
731 :
732 516 : if (params->log_min_duration == 0 ||
733 166 : TimestampDifferenceExceeds(starttime, endtime,
734 : params->log_min_duration))
735 : {
736 : long delay_in_ms;
737 184 : double read_rate = 0;
738 184 : double write_rate = 0;
739 : StringInfoData buf;
740 :
741 : /*
742 : * Calculate the difference in the Page Hit/Miss/Dirty that
743 : * happened as part of the analyze by subtracting out the
744 : * pre-analyze values which we saved above.
745 : */
746 184 : AnalyzePageHit = VacuumPageHit - AnalyzePageHit;
747 184 : AnalyzePageMiss = VacuumPageMiss - AnalyzePageMiss;
748 184 : AnalyzePageDirty = VacuumPageDirty - AnalyzePageDirty;
749 :
750 : /*
751 : * We do not expect an analyze to take > 25 days and it simplifies
752 : * things a bit to use TimestampDifferenceMilliseconds.
753 : */
754 184 : delay_in_ms = TimestampDifferenceMilliseconds(starttime, endtime);
755 :
756 : /*
757 : * Note that we are reporting these read/write rates in the same
758 : * manner as VACUUM does, which means that while the 'average read
759 : * rate' here actually corresponds to page misses and resulting
760 : * reads which are also picked up by track_io_timing, if enabled,
761 : * the 'average write rate' is actually talking about the rate of
762 : * pages being dirtied, not being written out, so it's typical to
763 : * have a non-zero 'avg write rate' while I/O timings only reports
764 : * reads.
765 : *
766 : * It's not clear that an ANALYZE will ever result in
767 : * FlushBuffer() being called, but we track and support reporting
768 : * on I/O write time in case that changes as it's practically free
769 : * to do so anyway.
770 : */
771 :
772 184 : if (delay_in_ms > 0)
773 : {
774 184 : read_rate = (double) BLCKSZ * AnalyzePageMiss / (1024 * 1024) /
775 184 : (delay_in_ms / 1000.0);
776 184 : write_rate = (double) BLCKSZ * AnalyzePageDirty / (1024 * 1024) /
777 184 : (delay_in_ms / 1000.0);
778 : }
779 :
780 : /*
781 : * We split this up so we don't emit empty I/O timing values when
782 : * track_io_timing isn't enabled.
783 : */
784 :
785 184 : initStringInfo(&buf);
786 184 : appendStringInfo(&buf, _("automatic analyze of table \"%s.%s.%s\"\n"),
787 : get_database_name(MyDatabaseId),
788 184 : get_namespace_name(RelationGetNamespace(onerel)),
789 184 : RelationGetRelationName(onerel));
790 184 : if (track_io_timing)
791 : {
792 0 : double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
793 0 : double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
794 :
795 0 : appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
796 : read_ms, write_ms);
797 : }
798 184 : appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
799 : read_rate, write_rate);
800 184 : appendStringInfo(&buf, _("buffer usage: %lld hits, %lld misses, %lld dirtied\n"),
801 : (long long) AnalyzePageHit,
802 : (long long) AnalyzePageMiss,
803 : (long long) AnalyzePageDirty);
804 184 : appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
805 :
806 184 : ereport(LOG,
807 : (errmsg_internal("%s", buf.data)));
808 :
809 184 : pfree(buf.data);
810 : }
811 : }
812 :
813 : /* Roll back any GUC changes executed by index functions */
814 47276 : AtEOXact_GUC(false, save_nestlevel);
815 :
816 : /* Restore userid and security context */
817 47276 : SetUserIdAndSecContext(save_userid, save_sec_context);
818 :
819 : /* Restore current context and release memory */
820 47276 : MemoryContextSwitchTo(caller_context);
821 47276 : MemoryContextDelete(anl_context);
822 47276 : anl_context = NULL;
823 47276 : }
824 :
825 : /*
826 : * Compute statistics about indexes of a relation
827 : */
828 : static void
829 23022 : compute_index_stats(Relation onerel, double totalrows,
830 : AnlIndexData *indexdata, int nindexes,
831 : HeapTuple *rows, int numrows,
832 : MemoryContext col_context)
833 : {
834 : MemoryContext ind_context,
835 : old_context;
836 : Datum values[INDEX_MAX_KEYS];
837 : bool isnull[INDEX_MAX_KEYS];
838 : int ind,
839 : i;
840 :
841 23022 : ind_context = AllocSetContextCreate(anl_context,
842 : "Analyze Index",
843 : ALLOCSET_DEFAULT_SIZES);
844 23022 : old_context = MemoryContextSwitchTo(ind_context);
845 :
846 70214 : for (ind = 0; ind < nindexes; ind++)
847 : {
848 47198 : AnlIndexData *thisdata = &indexdata[ind];
849 47198 : IndexInfo *indexInfo = thisdata->indexInfo;
850 47198 : int attr_cnt = thisdata->attr_cnt;
851 : TupleTableSlot *slot;
852 : EState *estate;
853 : ExprContext *econtext;
854 : ExprState *predicate;
855 : Datum *exprvals;
856 : bool *exprnulls;
857 : int numindexrows,
858 : tcnt,
859 : rowno;
860 : double totalindexrows;
861 :
862 : /* Ignore index if no columns to analyze and not partial */
863 47198 : if (attr_cnt == 0 && indexInfo->ii_Predicate == NIL)
864 47102 : continue;
865 :
866 : /*
867 : * Need an EState for evaluation of index expressions and
868 : * partial-index predicates. Create it in the per-index context to be
869 : * sure it gets cleaned up at the bottom of the loop.
870 : */
871 96 : estate = CreateExecutorState();
872 96 : econtext = GetPerTupleExprContext(estate);
873 : /* Need a slot to hold the current heap tuple, too */
874 96 : slot = MakeSingleTupleTableSlot(RelationGetDescr(onerel),
875 : &TTSOpsHeapTuple);
876 :
877 : /* Arrange for econtext's scan tuple to be the tuple under test */
878 96 : econtext->ecxt_scantuple = slot;
879 :
880 : /* Set up execution state for predicate. */
881 96 : predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
882 :
883 : /* Compute and save index expression values */
884 96 : exprvals = (Datum *) palloc(numrows * attr_cnt * sizeof(Datum));
885 96 : exprnulls = (bool *) palloc(numrows * attr_cnt * sizeof(bool));
886 96 : numindexrows = 0;
887 96 : tcnt = 0;
888 212574 : for (rowno = 0; rowno < numrows; rowno++)
889 : {
890 212484 : HeapTuple heapTuple = rows[rowno];
891 :
892 212484 : vacuum_delay_point();
893 :
894 : /*
895 : * Reset the per-tuple context each time, to reclaim any cruft
896 : * left behind by evaluating the predicate or index expressions.
897 : */
898 212484 : ResetExprContext(econtext);
899 :
900 : /* Set up for predicate or expression evaluation */
901 212484 : ExecStoreHeapTuple(heapTuple, slot, false);
902 :
903 : /* If index is partial, check predicate */
904 212484 : if (predicate != NULL)
905 : {
906 58048 : if (!ExecQual(predicate, econtext))
907 21332 : continue;
908 : }
909 191152 : numindexrows++;
910 :
911 191152 : if (attr_cnt > 0)
912 : {
913 : /*
914 : * Evaluate the index row to compute expression values. We
915 : * could do this by hand, but FormIndexDatum is convenient.
916 : */
917 154436 : FormIndexDatum(indexInfo,
918 : slot,
919 : estate,
920 : values,
921 : isnull);
922 :
923 : /*
924 : * Save just the columns we care about. We copy the values
925 : * into ind_context from the estate's per-tuple context.
926 : */
927 308860 : for (i = 0; i < attr_cnt; i++)
928 : {
929 154430 : VacAttrStats *stats = thisdata->vacattrstats[i];
930 154430 : int attnum = stats->attr->attnum;
931 :
932 154430 : if (isnull[attnum - 1])
933 : {
934 0 : exprvals[tcnt] = (Datum) 0;
935 0 : exprnulls[tcnt] = true;
936 : }
937 : else
938 : {
939 308860 : exprvals[tcnt] = datumCopy(values[attnum - 1],
940 154430 : stats->attrtype->typbyval,
941 154430 : stats->attrtype->typlen);
942 154430 : exprnulls[tcnt] = false;
943 : }
944 154430 : tcnt++;
945 : }
946 : }
947 : }
948 :
949 : /*
950 : * Having counted the number of rows that pass the predicate in the
951 : * sample, we can estimate the total number of rows in the index.
952 : */
953 90 : thisdata->tupleFract = (double) numindexrows / (double) numrows;
954 90 : totalindexrows = ceil(thisdata->tupleFract * totalrows);
955 :
956 : /*
957 : * Now we can compute the statistics for the expression columns.
958 : */
959 90 : if (numindexrows > 0)
960 : {
961 84 : MemoryContextSwitchTo(col_context);
962 134 : for (i = 0; i < attr_cnt; i++)
963 : {
964 50 : VacAttrStats *stats = thisdata->vacattrstats[i];
965 :
966 50 : stats->exprvals = exprvals + i;
967 50 : stats->exprnulls = exprnulls + i;
968 50 : stats->rowstride = attr_cnt;
969 50 : stats->compute_stats(stats,
970 : ind_fetch_func,
971 : numindexrows,
972 : totalindexrows);
973 :
974 50 : MemoryContextResetAndDeleteChildren(col_context);
975 : }
976 : }
977 :
978 : /* And clean up */
979 90 : MemoryContextSwitchTo(ind_context);
980 :
981 90 : ExecDropSingleTupleTableSlot(slot);
982 90 : FreeExecutorState(estate);
983 90 : MemoryContextResetAndDeleteChildren(ind_context);
984 : }
985 :
986 23016 : MemoryContextSwitchTo(old_context);
987 23016 : MemoryContextDelete(ind_context);
988 23016 : }
989 :
990 : /*
991 : * examine_attribute -- pre-analysis of a single column
992 : *
993 : * Determine whether the column is analyzable; if so, create and initialize
994 : * a VacAttrStats struct for it. If not, return NULL.
995 : *
996 : * If index_expr isn't NULL, then we're trying to analyze an expression index,
997 : * and index_expr is the expression tree representing the column's data.
998 : */
999 : static VacAttrStats *
1000 391260 : examine_attribute(Relation onerel, int attnum, Node *index_expr)
1001 : {
1002 391260 : Form_pg_attribute attr = TupleDescAttr(onerel->rd_att, attnum - 1);
1003 : HeapTuple typtuple;
1004 : VacAttrStats *stats;
1005 : int i;
1006 : bool ok;
1007 :
1008 : /* Never analyze dropped columns */
1009 391260 : if (attr->attisdropped)
1010 6 : return NULL;
1011 :
1012 : /* Don't analyze column if user has specified not to */
1013 391254 : if (attr->attstattarget == 0)
1014 6 : return NULL;
1015 :
1016 : /*
1017 : * Create the VacAttrStats struct. Note that we only have a copy of the
1018 : * fixed fields of the pg_attribute tuple.
1019 : */
1020 391248 : stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
1021 391248 : stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE);
1022 391248 : memcpy(stats->attr, attr, ATTRIBUTE_FIXED_PART_SIZE);
1023 :
1024 : /*
1025 : * When analyzing an expression index, believe the expression tree's type
1026 : * not the column datatype --- the latter might be the opckeytype storage
1027 : * type of the opclass, which is not interesting for our purposes. (Note:
1028 : * if we did anything with non-expression index columns, we'd need to
1029 : * figure out where to get the correct type info from, but for now that's
1030 : * not a problem.) It's not clear whether anyone will care about the
1031 : * typmod, but we store that too just in case.
1032 : */
1033 391248 : if (index_expr)
1034 : {
1035 62 : stats->attrtypid = exprType(index_expr);
1036 62 : stats->attrtypmod = exprTypmod(index_expr);
1037 :
1038 : /*
1039 : * If a collation has been specified for the index column, use that in
1040 : * preference to anything else; but if not, fall back to whatever we
1041 : * can get from the expression.
1042 : */
1043 62 : if (OidIsValid(onerel->rd_indcollation[attnum - 1]))
1044 12 : stats->attrcollid = onerel->rd_indcollation[attnum - 1];
1045 : else
1046 50 : stats->attrcollid = exprCollation(index_expr);
1047 : }
1048 : else
1049 : {
1050 391186 : stats->attrtypid = attr->atttypid;
1051 391186 : stats->attrtypmod = attr->atttypmod;
1052 391186 : stats->attrcollid = attr->attcollation;
1053 : }
1054 :
1055 391248 : typtuple = SearchSysCacheCopy1(TYPEOID,
1056 : ObjectIdGetDatum(stats->attrtypid));
1057 391248 : if (!HeapTupleIsValid(typtuple))
1058 0 : elog(ERROR, "cache lookup failed for type %u", stats->attrtypid);
1059 391248 : stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple);
1060 391248 : stats->anl_context = anl_context;
1061 391248 : stats->tupattnum = attnum;
1062 :
1063 : /*
1064 : * The fields describing the stats->stavalues[n] element types default to
1065 : * the type of the data being analyzed, but the type-specific typanalyze
1066 : * function can change them if it wants to store something else.
1067 : */
1068 2347488 : for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
1069 : {
1070 1956240 : stats->statypid[i] = stats->attrtypid;
1071 1956240 : stats->statyplen[i] = stats->attrtype->typlen;
1072 1956240 : stats->statypbyval[i] = stats->attrtype->typbyval;
1073 1956240 : stats->statypalign[i] = stats->attrtype->typalign;
1074 : }
1075 :
1076 : /*
1077 : * Call the type-specific typanalyze function. If none is specified, use
1078 : * std_typanalyze().
1079 : */
1080 391248 : if (OidIsValid(stats->attrtype->typanalyze))
1081 27408 : ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze,
1082 : PointerGetDatum(stats)));
1083 : else
1084 363840 : ok = std_typanalyze(stats);
1085 :
1086 391248 : if (!ok || stats->compute_stats == NULL || stats->minrows <= 0)
1087 : {
1088 0 : heap_freetuple(typtuple);
1089 0 : pfree(stats->attr);
1090 0 : pfree(stats);
1091 0 : return NULL;
1092 : }
1093 :
1094 391248 : return stats;
1095 : }
1096 :
1097 : /*
1098 : * acquire_sample_rows -- acquire a random sample of rows from the table
1099 : *
1100 : * Selected rows are returned in the caller-allocated array rows[], which
1101 : * must have at least targrows entries.
1102 : * The actual number of rows selected is returned as the function result.
1103 : * We also estimate the total numbers of live and dead rows in the table,
1104 : * and return them into *totalrows and *totaldeadrows, respectively.
1105 : *
1106 : * The returned list of tuples is in order by physical position in the table.
1107 : * (We will rely on this later to derive correlation estimates.)
1108 : *
1109 : * As of May 2004 we use a new two-stage method: Stage one selects up
1110 : * to targrows random blocks (or all blocks, if there aren't so many).
1111 : * Stage two scans these blocks and uses the Vitter algorithm to create
1112 : * a random sample of targrows rows (or less, if there are less in the
1113 : * sample of blocks). The two stages are executed simultaneously: each
1114 : * block is processed as soon as stage one returns its number and while
1115 : * the rows are read stage two controls which ones are to be inserted
1116 : * into the sample.
1117 : *
1118 : * Although every row has an equal chance of ending up in the final
1119 : * sample, this sampling method is not perfect: not every possible
1120 : * sample has an equal chance of being selected. For large relations
1121 : * the number of different blocks represented by the sample tends to be
1122 : * too small. We can live with that for now. Improvements are welcome.
1123 : *
1124 : * An important property of this sampling method is that because we do
1125 : * look at a statistically unbiased set of blocks, we should get
1126 : * unbiased estimates of the average numbers of live and dead rows per
1127 : * block. The previous sampling method put too much credence in the row
1128 : * density near the start of the table.
1129 : */
1130 : static int
1131 48280 : acquire_sample_rows(Relation onerel, int elevel,
1132 : HeapTuple *rows, int targrows,
1133 : double *totalrows, double *totaldeadrows)
1134 : {
1135 48280 : int numrows = 0; /* # rows now in reservoir */
1136 48280 : double samplerows = 0; /* total # rows collected */
1137 48280 : double liverows = 0; /* # live rows seen */
1138 48280 : double deadrows = 0; /* # dead rows seen */
1139 48280 : double rowstoskip = -1; /* -1 means not set yet */
1140 : uint32 randseed; /* Seed for block sampler(s) */
1141 : BlockNumber totalblocks;
1142 : TransactionId OldestXmin;
1143 : BlockSamplerData bs;
1144 : ReservoirStateData rstate;
1145 : TupleTableSlot *slot;
1146 : TableScanDesc scan;
1147 : BlockNumber nblocks;
1148 48280 : BlockNumber blksdone = 0;
1149 : #ifdef USE_PREFETCH
1150 48280 : int prefetch_maximum = 0; /* blocks to prefetch if enabled */
1151 : BlockSamplerData prefetch_bs;
1152 : #endif
1153 :
1154 : Assert(targrows > 0);
1155 :
1156 48280 : totalblocks = RelationGetNumberOfBlocks(onerel);
1157 :
1158 : /* Need a cutoff xmin for HeapTupleSatisfiesVacuum */
1159 48280 : OldestXmin = GetOldestNonRemovableTransactionId(onerel);
1160 :
1161 : /* Prepare for sampling block numbers */
1162 48280 : randseed = pg_prng_uint32(&pg_global_prng_state);
1163 48280 : nblocks = BlockSampler_Init(&bs, totalblocks, targrows, randseed);
1164 :
1165 : #ifdef USE_PREFETCH
1166 48280 : prefetch_maximum = get_tablespace_maintenance_io_concurrency(onerel->rd_rel->reltablespace);
1167 : /* Create another BlockSampler, using the same seed, for prefetching */
1168 48280 : if (prefetch_maximum)
1169 48280 : (void) BlockSampler_Init(&prefetch_bs, totalblocks, targrows, randseed);
1170 : #endif
1171 :
1172 : /* Report sampling block numbers */
1173 48280 : pgstat_progress_update_param(PROGRESS_ANALYZE_BLOCKS_TOTAL,
1174 : nblocks);
1175 :
1176 : /* Prepare for sampling rows */
1177 48280 : reservoir_init_selection_state(&rstate, targrows);
1178 :
1179 48280 : scan = table_beginscan_analyze(onerel);
1180 48280 : slot = table_slot_create(onerel, NULL);
1181 :
1182 : #ifdef USE_PREFETCH
1183 :
1184 : /*
1185 : * If we are doing prefetching, then go ahead and tell the kernel about
1186 : * the first set of pages we are going to want. This also moves our
1187 : * iterator out ahead of the main one being used, where we will keep it so
1188 : * that we're always pre-fetching out prefetch_maximum number of blocks
1189 : * ahead.
1190 : */
1191 48280 : if (prefetch_maximum)
1192 : {
1193 161120 : for (int i = 0; i < prefetch_maximum; i++)
1194 : {
1195 : BlockNumber prefetch_block;
1196 :
1197 154456 : if (!BlockSampler_HasMore(&prefetch_bs))
1198 41616 : break;
1199 :
1200 112840 : prefetch_block = BlockSampler_Next(&prefetch_bs);
1201 112840 : PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, prefetch_block);
1202 : }
1203 : }
1204 : #endif
1205 :
1206 : /* Outer loop over blocks to sample */
1207 332958 : while (BlockSampler_HasMore(&bs))
1208 : {
1209 : bool block_accepted;
1210 284678 : BlockNumber targblock = BlockSampler_Next(&bs);
1211 : #ifdef USE_PREFETCH
1212 284678 : BlockNumber prefetch_targblock = InvalidBlockNumber;
1213 :
1214 : /*
1215 : * Make sure that every time the main BlockSampler is moved forward
1216 : * that our prefetch BlockSampler also gets moved forward, so that we
1217 : * always stay out ahead.
1218 : */
1219 284678 : if (prefetch_maximum && BlockSampler_HasMore(&prefetch_bs))
1220 171838 : prefetch_targblock = BlockSampler_Next(&prefetch_bs);
1221 : #endif
1222 :
1223 284678 : vacuum_delay_point();
1224 :
1225 284678 : block_accepted = table_scan_analyze_next_block(scan, targblock, vac_strategy);
1226 :
1227 : #ifdef USE_PREFETCH
1228 :
1229 : /*
1230 : * When pre-fetching, after we get a block, tell the kernel about the
1231 : * next one we will want, if there's any left.
1232 : *
1233 : * We want to do this even if the table_scan_analyze_next_block() call
1234 : * above decides against analyzing the block it picked.
1235 : */
1236 284678 : if (prefetch_maximum && prefetch_targblock != InvalidBlockNumber)
1237 171838 : PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, prefetch_targblock);
1238 : #endif
1239 :
1240 : /*
1241 : * Don't analyze if table_scan_analyze_next_block() indicated this
1242 : * block is unsuitable for analyzing.
1243 : */
1244 284678 : if (!block_accepted)
1245 0 : continue;
1246 :
1247 20172362 : while (table_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot))
1248 : {
1249 : /*
1250 : * The first targrows sample rows are simply copied into the
1251 : * reservoir. Then we start replacing tuples in the sample until
1252 : * we reach the end of the relation. This algorithm is from Jeff
1253 : * Vitter's paper (see full citation in utils/misc/sampling.c). It
1254 : * works by repeatedly computing the number of tuples to skip
1255 : * before selecting a tuple, which replaces a randomly chosen
1256 : * element of the reservoir (current set of tuples). At all times
1257 : * the reservoir is a true random sample of the tuples we've
1258 : * passed over so far, so when we fall off the end of the relation
1259 : * we're done.
1260 : */
1261 19887684 : if (numrows < targrows)
1262 19639816 : rows[numrows++] = ExecCopySlotHeapTuple(slot);
1263 : else
1264 : {
1265 : /*
1266 : * t in Vitter's paper is the number of records already
1267 : * processed. If we need to compute a new S value, we must
1268 : * use the not-yet-incremented value of samplerows as t.
1269 : */
1270 247868 : if (rowstoskip < 0)
1271 113752 : rowstoskip = reservoir_get_next_S(&rstate, samplerows, targrows);
1272 :
1273 247868 : if (rowstoskip <= 0)
1274 : {
1275 : /*
1276 : * Found a suitable tuple, so save it, replacing one old
1277 : * tuple at random
1278 : */
1279 113700 : int k = (int) (targrows * sampler_random_fract(&rstate.randstate));
1280 :
1281 : Assert(k >= 0 && k < targrows);
1282 113700 : heap_freetuple(rows[k]);
1283 113700 : rows[k] = ExecCopySlotHeapTuple(slot);
1284 : }
1285 :
1286 247868 : rowstoskip -= 1;
1287 : }
1288 :
1289 19887684 : samplerows += 1;
1290 : }
1291 :
1292 284678 : pgstat_progress_update_param(PROGRESS_ANALYZE_BLOCKS_DONE,
1293 : ++blksdone);
1294 : }
1295 :
1296 48280 : ExecDropSingleTupleTableSlot(slot);
1297 48280 : table_endscan(scan);
1298 :
1299 : /*
1300 : * If we didn't find as many tuples as we wanted then we're done. No sort
1301 : * is needed, since they're already in order.
1302 : *
1303 : * Otherwise we need to sort the collected tuples by position
1304 : * (itempointer). It's not worth worrying about corner cases where the
1305 : * tuples are already sorted.
1306 : */
1307 48280 : if (numrows == targrows)
1308 158 : qsort_interruptible(rows, numrows, sizeof(HeapTuple),
1309 : compare_rows, NULL);
1310 :
1311 : /*
1312 : * Estimate total numbers of live and dead rows in relation, extrapolating
1313 : * on the assumption that the average tuple density in pages we didn't
1314 : * scan is the same as in the pages we did scan. Since what we scanned is
1315 : * a random sample of the pages in the relation, this should be a good
1316 : * assumption.
1317 : */
1318 48280 : if (bs.m > 0)
1319 : {
1320 29306 : *totalrows = floor((liverows / bs.m) * totalblocks + 0.5);
1321 29306 : *totaldeadrows = floor((deadrows / bs.m) * totalblocks + 0.5);
1322 : }
1323 : else
1324 : {
1325 18974 : *totalrows = 0.0;
1326 18974 : *totaldeadrows = 0.0;
1327 : }
1328 :
1329 : /*
1330 : * Emit some interesting relation info
1331 : */
1332 48280 : ereport(elevel,
1333 : (errmsg("\"%s\": scanned %d of %u pages, "
1334 : "containing %.0f live rows and %.0f dead rows; "
1335 : "%d rows in sample, %.0f estimated total rows",
1336 : RelationGetRelationName(onerel),
1337 : bs.m, totalblocks,
1338 : liverows, deadrows,
1339 : numrows, *totalrows)));
1340 :
1341 48280 : return numrows;
1342 : }
1343 :
1344 : /*
1345 : * Comparator for sorting rows[] array
1346 : */
1347 : static int
1348 3930352 : compare_rows(const void *a, const void *b, void *arg)
1349 : {
1350 3930352 : HeapTuple ha = *(const HeapTuple *) a;
1351 3930352 : HeapTuple hb = *(const HeapTuple *) b;
1352 3930352 : BlockNumber ba = ItemPointerGetBlockNumber(&ha->t_self);
1353 3930352 : OffsetNumber oa = ItemPointerGetOffsetNumber(&ha->t_self);
1354 3930352 : BlockNumber bb = ItemPointerGetBlockNumber(&hb->t_self);
1355 3930352 : OffsetNumber ob = ItemPointerGetOffsetNumber(&hb->t_self);
1356 :
1357 3930352 : if (ba < bb)
1358 877412 : return -1;
1359 3052940 : if (ba > bb)
1360 872158 : return 1;
1361 2180782 : if (oa < ob)
1362 1427748 : return -1;
1363 753034 : if (oa > ob)
1364 753034 : return 1;
1365 0 : return 0;
1366 : }
1367 :
1368 :
1369 : /*
1370 : * acquire_inherited_sample_rows -- acquire sample rows from inheritance tree
1371 : *
1372 : * This has the same API as acquire_sample_rows, except that rows are
1373 : * collected from all inheritance children as well as the specified table.
1374 : * We fail and return zero if there are no inheritance children, or if all
1375 : * children are foreign tables that don't support ANALYZE.
1376 : */
1377 : static int
1378 680 : acquire_inherited_sample_rows(Relation onerel, int elevel,
1379 : HeapTuple *rows, int targrows,
1380 : double *totalrows, double *totaldeadrows)
1381 : {
1382 : List *tableOIDs;
1383 : Relation *rels;
1384 : AcquireSampleRowsFunc *acquirefuncs;
1385 : double *relblocks;
1386 : double totalblocks;
1387 : int numrows,
1388 : nrels,
1389 : i;
1390 : ListCell *lc;
1391 : bool has_child;
1392 :
1393 : /* Initialize output parameters to zero now, in case we exit early */
1394 680 : *totalrows = 0;
1395 680 : *totaldeadrows = 0;
1396 :
1397 : /*
1398 : * Find all members of inheritance set. We only need AccessShareLock on
1399 : * the children.
1400 : */
1401 : tableOIDs =
1402 680 : find_all_inheritors(RelationGetRelid(onerel), AccessShareLock, NULL);
1403 :
1404 : /*
1405 : * Check that there's at least one descendant, else fail. This could
1406 : * happen despite analyze_rel's relhassubclass check, if table once had a
1407 : * child but no longer does. In that case, we can clear the
1408 : * relhassubclass field so as not to make the same mistake again later.
1409 : * (This is safe because we hold ShareUpdateExclusiveLock.)
1410 : */
1411 680 : if (list_length(tableOIDs) < 2)
1412 : {
1413 : /* CCI because we already updated the pg_class row in this command */
1414 0 : CommandCounterIncrement();
1415 0 : SetRelationHasSubclass(RelationGetRelid(onerel), false);
1416 0 : ereport(elevel,
1417 : (errmsg("skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no child tables",
1418 : get_namespace_name(RelationGetNamespace(onerel)),
1419 : RelationGetRelationName(onerel))));
1420 0 : return 0;
1421 : }
1422 :
1423 : /*
1424 : * Identify acquirefuncs to use, and count blocks in all the relations.
1425 : * The result could overflow BlockNumber, so we use double arithmetic.
1426 : */
1427 680 : rels = (Relation *) palloc(list_length(tableOIDs) * sizeof(Relation));
1428 : acquirefuncs = (AcquireSampleRowsFunc *)
1429 680 : palloc(list_length(tableOIDs) * sizeof(AcquireSampleRowsFunc));
1430 680 : relblocks = (double *) palloc(list_length(tableOIDs) * sizeof(double));
1431 680 : totalblocks = 0;
1432 680 : nrels = 0;
1433 680 : has_child = false;
1434 3258 : foreach(lc, tableOIDs)
1435 : {
1436 2578 : Oid childOID = lfirst_oid(lc);
1437 : Relation childrel;
1438 2578 : AcquireSampleRowsFunc acquirefunc = NULL;
1439 2578 : BlockNumber relpages = 0;
1440 :
1441 : /* We already got the needed lock */
1442 2578 : childrel = table_open(childOID, NoLock);
1443 :
1444 : /* Ignore if temp table of another backend */
1445 2578 : if (RELATION_IS_OTHER_TEMP(childrel))
1446 : {
1447 : /* ... but release the lock on it */
1448 : Assert(childrel != onerel);
1449 0 : table_close(childrel, AccessShareLock);
1450 692 : continue;
1451 : }
1452 :
1453 : /* Check table type (MATVIEW can't happen, but might as well allow) */
1454 2578 : if (childrel->rd_rel->relkind == RELKIND_RELATION ||
1455 722 : childrel->rd_rel->relkind == RELKIND_MATVIEW)
1456 : {
1457 : /* Regular table, so use the regular row acquisition function */
1458 1856 : acquirefunc = acquire_sample_rows;
1459 1856 : relpages = RelationGetNumberOfBlocks(childrel);
1460 : }
1461 722 : else if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
1462 : {
1463 : /*
1464 : * For a foreign table, call the FDW's hook function to see
1465 : * whether it supports analysis.
1466 : */
1467 : FdwRoutine *fdwroutine;
1468 30 : bool ok = false;
1469 :
1470 30 : fdwroutine = GetFdwRoutineForRelation(childrel, false);
1471 :
1472 30 : if (fdwroutine->AnalyzeForeignTable != NULL)
1473 30 : ok = fdwroutine->AnalyzeForeignTable(childrel,
1474 : &acquirefunc,
1475 : &relpages);
1476 :
1477 30 : if (!ok)
1478 : {
1479 : /* ignore, but release the lock on it */
1480 : Assert(childrel != onerel);
1481 0 : table_close(childrel, AccessShareLock);
1482 0 : continue;
1483 : }
1484 : }
1485 : else
1486 : {
1487 : /*
1488 : * ignore, but release the lock on it. don't try to unlock the
1489 : * passed-in relation
1490 : */
1491 : Assert(childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
1492 692 : if (childrel != onerel)
1493 66 : table_close(childrel, AccessShareLock);
1494 : else
1495 626 : table_close(childrel, NoLock);
1496 692 : continue;
1497 : }
1498 :
1499 : /* OK, we'll process this child */
1500 1886 : has_child = true;
1501 1886 : rels[nrels] = childrel;
1502 1886 : acquirefuncs[nrels] = acquirefunc;
1503 1886 : relblocks[nrels] = (double) relpages;
1504 1886 : totalblocks += (double) relpages;
1505 1886 : nrels++;
1506 : }
1507 :
1508 : /*
1509 : * If we don't have at least one child table to consider, fail. If the
1510 : * relation is a partitioned table, it's not counted as a child table.
1511 : */
1512 680 : if (!has_child)
1513 : {
1514 0 : ereport(elevel,
1515 : (errmsg("skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no analyzable child tables",
1516 : get_namespace_name(RelationGetNamespace(onerel)),
1517 : RelationGetRelationName(onerel))));
1518 0 : return 0;
1519 : }
1520 :
1521 : /*
1522 : * Now sample rows from each relation, proportionally to its fraction of
1523 : * the total block count. (This might be less than desirable if the child
1524 : * rels have radically different free-space percentages, but it's not
1525 : * clear that it's worth working harder.)
1526 : */
1527 680 : pgstat_progress_update_param(PROGRESS_ANALYZE_CHILD_TABLES_TOTAL,
1528 : nrels);
1529 680 : numrows = 0;
1530 2566 : for (i = 0; i < nrels; i++)
1531 : {
1532 1886 : Relation childrel = rels[i];
1533 1886 : AcquireSampleRowsFunc acquirefunc = acquirefuncs[i];
1534 1886 : double childblocks = relblocks[i];
1535 :
1536 1886 : pgstat_progress_update_param(PROGRESS_ANALYZE_CURRENT_CHILD_TABLE_RELID,
1537 1886 : RelationGetRelid(childrel));
1538 :
1539 1886 : if (childblocks > 0)
1540 : {
1541 : int childtargrows;
1542 :
1543 1756 : childtargrows = (int) rint(targrows * childblocks / totalblocks);
1544 : /* Make sure we don't overrun due to roundoff error */
1545 1756 : childtargrows = Min(childtargrows, targrows - numrows);
1546 1756 : if (childtargrows > 0)
1547 : {
1548 : int childrows;
1549 : double trows,
1550 : tdrows;
1551 :
1552 : /* Fetch a random sample of the child's rows */
1553 1756 : childrows = (*acquirefunc) (childrel, elevel,
1554 1756 : rows + numrows, childtargrows,
1555 : &trows, &tdrows);
1556 :
1557 : /* We may need to convert from child's rowtype to parent's */
1558 1756 : if (childrows > 0 &&
1559 1756 : !equalTupleDescs(RelationGetDescr(childrel),
1560 : RelationGetDescr(onerel)))
1561 : {
1562 : TupleConversionMap *map;
1563 :
1564 1702 : map = convert_tuples_by_name(RelationGetDescr(childrel),
1565 : RelationGetDescr(onerel));
1566 1702 : if (map != NULL)
1567 : {
1568 : int j;
1569 :
1570 106202 : for (j = 0; j < childrows; j++)
1571 : {
1572 : HeapTuple newtup;
1573 :
1574 106104 : newtup = execute_attr_map_tuple(rows[numrows + j], map);
1575 106104 : heap_freetuple(rows[numrows + j]);
1576 106104 : rows[numrows + j] = newtup;
1577 : }
1578 98 : free_conversion_map(map);
1579 : }
1580 : }
1581 :
1582 : /* And add to counts */
1583 1756 : numrows += childrows;
1584 1756 : *totalrows += trows;
1585 1756 : *totaldeadrows += tdrows;
1586 : }
1587 : }
1588 :
1589 : /*
1590 : * Note: we cannot release the child-table locks, since we may have
1591 : * pointers to their TOAST tables in the sampled rows.
1592 : */
1593 1886 : table_close(childrel, NoLock);
1594 1886 : pgstat_progress_update_param(PROGRESS_ANALYZE_CHILD_TABLES_DONE,
1595 1886 : i + 1);
1596 : }
1597 :
1598 680 : return numrows;
1599 : }
1600 :
1601 :
1602 : /*
1603 : * update_attstats() -- update attribute statistics for one relation
1604 : *
1605 : * Statistics are stored in several places: the pg_class row for the
1606 : * relation has stats about the whole relation, and there is a
1607 : * pg_statistic row for each (non-system) attribute that has ever
1608 : * been analyzed. The pg_class values are updated by VACUUM, not here.
1609 : *
1610 : * pg_statistic rows are just added or updated normally. This means
1611 : * that pg_statistic will probably contain some deleted rows at the
1612 : * completion of a vacuum cycle, unless it happens to get vacuumed last.
1613 : *
1614 : * To keep things simple, we punt for pg_statistic, and don't try
1615 : * to compute or store rows for pg_statistic itself in pg_statistic.
1616 : * This could possibly be made to work, but it's not worth the trouble.
1617 : * Note analyze_rel() has seen to it that we won't come here when
1618 : * vacuuming pg_statistic itself.
1619 : *
1620 : * Note: there would be a race condition here if two backends could
1621 : * ANALYZE the same table concurrently. Presently, we lock that out
1622 : * by taking a self-exclusive lock on the relation in analyze_rel().
1623 : */
1624 : static void
1625 75428 : update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats)
1626 : {
1627 : Relation sd;
1628 : int attno;
1629 75428 : CatalogIndexState indstate = NULL;
1630 :
1631 75428 : if (natts <= 0)
1632 47136 : return; /* nothing to do */
1633 :
1634 28292 : sd = table_open(StatisticRelationId, RowExclusiveLock);
1635 :
1636 304794 : for (attno = 0; attno < natts; attno++)
1637 : {
1638 276502 : VacAttrStats *stats = vacattrstats[attno];
1639 : HeapTuple stup,
1640 : oldtup;
1641 : int i,
1642 : k,
1643 : n;
1644 : Datum values[Natts_pg_statistic];
1645 : bool nulls[Natts_pg_statistic];
1646 : bool replaces[Natts_pg_statistic];
1647 :
1648 : /* Ignore attr if we weren't able to collect stats */
1649 276502 : if (!stats->stats_valid)
1650 6 : continue;
1651 :
1652 : /*
1653 : * Construct a new pg_statistic tuple
1654 : */
1655 8847872 : for (i = 0; i < Natts_pg_statistic; ++i)
1656 : {
1657 8571376 : nulls[i] = false;
1658 8571376 : replaces[i] = true;
1659 : }
1660 :
1661 276496 : values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(relid);
1662 276496 : values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(stats->attr->attnum);
1663 276496 : values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(inh);
1664 276496 : values[Anum_pg_statistic_stanullfrac - 1] = Float4GetDatum(stats->stanullfrac);
1665 276496 : values[Anum_pg_statistic_stawidth - 1] = Int32GetDatum(stats->stawidth);
1666 276496 : values[Anum_pg_statistic_stadistinct - 1] = Float4GetDatum(stats->stadistinct);
1667 276496 : i = Anum_pg_statistic_stakind1 - 1;
1668 1658976 : for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
1669 : {
1670 1382480 : values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */
1671 : }
1672 276496 : i = Anum_pg_statistic_staop1 - 1;
1673 1658976 : for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
1674 : {
1675 1382480 : values[i++] = ObjectIdGetDatum(stats->staop[k]); /* staopN */
1676 : }
1677 276496 : i = Anum_pg_statistic_stacoll1 - 1;
1678 1658976 : for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
1679 : {
1680 1382480 : values[i++] = ObjectIdGetDatum(stats->stacoll[k]); /* stacollN */
1681 : }
1682 276496 : i = Anum_pg_statistic_stanumbers1 - 1;
1683 1658976 : for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
1684 : {
1685 1382480 : int nnum = stats->numnumbers[k];
1686 :
1687 1382480 : if (nnum > 0)
1688 : {
1689 432466 : Datum *numdatums = (Datum *) palloc(nnum * sizeof(Datum));
1690 : ArrayType *arry;
1691 :
1692 3528074 : for (n = 0; n < nnum; n++)
1693 3095608 : numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]);
1694 432466 : arry = construct_array_builtin(numdatums, nnum, FLOAT4OID);
1695 432466 : values[i++] = PointerGetDatum(arry); /* stanumbersN */
1696 : }
1697 : else
1698 : {
1699 950014 : nulls[i] = true;
1700 950014 : values[i++] = (Datum) 0;
1701 : }
1702 : }
1703 276496 : i = Anum_pg_statistic_stavalues1 - 1;
1704 1658976 : for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
1705 : {
1706 1382480 : if (stats->numvalues[k] > 0)
1707 : {
1708 : ArrayType *arry;
1709 :
1710 305170 : arry = construct_array(stats->stavalues[k],
1711 : stats->numvalues[k],
1712 : stats->statypid[k],
1713 305170 : stats->statyplen[k],
1714 305170 : stats->statypbyval[k],
1715 305170 : stats->statypalign[k]);
1716 305170 : values[i++] = PointerGetDatum(arry); /* stavaluesN */
1717 : }
1718 : else
1719 : {
1720 1077310 : nulls[i] = true;
1721 1077310 : values[i++] = (Datum) 0;
1722 : }
1723 : }
1724 :
1725 : /* Is there already a pg_statistic tuple for this attribute? */
1726 552992 : oldtup = SearchSysCache3(STATRELATTINH,
1727 : ObjectIdGetDatum(relid),
1728 276496 : Int16GetDatum(stats->attr->attnum),
1729 : BoolGetDatum(inh));
1730 :
1731 : /* Open index information when we know we need it */
1732 276496 : if (indstate == NULL)
1733 28286 : indstate = CatalogOpenIndexes(sd);
1734 :
1735 276496 : if (HeapTupleIsValid(oldtup))
1736 : {
1737 : /* Yes, replace it */
1738 23100 : stup = heap_modify_tuple(oldtup,
1739 : RelationGetDescr(sd),
1740 : values,
1741 : nulls,
1742 : replaces);
1743 23100 : ReleaseSysCache(oldtup);
1744 23100 : CatalogTupleUpdateWithInfo(sd, &stup->t_self, stup, indstate);
1745 : }
1746 : else
1747 : {
1748 : /* No, insert new tuple */
1749 253396 : stup = heap_form_tuple(RelationGetDescr(sd), values, nulls);
1750 253396 : CatalogTupleInsertWithInfo(sd, stup, indstate);
1751 : }
1752 :
1753 276496 : heap_freetuple(stup);
1754 : }
1755 :
1756 28292 : if (indstate != NULL)
1757 28286 : CatalogCloseIndexes(indstate);
1758 28292 : table_close(sd, RowExclusiveLock);
1759 : }
1760 :
1761 : /*
1762 : * Standard fetch function for use by compute_stats subroutines.
1763 : *
1764 : * This exists to provide some insulation between compute_stats routines
1765 : * and the actual storage of the sample data.
1766 : */
1767 : static Datum
1768 232781572 : std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
1769 : {
1770 232781572 : int attnum = stats->tupattnum;
1771 232781572 : HeapTuple tuple = stats->rows[rownum];
1772 232781572 : TupleDesc tupDesc = stats->tupDesc;
1773 :
1774 232781572 : return heap_getattr(tuple, attnum, tupDesc, isNull);
1775 : }
1776 :
1777 : /*
1778 : * Fetch function for analyzing index expressions.
1779 : *
1780 : * We have not bothered to construct index tuples, instead the data is
1781 : * just in Datum arrays.
1782 : */
1783 : static Datum
1784 154430 : ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
1785 : {
1786 : int i;
1787 :
1788 : /* exprvals and exprnulls are already offset for proper column */
1789 154430 : i = rownum * stats->rowstride;
1790 154430 : *isNull = stats->exprnulls[i];
1791 154430 : return stats->exprvals[i];
1792 : }
1793 :
1794 :
1795 : /*==========================================================================
1796 : *
1797 : * Code below this point represents the "standard" type-specific statistics
1798 : * analysis algorithms. This code can be replaced on a per-data-type basis
1799 : * by setting a nonzero value in pg_type.typanalyze.
1800 : *
1801 : *==========================================================================
1802 : */
1803 :
1804 :
1805 : /*
1806 : * To avoid consuming too much memory during analysis and/or too much space
1807 : * in the resulting pg_statistic rows, we ignore varlena datums that are wider
1808 : * than WIDTH_THRESHOLD (after detoasting!). This is legitimate for MCV
1809 : * and distinct-value calculations since a wide value is unlikely to be
1810 : * duplicated at all, much less be a most-common value. For the same reason,
1811 : * ignoring wide values will not affect our estimates of histogram bin
1812 : * boundaries very much.
1813 : */
1814 : #define WIDTH_THRESHOLD 1024
1815 :
1816 : #define swapInt(a,b) do {int _tmp; _tmp=a; a=b; b=_tmp;} while(0)
1817 : #define swapDatum(a,b) do {Datum _tmp; _tmp=a; a=b; b=_tmp;} while(0)
1818 :
1819 : /*
1820 : * Extra information used by the default analysis routines
1821 : */
1822 : typedef struct
1823 : {
1824 : int count; /* # of duplicates */
1825 : int first; /* values[] index of first occurrence */
1826 : } ScalarMCVItem;
1827 :
1828 : typedef struct
1829 : {
1830 : SortSupport ssup;
1831 : int *tupnoLink;
1832 : } CompareScalarsContext;
1833 :
1834 :
1835 : static void compute_trivial_stats(VacAttrStatsP stats,
1836 : AnalyzeAttrFetchFunc fetchfunc,
1837 : int samplerows,
1838 : double totalrows);
1839 : static void compute_distinct_stats(VacAttrStatsP stats,
1840 : AnalyzeAttrFetchFunc fetchfunc,
1841 : int samplerows,
1842 : double totalrows);
1843 : static void compute_scalar_stats(VacAttrStatsP stats,
1844 : AnalyzeAttrFetchFunc fetchfunc,
1845 : int samplerows,
1846 : double totalrows);
1847 : static int compare_scalars(const void *a, const void *b, void *arg);
1848 : static int compare_mcvs(const void *a, const void *b, void *arg);
1849 : static int analyze_mcv_list(int *mcv_counts,
1850 : int num_mcv,
1851 : double stadistinct,
1852 : double stanullfrac,
1853 : int samplerows,
1854 : double totalrows);
1855 :
1856 :
1857 : /*
1858 : * std_typanalyze -- the default type-specific typanalyze function
1859 : */
1860 : bool
1861 392362 : std_typanalyze(VacAttrStats *stats)
1862 : {
1863 392362 : Form_pg_attribute attr = stats->attr;
1864 : Oid ltopr;
1865 : Oid eqopr;
1866 : StdAnalyzeData *mystats;
1867 :
1868 : /* If the attstattarget column is negative, use the default value */
1869 : /* NB: it is okay to scribble on stats->attr since it's a copy */
1870 392362 : if (attr->attstattarget < 0)
1871 391780 : attr->attstattarget = default_statistics_target;
1872 :
1873 : /* Look for default "<" and "=" operators for column's type */
1874 392362 : get_sort_group_operators(stats->attrtypid,
1875 : false, false, false,
1876 : <opr, &eqopr, NULL,
1877 : NULL);
1878 :
1879 : /* Save the operator info for compute_stats routines */
1880 392362 : mystats = (StdAnalyzeData *) palloc(sizeof(StdAnalyzeData));
1881 392362 : mystats->eqopr = eqopr;
1882 392362 : mystats->eqfunc = OidIsValid(eqopr) ? get_opcode(eqopr) : InvalidOid;
1883 392362 : mystats->ltopr = ltopr;
1884 392362 : stats->extra_data = mystats;
1885 :
1886 : /*
1887 : * Determine which standard statistics algorithm to use
1888 : */
1889 392362 : if (OidIsValid(eqopr) && OidIsValid(ltopr))
1890 : {
1891 : /* Seems to be a scalar datatype */
1892 379260 : stats->compute_stats = compute_scalar_stats;
1893 : /*--------------------
1894 : * The following choice of minrows is based on the paper
1895 : * "Random sampling for histogram construction: how much is enough?"
1896 : * by Surajit Chaudhuri, Rajeev Motwani and Vivek Narasayya, in
1897 : * Proceedings of ACM SIGMOD International Conference on Management
1898 : * of Data, 1998, Pages 436-447. Their Corollary 1 to Theorem 5
1899 : * says that for table size n, histogram size k, maximum relative
1900 : * error in bin size f, and error probability gamma, the minimum
1901 : * random sample size is
1902 : * r = 4 * k * ln(2*n/gamma) / f^2
1903 : * Taking f = 0.5, gamma = 0.01, n = 10^6 rows, we obtain
1904 : * r = 305.82 * k
1905 : * Note that because of the log function, the dependence on n is
1906 : * quite weak; even at n = 10^12, a 300*k sample gives <= 0.66
1907 : * bin size error with probability 0.99. So there's no real need to
1908 : * scale for n, which is a good thing because we don't necessarily
1909 : * know it at this point.
1910 : *--------------------
1911 : */
1912 379260 : stats->minrows = 300 * attr->attstattarget;
1913 : }
1914 13102 : else if (OidIsValid(eqopr))
1915 : {
1916 : /* We can still recognize distinct values */
1917 11644 : stats->compute_stats = compute_distinct_stats;
1918 : /* Might as well use the same minrows as above */
1919 11644 : stats->minrows = 300 * attr->attstattarget;
1920 : }
1921 : else
1922 : {
1923 : /* Can't do much but the trivial stuff */
1924 1458 : stats->compute_stats = compute_trivial_stats;
1925 : /* Might as well use the same minrows as above */
1926 1458 : stats->minrows = 300 * attr->attstattarget;
1927 : }
1928 :
1929 392362 : return true;
1930 : }
1931 :
1932 :
1933 : /*
1934 : * compute_trivial_stats() -- compute very basic column statistics
1935 : *
1936 : * We use this when we cannot find a hash "=" operator for the datatype.
1937 : *
1938 : * We determine the fraction of non-null rows and the average datum width.
1939 : */
1940 : static void
1941 814 : compute_trivial_stats(VacAttrStatsP stats,
1942 : AnalyzeAttrFetchFunc fetchfunc,
1943 : int samplerows,
1944 : double totalrows)
1945 : {
1946 : int i;
1947 814 : int null_cnt = 0;
1948 814 : int nonnull_cnt = 0;
1949 814 : double total_width = 0;
1950 1628 : bool is_varlena = (!stats->attrtype->typbyval &&
1951 814 : stats->attrtype->typlen == -1);
1952 1628 : bool is_varwidth = (!stats->attrtype->typbyval &&
1953 814 : stats->attrtype->typlen < 0);
1954 :
1955 2598276 : for (i = 0; i < samplerows; i++)
1956 : {
1957 : Datum value;
1958 : bool isnull;
1959 :
1960 2597462 : vacuum_delay_point();
1961 :
1962 2597462 : value = fetchfunc(stats, i, &isnull);
1963 :
1964 : /* Check for null/nonnull */
1965 2597462 : if (isnull)
1966 : {
1967 2036746 : null_cnt++;
1968 2036746 : continue;
1969 : }
1970 560716 : nonnull_cnt++;
1971 :
1972 : /*
1973 : * If it's a variable-width field, add up widths for average width
1974 : * calculation. Note that if the value is toasted, we use the toasted
1975 : * width. We don't bother with this calculation if it's a fixed-width
1976 : * type.
1977 : */
1978 560716 : if (is_varlena)
1979 : {
1980 125316 : total_width += VARSIZE_ANY(DatumGetPointer(value));
1981 : }
1982 435400 : else if (is_varwidth)
1983 : {
1984 : /* must be cstring */
1985 0 : total_width += strlen(DatumGetCString(value)) + 1;
1986 : }
1987 : }
1988 :
1989 : /* We can only compute average width if we found some non-null values. */
1990 814 : if (nonnull_cnt > 0)
1991 : {
1992 162 : stats->stats_valid = true;
1993 : /* Do the simple null-frac and width stats */
1994 162 : stats->stanullfrac = (double) null_cnt / (double) samplerows;
1995 162 : if (is_varwidth)
1996 64 : stats->stawidth = total_width / (double) nonnull_cnt;
1997 : else
1998 98 : stats->stawidth = stats->attrtype->typlen;
1999 162 : stats->stadistinct = 0.0; /* "unknown" */
2000 : }
2001 652 : else if (null_cnt > 0)
2002 : {
2003 : /* We found only nulls; assume the column is entirely null */
2004 652 : stats->stats_valid = true;
2005 652 : stats->stanullfrac = 1.0;
2006 652 : if (is_varwidth)
2007 652 : stats->stawidth = 0; /* "unknown" */
2008 : else
2009 0 : stats->stawidth = stats->attrtype->typlen;
2010 652 : stats->stadistinct = 0.0; /* "unknown" */
2011 : }
2012 814 : }
2013 :
2014 :
2015 : /*
2016 : * compute_distinct_stats() -- compute column statistics including ndistinct
2017 : *
2018 : * We use this when we can find only an "=" operator for the datatype.
2019 : *
2020 : * We determine the fraction of non-null rows, the average width, the
2021 : * most common values, and the (estimated) number of distinct values.
2022 : *
2023 : * The most common values are determined by brute force: we keep a list
2024 : * of previously seen values, ordered by number of times seen, as we scan
2025 : * the samples. A newly seen value is inserted just after the last
2026 : * multiply-seen value, causing the bottommost (oldest) singly-seen value
2027 : * to drop off the list. The accuracy of this method, and also its cost,
2028 : * depend mainly on the length of the list we are willing to keep.
2029 : */
2030 : static void
2031 8424 : compute_distinct_stats(VacAttrStatsP stats,
2032 : AnalyzeAttrFetchFunc fetchfunc,
2033 : int samplerows,
2034 : double totalrows)
2035 : {
2036 : int i;
2037 8424 : int null_cnt = 0;
2038 8424 : int nonnull_cnt = 0;
2039 8424 : int toowide_cnt = 0;
2040 8424 : double total_width = 0;
2041 14256 : bool is_varlena = (!stats->attrtype->typbyval &&
2042 5832 : stats->attrtype->typlen == -1);
2043 14256 : bool is_varwidth = (!stats->attrtype->typbyval &&
2044 5832 : stats->attrtype->typlen < 0);
2045 : FmgrInfo f_cmpeq;
2046 : typedef struct
2047 : {
2048 : Datum value;
2049 : int count;
2050 : } TrackItem;
2051 : TrackItem *track;
2052 : int track_cnt,
2053 : track_max;
2054 8424 : int num_mcv = stats->attr->attstattarget;
2055 8424 : StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
2056 :
2057 : /*
2058 : * We track up to 2*n values for an n-element MCV list; but at least 10
2059 : */
2060 8424 : track_max = 2 * num_mcv;
2061 8424 : if (track_max < 10)
2062 78 : track_max = 10;
2063 8424 : track = (TrackItem *) palloc(track_max * sizeof(TrackItem));
2064 8424 : track_cnt = 0;
2065 :
2066 8424 : fmgr_info(mystats->eqfunc, &f_cmpeq);
2067 :
2068 5529098 : for (i = 0; i < samplerows; i++)
2069 : {
2070 : Datum value;
2071 : bool isnull;
2072 : bool match;
2073 : int firstcount1,
2074 : j;
2075 :
2076 5520674 : vacuum_delay_point();
2077 :
2078 5520674 : value = fetchfunc(stats, i, &isnull);
2079 :
2080 : /* Check for null/nonnull */
2081 5520674 : if (isnull)
2082 : {
2083 4650464 : null_cnt++;
2084 4650464 : continue;
2085 : }
2086 870210 : nonnull_cnt++;
2087 :
2088 : /*
2089 : * If it's a variable-width field, add up widths for average width
2090 : * calculation. Note that if the value is toasted, we use the toasted
2091 : * width. We don't bother with this calculation if it's a fixed-width
2092 : * type.
2093 : */
2094 870210 : if (is_varlena)
2095 : {
2096 325818 : total_width += VARSIZE_ANY(DatumGetPointer(value));
2097 :
2098 : /*
2099 : * If the value is toasted, we want to detoast it just once to
2100 : * avoid repeated detoastings and resultant excess memory usage
2101 : * during the comparisons. Also, check to see if the value is
2102 : * excessively wide, and if so don't detoast at all --- just
2103 : * ignore the value.
2104 : */
2105 325818 : if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
2106 : {
2107 0 : toowide_cnt++;
2108 0 : continue;
2109 : }
2110 325818 : value = PointerGetDatum(PG_DETOAST_DATUM(value));
2111 : }
2112 544392 : else if (is_varwidth)
2113 : {
2114 : /* must be cstring */
2115 0 : total_width += strlen(DatumGetCString(value)) + 1;
2116 : }
2117 :
2118 : /*
2119 : * See if the value matches anything we're already tracking.
2120 : */
2121 870210 : match = false;
2122 870210 : firstcount1 = track_cnt;
2123 1281748 : for (j = 0; j < track_cnt; j++)
2124 : {
2125 1261428 : if (DatumGetBool(FunctionCall2Coll(&f_cmpeq,
2126 : stats->attrcollid,
2127 1261428 : value, track[j].value)))
2128 : {
2129 849890 : match = true;
2130 849890 : break;
2131 : }
2132 411538 : if (j < firstcount1 && track[j].count == 1)
2133 10086 : firstcount1 = j;
2134 : }
2135 :
2136 870210 : if (match)
2137 : {
2138 : /* Found a match */
2139 849890 : track[j].count++;
2140 : /* This value may now need to "bubble up" in the track list */
2141 859956 : while (j > 0 && track[j].count > track[j - 1].count)
2142 : {
2143 10066 : swapDatum(track[j].value, track[j - 1].value);
2144 10066 : swapInt(track[j].count, track[j - 1].count);
2145 10066 : j--;
2146 : }
2147 : }
2148 : else
2149 : {
2150 : /* No match. Insert at head of count-1 list */
2151 20320 : if (track_cnt < track_max)
2152 20278 : track_cnt++;
2153 103314 : for (j = track_cnt - 1; j > firstcount1; j--)
2154 : {
2155 82994 : track[j].value = track[j - 1].value;
2156 82994 : track[j].count = track[j - 1].count;
2157 : }
2158 20320 : if (firstcount1 < track_cnt)
2159 : {
2160 20320 : track[firstcount1].value = value;
2161 20320 : track[firstcount1].count = 1;
2162 : }
2163 : }
2164 : }
2165 :
2166 : /* We can only compute real stats if we found some non-null values. */
2167 8424 : if (nonnull_cnt > 0)
2168 : {
2169 : int nmultiple,
2170 : summultiple;
2171 :
2172 5880 : stats->stats_valid = true;
2173 : /* Do the simple null-frac and width stats */
2174 5880 : stats->stanullfrac = (double) null_cnt / (double) samplerows;
2175 5880 : if (is_varwidth)
2176 3288 : stats->stawidth = total_width / (double) nonnull_cnt;
2177 : else
2178 2592 : stats->stawidth = stats->attrtype->typlen;
2179 :
2180 : /* Count the number of values we found multiple times */
2181 5880 : summultiple = 0;
2182 21014 : for (nmultiple = 0; nmultiple < track_cnt; nmultiple++)
2183 : {
2184 18348 : if (track[nmultiple].count == 1)
2185 3214 : break;
2186 15134 : summultiple += track[nmultiple].count;
2187 : }
2188 :
2189 5880 : if (nmultiple == 0)
2190 : {
2191 : /*
2192 : * If we found no repeated non-null values, assume it's a unique
2193 : * column; but be sure to discount for any nulls we found.
2194 : */
2195 1208 : stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
2196 : }
2197 4672 : else if (track_cnt < track_max && toowide_cnt == 0 &&
2198 : nmultiple == track_cnt)
2199 : {
2200 : /*
2201 : * Our track list includes every value in the sample, and every
2202 : * value appeared more than once. Assume the column has just
2203 : * these values. (This case is meant to address columns with
2204 : * small, fixed sets of possible values, such as boolean or enum
2205 : * columns. If there are any values that appear just once in the
2206 : * sample, including too-wide values, we should assume that that's
2207 : * not what we're dealing with.)
2208 : */
2209 2666 : stats->stadistinct = track_cnt;
2210 : }
2211 : else
2212 : {
2213 : /*----------
2214 : * Estimate the number of distinct values using the estimator
2215 : * proposed by Haas and Stokes in IBM Research Report RJ 10025:
2216 : * n*d / (n - f1 + f1*n/N)
2217 : * where f1 is the number of distinct values that occurred
2218 : * exactly once in our sample of n rows (from a total of N),
2219 : * and d is the total number of distinct values in the sample.
2220 : * This is their Duj1 estimator; the other estimators they
2221 : * recommend are considerably more complex, and are numerically
2222 : * very unstable when n is much smaller than N.
2223 : *
2224 : * In this calculation, we consider only non-nulls. We used to
2225 : * include rows with null values in the n and N counts, but that
2226 : * leads to inaccurate answers in columns with many nulls, and
2227 : * it's intuitively bogus anyway considering the desired result is
2228 : * the number of distinct non-null values.
2229 : *
2230 : * We assume (not very reliably!) that all the multiply-occurring
2231 : * values are reflected in the final track[] list, and the other
2232 : * nonnull values all appeared but once. (XXX this usually
2233 : * results in a drastic overestimate of ndistinct. Can we do
2234 : * any better?)
2235 : *----------
2236 : */
2237 2006 : int f1 = nonnull_cnt - summultiple;
2238 2006 : int d = f1 + nmultiple;
2239 2006 : double n = samplerows - null_cnt;
2240 2006 : double N = totalrows * (1.0 - stats->stanullfrac);
2241 : double stadistinct;
2242 :
2243 : /* N == 0 shouldn't happen, but just in case ... */
2244 2006 : if (N > 0)
2245 2006 : stadistinct = (n * d) / ((n - f1) + f1 * n / N);
2246 : else
2247 0 : stadistinct = 0;
2248 :
2249 : /* Clamp to sane range in case of roundoff error */
2250 2006 : if (stadistinct < d)
2251 644 : stadistinct = d;
2252 2006 : if (stadistinct > N)
2253 0 : stadistinct = N;
2254 : /* And round to integer */
2255 2006 : stats->stadistinct = floor(stadistinct + 0.5);
2256 : }
2257 :
2258 : /*
2259 : * If we estimated the number of distinct values at more than 10% of
2260 : * the total row count (a very arbitrary limit), then assume that
2261 : * stadistinct should scale with the row count rather than be a fixed
2262 : * value.
2263 : */
2264 5880 : if (stats->stadistinct > 0.1 * totalrows)
2265 778 : stats->stadistinct = -(stats->stadistinct / totalrows);
2266 :
2267 : /*
2268 : * Decide how many values are worth storing as most-common values. If
2269 : * we are able to generate a complete MCV list (all the values in the
2270 : * sample will fit, and we think these are all the ones in the table),
2271 : * then do so. Otherwise, store only those values that are
2272 : * significantly more common than the values not in the list.
2273 : *
2274 : * Note: the first of these cases is meant to address columns with
2275 : * small, fixed sets of possible values, such as boolean or enum
2276 : * columns. If we can *completely* represent the column population by
2277 : * an MCV list that will fit into the stats target, then we should do
2278 : * so and thus provide the planner with complete information. But if
2279 : * the MCV list is not complete, it's generally worth being more
2280 : * selective, and not just filling it all the way up to the stats
2281 : * target.
2282 : */
2283 5880 : if (track_cnt < track_max && toowide_cnt == 0 &&
2284 5870 : stats->stadistinct > 0 &&
2285 : track_cnt <= num_mcv)
2286 : {
2287 : /* Track list includes all values seen, and all will fit */
2288 3866 : num_mcv = track_cnt;
2289 : }
2290 : else
2291 : {
2292 : int *mcv_counts;
2293 :
2294 : /* Incomplete list; decide how many values are worth keeping */
2295 2014 : if (num_mcv > track_cnt)
2296 1954 : num_mcv = track_cnt;
2297 :
2298 2014 : if (num_mcv > 0)
2299 : {
2300 2014 : mcv_counts = (int *) palloc(num_mcv * sizeof(int));
2301 5078 : for (i = 0; i < num_mcv; i++)
2302 3064 : mcv_counts[i] = track[i].count;
2303 :
2304 2014 : num_mcv = analyze_mcv_list(mcv_counts, num_mcv,
2305 2014 : stats->stadistinct,
2306 2014 : stats->stanullfrac,
2307 : samplerows, totalrows);
2308 : }
2309 : }
2310 :
2311 : /* Generate MCV slot entry */
2312 5880 : if (num_mcv > 0)
2313 : {
2314 : MemoryContext old_context;
2315 : Datum *mcv_values;
2316 : float4 *mcv_freqs;
2317 :
2318 : /* Must copy the target values into anl_context */
2319 5872 : old_context = MemoryContextSwitchTo(stats->anl_context);
2320 5872 : mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
2321 5872 : mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
2322 25638 : for (i = 0; i < num_mcv; i++)
2323 : {
2324 39532 : mcv_values[i] = datumCopy(track[i].value,
2325 19766 : stats->attrtype->typbyval,
2326 19766 : stats->attrtype->typlen);
2327 19766 : mcv_freqs[i] = (double) track[i].count / (double) samplerows;
2328 : }
2329 5872 : MemoryContextSwitchTo(old_context);
2330 :
2331 5872 : stats->stakind[0] = STATISTIC_KIND_MCV;
2332 5872 : stats->staop[0] = mystats->eqopr;
2333 5872 : stats->stacoll[0] = stats->attrcollid;
2334 5872 : stats->stanumbers[0] = mcv_freqs;
2335 5872 : stats->numnumbers[0] = num_mcv;
2336 5872 : stats->stavalues[0] = mcv_values;
2337 5872 : stats->numvalues[0] = num_mcv;
2338 :
2339 : /*
2340 : * Accept the defaults for stats->statypid and others. They have
2341 : * been set before we were called (see vacuum.h)
2342 : */
2343 : }
2344 : }
2345 2544 : else if (null_cnt > 0)
2346 : {
2347 : /* We found only nulls; assume the column is entirely null */
2348 2544 : stats->stats_valid = true;
2349 2544 : stats->stanullfrac = 1.0;
2350 2544 : if (is_varwidth)
2351 2544 : stats->stawidth = 0; /* "unknown" */
2352 : else
2353 0 : stats->stawidth = stats->attrtype->typlen;
2354 2544 : stats->stadistinct = 0.0; /* "unknown" */
2355 : }
2356 :
2357 : /* We don't need to bother cleaning up any of our temporary palloc's */
2358 8424 : }
2359 :
2360 :
2361 : /*
2362 : * compute_scalar_stats() -- compute column statistics
2363 : *
2364 : * We use this when we can find "=" and "<" operators for the datatype.
2365 : *
2366 : * We determine the fraction of non-null rows, the average width, the
2367 : * most common values, the (estimated) number of distinct values, the
2368 : * distribution histogram, and the correlation of physical to logical order.
2369 : *
2370 : * The desired stats can be determined fairly easily after sorting the
2371 : * data values into order.
2372 : */
2373 : static void
2374 267514 : compute_scalar_stats(VacAttrStatsP stats,
2375 : AnalyzeAttrFetchFunc fetchfunc,
2376 : int samplerows,
2377 : double totalrows)
2378 : {
2379 : int i;
2380 267514 : int null_cnt = 0;
2381 267514 : int nonnull_cnt = 0;
2382 267514 : int toowide_cnt = 0;
2383 267514 : double total_width = 0;
2384 330782 : bool is_varlena = (!stats->attrtype->typbyval &&
2385 63268 : stats->attrtype->typlen == -1);
2386 330782 : bool is_varwidth = (!stats->attrtype->typbyval &&
2387 63268 : stats->attrtype->typlen < 0);
2388 : double corr_xysum;
2389 : SortSupportData ssup;
2390 : ScalarItem *values;
2391 267514 : int values_cnt = 0;
2392 : int *tupnoLink;
2393 : ScalarMCVItem *track;
2394 267514 : int track_cnt = 0;
2395 267514 : int num_mcv = stats->attr->attstattarget;
2396 267514 : int num_bins = stats->attr->attstattarget;
2397 267514 : StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
2398 :
2399 267514 : values = (ScalarItem *) palloc(samplerows * sizeof(ScalarItem));
2400 267514 : tupnoLink = (int *) palloc(samplerows * sizeof(int));
2401 267514 : track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem));
2402 :
2403 267514 : memset(&ssup, 0, sizeof(ssup));
2404 267514 : ssup.ssup_cxt = CurrentMemoryContext;
2405 267514 : ssup.ssup_collation = stats->attrcollid;
2406 267514 : ssup.ssup_nulls_first = false;
2407 :
2408 : /*
2409 : * For now, don't perform abbreviated key conversion, because full values
2410 : * are required for MCV slot generation. Supporting that optimization
2411 : * would necessitate teaching compare_scalars() to call a tie-breaker.
2412 : */
2413 267514 : ssup.abbreviate = false;
2414 :
2415 267514 : PrepareSortSupportFromOrderingOp(mystats->ltopr, &ssup);
2416 :
2417 : /* Initial scan to find sortable values */
2418 209251352 : for (i = 0; i < samplerows; i++)
2419 : {
2420 : Datum value;
2421 : bool isnull;
2422 :
2423 208983838 : vacuum_delay_point();
2424 :
2425 208983838 : value = fetchfunc(stats, i, &isnull);
2426 :
2427 : /* Check for null/nonnull */
2428 208983838 : if (isnull)
2429 : {
2430 27156664 : null_cnt++;
2431 27286710 : continue;
2432 : }
2433 181827174 : nonnull_cnt++;
2434 :
2435 : /*
2436 : * If it's a variable-width field, add up widths for average width
2437 : * calculation. Note that if the value is toasted, we use the toasted
2438 : * width. We don't bother with this calculation if it's a fixed-width
2439 : * type.
2440 : */
2441 181827174 : if (is_varlena)
2442 : {
2443 15920786 : total_width += VARSIZE_ANY(DatumGetPointer(value));
2444 :
2445 : /*
2446 : * If the value is toasted, we want to detoast it just once to
2447 : * avoid repeated detoastings and resultant excess memory usage
2448 : * during the comparisons. Also, check to see if the value is
2449 : * excessively wide, and if so don't detoast at all --- just
2450 : * ignore the value.
2451 : */
2452 15920786 : if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
2453 : {
2454 130046 : toowide_cnt++;
2455 130046 : continue;
2456 : }
2457 15790740 : value = PointerGetDatum(PG_DETOAST_DATUM(value));
2458 : }
2459 165906388 : else if (is_varwidth)
2460 : {
2461 : /* must be cstring */
2462 0 : total_width += strlen(DatumGetCString(value)) + 1;
2463 : }
2464 :
2465 : /* Add it to the list to be sorted */
2466 181697128 : values[values_cnt].value = value;
2467 181697128 : values[values_cnt].tupno = values_cnt;
2468 181697128 : tupnoLink[values_cnt] = values_cnt;
2469 181697128 : values_cnt++;
2470 : }
2471 :
2472 : /* We can only compute real stats if we found some sortable values. */
2473 267514 : if (values_cnt > 0)
2474 : {
2475 : int ndistinct, /* # distinct values in sample */
2476 : nmultiple, /* # that appear multiple times */
2477 : num_hist,
2478 : dups_cnt;
2479 250540 : int slot_idx = 0;
2480 : CompareScalarsContext cxt;
2481 :
2482 : /* Sort the collected values */
2483 250540 : cxt.ssup = &ssup;
2484 250540 : cxt.tupnoLink = tupnoLink;
2485 250540 : qsort_interruptible(values, values_cnt, sizeof(ScalarItem),
2486 : compare_scalars, &cxt);
2487 :
2488 : /*
2489 : * Now scan the values in order, find the most common ones, and also
2490 : * accumulate ordering-correlation statistics.
2491 : *
2492 : * To determine which are most common, we first have to count the
2493 : * number of duplicates of each value. The duplicates are adjacent in
2494 : * the sorted list, so a brute-force approach is to compare successive
2495 : * datum values until we find two that are not equal. However, that
2496 : * requires N-1 invocations of the datum comparison routine, which are
2497 : * completely redundant with work that was done during the sort. (The
2498 : * sort algorithm must at some point have compared each pair of items
2499 : * that are adjacent in the sorted order; otherwise it could not know
2500 : * that it's ordered the pair correctly.) We exploit this by having
2501 : * compare_scalars remember the highest tupno index that each
2502 : * ScalarItem has been found equal to. At the end of the sort, a
2503 : * ScalarItem's tupnoLink will still point to itself if and only if it
2504 : * is the last item of its group of duplicates (since the group will
2505 : * be ordered by tupno).
2506 : */
2507 250540 : corr_xysum = 0;
2508 250540 : ndistinct = 0;
2509 250540 : nmultiple = 0;
2510 250540 : dups_cnt = 0;
2511 181947668 : for (i = 0; i < values_cnt; i++)
2512 : {
2513 181697128 : int tupno = values[i].tupno;
2514 :
2515 181697128 : corr_xysum += ((double) i) * ((double) tupno);
2516 181697128 : dups_cnt++;
2517 181697128 : if (tupnoLink[tupno] == tupno)
2518 : {
2519 : /* Reached end of duplicates of this value */
2520 30628900 : ndistinct++;
2521 30628900 : if (dups_cnt > 1)
2522 : {
2523 3252712 : nmultiple++;
2524 3252712 : if (track_cnt < num_mcv ||
2525 1094810 : dups_cnt > track[track_cnt - 1].count)
2526 : {
2527 : /*
2528 : * Found a new item for the mcv list; find its
2529 : * position, bubbling down old items if needed. Loop
2530 : * invariant is that j points at an empty/ replaceable
2531 : * slot.
2532 : */
2533 : int j;
2534 :
2535 2519890 : if (track_cnt < num_mcv)
2536 2157902 : track_cnt++;
2537 38333068 : for (j = track_cnt - 1; j > 0; j--)
2538 : {
2539 38040692 : if (dups_cnt <= track[j - 1].count)
2540 2227514 : break;
2541 35813178 : track[j].count = track[j - 1].count;
2542 35813178 : track[j].first = track[j - 1].first;
2543 : }
2544 2519890 : track[j].count = dups_cnt;
2545 2519890 : track[j].first = i + 1 - dups_cnt;
2546 : }
2547 : }
2548 30628900 : dups_cnt = 0;
2549 : }
2550 : }
2551 :
2552 250540 : stats->stats_valid = true;
2553 : /* Do the simple null-frac and width stats */
2554 250540 : stats->stanullfrac = (double) null_cnt / (double) samplerows;
2555 250540 : if (is_varwidth)
2556 32656 : stats->stawidth = total_width / (double) nonnull_cnt;
2557 : else
2558 217884 : stats->stawidth = stats->attrtype->typlen;
2559 :
2560 250540 : if (nmultiple == 0)
2561 : {
2562 : /*
2563 : * If we found no repeated non-null values, assume it's a unique
2564 : * column; but be sure to discount for any nulls we found.
2565 : */
2566 61004 : stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
2567 : }
2568 189536 : else if (toowide_cnt == 0 && nmultiple == ndistinct)
2569 : {
2570 : /*
2571 : * Every value in the sample appeared more than once. Assume the
2572 : * column has just these values. (This case is meant to address
2573 : * columns with small, fixed sets of possible values, such as
2574 : * boolean or enum columns. If there are any values that appear
2575 : * just once in the sample, including too-wide values, we should
2576 : * assume that that's not what we're dealing with.)
2577 : */
2578 109974 : stats->stadistinct = ndistinct;
2579 : }
2580 : else
2581 : {
2582 : /*----------
2583 : * Estimate the number of distinct values using the estimator
2584 : * proposed by Haas and Stokes in IBM Research Report RJ 10025:
2585 : * n*d / (n - f1 + f1*n/N)
2586 : * where f1 is the number of distinct values that occurred
2587 : * exactly once in our sample of n rows (from a total of N),
2588 : * and d is the total number of distinct values in the sample.
2589 : * This is their Duj1 estimator; the other estimators they
2590 : * recommend are considerably more complex, and are numerically
2591 : * very unstable when n is much smaller than N.
2592 : *
2593 : * In this calculation, we consider only non-nulls. We used to
2594 : * include rows with null values in the n and N counts, but that
2595 : * leads to inaccurate answers in columns with many nulls, and
2596 : * it's intuitively bogus anyway considering the desired result is
2597 : * the number of distinct non-null values.
2598 : *
2599 : * Overwidth values are assumed to have been distinct.
2600 : *----------
2601 : */
2602 79562 : int f1 = ndistinct - nmultiple + toowide_cnt;
2603 79562 : int d = f1 + nmultiple;
2604 79562 : double n = samplerows - null_cnt;
2605 79562 : double N = totalrows * (1.0 - stats->stanullfrac);
2606 : double stadistinct;
2607 :
2608 : /* N == 0 shouldn't happen, but just in case ... */
2609 79562 : if (N > 0)
2610 79562 : stadistinct = (n * d) / ((n - f1) + f1 * n / N);
2611 : else
2612 0 : stadistinct = 0;
2613 :
2614 : /* Clamp to sane range in case of roundoff error */
2615 79562 : if (stadistinct < d)
2616 2062 : stadistinct = d;
2617 79562 : if (stadistinct > N)
2618 0 : stadistinct = N;
2619 : /* And round to integer */
2620 79562 : stats->stadistinct = floor(stadistinct + 0.5);
2621 : }
2622 :
2623 : /*
2624 : * If we estimated the number of distinct values at more than 10% of
2625 : * the total row count (a very arbitrary limit), then assume that
2626 : * stadistinct should scale with the row count rather than be a fixed
2627 : * value.
2628 : */
2629 250540 : if (stats->stadistinct > 0.1 * totalrows)
2630 53222 : stats->stadistinct = -(stats->stadistinct / totalrows);
2631 :
2632 : /*
2633 : * Decide how many values are worth storing as most-common values. If
2634 : * we are able to generate a complete MCV list (all the values in the
2635 : * sample will fit, and we think these are all the ones in the table),
2636 : * then do so. Otherwise, store only those values that are
2637 : * significantly more common than the values not in the list.
2638 : *
2639 : * Note: the first of these cases is meant to address columns with
2640 : * small, fixed sets of possible values, such as boolean or enum
2641 : * columns. If we can *completely* represent the column population by
2642 : * an MCV list that will fit into the stats target, then we should do
2643 : * so and thus provide the planner with complete information. But if
2644 : * the MCV list is not complete, it's generally worth being more
2645 : * selective, and not just filling it all the way up to the stats
2646 : * target.
2647 : */
2648 250540 : if (track_cnt == ndistinct && toowide_cnt == 0 &&
2649 109340 : stats->stadistinct > 0 &&
2650 : track_cnt <= num_mcv)
2651 : {
2652 : /* Track list includes all values seen, and all will fit */
2653 101670 : num_mcv = track_cnt;
2654 : }
2655 : else
2656 : {
2657 : int *mcv_counts;
2658 :
2659 : /* Incomplete list; decide how many values are worth keeping */
2660 148870 : if (num_mcv > track_cnt)
2661 139798 : num_mcv = track_cnt;
2662 :
2663 148870 : if (num_mcv > 0)
2664 : {
2665 87866 : mcv_counts = (int *) palloc(num_mcv * sizeof(int));
2666 1844356 : for (i = 0; i < num_mcv; i++)
2667 1756490 : mcv_counts[i] = track[i].count;
2668 :
2669 87866 : num_mcv = analyze_mcv_list(mcv_counts, num_mcv,
2670 87866 : stats->stadistinct,
2671 87866 : stats->stanullfrac,
2672 : samplerows, totalrows);
2673 : }
2674 : }
2675 :
2676 : /* Generate MCV slot entry */
2677 250540 : if (num_mcv > 0)
2678 : {
2679 : MemoryContext old_context;
2680 : Datum *mcv_values;
2681 : float4 *mcv_freqs;
2682 :
2683 : /* Must copy the target values into anl_context */
2684 189482 : old_context = MemoryContextSwitchTo(stats->anl_context);
2685 189482 : mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
2686 189482 : mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
2687 2347196 : for (i = 0; i < num_mcv; i++)
2688 : {
2689 4315428 : mcv_values[i] = datumCopy(values[track[i].first].value,
2690 2157714 : stats->attrtype->typbyval,
2691 2157714 : stats->attrtype->typlen);
2692 2157714 : mcv_freqs[i] = (double) track[i].count / (double) samplerows;
2693 : }
2694 189482 : MemoryContextSwitchTo(old_context);
2695 :
2696 189482 : stats->stakind[slot_idx] = STATISTIC_KIND_MCV;
2697 189482 : stats->staop[slot_idx] = mystats->eqopr;
2698 189482 : stats->stacoll[slot_idx] = stats->attrcollid;
2699 189482 : stats->stanumbers[slot_idx] = mcv_freqs;
2700 189482 : stats->numnumbers[slot_idx] = num_mcv;
2701 189482 : stats->stavalues[slot_idx] = mcv_values;
2702 189482 : stats->numvalues[slot_idx] = num_mcv;
2703 :
2704 : /*
2705 : * Accept the defaults for stats->statypid and others. They have
2706 : * been set before we were called (see vacuum.h)
2707 : */
2708 189482 : slot_idx++;
2709 : }
2710 :
2711 : /*
2712 : * Generate a histogram slot entry if there are at least two distinct
2713 : * values not accounted for in the MCV list. (This ensures the
2714 : * histogram won't collapse to empty or a singleton.)
2715 : */
2716 250540 : num_hist = ndistinct - num_mcv;
2717 250540 : if (num_hist > num_bins)
2718 34868 : num_hist = num_bins + 1;
2719 250540 : if (num_hist >= 2)
2720 : {
2721 : MemoryContext old_context;
2722 : Datum *hist_values;
2723 : int nvals;
2724 : int pos,
2725 : posfrac,
2726 : delta,
2727 : deltafrac;
2728 :
2729 : /* Sort the MCV items into position order to speed next loop */
2730 106664 : qsort_interruptible(track, num_mcv, sizeof(ScalarMCVItem),
2731 : compare_mcvs, NULL);
2732 :
2733 : /*
2734 : * Collapse out the MCV items from the values[] array.
2735 : *
2736 : * Note we destroy the values[] array here... but we don't need it
2737 : * for anything more. We do, however, still need values_cnt.
2738 : * nvals will be the number of remaining entries in values[].
2739 : */
2740 106664 : if (num_mcv > 0)
2741 : {
2742 : int src,
2743 : dest;
2744 : int j;
2745 :
2746 65290 : src = dest = 0;
2747 65290 : j = 0; /* index of next interesting MCV item */
2748 2440126 : while (src < values_cnt)
2749 : {
2750 : int ncopy;
2751 :
2752 2374836 : if (j < num_mcv)
2753 : {
2754 2327552 : int first = track[j].first;
2755 :
2756 2327552 : if (src >= first)
2757 : {
2758 : /* advance past this MCV item */
2759 1676706 : src = first + track[j].count;
2760 1676706 : j++;
2761 1676706 : continue;
2762 : }
2763 650846 : ncopy = first - src;
2764 : }
2765 : else
2766 47284 : ncopy = values_cnt - src;
2767 698130 : memmove(&values[dest], &values[src],
2768 : ncopy * sizeof(ScalarItem));
2769 698130 : src += ncopy;
2770 698130 : dest += ncopy;
2771 : }
2772 65290 : nvals = dest;
2773 : }
2774 : else
2775 41374 : nvals = values_cnt;
2776 : Assert(nvals >= num_hist);
2777 :
2778 : /* Must copy the target values into anl_context */
2779 106664 : old_context = MemoryContextSwitchTo(stats->anl_context);
2780 106664 : hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
2781 :
2782 : /*
2783 : * The object of this loop is to copy the first and last values[]
2784 : * entries along with evenly-spaced values in between. So the
2785 : * i'th value is values[(i * (nvals - 1)) / (num_hist - 1)]. But
2786 : * computing that subscript directly risks integer overflow when
2787 : * the stats target is more than a couple thousand. Instead we
2788 : * add (nvals - 1) / (num_hist - 1) to pos at each step, tracking
2789 : * the integral and fractional parts of the sum separately.
2790 : */
2791 106664 : delta = (nvals - 1) / (num_hist - 1);
2792 106664 : deltafrac = (nvals - 1) % (num_hist - 1);
2793 106664 : pos = posfrac = 0;
2794 :
2795 5058460 : for (i = 0; i < num_hist; i++)
2796 : {
2797 9903592 : hist_values[i] = datumCopy(values[pos].value,
2798 4951796 : stats->attrtype->typbyval,
2799 4951796 : stats->attrtype->typlen);
2800 4951796 : pos += delta;
2801 4951796 : posfrac += deltafrac;
2802 4951796 : if (posfrac >= (num_hist - 1))
2803 : {
2804 : /* fractional part exceeds 1, carry to integer part */
2805 1664058 : pos++;
2806 1664058 : posfrac -= (num_hist - 1);
2807 : }
2808 : }
2809 :
2810 106664 : MemoryContextSwitchTo(old_context);
2811 :
2812 106664 : stats->stakind[slot_idx] = STATISTIC_KIND_HISTOGRAM;
2813 106664 : stats->staop[slot_idx] = mystats->ltopr;
2814 106664 : stats->stacoll[slot_idx] = stats->attrcollid;
2815 106664 : stats->stavalues[slot_idx] = hist_values;
2816 106664 : stats->numvalues[slot_idx] = num_hist;
2817 :
2818 : /*
2819 : * Accept the defaults for stats->statypid and others. They have
2820 : * been set before we were called (see vacuum.h)
2821 : */
2822 106664 : slot_idx++;
2823 : }
2824 :
2825 : /* Generate a correlation entry if there are multiple values */
2826 250540 : if (values_cnt > 1)
2827 : {
2828 : MemoryContext old_context;
2829 : float4 *corrs;
2830 : double corr_xsum,
2831 : corr_x2sum;
2832 :
2833 : /* Must copy the target values into anl_context */
2834 230856 : old_context = MemoryContextSwitchTo(stats->anl_context);
2835 230856 : corrs = (float4 *) palloc(sizeof(float4));
2836 230856 : MemoryContextSwitchTo(old_context);
2837 :
2838 : /*----------
2839 : * Since we know the x and y value sets are both
2840 : * 0, 1, ..., values_cnt-1
2841 : * we have sum(x) = sum(y) =
2842 : * (values_cnt-1)*values_cnt / 2
2843 : * and sum(x^2) = sum(y^2) =
2844 : * (values_cnt-1)*values_cnt*(2*values_cnt-1) / 6.
2845 : *----------
2846 : */
2847 230856 : corr_xsum = ((double) (values_cnt - 1)) *
2848 230856 : ((double) values_cnt) / 2.0;
2849 230856 : corr_x2sum = ((double) (values_cnt - 1)) *
2850 230856 : ((double) values_cnt) * (double) (2 * values_cnt - 1) / 6.0;
2851 :
2852 : /* And the correlation coefficient reduces to */
2853 230856 : corrs[0] = (values_cnt * corr_xysum - corr_xsum * corr_xsum) /
2854 230856 : (values_cnt * corr_x2sum - corr_xsum * corr_xsum);
2855 :
2856 230856 : stats->stakind[slot_idx] = STATISTIC_KIND_CORRELATION;
2857 230856 : stats->staop[slot_idx] = mystats->ltopr;
2858 230856 : stats->stacoll[slot_idx] = stats->attrcollid;
2859 230856 : stats->stanumbers[slot_idx] = corrs;
2860 230856 : stats->numnumbers[slot_idx] = 1;
2861 230856 : slot_idx++;
2862 : }
2863 : }
2864 16974 : else if (nonnull_cnt > 0)
2865 : {
2866 : /* We found some non-null values, but they were all too wide */
2867 : Assert(nonnull_cnt == toowide_cnt);
2868 1296 : stats->stats_valid = true;
2869 : /* Do the simple null-frac and width stats */
2870 1296 : stats->stanullfrac = (double) null_cnt / (double) samplerows;
2871 1296 : if (is_varwidth)
2872 1296 : stats->stawidth = total_width / (double) nonnull_cnt;
2873 : else
2874 0 : stats->stawidth = stats->attrtype->typlen;
2875 : /* Assume all too-wide values are distinct, so it's a unique column */
2876 1296 : stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
2877 : }
2878 15678 : else if (null_cnt > 0)
2879 : {
2880 : /* We found only nulls; assume the column is entirely null */
2881 15678 : stats->stats_valid = true;
2882 15678 : stats->stanullfrac = 1.0;
2883 15678 : if (is_varwidth)
2884 14730 : stats->stawidth = 0; /* "unknown" */
2885 : else
2886 948 : stats->stawidth = stats->attrtype->typlen;
2887 15678 : stats->stadistinct = 0.0; /* "unknown" */
2888 : }
2889 :
2890 : /* We don't need to bother cleaning up any of our temporary palloc's */
2891 267514 : }
2892 :
2893 : /*
2894 : * Comparator for sorting ScalarItems
2895 : *
2896 : * Aside from sorting the items, we update the tupnoLink[] array
2897 : * whenever two ScalarItems are found to contain equal datums. The array
2898 : * is indexed by tupno; for each ScalarItem, it contains the highest
2899 : * tupno that that item's datum has been found to be equal to. This allows
2900 : * us to avoid additional comparisons in compute_scalar_stats().
2901 : */
2902 : static int
2903 1581016358 : compare_scalars(const void *a, const void *b, void *arg)
2904 : {
2905 1581016358 : Datum da = ((const ScalarItem *) a)->value;
2906 1581016358 : int ta = ((const ScalarItem *) a)->tupno;
2907 1581016358 : Datum db = ((const ScalarItem *) b)->value;
2908 1581016358 : int tb = ((const ScalarItem *) b)->tupno;
2909 1581016358 : CompareScalarsContext *cxt = (CompareScalarsContext *) arg;
2910 : int compare;
2911 :
2912 1581016358 : compare = ApplySortComparator(da, false, db, false, cxt->ssup);
2913 1581016358 : if (compare != 0)
2914 526561626 : return compare;
2915 :
2916 : /*
2917 : * The two datums are equal, so update cxt->tupnoLink[].
2918 : */
2919 1054454732 : if (cxt->tupnoLink[ta] < tb)
2920 161602040 : cxt->tupnoLink[ta] = tb;
2921 1054454732 : if (cxt->tupnoLink[tb] < ta)
2922 10518912 : cxt->tupnoLink[tb] = ta;
2923 :
2924 : /*
2925 : * For equal datums, sort by tupno
2926 : */
2927 1054454732 : return ta - tb;
2928 : }
2929 :
2930 : /*
2931 : * Comparator for sorting ScalarMCVItems by position
2932 : */
2933 : static int
2934 9137546 : compare_mcvs(const void *a, const void *b, void *arg)
2935 : {
2936 9137546 : int da = ((const ScalarMCVItem *) a)->first;
2937 9137546 : int db = ((const ScalarMCVItem *) b)->first;
2938 :
2939 9137546 : return da - db;
2940 : }
2941 :
2942 : /*
2943 : * Analyze the list of common values in the sample and decide how many are
2944 : * worth storing in the table's MCV list.
2945 : *
2946 : * mcv_counts is assumed to be a list of the counts of the most common values
2947 : * seen in the sample, starting with the most common. The return value is the
2948 : * number that are significantly more common than the values not in the list,
2949 : * and which are therefore deemed worth storing in the table's MCV list.
2950 : */
2951 : static int
2952 89880 : analyze_mcv_list(int *mcv_counts,
2953 : int num_mcv,
2954 : double stadistinct,
2955 : double stanullfrac,
2956 : int samplerows,
2957 : double totalrows)
2958 : {
2959 : double ndistinct_table;
2960 : double sumcount;
2961 : int i;
2962 :
2963 : /*
2964 : * If the entire table was sampled, keep the whole list. This also
2965 : * protects us against division by zero in the code below.
2966 : */
2967 89880 : if (samplerows == totalrows || totalrows <= 1.0)
2968 89040 : return num_mcv;
2969 :
2970 : /* Re-extract the estimated number of distinct nonnull values in table */
2971 840 : ndistinct_table = stadistinct;
2972 840 : if (ndistinct_table < 0)
2973 152 : ndistinct_table = -ndistinct_table * totalrows;
2974 :
2975 : /*
2976 : * Exclude the least common values from the MCV list, if they are not
2977 : * significantly more common than the estimated selectivity they would
2978 : * have if they weren't in the list. All non-MCV values are assumed to be
2979 : * equally common, after taking into account the frequencies of all the
2980 : * values in the MCV list and the number of nulls (c.f. eqsel()).
2981 : *
2982 : * Here sumcount tracks the total count of all but the last (least common)
2983 : * value in the MCV list, allowing us to determine the effect of excluding
2984 : * that value from the list.
2985 : *
2986 : * Note that we deliberately do this by removing values from the full
2987 : * list, rather than starting with an empty list and adding values,
2988 : * because the latter approach can fail to add any values if all the most
2989 : * common values have around the same frequency and make up the majority
2990 : * of the table, so that the overall average frequency of all values is
2991 : * roughly the same as that of the common values. This would lead to any
2992 : * uncommon values being significantly overestimated.
2993 : */
2994 840 : sumcount = 0.0;
2995 1724 : for (i = 0; i < num_mcv - 1; i++)
2996 884 : sumcount += mcv_counts[i];
2997 :
2998 974 : while (num_mcv > 0)
2999 : {
3000 : double selec,
3001 : otherdistinct,
3002 : N,
3003 : n,
3004 : K,
3005 : variance,
3006 : stddev;
3007 :
3008 : /*
3009 : * Estimated selectivity the least common value would have if it
3010 : * wasn't in the MCV list (c.f. eqsel()).
3011 : */
3012 974 : selec = 1.0 - sumcount / samplerows - stanullfrac;
3013 974 : if (selec < 0.0)
3014 0 : selec = 0.0;
3015 974 : if (selec > 1.0)
3016 0 : selec = 1.0;
3017 974 : otherdistinct = ndistinct_table - (num_mcv - 1);
3018 974 : if (otherdistinct > 1)
3019 974 : selec /= otherdistinct;
3020 :
3021 : /*
3022 : * If the value is kept in the MCV list, its population frequency is
3023 : * assumed to equal its sample frequency. We use the lower end of a
3024 : * textbook continuity-corrected Wald-type confidence interval to
3025 : * determine if that is significantly more common than the non-MCV
3026 : * frequency --- specifically we assume the population frequency is
3027 : * highly likely to be within around 2 standard errors of the sample
3028 : * frequency, which equates to an interval of 2 standard deviations
3029 : * either side of the sample count, plus an additional 0.5 for the
3030 : * continuity correction. Since we are sampling without replacement,
3031 : * this is a hypergeometric distribution.
3032 : *
3033 : * XXX: Empirically, this approach seems to work quite well, but it
3034 : * may be worth considering more advanced techniques for estimating
3035 : * the confidence interval of the hypergeometric distribution.
3036 : */
3037 974 : N = totalrows;
3038 974 : n = samplerows;
3039 974 : K = N * mcv_counts[num_mcv - 1] / n;
3040 974 : variance = n * K * (N - K) * (N - n) / (N * N * (N - 1));
3041 974 : stddev = sqrt(variance);
3042 :
3043 974 : if (mcv_counts[num_mcv - 1] > selec * samplerows + 2 * stddev + 0.5)
3044 : {
3045 : /*
3046 : * The value is significantly more common than the non-MCV
3047 : * selectivity would suggest. Keep it, and all the other more
3048 : * common values in the list.
3049 : */
3050 778 : break;
3051 : }
3052 : else
3053 : {
3054 : /* Discard this value and consider the next least common value */
3055 196 : num_mcv--;
3056 196 : if (num_mcv == 0)
3057 62 : break;
3058 134 : sumcount -= mcv_counts[num_mcv - 1];
3059 : }
3060 : }
3061 840 : return num_mcv;
3062 : }
|