Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * dependencies.c
4 : * POSTGRES functional dependencies
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : * IDENTIFICATION
10 : * src/backend/statistics/dependencies.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #include "postgres.h"
15 :
16 : #include "access/htup_details.h"
17 : #include "catalog/pg_statistic_ext.h"
18 : #include "catalog/pg_statistic_ext_data.h"
19 : #include "nodes/nodeFuncs.h"
20 : #include "optimizer/clauses.h"
21 : #include "optimizer/optimizer.h"
22 : #include "parser/parsetree.h"
23 : #include "statistics/extended_stats_internal.h"
24 : #include "utils/fmgroids.h"
25 : #include "utils/lsyscache.h"
26 : #include "utils/memutils.h"
27 : #include "utils/selfuncs.h"
28 : #include "utils/syscache.h"
29 : #include "utils/typcache.h"
30 :
31 : /* size of the struct header fields (magic, type, ndeps) */
32 : #define SizeOfHeader (3 * sizeof(uint32))
33 :
34 : /* size of a serialized dependency (degree, natts, atts) */
35 : #define SizeOfItem(natts) \
36 : (sizeof(double) + sizeof(AttrNumber) * (1 + (natts)))
37 :
38 : /* minimal size of a dependency (with two attributes) */
39 : #define MinSizeOfItem SizeOfItem(2)
40 :
41 : /* minimal size of dependencies, when all deps are minimal */
42 : #define MinSizeOfItems(ndeps) \
43 : (SizeOfHeader + (ndeps) * MinSizeOfItem)
44 :
45 : /*
46 : * Internal state for DependencyGenerator of dependencies. Dependencies are similar to
47 : * k-permutations of n elements, except that the order does not matter for the
48 : * first (k-1) elements. That is, (a,b=>c) and (b,a=>c) are equivalent.
49 : */
50 : typedef struct DependencyGeneratorData
51 : {
52 : int k; /* size of the dependency */
53 : int n; /* number of possible attributes */
54 : int current; /* next dependency to return (index) */
55 : AttrNumber ndependencies; /* number of dependencies generated */
56 : AttrNumber *dependencies; /* array of pre-generated dependencies */
57 : } DependencyGeneratorData;
58 :
59 : typedef DependencyGeneratorData *DependencyGenerator;
60 :
61 : static void generate_dependencies_recurse(DependencyGenerator state,
62 : int index, AttrNumber start, AttrNumber *current);
63 : static void generate_dependencies(DependencyGenerator state);
64 : static DependencyGenerator DependencyGenerator_init(int n, int k);
65 : static void DependencyGenerator_free(DependencyGenerator state);
66 : static AttrNumber *DependencyGenerator_next(DependencyGenerator state);
67 : static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency);
68 : static bool dependency_is_fully_matched(MVDependency *dependency,
69 : Bitmapset *attnums);
70 : static bool dependency_is_compatible_clause(Node *clause, Index relid,
71 : AttrNumber *attnum);
72 : static bool dependency_is_compatible_expression(Node *clause, Index relid,
73 : List *statlist, Node **expr);
74 : static MVDependency *find_strongest_dependency(MVDependencies **dependencies,
75 : int ndependencies, Bitmapset *attnums);
76 : static Selectivity clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
77 : int varRelid, JoinType jointype,
78 : SpecialJoinInfo *sjinfo,
79 : MVDependency **dependencies,
80 : int ndependencies,
81 : AttrNumber *list_attnums,
82 : Bitmapset **estimatedclauses);
83 :
84 : static void
85 834 : generate_dependencies_recurse(DependencyGenerator state, int index,
86 : AttrNumber start, AttrNumber *current)
87 : {
88 : /*
89 : * The generator handles the first (k-1) elements differently from the
90 : * last element.
91 : */
92 834 : if (index < (state->k - 1))
93 : {
94 : AttrNumber i;
95 :
96 : /*
97 : * The first (k-1) values have to be in ascending order, which we
98 : * generate recursively.
99 : */
100 :
101 978 : for (i = start; i < state->n; i++)
102 : {
103 636 : current[index] = i;
104 636 : generate_dependencies_recurse(state, (index + 1), (i + 1), current);
105 : }
106 : }
107 : else
108 : {
109 : int i;
110 :
111 : /*
112 : * the last element is the implied value, which does not respect the
113 : * ascending order. We just need to check that the value is not in the
114 : * first (k-1) elements.
115 : */
116 :
117 1764 : for (i = 0; i < state->n; i++)
118 : {
119 : int j;
120 1272 : bool match = false;
121 :
122 1272 : current[index] = i;
123 :
124 2196 : for (j = 0; j < index; j++)
125 : {
126 1560 : if (current[j] == i)
127 : {
128 636 : match = true;
129 636 : break;
130 : }
131 : }
132 :
133 : /*
134 : * If the value is not found in the first part of the dependency,
135 : * we're done.
136 : */
137 1272 : if (!match)
138 : {
139 1272 : state->dependencies = (AttrNumber *) repalloc(state->dependencies,
140 636 : state->k * (state->ndependencies + 1) * sizeof(AttrNumber));
141 636 : memcpy(&state->dependencies[(state->k * state->ndependencies)],
142 636 : current, state->k * sizeof(AttrNumber));
143 636 : state->ndependencies++;
144 : }
145 : }
146 : }
147 834 : }
148 :
149 : /* generate all dependencies (k-permutations of n elements) */
150 : static void
151 198 : generate_dependencies(DependencyGenerator state)
152 : {
153 198 : AttrNumber *current = (AttrNumber *) palloc0(sizeof(AttrNumber) * state->k);
154 :
155 198 : generate_dependencies_recurse(state, 0, 0, current);
156 :
157 198 : pfree(current);
158 198 : }
159 :
160 : /*
161 : * initialize the DependencyGenerator of variations, and prebuild the variations
162 : *
163 : * This pre-builds all the variations. We could also generate them in
164 : * DependencyGenerator_next(), but this seems simpler.
165 : */
166 : static DependencyGenerator
167 198 : DependencyGenerator_init(int n, int k)
168 : {
169 : DependencyGenerator state;
170 :
171 : Assert((n >= k) && (k > 0));
172 :
173 : /* allocate the DependencyGenerator state */
174 198 : state = (DependencyGenerator) palloc0(sizeof(DependencyGeneratorData));
175 198 : state->dependencies = (AttrNumber *) palloc(k * sizeof(AttrNumber));
176 :
177 198 : state->ndependencies = 0;
178 198 : state->current = 0;
179 198 : state->k = k;
180 198 : state->n = n;
181 :
182 : /* now actually pre-generate all the variations */
183 198 : generate_dependencies(state);
184 :
185 198 : return state;
186 : }
187 :
188 : /* free the DependencyGenerator state */
189 : static void
190 198 : DependencyGenerator_free(DependencyGenerator state)
191 : {
192 198 : pfree(state->dependencies);
193 198 : pfree(state);
194 198 : }
195 :
196 : /* generate next combination */
197 : static AttrNumber *
198 834 : DependencyGenerator_next(DependencyGenerator state)
199 : {
200 834 : if (state->current == state->ndependencies)
201 198 : return NULL;
202 :
203 636 : return &state->dependencies[state->k * state->current++];
204 : }
205 :
206 :
207 : /*
208 : * validates functional dependency on the data
209 : *
210 : * An actual work horse of detecting functional dependencies. Given a variation
211 : * of k attributes, it checks that the first (k-1) are sufficient to determine
212 : * the last one.
213 : */
214 : static double
215 636 : dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency)
216 : {
217 : int i,
218 : nitems;
219 : MultiSortSupport mss;
220 : SortItem *items;
221 : AttrNumber *attnums_dep;
222 :
223 : /* counters valid within a group */
224 636 : int group_size = 0;
225 636 : int n_violations = 0;
226 :
227 : /* total number of rows supporting (consistent with) the dependency */
228 636 : int n_supporting_rows = 0;
229 :
230 : /* Make sure we have at least two input attributes. */
231 : Assert(k >= 2);
232 :
233 : /* sort info for all attributes columns */
234 636 : mss = multi_sort_init(k);
235 :
236 : /*
237 : * Translate the array of indexes to regular attnums for the dependency
238 : * (we will need this to identify the columns in StatsBuildData).
239 : */
240 636 : attnums_dep = (AttrNumber *) palloc(k * sizeof(AttrNumber));
241 2052 : for (i = 0; i < k; i++)
242 1416 : attnums_dep[i] = data->attnums[dependency[i]];
243 :
244 : /*
245 : * Verify the dependency (a,b,...)->z, using a rather simple algorithm:
246 : *
247 : * (a) sort the data lexicographically
248 : *
249 : * (b) split the data into groups by first (k-1) columns
250 : *
251 : * (c) for each group count different values in the last column
252 : *
253 : * We use the column data types' default sort operators and collations;
254 : * perhaps at some point it'd be worth using column-specific collations?
255 : */
256 :
257 : /* prepare the sort function for the dimensions */
258 2052 : for (i = 0; i < k; i++)
259 : {
260 1416 : VacAttrStats *colstat = data->stats[dependency[i]];
261 : TypeCacheEntry *type;
262 :
263 1416 : type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
264 1416 : if (type->lt_opr == InvalidOid) /* shouldn't happen */
265 0 : elog(ERROR, "cache lookup failed for ordering operator for type %u",
266 : colstat->attrtypid);
267 :
268 : /* prepare the sort function for this dimension */
269 1416 : multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
270 : }
271 :
272 : /*
273 : * build an array of SortItem(s) sorted using the multi-sort support
274 : *
275 : * XXX This relies on all stats entries pointing to the same tuple
276 : * descriptor. For now that assumption holds, but it might change in the
277 : * future for example if we support statistics on multiple tables.
278 : */
279 636 : items = build_sorted_items(data, &nitems, mss, k, attnums_dep);
280 :
281 : /*
282 : * Walk through the sorted array, split it into rows according to the
283 : * first (k-1) columns. If there's a single value in the last column, we
284 : * count the group as 'supporting' the functional dependency. Otherwise we
285 : * count it as contradicting.
286 : */
287 :
288 : /* start with the first row forming a group */
289 636 : group_size = 1;
290 :
291 : /* loop 1 beyond the end of the array so that we count the final group */
292 1505878 : for (i = 1; i <= nitems; i++)
293 : {
294 : /*
295 : * Check if the group ended, which may be either because we processed
296 : * all the items (i==nitems), or because the i-th item is not equal to
297 : * the preceding one.
298 : */
299 3009848 : if (i == nitems ||
300 1504606 : multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0)
301 : {
302 : /*
303 : * If no violations were found in the group then track the rows of
304 : * the group as supporting the functional dependency.
305 : */
306 32628 : if (n_violations == 0)
307 19938 : n_supporting_rows += group_size;
308 :
309 : /* Reset counters for the new group */
310 32628 : n_violations = 0;
311 32628 : group_size = 1;
312 32628 : continue;
313 : }
314 : /* first columns match, but the last one does not (so contradicting) */
315 1472614 : else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0)
316 60102 : n_violations++;
317 :
318 1472614 : group_size++;
319 : }
320 :
321 : /* Compute the 'degree of validity' as (supporting/total). */
322 636 : return (n_supporting_rows * 1.0 / data->numrows);
323 : }
324 :
325 : /*
326 : * detects functional dependencies between groups of columns
327 : *
328 : * Generates all possible subsets of columns (variations) and computes
329 : * the degree of validity for each one. For example when creating statistics
330 : * on three columns (a,b,c) there are 9 possible dependencies
331 : *
332 : * two columns three columns
333 : * ----------- -------------
334 : * (a) -> b (a,b) -> c
335 : * (a) -> c (a,c) -> b
336 : * (b) -> a (b,c) -> a
337 : * (b) -> c
338 : * (c) -> a
339 : * (c) -> b
340 : */
341 : MVDependencies *
342 150 : statext_dependencies_build(StatsBuildData *data)
343 : {
344 : int i,
345 : k;
346 :
347 : /* result */
348 150 : MVDependencies *dependencies = NULL;
349 : MemoryContext cxt;
350 :
351 : Assert(data->nattnums >= 2);
352 :
353 : /* tracks memory allocated by dependency_degree calls */
354 150 : cxt = AllocSetContextCreate(CurrentMemoryContext,
355 : "dependency_degree cxt",
356 : ALLOCSET_DEFAULT_SIZES);
357 :
358 : /*
359 : * We'll try build functional dependencies starting from the smallest ones
360 : * covering just 2 columns, to the largest ones, covering all columns
361 : * included in the statistics object. We start from the smallest ones
362 : * because we want to be able to skip already implied ones.
363 : */
364 348 : for (k = 2; k <= data->nattnums; k++)
365 : {
366 : AttrNumber *dependency; /* array with k elements */
367 :
368 : /* prepare a DependencyGenerator of variation */
369 198 : DependencyGenerator DependencyGenerator = DependencyGenerator_init(data->nattnums, k);
370 :
371 : /* generate all possible variations of k values (out of n) */
372 834 : while ((dependency = DependencyGenerator_next(DependencyGenerator)))
373 : {
374 : double degree;
375 : MVDependency *d;
376 : MemoryContext oldcxt;
377 :
378 : /* release memory used by dependency degree calculation */
379 636 : oldcxt = MemoryContextSwitchTo(cxt);
380 :
381 : /* compute how valid the dependency seems */
382 636 : degree = dependency_degree(data, k, dependency);
383 :
384 636 : MemoryContextSwitchTo(oldcxt);
385 636 : MemoryContextReset(cxt);
386 :
387 : /*
388 : * if the dependency seems entirely invalid, don't store it
389 : */
390 636 : if (degree == 0.0)
391 252 : continue;
392 :
393 384 : d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
394 384 : + k * sizeof(AttrNumber));
395 :
396 : /* copy the dependency (and keep the indexes into stxkeys) */
397 384 : d->degree = degree;
398 384 : d->nattributes = k;
399 1224 : for (i = 0; i < k; i++)
400 840 : d->attributes[i] = data->attnums[dependency[i]];
401 :
402 : /* initialize the list of dependencies */
403 384 : if (dependencies == NULL)
404 : {
405 : dependencies
406 132 : = (MVDependencies *) palloc0(sizeof(MVDependencies));
407 :
408 132 : dependencies->magic = STATS_DEPS_MAGIC;
409 132 : dependencies->type = STATS_DEPS_TYPE_BASIC;
410 132 : dependencies->ndeps = 0;
411 : }
412 :
413 384 : dependencies->ndeps++;
414 384 : dependencies = (MVDependencies *) repalloc(dependencies,
415 : offsetof(MVDependencies, deps)
416 384 : + dependencies->ndeps * sizeof(MVDependency *));
417 :
418 384 : dependencies->deps[dependencies->ndeps - 1] = d;
419 : }
420 :
421 : /*
422 : * we're done with variations of k elements, so free the
423 : * DependencyGenerator
424 : */
425 198 : DependencyGenerator_free(DependencyGenerator);
426 : }
427 :
428 150 : MemoryContextDelete(cxt);
429 :
430 150 : return dependencies;
431 : }
432 :
433 :
434 : /*
435 : * Serialize list of dependencies into a bytea value.
436 : */
437 : bytea *
438 132 : statext_dependencies_serialize(MVDependencies *dependencies)
439 : {
440 : int i;
441 : bytea *output;
442 : char *tmp;
443 : Size len;
444 :
445 : /* we need to store ndeps, with a number of attributes for each one */
446 132 : len = VARHDRSZ + SizeOfHeader;
447 :
448 : /* and also include space for the actual attribute numbers and degrees */
449 516 : for (i = 0; i < dependencies->ndeps; i++)
450 384 : len += SizeOfItem(dependencies->deps[i]->nattributes);
451 :
452 132 : output = (bytea *) palloc0(len);
453 132 : SET_VARSIZE(output, len);
454 :
455 132 : tmp = VARDATA(output);
456 :
457 : /* Store the base struct values (magic, type, ndeps) */
458 132 : memcpy(tmp, &dependencies->magic, sizeof(uint32));
459 132 : tmp += sizeof(uint32);
460 132 : memcpy(tmp, &dependencies->type, sizeof(uint32));
461 132 : tmp += sizeof(uint32);
462 132 : memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
463 132 : tmp += sizeof(uint32);
464 :
465 : /* store number of attributes and attribute numbers for each dependency */
466 516 : for (i = 0; i < dependencies->ndeps; i++)
467 : {
468 384 : MVDependency *d = dependencies->deps[i];
469 :
470 384 : memcpy(tmp, &d->degree, sizeof(double));
471 384 : tmp += sizeof(double);
472 :
473 384 : memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
474 384 : tmp += sizeof(AttrNumber);
475 :
476 384 : memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
477 384 : tmp += sizeof(AttrNumber) * d->nattributes;
478 :
479 : /* protect against overflow */
480 : Assert(tmp <= ((char *) output + len));
481 : }
482 :
483 : /* make sure we've produced exactly the right amount of data */
484 : Assert(tmp == ((char *) output + len));
485 :
486 132 : return output;
487 : }
488 :
489 : /*
490 : * Reads serialized dependencies into MVDependencies structure.
491 : */
492 : MVDependencies *
493 1624 : statext_dependencies_deserialize(bytea *data)
494 : {
495 : int i;
496 : Size min_expected_size;
497 : MVDependencies *dependencies;
498 : char *tmp;
499 :
500 1624 : if (data == NULL)
501 0 : return NULL;
502 :
503 1624 : if (VARSIZE_ANY_EXHDR(data) < SizeOfHeader)
504 0 : elog(ERROR, "invalid MVDependencies size %zu (expected at least %zu)",
505 : VARSIZE_ANY_EXHDR(data), SizeOfHeader);
506 :
507 : /* read the MVDependencies header */
508 1624 : dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies));
509 :
510 : /* initialize pointer to the data part (skip the varlena header) */
511 1624 : tmp = VARDATA_ANY(data);
512 :
513 : /* read the header fields and perform basic sanity checks */
514 1624 : memcpy(&dependencies->magic, tmp, sizeof(uint32));
515 1624 : tmp += sizeof(uint32);
516 1624 : memcpy(&dependencies->type, tmp, sizeof(uint32));
517 1624 : tmp += sizeof(uint32);
518 1624 : memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
519 1624 : tmp += sizeof(uint32);
520 :
521 1624 : if (dependencies->magic != STATS_DEPS_MAGIC)
522 0 : elog(ERROR, "invalid dependency magic %d (expected %d)",
523 : dependencies->magic, STATS_DEPS_MAGIC);
524 :
525 1624 : if (dependencies->type != STATS_DEPS_TYPE_BASIC)
526 0 : elog(ERROR, "invalid dependency type %d (expected %d)",
527 : dependencies->type, STATS_DEPS_TYPE_BASIC);
528 :
529 1624 : if (dependencies->ndeps == 0)
530 0 : elog(ERROR, "invalid zero-length item array in MVDependencies");
531 :
532 : /* what minimum bytea size do we expect for those parameters */
533 1624 : min_expected_size = SizeOfItem(dependencies->ndeps);
534 :
535 1624 : if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
536 0 : elog(ERROR, "invalid dependencies size %zu (expected at least %zu)",
537 : VARSIZE_ANY_EXHDR(data), min_expected_size);
538 :
539 : /* allocate space for the MCV items */
540 1624 : dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
541 1624 : + (dependencies->ndeps * sizeof(MVDependency *)));
542 :
543 9618 : for (i = 0; i < dependencies->ndeps; i++)
544 : {
545 : double degree;
546 : AttrNumber k;
547 : MVDependency *d;
548 :
549 : /* degree of validity */
550 7994 : memcpy(°ree, tmp, sizeof(double));
551 7994 : tmp += sizeof(double);
552 :
553 : /* number of attributes */
554 7994 : memcpy(&k, tmp, sizeof(AttrNumber));
555 7994 : tmp += sizeof(AttrNumber);
556 :
557 : /* is the number of attributes valid? */
558 : Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
559 :
560 : /* now that we know the number of attributes, allocate the dependency */
561 7994 : d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
562 7994 : + (k * sizeof(AttrNumber)));
563 :
564 7994 : d->degree = degree;
565 7994 : d->nattributes = k;
566 :
567 : /* copy attribute numbers */
568 7994 : memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
569 7994 : tmp += sizeof(AttrNumber) * d->nattributes;
570 :
571 7994 : dependencies->deps[i] = d;
572 :
573 : /* still within the bytea */
574 : Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
575 : }
576 :
577 : /* we should have consumed the whole bytea exactly */
578 : Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
579 :
580 1624 : return dependencies;
581 : }
582 :
583 : /*
584 : * dependency_is_fully_matched
585 : * checks that a functional dependency is fully matched given clauses on
586 : * attributes (assuming the clauses are suitable equality clauses)
587 : */
588 : static bool
589 6156 : dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
590 : {
591 : int j;
592 :
593 : /*
594 : * Check that the dependency actually is fully covered by clauses. We have
595 : * to translate all attribute numbers, as those are referenced
596 : */
597 15642 : for (j = 0; j < dependency->nattributes; j++)
598 : {
599 12558 : int attnum = dependency->attributes[j];
600 :
601 12558 : if (!bms_is_member(attnum, attnums))
602 3072 : return false;
603 : }
604 :
605 3084 : return true;
606 : }
607 :
608 : /*
609 : * statext_dependencies_load
610 : * Load the functional dependencies for the indicated pg_statistic_ext tuple
611 : */
612 : MVDependencies *
613 1608 : statext_dependencies_load(Oid mvoid, bool inh)
614 : {
615 : MVDependencies *result;
616 : bool isnull;
617 : Datum deps;
618 : HeapTuple htup;
619 :
620 1608 : htup = SearchSysCache2(STATEXTDATASTXOID,
621 : ObjectIdGetDatum(mvoid),
622 : BoolGetDatum(inh));
623 1608 : if (!HeapTupleIsValid(htup))
624 0 : elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
625 :
626 1608 : deps = SysCacheGetAttr(STATEXTDATASTXOID, htup,
627 : Anum_pg_statistic_ext_data_stxddependencies, &isnull);
628 1608 : if (isnull)
629 0 : elog(ERROR,
630 : "requested statistics kind \"%c\" is not yet built for statistics object %u",
631 : STATS_EXT_DEPENDENCIES, mvoid);
632 :
633 1608 : result = statext_dependencies_deserialize(DatumGetByteaPP(deps));
634 :
635 1608 : ReleaseSysCache(htup);
636 :
637 1608 : return result;
638 : }
639 :
640 : /*
641 : * dependency_is_compatible_clause
642 : * Determines if the clause is compatible with functional dependencies
643 : *
644 : * Only clauses that have the form of equality to a pseudoconstant, or can be
645 : * interpreted that way, are currently accepted. Furthermore the variable
646 : * part of the clause must be a simple Var belonging to the specified
647 : * relation, whose attribute number we return in *attnum on success.
648 : */
649 : static bool
650 3960 : dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
651 : {
652 : Var *var;
653 : Node *clause_expr;
654 :
655 3960 : if (IsA(clause, RestrictInfo))
656 : {
657 3840 : RestrictInfo *rinfo = (RestrictInfo *) clause;
658 :
659 : /* Pseudoconstants are not interesting (they couldn't contain a Var) */
660 3840 : if (rinfo->pseudoconstant)
661 6 : return false;
662 :
663 : /* Clauses referencing multiple, or no, varnos are incompatible */
664 3834 : if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
665 0 : return false;
666 :
667 3834 : clause = (Node *) rinfo->clause;
668 : }
669 :
670 3954 : if (is_opclause(clause))
671 : {
672 : /* If it's an opclause, check for Var = Const or Const = Var. */
673 1446 : OpExpr *expr = (OpExpr *) clause;
674 :
675 : /* Only expressions with two arguments are candidates. */
676 1446 : if (list_length(expr->args) != 2)
677 0 : return false;
678 :
679 : /* Make sure non-selected argument is a pseudoconstant. */
680 1446 : if (is_pseudo_constant_clause(lsecond(expr->args)))
681 1446 : clause_expr = linitial(expr->args);
682 0 : else if (is_pseudo_constant_clause(linitial(expr->args)))
683 0 : clause_expr = lsecond(expr->args);
684 : else
685 0 : return false;
686 :
687 : /*
688 : * If it's not an "=" operator, just ignore the clause, as it's not
689 : * compatible with functional dependencies.
690 : *
691 : * This uses the function for estimating selectivity, not the operator
692 : * directly (a bit awkward, but well ...).
693 : *
694 : * XXX this is pretty dubious; probably it'd be better to check btree
695 : * or hash opclass membership, so as not to be fooled by custom
696 : * selectivity functions, and to be more consistent with decisions
697 : * elsewhere in the planner.
698 : */
699 1446 : if (get_oprrest(expr->opno) != F_EQSEL)
700 36 : return false;
701 :
702 : /* OK to proceed with checking "var" */
703 : }
704 2508 : else if (IsA(clause, ScalarArrayOpExpr))
705 : {
706 : /* If it's a scalar array operator, check for Var IN Const. */
707 2436 : ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
708 :
709 : /*
710 : * Reject ALL() variant, we only care about ANY/IN.
711 : *
712 : * XXX Maybe we should check if all the values are the same, and allow
713 : * ALL in that case? Doesn't seem very practical, though.
714 : */
715 2436 : if (!expr->useOr)
716 36 : return false;
717 :
718 : /* Only expressions with two arguments are candidates. */
719 2400 : if (list_length(expr->args) != 2)
720 0 : return false;
721 :
722 : /*
723 : * We know it's always (Var IN Const), so we assume the var is the
724 : * first argument, and pseudoconstant is the second one.
725 : */
726 2400 : if (!is_pseudo_constant_clause(lsecond(expr->args)))
727 0 : return false;
728 :
729 2400 : clause_expr = linitial(expr->args);
730 :
731 : /*
732 : * If it's not an "=" operator, just ignore the clause, as it's not
733 : * compatible with functional dependencies. The operator is identified
734 : * simply by looking at which function it uses to estimate
735 : * selectivity. That's a bit strange, but it's what other similar
736 : * places do.
737 : */
738 2400 : if (get_oprrest(expr->opno) != F_EQSEL)
739 180 : return false;
740 :
741 : /* OK to proceed with checking "var" */
742 : }
743 72 : else if (is_orclause(clause))
744 : {
745 72 : BoolExpr *bool_expr = (BoolExpr *) clause;
746 : ListCell *lc;
747 :
748 : /* start with no attribute number */
749 72 : *attnum = InvalidAttrNumber;
750 :
751 150 : foreach(lc, bool_expr->args)
752 : {
753 : AttrNumber clause_attnum;
754 :
755 : /*
756 : * Had we found incompatible clause in the arguments, treat the
757 : * whole clause as incompatible.
758 : */
759 120 : if (!dependency_is_compatible_clause((Node *) lfirst(lc),
760 : relid, &clause_attnum))
761 42 : return false;
762 :
763 84 : if (*attnum == InvalidAttrNumber)
764 36 : *attnum = clause_attnum;
765 :
766 : /* ensure all the variables are the same (same attnum) */
767 84 : if (*attnum != clause_attnum)
768 6 : return false;
769 : }
770 :
771 : /* the Var is already checked by the recursive call */
772 30 : return true;
773 : }
774 0 : else if (is_notclause(clause))
775 : {
776 : /*
777 : * "NOT x" can be interpreted as "x = false", so get the argument and
778 : * proceed with seeing if it's a suitable Var.
779 : */
780 0 : clause_expr = (Node *) get_notclausearg(clause);
781 : }
782 : else
783 : {
784 : /*
785 : * A boolean expression "x" can be interpreted as "x = true", so
786 : * proceed with seeing if it's a suitable Var.
787 : */
788 0 : clause_expr = (Node *) clause;
789 : }
790 :
791 : /*
792 : * We may ignore any RelabelType node above the operand. (There won't be
793 : * more than one, since eval_const_expressions has been applied already.)
794 : */
795 3630 : if (IsA(clause_expr, RelabelType))
796 0 : clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
797 :
798 : /* We only support plain Vars for now */
799 3630 : if (!IsA(clause_expr, Var))
800 288 : return false;
801 :
802 : /* OK, we know we have a Var */
803 3342 : var = (Var *) clause_expr;
804 :
805 : /* Ensure Var is from the correct relation */
806 3342 : if (var->varno != relid)
807 0 : return false;
808 :
809 : /* We also better ensure the Var is from the current level */
810 3342 : if (var->varlevelsup != 0)
811 0 : return false;
812 :
813 : /* Also ignore system attributes (we don't allow stats on those) */
814 3342 : if (!AttrNumberIsForUserDefinedAttr(var->varattno))
815 0 : return false;
816 :
817 3342 : *attnum = var->varattno;
818 3342 : return true;
819 : }
820 :
821 : /*
822 : * find_strongest_dependency
823 : * find the strongest dependency on the attributes
824 : *
825 : * When applying functional dependencies, we start with the strongest
826 : * dependencies. That is, we select the dependency that:
827 : *
828 : * (a) has all attributes covered by equality clauses
829 : *
830 : * (b) has the most attributes
831 : *
832 : * (c) has the highest degree of validity
833 : *
834 : * This guarantees that we eliminate the most redundant conditions first
835 : * (see the comment in dependencies_clauselist_selectivity).
836 : */
837 : static MVDependency *
838 3486 : find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
839 : Bitmapset *attnums)
840 : {
841 : int i,
842 : j;
843 3486 : MVDependency *strongest = NULL;
844 :
845 : /* number of attnums in clauses */
846 3486 : int nattnums = bms_num_members(attnums);
847 :
848 : /*
849 : * Iterate over the MVDependency items and find the strongest one from the
850 : * fully-matched dependencies. We do the cheap checks first, before
851 : * matching it against the attnums.
852 : */
853 7008 : for (i = 0; i < ndependencies; i++)
854 : {
855 20280 : for (j = 0; j < dependencies[i]->ndeps; j++)
856 : {
857 16758 : MVDependency *dependency = dependencies[i]->deps[j];
858 :
859 : /*
860 : * Skip dependencies referencing more attributes than available
861 : * clauses, as those can't be fully matched.
862 : */
863 16758 : if (dependency->nattributes > nattnums)
864 10602 : continue;
865 :
866 6156 : if (strongest)
867 : {
868 : /* skip dependencies on fewer attributes than the strongest. */
869 3936 : if (dependency->nattributes < strongest->nattributes)
870 0 : continue;
871 :
872 : /* also skip weaker dependencies when attribute count matches */
873 3936 : if (strongest->nattributes == dependency->nattributes &&
874 3654 : strongest->degree > dependency->degree)
875 0 : continue;
876 : }
877 :
878 : /*
879 : * this dependency is stronger, but we must still check that it's
880 : * fully matched to these attnums. We perform this check last as
881 : * it's slightly more expensive than the previous checks.
882 : */
883 6156 : if (dependency_is_fully_matched(dependency, attnums))
884 3084 : strongest = dependency; /* save new best match */
885 : }
886 : }
887 :
888 3486 : return strongest;
889 : }
890 :
891 : /*
892 : * clauselist_apply_dependencies
893 : * Apply the specified functional dependencies to a list of clauses and
894 : * return the estimated selectivity of the clauses that are compatible
895 : * with any of the given dependencies.
896 : *
897 : * This will estimate all not-already-estimated clauses that are compatible
898 : * with functional dependencies, and which have an attribute mentioned by any
899 : * of the given dependencies (either as an implying or implied attribute).
900 : *
901 : * Given (lists of) clauses on attributes (a,b) and a functional dependency
902 : * (a=>b), the per-column selectivities P(a) and P(b) are notionally combined
903 : * using the formula
904 : *
905 : * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
906 : *
907 : * where 'f' is the degree of dependency. This reflects the fact that we
908 : * expect a fraction f of all rows to be consistent with the dependency
909 : * (a=>b), and so have a selectivity of P(a), while the remaining rows are
910 : * treated as independent.
911 : *
912 : * In practice, we use a slightly modified version of this formula, which uses
913 : * a selectivity of Min(P(a), P(b)) for the dependent rows, since the result
914 : * should obviously not exceed either column's individual selectivity. I.e.,
915 : * we actually combine selectivities using the formula
916 : *
917 : * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
918 : *
919 : * This can make quite a difference if the specific values matching the
920 : * clauses are not consistent with the functional dependency.
921 : */
922 : static Selectivity
923 1596 : clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
924 : int varRelid, JoinType jointype,
925 : SpecialJoinInfo *sjinfo,
926 : MVDependency **dependencies, int ndependencies,
927 : AttrNumber *list_attnums,
928 : Bitmapset **estimatedclauses)
929 : {
930 : Bitmapset *attnums;
931 : int i;
932 : int j;
933 : int nattrs;
934 : Selectivity *attr_sel;
935 : int attidx;
936 : int listidx;
937 : ListCell *l;
938 : Selectivity s1;
939 :
940 : /*
941 : * Extract the attnums of all implying and implied attributes from all the
942 : * given dependencies. Each of these attributes is expected to have at
943 : * least 1 not-already-estimated compatible clause that we will estimate
944 : * here.
945 : */
946 1596 : attnums = NULL;
947 3486 : for (i = 0; i < ndependencies; i++)
948 : {
949 5952 : for (j = 0; j < dependencies[i]->nattributes; j++)
950 : {
951 4062 : AttrNumber attnum = dependencies[i]->attributes[j];
952 :
953 4062 : attnums = bms_add_member(attnums, attnum);
954 : }
955 : }
956 :
957 : /*
958 : * Compute per-column selectivity estimates for each of these attributes,
959 : * and mark all the corresponding clauses as estimated.
960 : */
961 1596 : nattrs = bms_num_members(attnums);
962 1596 : attr_sel = (Selectivity *) palloc(sizeof(Selectivity) * nattrs);
963 :
964 1596 : attidx = 0;
965 1596 : i = -1;
966 5094 : while ((i = bms_next_member(attnums, i)) >= 0)
967 : {
968 3498 : List *attr_clauses = NIL;
969 : Selectivity simple_sel;
970 :
971 3498 : listidx = -1;
972 11436 : foreach(l, clauses)
973 : {
974 7938 : Node *clause = (Node *) lfirst(l);
975 :
976 7938 : listidx++;
977 7938 : if (list_attnums[listidx] == i)
978 : {
979 3498 : attr_clauses = lappend(attr_clauses, clause);
980 3498 : *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
981 : }
982 : }
983 :
984 3498 : simple_sel = clauselist_selectivity_ext(root, attr_clauses, varRelid,
985 : jointype, sjinfo, false);
986 3498 : attr_sel[attidx++] = simple_sel;
987 : }
988 :
989 : /*
990 : * Now combine these selectivities using the dependency information. For
991 : * chains of dependencies such as a -> b -> c, the b -> c dependency will
992 : * come before the a -> b dependency in the array, so we traverse the
993 : * array backwards to ensure such chains are computed in the right order.
994 : *
995 : * As explained above, pairs of selectivities are combined using the
996 : * formula
997 : *
998 : * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
999 : *
1000 : * to ensure that the combined selectivity is never greater than either
1001 : * individual selectivity.
1002 : *
1003 : * Where multiple dependencies apply (e.g., a -> b -> c), we use
1004 : * conditional probabilities to compute the overall result as follows:
1005 : *
1006 : * P(a,b,c) = P(c|a,b) * P(a,b) = P(c|a,b) * P(b|a) * P(a)
1007 : *
1008 : * so we replace the selectivities of all implied attributes with
1009 : * conditional probabilities, that are conditional on all their implying
1010 : * attributes. The selectivities of all other non-implied attributes are
1011 : * left as they are.
1012 : */
1013 3486 : for (i = ndependencies - 1; i >= 0; i--)
1014 : {
1015 1890 : MVDependency *dependency = dependencies[i];
1016 : AttrNumber attnum;
1017 : Selectivity s2;
1018 : double f;
1019 :
1020 : /* Selectivity of all the implying attributes */
1021 1890 : s1 = 1.0;
1022 4062 : for (j = 0; j < dependency->nattributes - 1; j++)
1023 : {
1024 2172 : attnum = dependency->attributes[j];
1025 2172 : attidx = bms_member_index(attnums, attnum);
1026 2172 : s1 *= attr_sel[attidx];
1027 : }
1028 :
1029 : /* Original selectivity of the implied attribute */
1030 1890 : attnum = dependency->attributes[j];
1031 1890 : attidx = bms_member_index(attnums, attnum);
1032 1890 : s2 = attr_sel[attidx];
1033 :
1034 : /*
1035 : * Replace s2 with the conditional probability s2 given s1, computed
1036 : * using the formula P(b|a) = P(a,b) / P(a), which simplifies to
1037 : *
1038 : * P(b|a) = f * Min(P(a), P(b)) / P(a) + (1-f) * P(b)
1039 : *
1040 : * where P(a) = s1, the selectivity of the implying attributes, and
1041 : * P(b) = s2, the selectivity of the implied attribute.
1042 : */
1043 1890 : f = dependency->degree;
1044 :
1045 1890 : if (s1 <= s2)
1046 1782 : attr_sel[attidx] = f + (1 - f) * s2;
1047 : else
1048 108 : attr_sel[attidx] = f * s2 / s1 + (1 - f) * s2;
1049 : }
1050 :
1051 : /*
1052 : * The overall selectivity of all the clauses on all these attributes is
1053 : * then the product of all the original (non-implied) probabilities and
1054 : * the new conditional (implied) probabilities.
1055 : */
1056 1596 : s1 = 1.0;
1057 5094 : for (i = 0; i < nattrs; i++)
1058 3498 : s1 *= attr_sel[i];
1059 :
1060 1596 : CLAMP_PROBABILITY(s1);
1061 :
1062 1596 : pfree(attr_sel);
1063 1596 : bms_free(attnums);
1064 :
1065 1596 : return s1;
1066 : }
1067 :
1068 : /*
1069 : * dependency_is_compatible_expression
1070 : * Determines if the expression is compatible with functional dependencies
1071 : *
1072 : * Similar to dependency_is_compatible_clause, but doesn't enforce that the
1073 : * expression is a simple Var. On success, return the matching statistics
1074 : * expression into *expr.
1075 : */
1076 : static bool
1077 642 : dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
1078 : {
1079 : ListCell *lc,
1080 : *lc2;
1081 : Node *clause_expr;
1082 :
1083 642 : if (IsA(clause, RestrictInfo))
1084 : {
1085 552 : RestrictInfo *rinfo = (RestrictInfo *) clause;
1086 :
1087 : /* Pseudoconstants are not interesting (they couldn't contain a Var) */
1088 552 : if (rinfo->pseudoconstant)
1089 6 : return false;
1090 :
1091 : /* Clauses referencing multiple, or no, varnos are incompatible */
1092 546 : if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
1093 0 : return false;
1094 :
1095 546 : clause = (Node *) rinfo->clause;
1096 : }
1097 :
1098 636 : if (is_opclause(clause))
1099 : {
1100 : /* If it's an opclause, check for Var = Const or Const = Var. */
1101 204 : OpExpr *expr = (OpExpr *) clause;
1102 :
1103 : /* Only expressions with two arguments are candidates. */
1104 204 : if (list_length(expr->args) != 2)
1105 0 : return false;
1106 :
1107 : /* Make sure non-selected argument is a pseudoconstant. */
1108 204 : if (is_pseudo_constant_clause(lsecond(expr->args)))
1109 204 : clause_expr = linitial(expr->args);
1110 0 : else if (is_pseudo_constant_clause(linitial(expr->args)))
1111 0 : clause_expr = lsecond(expr->args);
1112 : else
1113 0 : return false;
1114 :
1115 : /*
1116 : * If it's not an "=" operator, just ignore the clause, as it's not
1117 : * compatible with functional dependencies.
1118 : *
1119 : * This uses the function for estimating selectivity, not the operator
1120 : * directly (a bit awkward, but well ...).
1121 : *
1122 : * XXX this is pretty dubious; probably it'd be better to check btree
1123 : * or hash opclass membership, so as not to be fooled by custom
1124 : * selectivity functions, and to be more consistent with decisions
1125 : * elsewhere in the planner.
1126 : */
1127 204 : if (get_oprrest(expr->opno) != F_EQSEL)
1128 36 : return false;
1129 :
1130 : /* OK to proceed with checking "var" */
1131 : }
1132 432 : else if (IsA(clause, ScalarArrayOpExpr))
1133 : {
1134 : /* If it's a scalar array operator, check for Var IN Const. */
1135 390 : ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
1136 :
1137 : /*
1138 : * Reject ALL() variant, we only care about ANY/IN.
1139 : *
1140 : * FIXME Maybe we should check if all the values are the same, and
1141 : * allow ALL in that case? Doesn't seem very practical, though.
1142 : */
1143 390 : if (!expr->useOr)
1144 36 : return false;
1145 :
1146 : /* Only expressions with two arguments are candidates. */
1147 354 : if (list_length(expr->args) != 2)
1148 0 : return false;
1149 :
1150 : /*
1151 : * We know it's always (Var IN Const), so we assume the var is the
1152 : * first argument, and pseudoconstant is the second one.
1153 : */
1154 354 : if (!is_pseudo_constant_clause(lsecond(expr->args)))
1155 0 : return false;
1156 :
1157 354 : clause_expr = linitial(expr->args);
1158 :
1159 : /*
1160 : * If it's not an "=" operator, just ignore the clause, as it's not
1161 : * compatible with functional dependencies. The operator is identified
1162 : * simply by looking at which function it uses to estimate
1163 : * selectivity. That's a bit strange, but it's what other similar
1164 : * places do.
1165 : */
1166 354 : if (get_oprrest(expr->opno) != F_EQSEL)
1167 180 : return false;
1168 :
1169 : /* OK to proceed with checking "var" */
1170 : }
1171 42 : else if (is_orclause(clause))
1172 : {
1173 42 : BoolExpr *bool_expr = (BoolExpr *) clause;
1174 :
1175 : /* start with no expression (we'll use the first match) */
1176 42 : *expr = NULL;
1177 :
1178 120 : foreach(lc, bool_expr->args)
1179 : {
1180 90 : Node *or_expr = NULL;
1181 :
1182 : /*
1183 : * Had we found incompatible expression in the arguments, treat
1184 : * the whole expression as incompatible.
1185 : */
1186 90 : if (!dependency_is_compatible_expression((Node *) lfirst(lc), relid,
1187 : statlist, &or_expr))
1188 12 : return false;
1189 :
1190 84 : if (*expr == NULL)
1191 36 : *expr = or_expr;
1192 :
1193 : /* ensure all the expressions are the same */
1194 84 : if (!equal(or_expr, *expr))
1195 6 : return false;
1196 : }
1197 :
1198 : /* the expression is already checked by the recursive call */
1199 30 : return true;
1200 : }
1201 0 : else if (is_notclause(clause))
1202 : {
1203 : /*
1204 : * "NOT x" can be interpreted as "x = false", so get the argument and
1205 : * proceed with seeing if it's a suitable Var.
1206 : */
1207 0 : clause_expr = (Node *) get_notclausearg(clause);
1208 : }
1209 : else
1210 : {
1211 : /*
1212 : * A boolean expression "x" can be interpreted as "x = true", so
1213 : * proceed with seeing if it's a suitable Var.
1214 : */
1215 0 : clause_expr = (Node *) clause;
1216 : }
1217 :
1218 : /*
1219 : * We may ignore any RelabelType node above the operand. (There won't be
1220 : * more than one, since eval_const_expressions has been applied already.)
1221 : */
1222 342 : if (IsA(clause_expr, RelabelType))
1223 0 : clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
1224 :
1225 : /*
1226 : * Search for a matching statistics expression.
1227 : */
1228 348 : foreach(lc, statlist)
1229 : {
1230 342 : StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
1231 :
1232 : /* ignore stats without dependencies */
1233 342 : if (info->kind != STATS_EXT_DEPENDENCIES)
1234 0 : continue;
1235 :
1236 558 : foreach(lc2, info->exprs)
1237 : {
1238 552 : Node *stat_expr = (Node *) lfirst(lc2);
1239 :
1240 552 : if (equal(clause_expr, stat_expr))
1241 : {
1242 336 : *expr = stat_expr;
1243 336 : return true;
1244 : }
1245 : }
1246 : }
1247 :
1248 6 : return false;
1249 : }
1250 :
1251 : /*
1252 : * dependencies_clauselist_selectivity
1253 : * Return the estimated selectivity of (a subset of) the given clauses
1254 : * using functional dependency statistics, or 1.0 if no useful functional
1255 : * dependency statistic exists.
1256 : *
1257 : * 'estimatedclauses' is an input/output argument that gets a bit set
1258 : * corresponding to the (zero-based) list index of each clause that is included
1259 : * in the estimated selectivity.
1260 : *
1261 : * Given equality clauses on attributes (a,b) we find the strongest dependency
1262 : * between them, i.e. either (a=>b) or (b=>a). Assuming (a=>b) is the selected
1263 : * dependency, we then combine the per-clause selectivities using the formula
1264 : *
1265 : * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
1266 : *
1267 : * where 'f' is the degree of the dependency. (Actually we use a slightly
1268 : * modified version of this formula -- see clauselist_apply_dependencies()).
1269 : *
1270 : * With clauses on more than two attributes, the dependencies are applied
1271 : * recursively, starting with the widest/strongest dependencies. For example
1272 : * P(a,b,c) is first split like this:
1273 : *
1274 : * P(a,b,c) = f * P(a,b) + (1-f) * P(a,b) * P(c)
1275 : *
1276 : * assuming (a,b=>c) is the strongest dependency.
1277 : */
1278 : Selectivity
1279 2436 : dependencies_clauselist_selectivity(PlannerInfo *root,
1280 : List *clauses,
1281 : int varRelid,
1282 : JoinType jointype,
1283 : SpecialJoinInfo *sjinfo,
1284 : RelOptInfo *rel,
1285 : Bitmapset **estimatedclauses)
1286 : {
1287 2436 : Selectivity s1 = 1.0;
1288 : ListCell *l;
1289 2436 : Bitmapset *clauses_attnums = NULL;
1290 : AttrNumber *list_attnums;
1291 : int listidx;
1292 : MVDependencies **func_dependencies;
1293 : int nfunc_dependencies;
1294 : int total_ndeps;
1295 : MVDependency **dependencies;
1296 : int ndependencies;
1297 : int i;
1298 : AttrNumber attnum_offset;
1299 2436 : RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
1300 :
1301 : /* unique expressions */
1302 : Node **unique_exprs;
1303 : int unique_exprs_cnt;
1304 :
1305 : /* check if there's any stats that might be useful for us. */
1306 2436 : if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
1307 648 : return 1.0;
1308 :
1309 1788 : list_attnums = (AttrNumber *) palloc(sizeof(AttrNumber) *
1310 1788 : list_length(clauses));
1311 :
1312 : /*
1313 : * We allocate space as if every clause was a unique expression, although
1314 : * that's probably overkill. Some will be simple column references that
1315 : * we'll translate to attnums, and there might be duplicates. But it's
1316 : * easier and cheaper to just do one allocation than repalloc later.
1317 : */
1318 1788 : unique_exprs = (Node **) palloc(sizeof(Node *) * list_length(clauses));
1319 1788 : unique_exprs_cnt = 0;
1320 :
1321 : /*
1322 : * Pre-process the clauses list to extract the attnums seen in each item.
1323 : * We need to determine if there's any clauses which will be useful for
1324 : * dependency selectivity estimations. Along the way we'll record all of
1325 : * the attnums for each clause in a list which we'll reference later so we
1326 : * don't need to repeat the same work again. We'll also keep track of all
1327 : * attnums seen.
1328 : *
1329 : * We also skip clauses that we already estimated using different types of
1330 : * statistics (we treat them as incompatible).
1331 : *
1332 : * To handle expressions, we assign them negative attnums, as if it was a
1333 : * system attribute (this is fine, as we only allow extended stats on user
1334 : * attributes). And then we offset everything by the number of
1335 : * expressions, so that we can store the values in a bitmapset.
1336 : */
1337 1788 : listidx = 0;
1338 5670 : foreach(l, clauses)
1339 : {
1340 3882 : Node *clause = (Node *) lfirst(l);
1341 : AttrNumber attnum;
1342 3882 : Node *expr = NULL;
1343 :
1344 : /* ignore clause by default */
1345 3882 : list_attnums[listidx] = InvalidAttrNumber;
1346 :
1347 3882 : if (!bms_is_member(listidx, *estimatedclauses))
1348 : {
1349 : /*
1350 : * If it's a simple column reference, just extract the attnum. If
1351 : * it's an expression, assign a negative attnum as if it was a
1352 : * system attribute.
1353 : */
1354 3840 : if (dependency_is_compatible_clause(clause, rel->relid, &attnum))
1355 : {
1356 3288 : list_attnums[listidx] = attnum;
1357 : }
1358 552 : else if (dependency_is_compatible_expression(clause, rel->relid,
1359 : rel->statlist,
1360 : &expr))
1361 : {
1362 : /* special attnum assigned to this expression */
1363 282 : attnum = InvalidAttrNumber;
1364 :
1365 : Assert(expr != NULL);
1366 :
1367 : /* If the expression is duplicate, use the same attnum. */
1368 474 : for (i = 0; i < unique_exprs_cnt; i++)
1369 : {
1370 192 : if (equal(unique_exprs[i], expr))
1371 : {
1372 : /* negative attribute number to expression */
1373 0 : attnum = -(i + 1);
1374 0 : break;
1375 : }
1376 : }
1377 :
1378 : /* not found in the list, so add it */
1379 282 : if (attnum == InvalidAttrNumber)
1380 : {
1381 282 : unique_exprs[unique_exprs_cnt++] = expr;
1382 :
1383 : /* after incrementing the value, to get -1, -2, ... */
1384 282 : attnum = (-unique_exprs_cnt);
1385 : }
1386 :
1387 : /* remember which attnum was assigned to this clause */
1388 282 : list_attnums[listidx] = attnum;
1389 : }
1390 : }
1391 :
1392 3882 : listidx++;
1393 : }
1394 :
1395 : Assert(listidx == list_length(clauses));
1396 :
1397 : /*
1398 : * How much we need to offset the attnums? If there are no expressions,
1399 : * then no offset is needed. Otherwise we need to offset enough for the
1400 : * lowest value (-unique_exprs_cnt) to become 1.
1401 : */
1402 1788 : if (unique_exprs_cnt > 0)
1403 132 : attnum_offset = (unique_exprs_cnt + 1);
1404 : else
1405 1656 : attnum_offset = 0;
1406 :
1407 : /*
1408 : * Now that we know how many expressions there are, we can offset the
1409 : * values just enough to build the bitmapset.
1410 : */
1411 5670 : for (i = 0; i < list_length(clauses); i++)
1412 : {
1413 : AttrNumber attnum;
1414 :
1415 : /* ignore incompatible or already estimated clauses */
1416 3882 : if (list_attnums[i] == InvalidAttrNumber)
1417 312 : continue;
1418 :
1419 : /* make sure the attnum is in the expected range */
1420 : Assert(list_attnums[i] >= (-unique_exprs_cnt));
1421 : Assert(list_attnums[i] <= MaxHeapAttributeNumber);
1422 :
1423 : /* make sure the attnum is positive (valid AttrNumber) */
1424 3570 : attnum = list_attnums[i] + attnum_offset;
1425 :
1426 : /*
1427 : * Either it's a regular attribute, or it's an expression, in which
1428 : * case we must not have seen it before (expressions are unique).
1429 : *
1430 : * XXX Check whether it's a regular attribute has to be done using the
1431 : * original attnum, while the second check has to use the value with
1432 : * an offset.
1433 : */
1434 : Assert(AttrNumberIsForUserDefinedAttr(list_attnums[i]) ||
1435 : !bms_is_member(attnum, clauses_attnums));
1436 :
1437 : /*
1438 : * Remember the offset attnum, both for attributes and expressions.
1439 : * We'll pass list_attnums to clauselist_apply_dependencies, which
1440 : * uses it to identify clauses in a bitmap. We could also pass the
1441 : * offset, but this is more convenient.
1442 : */
1443 3570 : list_attnums[i] = attnum;
1444 :
1445 3570 : clauses_attnums = bms_add_member(clauses_attnums, attnum);
1446 : }
1447 :
1448 : /*
1449 : * If there's not at least two distinct attnums and expressions, then
1450 : * reject the whole list of clauses. We must return 1.0 so the calling
1451 : * function's selectivity is unaffected.
1452 : */
1453 1788 : if (bms_membership(clauses_attnums) != BMS_MULTIPLE)
1454 : {
1455 192 : bms_free(clauses_attnums);
1456 192 : pfree(list_attnums);
1457 192 : return 1.0;
1458 : }
1459 :
1460 : /*
1461 : * Load all functional dependencies matching at least two parameters. We
1462 : * can simply consider all dependencies at once, without having to search
1463 : * for the best statistics object.
1464 : *
1465 : * To not waste cycles and memory, we deserialize dependencies only for
1466 : * statistics that match at least two attributes. The array is allocated
1467 : * with the assumption that all objects match - we could grow the array to
1468 : * make it just the right size, but it's likely wasteful anyway thanks to
1469 : * moving the freed chunks to freelists etc.
1470 : */
1471 1596 : func_dependencies = (MVDependencies **) palloc(sizeof(MVDependencies *) *
1472 1596 : list_length(rel->statlist));
1473 1596 : nfunc_dependencies = 0;
1474 1596 : total_ndeps = 0;
1475 :
1476 3330 : foreach(l, rel->statlist)
1477 : {
1478 1734 : StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
1479 : int nmatched;
1480 : int nexprs;
1481 : int k;
1482 : MVDependencies *deps;
1483 :
1484 : /* skip statistics that are not of the correct type */
1485 1734 : if (stat->kind != STATS_EXT_DEPENDENCIES)
1486 108 : continue;
1487 :
1488 : /* skip statistics with mismatching stxdinherit value */
1489 1626 : if (stat->inherit != rte->inh)
1490 0 : continue;
1491 :
1492 : /*
1493 : * Count matching attributes - we have to undo the attnum offsets. The
1494 : * input attribute numbers are not offset (expressions are not
1495 : * included in stat->keys, so it's not necessary). But we need to
1496 : * offset it before checking against clauses_attnums.
1497 : */
1498 1626 : nmatched = 0;
1499 1626 : k = -1;
1500 6120 : while ((k = bms_next_member(stat->keys, k)) >= 0)
1501 : {
1502 4494 : AttrNumber attnum = (AttrNumber) k;
1503 :
1504 : /* skip expressions */
1505 4494 : if (!AttrNumberIsForUserDefinedAttr(attnum))
1506 0 : continue;
1507 :
1508 : /* apply the same offset as above */
1509 4494 : attnum += attnum_offset;
1510 :
1511 4494 : if (bms_is_member(attnum, clauses_attnums))
1512 3240 : nmatched++;
1513 : }
1514 :
1515 : /* count matching expressions */
1516 1626 : nexprs = 0;
1517 1884 : for (i = 0; i < unique_exprs_cnt; i++)
1518 : {
1519 : ListCell *lc;
1520 :
1521 1032 : foreach(lc, stat->exprs)
1522 : {
1523 774 : Node *stat_expr = (Node *) lfirst(lc);
1524 :
1525 : /* try to match it */
1526 774 : if (equal(stat_expr, unique_exprs[i]))
1527 258 : nexprs++;
1528 : }
1529 : }
1530 :
1531 : /*
1532 : * Skip objects matching fewer than two attributes/expressions from
1533 : * clauses.
1534 : */
1535 1626 : if (nmatched + nexprs < 2)
1536 18 : continue;
1537 :
1538 1608 : deps = statext_dependencies_load(stat->statOid, rte->inh);
1539 :
1540 : /*
1541 : * The expressions may be represented by different attnums in the
1542 : * stats, we need to remap them to be consistent with the clauses.
1543 : * That will make the later steps (e.g. picking the strongest item and
1544 : * so on) much simpler and cheaper, because it won't need to care
1545 : * about the offset at all.
1546 : *
1547 : * When we're at it, we can ignore dependencies that are not fully
1548 : * matched by clauses (i.e. referencing attributes or expressions that
1549 : * are not in the clauses).
1550 : *
1551 : * We have to do this for all statistics, as long as there are any
1552 : * expressions - we need to shift the attnums in all dependencies.
1553 : *
1554 : * XXX Maybe we should do this always, because it also eliminates some
1555 : * of the dependencies early. It might be cheaper than having to walk
1556 : * the longer list in find_strongest_dependency later, especially as
1557 : * we need to do that repeatedly?
1558 : *
1559 : * XXX We have to do this even when there are no expressions in
1560 : * clauses, otherwise find_strongest_dependency may fail for stats
1561 : * with expressions (due to lookup of negative value in bitmap). So we
1562 : * need to at least filter out those dependencies. Maybe we could do
1563 : * it in a cheaper way (if there are no expr clauses, we can just
1564 : * discard all negative attnums without any lookups).
1565 : */
1566 1608 : if (unique_exprs_cnt > 0 || stat->exprs != NIL)
1567 : {
1568 108 : int ndeps = 0;
1569 :
1570 648 : for (i = 0; i < deps->ndeps; i++)
1571 : {
1572 540 : bool skip = false;
1573 540 : MVDependency *dep = deps->deps[i];
1574 : int j;
1575 :
1576 1506 : for (j = 0; j < dep->nattributes; j++)
1577 : {
1578 : int idx;
1579 : Node *expr;
1580 1230 : AttrNumber unique_attnum = InvalidAttrNumber;
1581 : AttrNumber attnum;
1582 :
1583 : /* undo the per-statistics offset */
1584 1230 : attnum = dep->attributes[j];
1585 :
1586 : /*
1587 : * For regular attributes we can simply check if it
1588 : * matches any clause. If there's no matching clause, we
1589 : * can just ignore it. We need to offset the attnum
1590 : * though.
1591 : */
1592 1230 : if (AttrNumberIsForUserDefinedAttr(attnum))
1593 : {
1594 0 : dep->attributes[j] = attnum + attnum_offset;
1595 :
1596 0 : if (!bms_is_member(dep->attributes[j], clauses_attnums))
1597 : {
1598 0 : skip = true;
1599 0 : break;
1600 : }
1601 :
1602 0 : continue;
1603 : }
1604 :
1605 : /*
1606 : * the attnum should be a valid system attnum (-1, -2,
1607 : * ...)
1608 : */
1609 : Assert(AttributeNumberIsValid(attnum));
1610 :
1611 : /*
1612 : * For expressions, we need to do two translations. First
1613 : * we have to translate the negative attnum to index in
1614 : * the list of expressions (in the statistics object).
1615 : * Then we need to see if there's a matching clause. The
1616 : * index of the unique expression determines the attnum
1617 : * (and we offset it).
1618 : */
1619 1230 : idx = -(1 + attnum);
1620 :
1621 : /* Is the expression index is valid? */
1622 : Assert((idx >= 0) && (idx < list_length(stat->exprs)));
1623 :
1624 1230 : expr = (Node *) list_nth(stat->exprs, idx);
1625 :
1626 : /* try to find the expression in the unique list */
1627 2460 : for (int m = 0; m < unique_exprs_cnt; m++)
1628 : {
1629 : /*
1630 : * found a matching unique expression, use the attnum
1631 : * (derived from index of the unique expression)
1632 : */
1633 2196 : if (equal(unique_exprs[m], expr))
1634 : {
1635 966 : unique_attnum = -(m + 1) + attnum_offset;
1636 966 : break;
1637 : }
1638 : }
1639 :
1640 : /*
1641 : * Found no matching expression, so we can simply skip
1642 : * this dependency, because there's no chance it will be
1643 : * fully covered.
1644 : */
1645 1230 : if (unique_attnum == InvalidAttrNumber)
1646 : {
1647 264 : skip = true;
1648 264 : break;
1649 : }
1650 :
1651 : /* otherwise remap it to the new attnum */
1652 966 : dep->attributes[j] = unique_attnum;
1653 : }
1654 :
1655 : /* if found a matching dependency, keep it */
1656 540 : if (!skip)
1657 : {
1658 : /* maybe we've skipped something earlier, so move it */
1659 276 : if (ndeps != i)
1660 0 : deps->deps[ndeps] = deps->deps[i];
1661 :
1662 276 : ndeps++;
1663 : }
1664 : }
1665 :
1666 108 : deps->ndeps = ndeps;
1667 : }
1668 :
1669 : /*
1670 : * It's possible we've removed all dependencies, in which case we
1671 : * don't bother adding it to the list.
1672 : */
1673 1608 : if (deps->ndeps > 0)
1674 : {
1675 1608 : func_dependencies[nfunc_dependencies] = deps;
1676 1608 : total_ndeps += deps->ndeps;
1677 1608 : nfunc_dependencies++;
1678 : }
1679 : }
1680 :
1681 : /* if no matching stats could be found then we've nothing to do */
1682 1596 : if (nfunc_dependencies == 0)
1683 : {
1684 0 : pfree(func_dependencies);
1685 0 : bms_free(clauses_attnums);
1686 0 : pfree(list_attnums);
1687 0 : pfree(unique_exprs);
1688 0 : return 1.0;
1689 : }
1690 :
1691 : /*
1692 : * Work out which dependencies we can apply, starting with the
1693 : * widest/strongest ones, and proceeding to smaller/weaker ones.
1694 : */
1695 1596 : dependencies = (MVDependency **) palloc(sizeof(MVDependency *) *
1696 : total_ndeps);
1697 1596 : ndependencies = 0;
1698 :
1699 : while (true)
1700 1890 : {
1701 : MVDependency *dependency;
1702 : AttrNumber attnum;
1703 :
1704 : /* the widest/strongest dependency, fully matched by clauses */
1705 3486 : dependency = find_strongest_dependency(func_dependencies,
1706 : nfunc_dependencies,
1707 : clauses_attnums);
1708 3486 : if (!dependency)
1709 1596 : break;
1710 :
1711 1890 : dependencies[ndependencies++] = dependency;
1712 :
1713 : /* Ignore dependencies using this implied attribute in later loops */
1714 1890 : attnum = dependency->attributes[dependency->nattributes - 1];
1715 1890 : clauses_attnums = bms_del_member(clauses_attnums, attnum);
1716 : }
1717 :
1718 : /*
1719 : * If we found applicable dependencies, use them to estimate all
1720 : * compatible clauses on attributes that they refer to.
1721 : */
1722 1596 : if (ndependencies != 0)
1723 1596 : s1 = clauselist_apply_dependencies(root, clauses, varRelid, jointype,
1724 : sjinfo, dependencies, ndependencies,
1725 : list_attnums, estimatedclauses);
1726 :
1727 : /* free deserialized functional dependencies (and then the array) */
1728 3204 : for (i = 0; i < nfunc_dependencies; i++)
1729 1608 : pfree(func_dependencies[i]);
1730 :
1731 1596 : pfree(dependencies);
1732 1596 : pfree(func_dependencies);
1733 1596 : bms_free(clauses_attnums);
1734 1596 : pfree(list_attnums);
1735 1596 : pfree(unique_exprs);
1736 :
1737 1596 : return s1;
1738 : }
|