Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * dependencies.c
4 : * POSTGRES functional dependencies
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : * IDENTIFICATION
10 : * src/backend/statistics/dependencies.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #include "postgres.h"
15 :
16 : #include "access/htup_details.h"
17 : #include "catalog/pg_statistic_ext.h"
18 : #include "catalog/pg_statistic_ext_data.h"
19 : #include "nodes/nodeFuncs.h"
20 : #include "optimizer/clauses.h"
21 : #include "optimizer/optimizer.h"
22 : #include "parser/parsetree.h"
23 : #include "statistics/extended_stats_internal.h"
24 : #include "utils/fmgroids.h"
25 : #include "utils/lsyscache.h"
26 : #include "utils/memutils.h"
27 : #include "utils/selfuncs.h"
28 : #include "utils/syscache.h"
29 : #include "utils/typcache.h"
30 :
31 : /* size of the struct header fields (magic, type, ndeps) */
32 : #define SizeOfHeader (3 * sizeof(uint32))
33 :
34 : /* size of a serialized dependency (degree, natts, atts) */
35 : #define SizeOfItem(natts) \
36 : (sizeof(double) + sizeof(AttrNumber) * (1 + (natts)))
37 :
38 : /* minimal size of a dependency (with two attributes) */
39 : #define MinSizeOfItem SizeOfItem(2)
40 :
41 : /* minimal size of dependencies, when all deps are minimal */
42 : #define MinSizeOfItems(ndeps) \
43 : (SizeOfHeader + (ndeps) * MinSizeOfItem)
44 :
45 : /*
46 : * Internal state for DependencyGenerator of dependencies. Dependencies are similar to
47 : * k-permutations of n elements, except that the order does not matter for the
48 : * first (k-1) elements. That is, (a,b=>c) and (b,a=>c) are equivalent.
49 : */
50 : typedef struct DependencyGeneratorData
51 : {
52 : int k; /* size of the dependency */
53 : int n; /* number of possible attributes */
54 : int current; /* next dependency to return (index) */
55 : AttrNumber ndependencies; /* number of dependencies generated */
56 : AttrNumber *dependencies; /* array of pre-generated dependencies */
57 : } DependencyGeneratorData;
58 :
59 : typedef DependencyGeneratorData *DependencyGenerator;
60 :
61 : static void generate_dependencies_recurse(DependencyGenerator state,
62 : int index, AttrNumber start, AttrNumber *current);
63 : static void generate_dependencies(DependencyGenerator state);
64 : static DependencyGenerator DependencyGenerator_init(int n, int k);
65 : static void DependencyGenerator_free(DependencyGenerator state);
66 : static AttrNumber *DependencyGenerator_next(DependencyGenerator state);
67 : static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency);
68 : static bool dependency_is_fully_matched(MVDependency *dependency,
69 : Bitmapset *attnums);
70 : static bool dependency_is_compatible_clause(Node *clause, Index relid,
71 : AttrNumber *attnum);
72 : static bool dependency_is_compatible_expression(Node *clause, Index relid,
73 : List *statlist, Node **expr);
74 : static MVDependency *find_strongest_dependency(MVDependencies **dependencies,
75 : int ndependencies, Bitmapset *attnums);
76 : static Selectivity clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
77 : int varRelid, JoinType jointype,
78 : SpecialJoinInfo *sjinfo,
79 : MVDependency **dependencies,
80 : int ndependencies,
81 : AttrNumber *list_attnums,
82 : Bitmapset **estimatedclauses);
83 :
84 : static void
85 834 : generate_dependencies_recurse(DependencyGenerator state, int index,
86 : AttrNumber start, AttrNumber *current)
87 : {
88 : /*
89 : * The generator handles the first (k-1) elements differently from the
90 : * last element.
91 : */
92 834 : if (index < (state->k - 1))
93 : {
94 : AttrNumber i;
95 :
96 : /*
97 : * The first (k-1) values have to be in ascending order, which we
98 : * generate recursively.
99 : */
100 :
101 978 : for (i = start; i < state->n; i++)
102 : {
103 636 : current[index] = i;
104 636 : generate_dependencies_recurse(state, (index + 1), (i + 1), current);
105 : }
106 : }
107 : else
108 : {
109 : int i;
110 :
111 : /*
112 : * the last element is the implied value, which does not respect the
113 : * ascending order. We just need to check that the value is not in the
114 : * first (k-1) elements.
115 : */
116 :
117 1764 : for (i = 0; i < state->n; i++)
118 : {
119 : int j;
120 1272 : bool match = false;
121 :
122 1272 : current[index] = i;
123 :
124 2196 : for (j = 0; j < index; j++)
125 : {
126 1560 : if (current[j] == i)
127 : {
128 636 : match = true;
129 636 : break;
130 : }
131 : }
132 :
133 : /*
134 : * If the value is not found in the first part of the dependency,
135 : * we're done.
136 : */
137 1272 : if (!match)
138 : {
139 1272 : state->dependencies = (AttrNumber *) repalloc(state->dependencies,
140 636 : state->k * (state->ndependencies + 1) * sizeof(AttrNumber));
141 636 : memcpy(&state->dependencies[(state->k * state->ndependencies)],
142 636 : current, state->k * sizeof(AttrNumber));
143 636 : state->ndependencies++;
144 : }
145 : }
146 : }
147 834 : }
148 :
149 : /* generate all dependencies (k-permutations of n elements) */
150 : static void
151 198 : generate_dependencies(DependencyGenerator state)
152 : {
153 198 : AttrNumber *current = palloc0_array(AttrNumber, state->k);
154 :
155 198 : generate_dependencies_recurse(state, 0, 0, current);
156 :
157 198 : pfree(current);
158 198 : }
159 :
160 : /*
161 : * initialize the DependencyGenerator of variations, and prebuild the variations
162 : *
163 : * This pre-builds all the variations. We could also generate them in
164 : * DependencyGenerator_next(), but this seems simpler.
165 : */
166 : static DependencyGenerator
167 198 : DependencyGenerator_init(int n, int k)
168 : {
169 : DependencyGenerator state;
170 :
171 : Assert((n >= k) && (k > 0));
172 :
173 : /* allocate the DependencyGenerator state */
174 198 : state = palloc0_object(DependencyGeneratorData);
175 198 : state->dependencies = palloc_array(AttrNumber, k);
176 :
177 198 : state->ndependencies = 0;
178 198 : state->current = 0;
179 198 : state->k = k;
180 198 : state->n = n;
181 :
182 : /* now actually pre-generate all the variations */
183 198 : generate_dependencies(state);
184 :
185 198 : return state;
186 : }
187 :
188 : /* free the DependencyGenerator state */
189 : static void
190 198 : DependencyGenerator_free(DependencyGenerator state)
191 : {
192 198 : pfree(state->dependencies);
193 198 : pfree(state);
194 198 : }
195 :
196 : /* generate next combination */
197 : static AttrNumber *
198 834 : DependencyGenerator_next(DependencyGenerator state)
199 : {
200 834 : if (state->current == state->ndependencies)
201 198 : return NULL;
202 :
203 636 : return &state->dependencies[state->k * state->current++];
204 : }
205 :
206 :
207 : /*
208 : * validates functional dependency on the data
209 : *
210 : * An actual work horse of detecting functional dependencies. Given a variation
211 : * of k attributes, it checks that the first (k-1) are sufficient to determine
212 : * the last one.
213 : */
214 : static double
215 636 : dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency)
216 : {
217 : int i,
218 : nitems;
219 : MultiSortSupport mss;
220 : SortItem *items;
221 : AttrNumber *attnums_dep;
222 :
223 : /* counters valid within a group */
224 636 : int group_size = 0;
225 636 : int n_violations = 0;
226 :
227 : /* total number of rows supporting (consistent with) the dependency */
228 636 : int n_supporting_rows = 0;
229 :
230 : /* Make sure we have at least two input attributes. */
231 : Assert(k >= 2);
232 :
233 : /* sort info for all attributes columns */
234 636 : mss = multi_sort_init(k);
235 :
236 : /*
237 : * Translate the array of indexes to regular attnums for the dependency
238 : * (we will need this to identify the columns in StatsBuildData).
239 : */
240 636 : attnums_dep = palloc_array(AttrNumber, k);
241 2052 : for (i = 0; i < k; i++)
242 1416 : attnums_dep[i] = data->attnums[dependency[i]];
243 :
244 : /*
245 : * Verify the dependency (a,b,...)->z, using a rather simple algorithm:
246 : *
247 : * (a) sort the data lexicographically
248 : *
249 : * (b) split the data into groups by first (k-1) columns
250 : *
251 : * (c) for each group count different values in the last column
252 : *
253 : * We use the column data types' default sort operators and collations;
254 : * perhaps at some point it'd be worth using column-specific collations?
255 : */
256 :
257 : /* prepare the sort function for the dimensions */
258 2052 : for (i = 0; i < k; i++)
259 : {
260 1416 : VacAttrStats *colstat = data->stats[dependency[i]];
261 : TypeCacheEntry *type;
262 :
263 1416 : type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
264 1416 : if (type->lt_opr == InvalidOid) /* shouldn't happen */
265 0 : elog(ERROR, "cache lookup failed for ordering operator for type %u",
266 : colstat->attrtypid);
267 :
268 : /* prepare the sort function for this dimension */
269 1416 : multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
270 : }
271 :
272 : /*
273 : * build an array of SortItem(s) sorted using the multi-sort support
274 : *
275 : * XXX This relies on all stats entries pointing to the same tuple
276 : * descriptor. For now that assumption holds, but it might change in the
277 : * future for example if we support statistics on multiple tables.
278 : */
279 636 : items = build_sorted_items(data, &nitems, mss, k, attnums_dep);
280 :
281 : /*
282 : * Walk through the sorted array, split it into rows according to the
283 : * first (k-1) columns. If there's a single value in the last column, we
284 : * count the group as 'supporting' the functional dependency. Otherwise we
285 : * count it as contradicting.
286 : */
287 :
288 : /* start with the first row forming a group */
289 636 : group_size = 1;
290 :
291 : /* loop 1 beyond the end of the array so that we count the final group */
292 1505878 : for (i = 1; i <= nitems; i++)
293 : {
294 : /*
295 : * Check if the group ended, which may be either because we processed
296 : * all the items (i==nitems), or because the i-th item is not equal to
297 : * the preceding one.
298 : */
299 3009848 : if (i == nitems ||
300 1504606 : multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0)
301 : {
302 : /*
303 : * If no violations were found in the group then track the rows of
304 : * the group as supporting the functional dependency.
305 : */
306 32628 : if (n_violations == 0)
307 19938 : n_supporting_rows += group_size;
308 :
309 : /* Reset counters for the new group */
310 32628 : n_violations = 0;
311 32628 : group_size = 1;
312 32628 : continue;
313 : }
314 : /* first columns match, but the last one does not (so contradicting) */
315 1472614 : else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0)
316 60102 : n_violations++;
317 :
318 1472614 : group_size++;
319 : }
320 :
321 : /* Compute the 'degree of validity' as (supporting/total). */
322 636 : return (n_supporting_rows * 1.0 / data->numrows);
323 : }
324 :
325 : /*
326 : * detects functional dependencies between groups of columns
327 : *
328 : * Generates all possible subsets of columns (variations) and computes
329 : * the degree of validity for each one. For example when creating statistics
330 : * on three columns (a,b,c) there are 9 possible dependencies
331 : *
332 : * two columns three columns
333 : * ----------- -------------
334 : * (a) -> b (a,b) -> c
335 : * (a) -> c (a,c) -> b
336 : * (b) -> a (b,c) -> a
337 : * (b) -> c
338 : * (c) -> a
339 : * (c) -> b
340 : */
341 : MVDependencies *
342 150 : statext_dependencies_build(StatsBuildData *data)
343 : {
344 : int i,
345 : k;
346 :
347 : /* result */
348 150 : MVDependencies *dependencies = NULL;
349 : MemoryContext cxt;
350 :
351 : Assert(data->nattnums >= 2);
352 :
353 : /* tracks memory allocated by dependency_degree calls */
354 150 : cxt = AllocSetContextCreate(CurrentMemoryContext,
355 : "dependency_degree cxt",
356 : ALLOCSET_DEFAULT_SIZES);
357 :
358 : /*
359 : * We'll try build functional dependencies starting from the smallest ones
360 : * covering just 2 columns, to the largest ones, covering all columns
361 : * included in the statistics object. We start from the smallest ones
362 : * because we want to be able to skip already implied ones.
363 : */
364 348 : for (k = 2; k <= data->nattnums; k++)
365 : {
366 : AttrNumber *dependency; /* array with k elements */
367 :
368 : /* prepare a DependencyGenerator of variation */
369 198 : DependencyGenerator DependencyGenerator = DependencyGenerator_init(data->nattnums, k);
370 :
371 : /* generate all possible variations of k values (out of n) */
372 834 : while ((dependency = DependencyGenerator_next(DependencyGenerator)))
373 : {
374 : double degree;
375 : MVDependency *d;
376 : MemoryContext oldcxt;
377 :
378 : /* release memory used by dependency degree calculation */
379 636 : oldcxt = MemoryContextSwitchTo(cxt);
380 :
381 : /* compute how valid the dependency seems */
382 636 : degree = dependency_degree(data, k, dependency);
383 :
384 636 : MemoryContextSwitchTo(oldcxt);
385 636 : MemoryContextReset(cxt);
386 :
387 : /*
388 : * if the dependency seems entirely invalid, don't store it
389 : */
390 636 : if (degree == 0.0)
391 252 : continue;
392 :
393 384 : d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
394 384 : + k * sizeof(AttrNumber));
395 :
396 : /* copy the dependency (and keep the indexes into stxkeys) */
397 384 : d->degree = degree;
398 384 : d->nattributes = k;
399 1224 : for (i = 0; i < k; i++)
400 840 : d->attributes[i] = data->attnums[dependency[i]];
401 :
402 : /* initialize the list of dependencies */
403 384 : if (dependencies == NULL)
404 : {
405 132 : dependencies = palloc0_object(MVDependencies);
406 :
407 132 : dependencies->magic = STATS_DEPS_MAGIC;
408 132 : dependencies->type = STATS_DEPS_TYPE_BASIC;
409 132 : dependencies->ndeps = 0;
410 : }
411 :
412 384 : dependencies->ndeps++;
413 384 : dependencies = (MVDependencies *) repalloc(dependencies,
414 : offsetof(MVDependencies, deps)
415 384 : + dependencies->ndeps * sizeof(MVDependency *));
416 :
417 384 : dependencies->deps[dependencies->ndeps - 1] = d;
418 : }
419 :
420 : /*
421 : * we're done with variations of k elements, so free the
422 : * DependencyGenerator
423 : */
424 198 : DependencyGenerator_free(DependencyGenerator);
425 : }
426 :
427 150 : MemoryContextDelete(cxt);
428 :
429 150 : return dependencies;
430 : }
431 :
432 :
433 : /*
434 : * Serialize list of dependencies into a bytea value.
435 : */
436 : bytea *
437 174 : statext_dependencies_serialize(MVDependencies *dependencies)
438 : {
439 : int i;
440 : bytea *output;
441 : char *tmp;
442 : Size len;
443 :
444 : /* we need to store ndeps, with a number of attributes for each one */
445 174 : len = VARHDRSZ + SizeOfHeader;
446 :
447 : /* and also include space for the actual attribute numbers and degrees */
448 654 : for (i = 0; i < dependencies->ndeps; i++)
449 480 : len += SizeOfItem(dependencies->deps[i]->nattributes);
450 :
451 174 : output = (bytea *) palloc0(len);
452 174 : SET_VARSIZE(output, len);
453 :
454 174 : tmp = VARDATA(output);
455 :
456 : /* Store the base struct values (magic, type, ndeps) */
457 174 : memcpy(tmp, &dependencies->magic, sizeof(uint32));
458 174 : tmp += sizeof(uint32);
459 174 : memcpy(tmp, &dependencies->type, sizeof(uint32));
460 174 : tmp += sizeof(uint32);
461 174 : memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
462 174 : tmp += sizeof(uint32);
463 :
464 : /* store number of attributes and attribute numbers for each dependency */
465 654 : for (i = 0; i < dependencies->ndeps; i++)
466 : {
467 480 : MVDependency *d = dependencies->deps[i];
468 :
469 480 : memcpy(tmp, &d->degree, sizeof(double));
470 480 : tmp += sizeof(double);
471 :
472 480 : memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
473 480 : tmp += sizeof(AttrNumber);
474 :
475 480 : memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
476 480 : tmp += sizeof(AttrNumber) * d->nattributes;
477 :
478 : /* protect against overflow */
479 : Assert(tmp <= ((char *) output + len));
480 : }
481 :
482 : /* make sure we've produced exactly the right amount of data */
483 : Assert(tmp == ((char *) output + len));
484 :
485 174 : return output;
486 : }
487 :
488 : /*
489 : * Reads serialized dependencies into MVDependencies structure.
490 : */
491 : MVDependencies *
492 1660 : statext_dependencies_deserialize(bytea *data)
493 : {
494 : int i;
495 : Size min_expected_size;
496 : MVDependencies *dependencies;
497 : char *tmp;
498 :
499 1660 : if (data == NULL)
500 0 : return NULL;
501 :
502 1660 : if (VARSIZE_ANY_EXHDR(data) < SizeOfHeader)
503 0 : elog(ERROR, "invalid MVDependencies size %zu (expected at least %zu)",
504 : VARSIZE_ANY_EXHDR(data), SizeOfHeader);
505 :
506 : /* read the MVDependencies header */
507 1660 : dependencies = palloc0_object(MVDependencies);
508 :
509 : /* initialize pointer to the data part (skip the varlena header) */
510 1660 : tmp = VARDATA_ANY(data);
511 :
512 : /* read the header fields and perform basic sanity checks */
513 1660 : memcpy(&dependencies->magic, tmp, sizeof(uint32));
514 1660 : tmp += sizeof(uint32);
515 1660 : memcpy(&dependencies->type, tmp, sizeof(uint32));
516 1660 : tmp += sizeof(uint32);
517 1660 : memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
518 1660 : tmp += sizeof(uint32);
519 :
520 1660 : if (dependencies->magic != STATS_DEPS_MAGIC)
521 0 : elog(ERROR, "invalid dependency magic %d (expected %d)",
522 : dependencies->magic, STATS_DEPS_MAGIC);
523 :
524 1660 : if (dependencies->type != STATS_DEPS_TYPE_BASIC)
525 0 : elog(ERROR, "invalid dependency type %d (expected %d)",
526 : dependencies->type, STATS_DEPS_TYPE_BASIC);
527 :
528 1660 : if (dependencies->ndeps == 0)
529 0 : elog(ERROR, "invalid zero-length item array in MVDependencies");
530 :
531 : /* what minimum bytea size do we expect for those parameters */
532 1660 : min_expected_size = SizeOfItem(dependencies->ndeps);
533 :
534 1660 : if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
535 0 : elog(ERROR, "invalid dependencies size %zu (expected at least %zu)",
536 : VARSIZE_ANY_EXHDR(data), min_expected_size);
537 :
538 : /* allocate space for the MCV items */
539 1660 : dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
540 1660 : + (dependencies->ndeps * sizeof(MVDependency *)));
541 :
542 9726 : for (i = 0; i < dependencies->ndeps; i++)
543 : {
544 : double degree;
545 : AttrNumber k;
546 : MVDependency *d;
547 :
548 : /* degree of validity */
549 8066 : memcpy(°ree, tmp, sizeof(double));
550 8066 : tmp += sizeof(double);
551 :
552 : /* number of attributes */
553 8066 : memcpy(&k, tmp, sizeof(AttrNumber));
554 8066 : tmp += sizeof(AttrNumber);
555 :
556 : /* is the number of attributes valid? */
557 : Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
558 :
559 : /* now that we know the number of attributes, allocate the dependency */
560 8066 : d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
561 8066 : + (k * sizeof(AttrNumber)));
562 :
563 8066 : d->degree = degree;
564 8066 : d->nattributes = k;
565 :
566 : /* copy attribute numbers */
567 8066 : memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
568 8066 : tmp += sizeof(AttrNumber) * d->nattributes;
569 :
570 8066 : dependencies->deps[i] = d;
571 :
572 : /* still within the bytea */
573 : Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
574 : }
575 :
576 : /* we should have consumed the whole bytea exactly */
577 : Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
578 :
579 1660 : return dependencies;
580 : }
581 :
582 : /*
583 : * dependency_is_fully_matched
584 : * checks that a functional dependency is fully matched given clauses on
585 : * attributes (assuming the clauses are suitable equality clauses)
586 : */
587 : static bool
588 6156 : dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
589 : {
590 : int j;
591 :
592 : /*
593 : * Check that the dependency actually is fully covered by clauses. We have
594 : * to translate all attribute numbers, as those are referenced
595 : */
596 15642 : for (j = 0; j < dependency->nattributes; j++)
597 : {
598 12558 : int attnum = dependency->attributes[j];
599 :
600 12558 : if (!bms_is_member(attnum, attnums))
601 3072 : return false;
602 : }
603 :
604 3084 : return true;
605 : }
606 :
607 : /*
608 : * statext_dependencies_load
609 : * Load the functional dependencies for the indicated pg_statistic_ext tuple
610 : */
611 : MVDependencies *
612 1608 : statext_dependencies_load(Oid mvoid, bool inh)
613 : {
614 : MVDependencies *result;
615 : bool isnull;
616 : Datum deps;
617 : HeapTuple htup;
618 :
619 1608 : htup = SearchSysCache2(STATEXTDATASTXOID,
620 : ObjectIdGetDatum(mvoid),
621 : BoolGetDatum(inh));
622 1608 : if (!HeapTupleIsValid(htup))
623 0 : elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
624 :
625 1608 : deps = SysCacheGetAttr(STATEXTDATASTXOID, htup,
626 : Anum_pg_statistic_ext_data_stxddependencies, &isnull);
627 1608 : if (isnull)
628 0 : elog(ERROR,
629 : "requested statistics kind \"%c\" is not yet built for statistics object %u",
630 : STATS_EXT_DEPENDENCIES, mvoid);
631 :
632 1608 : result = statext_dependencies_deserialize(DatumGetByteaPP(deps));
633 :
634 1608 : ReleaseSysCache(htup);
635 :
636 1608 : return result;
637 : }
638 :
639 : /*
640 : * dependency_is_compatible_clause
641 : * Determines if the clause is compatible with functional dependencies
642 : *
643 : * Only clauses that have the form of equality to a pseudoconstant, or can be
644 : * interpreted that way, are currently accepted. Furthermore the variable
645 : * part of the clause must be a simple Var belonging to the specified
646 : * relation, whose attribute number we return in *attnum on success.
647 : */
648 : static bool
649 3960 : dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
650 : {
651 : Var *var;
652 : Node *clause_expr;
653 :
654 3960 : if (IsA(clause, RestrictInfo))
655 : {
656 3840 : RestrictInfo *rinfo = (RestrictInfo *) clause;
657 :
658 : /* Pseudoconstants are not interesting (they couldn't contain a Var) */
659 3840 : if (rinfo->pseudoconstant)
660 6 : return false;
661 :
662 : /* Clauses referencing multiple, or no, varnos are incompatible */
663 3834 : if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
664 0 : return false;
665 :
666 3834 : clause = (Node *) rinfo->clause;
667 : }
668 :
669 3954 : if (is_opclause(clause))
670 : {
671 : /* If it's an opclause, check for Var = Const or Const = Var. */
672 1446 : OpExpr *expr = (OpExpr *) clause;
673 :
674 : /* Only expressions with two arguments are candidates. */
675 1446 : if (list_length(expr->args) != 2)
676 0 : return false;
677 :
678 : /* Make sure non-selected argument is a pseudoconstant. */
679 1446 : if (is_pseudo_constant_clause(lsecond(expr->args)))
680 1446 : clause_expr = linitial(expr->args);
681 0 : else if (is_pseudo_constant_clause(linitial(expr->args)))
682 0 : clause_expr = lsecond(expr->args);
683 : else
684 0 : return false;
685 :
686 : /*
687 : * If it's not an "=" operator, just ignore the clause, as it's not
688 : * compatible with functional dependencies.
689 : *
690 : * This uses the function for estimating selectivity, not the operator
691 : * directly (a bit awkward, but well ...).
692 : *
693 : * XXX this is pretty dubious; probably it'd be better to check btree
694 : * or hash opclass membership, so as not to be fooled by custom
695 : * selectivity functions, and to be more consistent with decisions
696 : * elsewhere in the planner.
697 : */
698 1446 : if (get_oprrest(expr->opno) != F_EQSEL)
699 36 : return false;
700 :
701 : /* OK to proceed with checking "var" */
702 : }
703 2508 : else if (IsA(clause, ScalarArrayOpExpr))
704 : {
705 : /* If it's a scalar array operator, check for Var IN Const. */
706 2436 : ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
707 :
708 : /*
709 : * Reject ALL() variant, we only care about ANY/IN.
710 : *
711 : * XXX Maybe we should check if all the values are the same, and allow
712 : * ALL in that case? Doesn't seem very practical, though.
713 : */
714 2436 : if (!expr->useOr)
715 36 : return false;
716 :
717 : /* Only expressions with two arguments are candidates. */
718 2400 : if (list_length(expr->args) != 2)
719 0 : return false;
720 :
721 : /*
722 : * We know it's always (Var IN Const), so we assume the var is the
723 : * first argument, and pseudoconstant is the second one.
724 : */
725 2400 : if (!is_pseudo_constant_clause(lsecond(expr->args)))
726 0 : return false;
727 :
728 2400 : clause_expr = linitial(expr->args);
729 :
730 : /*
731 : * If it's not an "=" operator, just ignore the clause, as it's not
732 : * compatible with functional dependencies. The operator is identified
733 : * simply by looking at which function it uses to estimate
734 : * selectivity. That's a bit strange, but it's what other similar
735 : * places do.
736 : */
737 2400 : if (get_oprrest(expr->opno) != F_EQSEL)
738 180 : return false;
739 :
740 : /* OK to proceed with checking "var" */
741 : }
742 72 : else if (is_orclause(clause))
743 : {
744 72 : BoolExpr *bool_expr = (BoolExpr *) clause;
745 : ListCell *lc;
746 :
747 : /* start with no attribute number */
748 72 : *attnum = InvalidAttrNumber;
749 :
750 150 : foreach(lc, bool_expr->args)
751 : {
752 : AttrNumber clause_attnum;
753 :
754 : /*
755 : * Had we found incompatible clause in the arguments, treat the
756 : * whole clause as incompatible.
757 : */
758 120 : if (!dependency_is_compatible_clause((Node *) lfirst(lc),
759 : relid, &clause_attnum))
760 42 : return false;
761 :
762 84 : if (*attnum == InvalidAttrNumber)
763 36 : *attnum = clause_attnum;
764 :
765 : /* ensure all the variables are the same (same attnum) */
766 84 : if (*attnum != clause_attnum)
767 6 : return false;
768 : }
769 :
770 : /* the Var is already checked by the recursive call */
771 30 : return true;
772 : }
773 0 : else if (is_notclause(clause))
774 : {
775 : /*
776 : * "NOT x" can be interpreted as "x = false", so get the argument and
777 : * proceed with seeing if it's a suitable Var.
778 : */
779 0 : clause_expr = (Node *) get_notclausearg(clause);
780 : }
781 : else
782 : {
783 : /*
784 : * A boolean expression "x" can be interpreted as "x = true", so
785 : * proceed with seeing if it's a suitable Var.
786 : */
787 0 : clause_expr = clause;
788 : }
789 :
790 : /*
791 : * We may ignore any RelabelType node above the operand. (There won't be
792 : * more than one, since eval_const_expressions has been applied already.)
793 : */
794 3630 : if (IsA(clause_expr, RelabelType))
795 0 : clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
796 :
797 : /* We only support plain Vars for now */
798 3630 : if (!IsA(clause_expr, Var))
799 288 : return false;
800 :
801 : /* OK, we know we have a Var */
802 3342 : var = (Var *) clause_expr;
803 :
804 : /* Ensure Var is from the correct relation */
805 3342 : if (var->varno != relid)
806 0 : return false;
807 :
808 : /* We also better ensure the Var is from the current level */
809 3342 : if (var->varlevelsup != 0)
810 0 : return false;
811 :
812 : /* Also ignore system attributes (we don't allow stats on those) */
813 3342 : if (!AttrNumberIsForUserDefinedAttr(var->varattno))
814 0 : return false;
815 :
816 3342 : *attnum = var->varattno;
817 3342 : return true;
818 : }
819 :
820 : /*
821 : * find_strongest_dependency
822 : * find the strongest dependency on the attributes
823 : *
824 : * When applying functional dependencies, we start with the strongest
825 : * dependencies. That is, we select the dependency that:
826 : *
827 : * (a) has all attributes covered by equality clauses
828 : *
829 : * (b) has the most attributes
830 : *
831 : * (c) has the highest degree of validity
832 : *
833 : * This guarantees that we eliminate the most redundant conditions first
834 : * (see the comment in dependencies_clauselist_selectivity).
835 : */
836 : static MVDependency *
837 3486 : find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
838 : Bitmapset *attnums)
839 : {
840 : int i,
841 : j;
842 3486 : MVDependency *strongest = NULL;
843 :
844 : /* number of attnums in clauses */
845 3486 : int nattnums = bms_num_members(attnums);
846 :
847 : /*
848 : * Iterate over the MVDependency items and find the strongest one from the
849 : * fully-matched dependencies. We do the cheap checks first, before
850 : * matching it against the attnums.
851 : */
852 7008 : for (i = 0; i < ndependencies; i++)
853 : {
854 20280 : for (j = 0; j < dependencies[i]->ndeps; j++)
855 : {
856 16758 : MVDependency *dependency = dependencies[i]->deps[j];
857 :
858 : /*
859 : * Skip dependencies referencing more attributes than available
860 : * clauses, as those can't be fully matched.
861 : */
862 16758 : if (dependency->nattributes > nattnums)
863 10602 : continue;
864 :
865 6156 : if (strongest)
866 : {
867 : /* skip dependencies on fewer attributes than the strongest. */
868 3936 : if (dependency->nattributes < strongest->nattributes)
869 0 : continue;
870 :
871 : /* also skip weaker dependencies when attribute count matches */
872 3936 : if (strongest->nattributes == dependency->nattributes &&
873 3654 : strongest->degree > dependency->degree)
874 0 : continue;
875 : }
876 :
877 : /*
878 : * this dependency is stronger, but we must still check that it's
879 : * fully matched to these attnums. We perform this check last as
880 : * it's slightly more expensive than the previous checks.
881 : */
882 6156 : if (dependency_is_fully_matched(dependency, attnums))
883 3084 : strongest = dependency; /* save new best match */
884 : }
885 : }
886 :
887 3486 : return strongest;
888 : }
889 :
890 : /*
891 : * clauselist_apply_dependencies
892 : * Apply the specified functional dependencies to a list of clauses and
893 : * return the estimated selectivity of the clauses that are compatible
894 : * with any of the given dependencies.
895 : *
896 : * This will estimate all not-already-estimated clauses that are compatible
897 : * with functional dependencies, and which have an attribute mentioned by any
898 : * of the given dependencies (either as an implying or implied attribute).
899 : *
900 : * Given (lists of) clauses on attributes (a,b) and a functional dependency
901 : * (a=>b), the per-column selectivities P(a) and P(b) are notionally combined
902 : * using the formula
903 : *
904 : * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
905 : *
906 : * where 'f' is the degree of dependency. This reflects the fact that we
907 : * expect a fraction f of all rows to be consistent with the dependency
908 : * (a=>b), and so have a selectivity of P(a), while the remaining rows are
909 : * treated as independent.
910 : *
911 : * In practice, we use a slightly modified version of this formula, which uses
912 : * a selectivity of Min(P(a), P(b)) for the dependent rows, since the result
913 : * should obviously not exceed either column's individual selectivity. I.e.,
914 : * we actually combine selectivities using the formula
915 : *
916 : * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
917 : *
918 : * This can make quite a difference if the specific values matching the
919 : * clauses are not consistent with the functional dependency.
920 : */
921 : static Selectivity
922 1596 : clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
923 : int varRelid, JoinType jointype,
924 : SpecialJoinInfo *sjinfo,
925 : MVDependency **dependencies, int ndependencies,
926 : AttrNumber *list_attnums,
927 : Bitmapset **estimatedclauses)
928 : {
929 : Bitmapset *attnums;
930 : int i;
931 : int j;
932 : int nattrs;
933 : Selectivity *attr_sel;
934 : int attidx;
935 : int listidx;
936 : ListCell *l;
937 : Selectivity s1;
938 :
939 : /*
940 : * Extract the attnums of all implying and implied attributes from all the
941 : * given dependencies. Each of these attributes is expected to have at
942 : * least 1 not-already-estimated compatible clause that we will estimate
943 : * here.
944 : */
945 1596 : attnums = NULL;
946 3486 : for (i = 0; i < ndependencies; i++)
947 : {
948 5952 : for (j = 0; j < dependencies[i]->nattributes; j++)
949 : {
950 4062 : AttrNumber attnum = dependencies[i]->attributes[j];
951 :
952 4062 : attnums = bms_add_member(attnums, attnum);
953 : }
954 : }
955 :
956 : /*
957 : * Compute per-column selectivity estimates for each of these attributes,
958 : * and mark all the corresponding clauses as estimated.
959 : */
960 1596 : nattrs = bms_num_members(attnums);
961 1596 : attr_sel = palloc_array(Selectivity, nattrs);
962 :
963 1596 : attidx = 0;
964 1596 : i = -1;
965 5094 : while ((i = bms_next_member(attnums, i)) >= 0)
966 : {
967 3498 : List *attr_clauses = NIL;
968 : Selectivity simple_sel;
969 :
970 3498 : listidx = -1;
971 11436 : foreach(l, clauses)
972 : {
973 7938 : Node *clause = (Node *) lfirst(l);
974 :
975 7938 : listidx++;
976 7938 : if (list_attnums[listidx] == i)
977 : {
978 3498 : attr_clauses = lappend(attr_clauses, clause);
979 3498 : *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
980 : }
981 : }
982 :
983 3498 : simple_sel = clauselist_selectivity_ext(root, attr_clauses, varRelid,
984 : jointype, sjinfo, false);
985 3498 : attr_sel[attidx++] = simple_sel;
986 : }
987 :
988 : /*
989 : * Now combine these selectivities using the dependency information. For
990 : * chains of dependencies such as a -> b -> c, the b -> c dependency will
991 : * come before the a -> b dependency in the array, so we traverse the
992 : * array backwards to ensure such chains are computed in the right order.
993 : *
994 : * As explained above, pairs of selectivities are combined using the
995 : * formula
996 : *
997 : * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
998 : *
999 : * to ensure that the combined selectivity is never greater than either
1000 : * individual selectivity.
1001 : *
1002 : * Where multiple dependencies apply (e.g., a -> b -> c), we use
1003 : * conditional probabilities to compute the overall result as follows:
1004 : *
1005 : * P(a,b,c) = P(c|a,b) * P(a,b) = P(c|a,b) * P(b|a) * P(a)
1006 : *
1007 : * so we replace the selectivities of all implied attributes with
1008 : * conditional probabilities, that are conditional on all their implying
1009 : * attributes. The selectivities of all other non-implied attributes are
1010 : * left as they are.
1011 : */
1012 3486 : for (i = ndependencies - 1; i >= 0; i--)
1013 : {
1014 1890 : MVDependency *dependency = dependencies[i];
1015 : AttrNumber attnum;
1016 : Selectivity s2;
1017 : double f;
1018 :
1019 : /* Selectivity of all the implying attributes */
1020 1890 : s1 = 1.0;
1021 4062 : for (j = 0; j < dependency->nattributes - 1; j++)
1022 : {
1023 2172 : attnum = dependency->attributes[j];
1024 2172 : attidx = bms_member_index(attnums, attnum);
1025 2172 : s1 *= attr_sel[attidx];
1026 : }
1027 :
1028 : /* Original selectivity of the implied attribute */
1029 1890 : attnum = dependency->attributes[j];
1030 1890 : attidx = bms_member_index(attnums, attnum);
1031 1890 : s2 = attr_sel[attidx];
1032 :
1033 : /*
1034 : * Replace s2 with the conditional probability s2 given s1, computed
1035 : * using the formula P(b|a) = P(a,b) / P(a), which simplifies to
1036 : *
1037 : * P(b|a) = f * Min(P(a), P(b)) / P(a) + (1-f) * P(b)
1038 : *
1039 : * where P(a) = s1, the selectivity of the implying attributes, and
1040 : * P(b) = s2, the selectivity of the implied attribute.
1041 : */
1042 1890 : f = dependency->degree;
1043 :
1044 1890 : if (s1 <= s2)
1045 1782 : attr_sel[attidx] = f + (1 - f) * s2;
1046 : else
1047 108 : attr_sel[attidx] = f * s2 / s1 + (1 - f) * s2;
1048 : }
1049 :
1050 : /*
1051 : * The overall selectivity of all the clauses on all these attributes is
1052 : * then the product of all the original (non-implied) probabilities and
1053 : * the new conditional (implied) probabilities.
1054 : */
1055 1596 : s1 = 1.0;
1056 5094 : for (i = 0; i < nattrs; i++)
1057 3498 : s1 *= attr_sel[i];
1058 :
1059 1596 : CLAMP_PROBABILITY(s1);
1060 :
1061 1596 : pfree(attr_sel);
1062 1596 : bms_free(attnums);
1063 :
1064 1596 : return s1;
1065 : }
1066 :
1067 : /*
1068 : * dependency_is_compatible_expression
1069 : * Determines if the expression is compatible with functional dependencies
1070 : *
1071 : * Similar to dependency_is_compatible_clause, but doesn't enforce that the
1072 : * expression is a simple Var. On success, return the matching statistics
1073 : * expression into *expr.
1074 : */
1075 : static bool
1076 642 : dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
1077 : {
1078 : ListCell *lc,
1079 : *lc2;
1080 : Node *clause_expr;
1081 :
1082 642 : if (IsA(clause, RestrictInfo))
1083 : {
1084 552 : RestrictInfo *rinfo = (RestrictInfo *) clause;
1085 :
1086 : /* Pseudoconstants are not interesting (they couldn't contain a Var) */
1087 552 : if (rinfo->pseudoconstant)
1088 6 : return false;
1089 :
1090 : /* Clauses referencing multiple, or no, varnos are incompatible */
1091 546 : if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
1092 0 : return false;
1093 :
1094 546 : clause = (Node *) rinfo->clause;
1095 : }
1096 :
1097 636 : if (is_opclause(clause))
1098 : {
1099 : /* If it's an opclause, check for Var = Const or Const = Var. */
1100 204 : OpExpr *expr = (OpExpr *) clause;
1101 :
1102 : /* Only expressions with two arguments are candidates. */
1103 204 : if (list_length(expr->args) != 2)
1104 0 : return false;
1105 :
1106 : /* Make sure non-selected argument is a pseudoconstant. */
1107 204 : if (is_pseudo_constant_clause(lsecond(expr->args)))
1108 204 : clause_expr = linitial(expr->args);
1109 0 : else if (is_pseudo_constant_clause(linitial(expr->args)))
1110 0 : clause_expr = lsecond(expr->args);
1111 : else
1112 0 : return false;
1113 :
1114 : /*
1115 : * If it's not an "=" operator, just ignore the clause, as it's not
1116 : * compatible with functional dependencies.
1117 : *
1118 : * This uses the function for estimating selectivity, not the operator
1119 : * directly (a bit awkward, but well ...).
1120 : *
1121 : * XXX this is pretty dubious; probably it'd be better to check btree
1122 : * or hash opclass membership, so as not to be fooled by custom
1123 : * selectivity functions, and to be more consistent with decisions
1124 : * elsewhere in the planner.
1125 : */
1126 204 : if (get_oprrest(expr->opno) != F_EQSEL)
1127 36 : return false;
1128 :
1129 : /* OK to proceed with checking "var" */
1130 : }
1131 432 : else if (IsA(clause, ScalarArrayOpExpr))
1132 : {
1133 : /* If it's a scalar array operator, check for Var IN Const. */
1134 390 : ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
1135 :
1136 : /*
1137 : * Reject ALL() variant, we only care about ANY/IN.
1138 : *
1139 : * FIXME Maybe we should check if all the values are the same, and
1140 : * allow ALL in that case? Doesn't seem very practical, though.
1141 : */
1142 390 : if (!expr->useOr)
1143 36 : return false;
1144 :
1145 : /* Only expressions with two arguments are candidates. */
1146 354 : if (list_length(expr->args) != 2)
1147 0 : return false;
1148 :
1149 : /*
1150 : * We know it's always (Var IN Const), so we assume the var is the
1151 : * first argument, and pseudoconstant is the second one.
1152 : */
1153 354 : if (!is_pseudo_constant_clause(lsecond(expr->args)))
1154 0 : return false;
1155 :
1156 354 : clause_expr = linitial(expr->args);
1157 :
1158 : /*
1159 : * If it's not an "=" operator, just ignore the clause, as it's not
1160 : * compatible with functional dependencies. The operator is identified
1161 : * simply by looking at which function it uses to estimate
1162 : * selectivity. That's a bit strange, but it's what other similar
1163 : * places do.
1164 : */
1165 354 : if (get_oprrest(expr->opno) != F_EQSEL)
1166 180 : return false;
1167 :
1168 : /* OK to proceed with checking "var" */
1169 : }
1170 42 : else if (is_orclause(clause))
1171 : {
1172 42 : BoolExpr *bool_expr = (BoolExpr *) clause;
1173 :
1174 : /* start with no expression (we'll use the first match) */
1175 42 : *expr = NULL;
1176 :
1177 120 : foreach(lc, bool_expr->args)
1178 : {
1179 90 : Node *or_expr = NULL;
1180 :
1181 : /*
1182 : * Had we found incompatible expression in the arguments, treat
1183 : * the whole expression as incompatible.
1184 : */
1185 90 : if (!dependency_is_compatible_expression((Node *) lfirst(lc), relid,
1186 : statlist, &or_expr))
1187 12 : return false;
1188 :
1189 84 : if (*expr == NULL)
1190 36 : *expr = or_expr;
1191 :
1192 : /* ensure all the expressions are the same */
1193 84 : if (!equal(or_expr, *expr))
1194 6 : return false;
1195 : }
1196 :
1197 : /* the expression is already checked by the recursive call */
1198 30 : return true;
1199 : }
1200 0 : else if (is_notclause(clause))
1201 : {
1202 : /*
1203 : * "NOT x" can be interpreted as "x = false", so get the argument and
1204 : * proceed with seeing if it's a suitable Var.
1205 : */
1206 0 : clause_expr = (Node *) get_notclausearg(clause);
1207 : }
1208 : else
1209 : {
1210 : /*
1211 : * A boolean expression "x" can be interpreted as "x = true", so
1212 : * proceed with seeing if it's a suitable Var.
1213 : */
1214 0 : clause_expr = clause;
1215 : }
1216 :
1217 : /*
1218 : * We may ignore any RelabelType node above the operand. (There won't be
1219 : * more than one, since eval_const_expressions has been applied already.)
1220 : */
1221 342 : if (IsA(clause_expr, RelabelType))
1222 0 : clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
1223 :
1224 : /*
1225 : * Search for a matching statistics expression.
1226 : */
1227 348 : foreach(lc, statlist)
1228 : {
1229 342 : StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
1230 :
1231 : /* ignore stats without dependencies */
1232 342 : if (info->kind != STATS_EXT_DEPENDENCIES)
1233 0 : continue;
1234 :
1235 558 : foreach(lc2, info->exprs)
1236 : {
1237 552 : Node *stat_expr = (Node *) lfirst(lc2);
1238 :
1239 552 : if (equal(clause_expr, stat_expr))
1240 : {
1241 336 : *expr = stat_expr;
1242 336 : return true;
1243 : }
1244 : }
1245 : }
1246 :
1247 6 : return false;
1248 : }
1249 :
1250 : /*
1251 : * dependencies_clauselist_selectivity
1252 : * Return the estimated selectivity of (a subset of) the given clauses
1253 : * using functional dependency statistics, or 1.0 if no useful functional
1254 : * dependency statistic exists.
1255 : *
1256 : * 'estimatedclauses' is an input/output argument that gets a bit set
1257 : * corresponding to the (zero-based) list index of each clause that is included
1258 : * in the estimated selectivity.
1259 : *
1260 : * Given equality clauses on attributes (a,b) we find the strongest dependency
1261 : * between them, i.e. either (a=>b) or (b=>a). Assuming (a=>b) is the selected
1262 : * dependency, we then combine the per-clause selectivities using the formula
1263 : *
1264 : * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
1265 : *
1266 : * where 'f' is the degree of the dependency. (Actually we use a slightly
1267 : * modified version of this formula -- see clauselist_apply_dependencies()).
1268 : *
1269 : * With clauses on more than two attributes, the dependencies are applied
1270 : * recursively, starting with the widest/strongest dependencies. For example
1271 : * P(a,b,c) is first split like this:
1272 : *
1273 : * P(a,b,c) = f * P(a,b) + (1-f) * P(a,b) * P(c)
1274 : *
1275 : * assuming (a,b=>c) is the strongest dependency.
1276 : */
1277 : Selectivity
1278 2436 : dependencies_clauselist_selectivity(PlannerInfo *root,
1279 : List *clauses,
1280 : int varRelid,
1281 : JoinType jointype,
1282 : SpecialJoinInfo *sjinfo,
1283 : RelOptInfo *rel,
1284 : Bitmapset **estimatedclauses)
1285 : {
1286 2436 : Selectivity s1 = 1.0;
1287 : ListCell *l;
1288 2436 : Bitmapset *clauses_attnums = NULL;
1289 : AttrNumber *list_attnums;
1290 : int listidx;
1291 : MVDependencies **func_dependencies;
1292 : int nfunc_dependencies;
1293 : int total_ndeps;
1294 : MVDependency **dependencies;
1295 : int ndependencies;
1296 : int i;
1297 : AttrNumber attnum_offset;
1298 2436 : RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
1299 :
1300 : /* unique expressions */
1301 : Node **unique_exprs;
1302 : int unique_exprs_cnt;
1303 :
1304 : /* check if there's any stats that might be useful for us. */
1305 2436 : if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
1306 648 : return 1.0;
1307 :
1308 1788 : list_attnums = palloc_array(AttrNumber, list_length(clauses));
1309 :
1310 : /*
1311 : * We allocate space as if every clause was a unique expression, although
1312 : * that's probably overkill. Some will be simple column references that
1313 : * we'll translate to attnums, and there might be duplicates. But it's
1314 : * easier and cheaper to just do one allocation than repalloc later.
1315 : */
1316 1788 : unique_exprs = palloc_array(Node *, list_length(clauses));
1317 1788 : unique_exprs_cnt = 0;
1318 :
1319 : /*
1320 : * Pre-process the clauses list to extract the attnums seen in each item.
1321 : * We need to determine if there's any clauses which will be useful for
1322 : * dependency selectivity estimations. Along the way we'll record all of
1323 : * the attnums for each clause in a list which we'll reference later so we
1324 : * don't need to repeat the same work again. We'll also keep track of all
1325 : * attnums seen.
1326 : *
1327 : * We also skip clauses that we already estimated using different types of
1328 : * statistics (we treat them as incompatible).
1329 : *
1330 : * To handle expressions, we assign them negative attnums, as if it was a
1331 : * system attribute (this is fine, as we only allow extended stats on user
1332 : * attributes). And then we offset everything by the number of
1333 : * expressions, so that we can store the values in a bitmapset.
1334 : */
1335 1788 : listidx = 0;
1336 5670 : foreach(l, clauses)
1337 : {
1338 3882 : Node *clause = (Node *) lfirst(l);
1339 : AttrNumber attnum;
1340 3882 : Node *expr = NULL;
1341 :
1342 : /* ignore clause by default */
1343 3882 : list_attnums[listidx] = InvalidAttrNumber;
1344 :
1345 3882 : if (!bms_is_member(listidx, *estimatedclauses))
1346 : {
1347 : /*
1348 : * If it's a simple column reference, just extract the attnum. If
1349 : * it's an expression, assign a negative attnum as if it was a
1350 : * system attribute.
1351 : */
1352 3840 : if (dependency_is_compatible_clause(clause, rel->relid, &attnum))
1353 : {
1354 3288 : list_attnums[listidx] = attnum;
1355 : }
1356 552 : else if (dependency_is_compatible_expression(clause, rel->relid,
1357 : rel->statlist,
1358 : &expr))
1359 : {
1360 : /* special attnum assigned to this expression */
1361 282 : attnum = InvalidAttrNumber;
1362 :
1363 : Assert(expr != NULL);
1364 :
1365 : /* If the expression is duplicate, use the same attnum. */
1366 474 : for (i = 0; i < unique_exprs_cnt; i++)
1367 : {
1368 192 : if (equal(unique_exprs[i], expr))
1369 : {
1370 : /* negative attribute number to expression */
1371 0 : attnum = -(i + 1);
1372 0 : break;
1373 : }
1374 : }
1375 :
1376 : /* not found in the list, so add it */
1377 282 : if (attnum == InvalidAttrNumber)
1378 : {
1379 282 : unique_exprs[unique_exprs_cnt++] = expr;
1380 :
1381 : /* after incrementing the value, to get -1, -2, ... */
1382 282 : attnum = (-unique_exprs_cnt);
1383 : }
1384 :
1385 : /* remember which attnum was assigned to this clause */
1386 282 : list_attnums[listidx] = attnum;
1387 : }
1388 : }
1389 :
1390 3882 : listidx++;
1391 : }
1392 :
1393 : Assert(listidx == list_length(clauses));
1394 :
1395 : /*
1396 : * How much we need to offset the attnums? If there are no expressions,
1397 : * then no offset is needed. Otherwise we need to offset enough for the
1398 : * lowest value (-unique_exprs_cnt) to become 1.
1399 : */
1400 1788 : if (unique_exprs_cnt > 0)
1401 132 : attnum_offset = (unique_exprs_cnt + 1);
1402 : else
1403 1656 : attnum_offset = 0;
1404 :
1405 : /*
1406 : * Now that we know how many expressions there are, we can offset the
1407 : * values just enough to build the bitmapset.
1408 : */
1409 5670 : for (i = 0; i < list_length(clauses); i++)
1410 : {
1411 : AttrNumber attnum;
1412 :
1413 : /* ignore incompatible or already estimated clauses */
1414 3882 : if (list_attnums[i] == InvalidAttrNumber)
1415 312 : continue;
1416 :
1417 : /* make sure the attnum is in the expected range */
1418 : Assert(list_attnums[i] >= (-unique_exprs_cnt));
1419 : Assert(list_attnums[i] <= MaxHeapAttributeNumber);
1420 :
1421 : /* make sure the attnum is positive (valid AttrNumber) */
1422 3570 : attnum = list_attnums[i] + attnum_offset;
1423 :
1424 : /*
1425 : * Either it's a regular attribute, or it's an expression, in which
1426 : * case we must not have seen it before (expressions are unique).
1427 : *
1428 : * XXX Check whether it's a regular attribute has to be done using the
1429 : * original attnum, while the second check has to use the value with
1430 : * an offset.
1431 : */
1432 : Assert(AttrNumberIsForUserDefinedAttr(list_attnums[i]) ||
1433 : !bms_is_member(attnum, clauses_attnums));
1434 :
1435 : /*
1436 : * Remember the offset attnum, both for attributes and expressions.
1437 : * We'll pass list_attnums to clauselist_apply_dependencies, which
1438 : * uses it to identify clauses in a bitmap. We could also pass the
1439 : * offset, but this is more convenient.
1440 : */
1441 3570 : list_attnums[i] = attnum;
1442 :
1443 3570 : clauses_attnums = bms_add_member(clauses_attnums, attnum);
1444 : }
1445 :
1446 : /*
1447 : * If there's not at least two distinct attnums and expressions, then
1448 : * reject the whole list of clauses. We must return 1.0 so the calling
1449 : * function's selectivity is unaffected.
1450 : */
1451 1788 : if (bms_membership(clauses_attnums) != BMS_MULTIPLE)
1452 : {
1453 192 : bms_free(clauses_attnums);
1454 192 : pfree(list_attnums);
1455 192 : return 1.0;
1456 : }
1457 :
1458 : /*
1459 : * Load all functional dependencies matching at least two parameters. We
1460 : * can simply consider all dependencies at once, without having to search
1461 : * for the best statistics object.
1462 : *
1463 : * To not waste cycles and memory, we deserialize dependencies only for
1464 : * statistics that match at least two attributes. The array is allocated
1465 : * with the assumption that all objects match - we could grow the array to
1466 : * make it just the right size, but it's likely wasteful anyway thanks to
1467 : * moving the freed chunks to freelists etc.
1468 : */
1469 1596 : func_dependencies = palloc_array(MVDependencies *, list_length(rel->statlist));
1470 1596 : nfunc_dependencies = 0;
1471 1596 : total_ndeps = 0;
1472 :
1473 3330 : foreach(l, rel->statlist)
1474 : {
1475 1734 : StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
1476 : int nmatched;
1477 : int nexprs;
1478 : int k;
1479 : MVDependencies *deps;
1480 :
1481 : /* skip statistics that are not of the correct type */
1482 1734 : if (stat->kind != STATS_EXT_DEPENDENCIES)
1483 108 : continue;
1484 :
1485 : /* skip statistics with mismatching stxdinherit value */
1486 1626 : if (stat->inherit != rte->inh)
1487 0 : continue;
1488 :
1489 : /*
1490 : * Count matching attributes - we have to undo the attnum offsets. The
1491 : * input attribute numbers are not offset (expressions are not
1492 : * included in stat->keys, so it's not necessary). But we need to
1493 : * offset it before checking against clauses_attnums.
1494 : */
1495 1626 : nmatched = 0;
1496 1626 : k = -1;
1497 6120 : while ((k = bms_next_member(stat->keys, k)) >= 0)
1498 : {
1499 4494 : AttrNumber attnum = (AttrNumber) k;
1500 :
1501 : /* skip expressions */
1502 4494 : if (!AttrNumberIsForUserDefinedAttr(attnum))
1503 0 : continue;
1504 :
1505 : /* apply the same offset as above */
1506 4494 : attnum += attnum_offset;
1507 :
1508 4494 : if (bms_is_member(attnum, clauses_attnums))
1509 3240 : nmatched++;
1510 : }
1511 :
1512 : /* count matching expressions */
1513 1626 : nexprs = 0;
1514 1884 : for (i = 0; i < unique_exprs_cnt; i++)
1515 : {
1516 : ListCell *lc;
1517 :
1518 1032 : foreach(lc, stat->exprs)
1519 : {
1520 774 : Node *stat_expr = (Node *) lfirst(lc);
1521 :
1522 : /* try to match it */
1523 774 : if (equal(stat_expr, unique_exprs[i]))
1524 258 : nexprs++;
1525 : }
1526 : }
1527 :
1528 : /*
1529 : * Skip objects matching fewer than two attributes/expressions from
1530 : * clauses.
1531 : */
1532 1626 : if (nmatched + nexprs < 2)
1533 18 : continue;
1534 :
1535 1608 : deps = statext_dependencies_load(stat->statOid, rte->inh);
1536 :
1537 : /*
1538 : * The expressions may be represented by different attnums in the
1539 : * stats, we need to remap them to be consistent with the clauses.
1540 : * That will make the later steps (e.g. picking the strongest item and
1541 : * so on) much simpler and cheaper, because it won't need to care
1542 : * about the offset at all.
1543 : *
1544 : * When we're at it, we can ignore dependencies that are not fully
1545 : * matched by clauses (i.e. referencing attributes or expressions that
1546 : * are not in the clauses).
1547 : *
1548 : * We have to do this for all statistics, as long as there are any
1549 : * expressions - we need to shift the attnums in all dependencies.
1550 : *
1551 : * XXX Maybe we should do this always, because it also eliminates some
1552 : * of the dependencies early. It might be cheaper than having to walk
1553 : * the longer list in find_strongest_dependency later, especially as
1554 : * we need to do that repeatedly?
1555 : *
1556 : * XXX We have to do this even when there are no expressions in
1557 : * clauses, otherwise find_strongest_dependency may fail for stats
1558 : * with expressions (due to lookup of negative value in bitmap). So we
1559 : * need to at least filter out those dependencies. Maybe we could do
1560 : * it in a cheaper way (if there are no expr clauses, we can just
1561 : * discard all negative attnums without any lookups).
1562 : */
1563 1608 : if (unique_exprs_cnt > 0 || stat->exprs != NIL)
1564 : {
1565 108 : int ndeps = 0;
1566 :
1567 648 : for (i = 0; i < deps->ndeps; i++)
1568 : {
1569 540 : bool skip = false;
1570 540 : MVDependency *dep = deps->deps[i];
1571 : int j;
1572 :
1573 1506 : for (j = 0; j < dep->nattributes; j++)
1574 : {
1575 : int idx;
1576 : Node *expr;
1577 1230 : AttrNumber unique_attnum = InvalidAttrNumber;
1578 : AttrNumber attnum;
1579 :
1580 : /* undo the per-statistics offset */
1581 1230 : attnum = dep->attributes[j];
1582 :
1583 : /*
1584 : * For regular attributes we can simply check if it
1585 : * matches any clause. If there's no matching clause, we
1586 : * can just ignore it. We need to offset the attnum
1587 : * though.
1588 : */
1589 1230 : if (AttrNumberIsForUserDefinedAttr(attnum))
1590 : {
1591 0 : dep->attributes[j] = attnum + attnum_offset;
1592 :
1593 0 : if (!bms_is_member(dep->attributes[j], clauses_attnums))
1594 : {
1595 0 : skip = true;
1596 0 : break;
1597 : }
1598 :
1599 0 : continue;
1600 : }
1601 :
1602 : /*
1603 : * the attnum should be a valid system attnum (-1, -2,
1604 : * ...)
1605 : */
1606 : Assert(AttributeNumberIsValid(attnum));
1607 :
1608 : /*
1609 : * For expressions, we need to do two translations. First
1610 : * we have to translate the negative attnum to index in
1611 : * the list of expressions (in the statistics object).
1612 : * Then we need to see if there's a matching clause. The
1613 : * index of the unique expression determines the attnum
1614 : * (and we offset it).
1615 : */
1616 1230 : idx = -(1 + attnum);
1617 :
1618 : /* Is the expression index is valid? */
1619 : Assert((idx >= 0) && (idx < list_length(stat->exprs)));
1620 :
1621 1230 : expr = (Node *) list_nth(stat->exprs, idx);
1622 :
1623 : /* try to find the expression in the unique list */
1624 2460 : for (int m = 0; m < unique_exprs_cnt; m++)
1625 : {
1626 : /*
1627 : * found a matching unique expression, use the attnum
1628 : * (derived from index of the unique expression)
1629 : */
1630 2196 : if (equal(unique_exprs[m], expr))
1631 : {
1632 966 : unique_attnum = -(m + 1) + attnum_offset;
1633 966 : break;
1634 : }
1635 : }
1636 :
1637 : /*
1638 : * Found no matching expression, so we can simply skip
1639 : * this dependency, because there's no chance it will be
1640 : * fully covered.
1641 : */
1642 1230 : if (unique_attnum == InvalidAttrNumber)
1643 : {
1644 264 : skip = true;
1645 264 : break;
1646 : }
1647 :
1648 : /* otherwise remap it to the new attnum */
1649 966 : dep->attributes[j] = unique_attnum;
1650 : }
1651 :
1652 : /* if found a matching dependency, keep it */
1653 540 : if (!skip)
1654 : {
1655 : /* maybe we've skipped something earlier, so move it */
1656 276 : if (ndeps != i)
1657 0 : deps->deps[ndeps] = deps->deps[i];
1658 :
1659 276 : ndeps++;
1660 : }
1661 : }
1662 :
1663 108 : deps->ndeps = ndeps;
1664 : }
1665 :
1666 : /*
1667 : * It's possible we've removed all dependencies, in which case we
1668 : * don't bother adding it to the list.
1669 : */
1670 1608 : if (deps->ndeps > 0)
1671 : {
1672 1608 : func_dependencies[nfunc_dependencies] = deps;
1673 1608 : total_ndeps += deps->ndeps;
1674 1608 : nfunc_dependencies++;
1675 : }
1676 : }
1677 :
1678 : /* if no matching stats could be found then we've nothing to do */
1679 1596 : if (nfunc_dependencies == 0)
1680 : {
1681 0 : pfree(func_dependencies);
1682 0 : bms_free(clauses_attnums);
1683 0 : pfree(list_attnums);
1684 0 : pfree(unique_exprs);
1685 0 : return 1.0;
1686 : }
1687 :
1688 : /*
1689 : * Work out which dependencies we can apply, starting with the
1690 : * widest/strongest ones, and proceeding to smaller/weaker ones.
1691 : */
1692 1596 : dependencies = palloc_array(MVDependency *, total_ndeps);
1693 1596 : ndependencies = 0;
1694 :
1695 : while (true)
1696 1890 : {
1697 : MVDependency *dependency;
1698 : AttrNumber attnum;
1699 :
1700 : /* the widest/strongest dependency, fully matched by clauses */
1701 3486 : dependency = find_strongest_dependency(func_dependencies,
1702 : nfunc_dependencies,
1703 : clauses_attnums);
1704 3486 : if (!dependency)
1705 1596 : break;
1706 :
1707 1890 : dependencies[ndependencies++] = dependency;
1708 :
1709 : /* Ignore dependencies using this implied attribute in later loops */
1710 1890 : attnum = dependency->attributes[dependency->nattributes - 1];
1711 1890 : clauses_attnums = bms_del_member(clauses_attnums, attnum);
1712 : }
1713 :
1714 : /*
1715 : * If we found applicable dependencies, use them to estimate all
1716 : * compatible clauses on attributes that they refer to.
1717 : */
1718 1596 : if (ndependencies != 0)
1719 1596 : s1 = clauselist_apply_dependencies(root, clauses, varRelid, jointype,
1720 : sjinfo, dependencies, ndependencies,
1721 : list_attnums, estimatedclauses);
1722 :
1723 : /* free deserialized functional dependencies (and then the array) */
1724 3204 : for (i = 0; i < nfunc_dependencies; i++)
1725 1608 : pfree(func_dependencies[i]);
1726 :
1727 1596 : pfree(dependencies);
1728 1596 : pfree(func_dependencies);
1729 1596 : bms_free(clauses_attnums);
1730 1596 : pfree(list_attnums);
1731 1596 : pfree(unique_exprs);
1732 :
1733 1596 : return s1;
1734 : }
|