Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * clausesel.c
4 : * Routines to compute clause selectivities
5 : *
6 : * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/optimizer/path/clausesel.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "nodes/makefuncs.h"
18 : #include "nodes/nodeFuncs.h"
19 : #include "optimizer/clauses.h"
20 : #include "optimizer/cost.h"
21 : #include "optimizer/optimizer.h"
22 : #include "optimizer/pathnode.h"
23 : #include "optimizer/plancat.h"
24 : #include "statistics/statistics.h"
25 : #include "utils/fmgroids.h"
26 : #include "utils/lsyscache.h"
27 : #include "utils/selfuncs.h"
28 :
29 : /*
30 : * Data structure for accumulating info about possible range-query
31 : * clause pairs in clauselist_selectivity.
32 : */
33 : typedef struct RangeQueryClause
34 : {
35 : struct RangeQueryClause *next; /* next in linked list */
36 : Node *var; /* The common variable of the clauses */
37 : bool have_lobound; /* found a low-bound clause yet? */
38 : bool have_hibound; /* found a high-bound clause yet? */
39 : Selectivity lobound; /* Selectivity of a var > something clause */
40 : Selectivity hibound; /* Selectivity of a var < something clause */
41 : } RangeQueryClause;
42 :
43 : static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
44 : bool varonleft, bool isLTsel, Selectivity s2);
45 : static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root,
46 : List *clauses);
47 : static Selectivity clauselist_selectivity_or(PlannerInfo *root,
48 : List *clauses,
49 : int varRelid,
50 : JoinType jointype,
51 : SpecialJoinInfo *sjinfo,
52 : bool use_extended_stats);
53 :
54 : /****************************************************************************
55 : * ROUTINES TO COMPUTE SELECTIVITIES
56 : ****************************************************************************/
57 :
58 : /*
59 : * clauselist_selectivity -
60 : * Compute the selectivity of an implicitly-ANDed list of boolean
61 : * expression clauses. The list can be empty, in which case 1.0
62 : * must be returned. List elements may be either RestrictInfos
63 : * or bare expression clauses --- the former is preferred since
64 : * it allows caching of results.
65 : *
66 : * See clause_selectivity() for the meaning of the additional parameters.
67 : *
68 : * The basic approach is to apply extended statistics first, on as many
69 : * clauses as possible, in order to capture cross-column dependencies etc.
70 : * The remaining clauses are then estimated by taking the product of their
71 : * selectivities, but that's only right if they have independent
72 : * probabilities, and in reality they are often NOT independent even if they
73 : * only refer to a single column. So, we want to be smarter where we can.
74 : *
75 : * We also recognize "range queries", such as "x > 34 AND x < 42". Clauses
76 : * are recognized as possible range query components if they are restriction
77 : * opclauses whose operators have scalarltsel or a related function as their
78 : * restriction selectivity estimator. We pair up clauses of this form that
79 : * refer to the same variable. An unpairable clause of this kind is simply
80 : * multiplied into the selectivity product in the normal way. But when we
81 : * find a pair, we know that the selectivities represent the relative
82 : * positions of the low and high bounds within the column's range, so instead
83 : * of figuring the selectivity as hisel * losel, we can figure it as hisel +
84 : * losel - 1. (To visualize this, see that hisel is the fraction of the range
85 : * below the high bound, while losel is the fraction above the low bound; so
86 : * hisel can be interpreted directly as a 0..1 value but we need to convert
87 : * losel to 1-losel before interpreting it as a value. Then the available
88 : * range is 1-losel to hisel. However, this calculation double-excludes
89 : * nulls, so really we need hisel + losel + null_frac - 1.)
90 : *
91 : * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
92 : * and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation
93 : * yields an impossible (negative) result.
94 : *
95 : * A free side-effect is that we can recognize redundant inequalities such
96 : * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
97 : *
98 : * Of course this is all very dependent on the behavior of the inequality
99 : * selectivity functions; perhaps some day we can generalize the approach.
100 : */
101 : Selectivity
102 1437324 : clauselist_selectivity(PlannerInfo *root,
103 : List *clauses,
104 : int varRelid,
105 : JoinType jointype,
106 : SpecialJoinInfo *sjinfo)
107 : {
108 1437324 : return clauselist_selectivity_ext(root, clauses, varRelid,
109 : jointype, sjinfo, true);
110 : }
111 :
112 : /*
113 : * clauselist_selectivity_ext -
114 : * Extended version of clauselist_selectivity(). If "use_extended_stats"
115 : * is false, all extended statistics will be ignored, and only per-column
116 : * statistics will be used.
117 : */
118 : Selectivity
119 1440630 : clauselist_selectivity_ext(PlannerInfo *root,
120 : List *clauses,
121 : int varRelid,
122 : JoinType jointype,
123 : SpecialJoinInfo *sjinfo,
124 : bool use_extended_stats)
125 : {
126 1440630 : Selectivity s1 = 1.0;
127 : RelOptInfo *rel;
128 1440630 : Bitmapset *estimatedclauses = NULL;
129 1440630 : RangeQueryClause *rqlist = NULL;
130 : ListCell *l;
131 : int listidx;
132 :
133 : /*
134 : * If there's exactly one clause, just go directly to
135 : * clause_selectivity_ext(). None of what we might do below is relevant.
136 : */
137 1440630 : if (list_length(clauses) == 1)
138 798746 : return clause_selectivity_ext(root, (Node *) linitial(clauses),
139 : varRelid, jointype, sjinfo,
140 : use_extended_stats);
141 :
142 : /*
143 : * Determine if these clauses reference a single relation. If so, and if
144 : * it has extended statistics, try to apply those.
145 : */
146 641884 : rel = find_single_rel_for_clauses(root, clauses);
147 641884 : if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
148 : {
149 : /*
150 : * Estimate as many clauses as possible using extended statistics.
151 : *
152 : * 'estimatedclauses' is populated with the 0-based list position
153 : * index of clauses estimated here, and that should be ignored below.
154 : */
155 992 : s1 = statext_clauselist_selectivity(root, clauses, varRelid,
156 : jointype, sjinfo, rel,
157 : &estimatedclauses, false);
158 : }
159 :
160 : /*
161 : * Apply normal selectivity estimates for remaining clauses. We'll be
162 : * careful to skip any clauses which were already estimated above.
163 : *
164 : * Anything that doesn't look like a potential rangequery clause gets
165 : * multiplied into s1 and forgotten. Anything that does gets inserted into
166 : * an rqlist entry.
167 : */
168 641884 : listidx = -1;
169 1089512 : foreach(l, clauses)
170 : {
171 447628 : Node *clause = (Node *) lfirst(l);
172 : RestrictInfo *rinfo;
173 : Selectivity s2;
174 :
175 447628 : listidx++;
176 :
177 : /*
178 : * Skip this clause if it's already been estimated by some other
179 : * statistics above.
180 : */
181 447628 : if (bms_is_member(listidx, estimatedclauses))
182 2044 : continue;
183 :
184 : /* Compute the selectivity of this clause in isolation */
185 445584 : s2 = clause_selectivity_ext(root, clause, varRelid, jointype, sjinfo,
186 : use_extended_stats);
187 :
188 : /*
189 : * Check for being passed a RestrictInfo.
190 : *
191 : * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
192 : * 0.0; just use that rather than looking for range pairs.
193 : */
194 445584 : if (IsA(clause, RestrictInfo))
195 : {
196 444704 : rinfo = (RestrictInfo *) clause;
197 444704 : if (rinfo->pseudoconstant)
198 : {
199 2924 : s1 = s1 * s2;
200 2924 : continue;
201 : }
202 441780 : clause = (Node *) rinfo->clause;
203 : }
204 : else
205 880 : rinfo = NULL;
206 :
207 : /*
208 : * See if it looks like a restriction clause with a pseudoconstant on
209 : * one side. (Anything more complicated than that might not behave in
210 : * the simple way we are expecting.) Most of the tests here can be
211 : * done more efficiently with rinfo than without.
212 : */
213 442660 : if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
214 : {
215 399606 : OpExpr *expr = (OpExpr *) clause;
216 399606 : bool varonleft = true;
217 : bool ok;
218 :
219 399606 : if (rinfo)
220 : {
221 683870 : ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) &&
222 279570 : (is_pseudo_constant_clause_relids(lsecond(expr->args),
223 5558 : rinfo->right_relids) ||
224 5558 : (varonleft = false,
225 5558 : is_pseudo_constant_clause_relids(linitial(expr->args),
226 : rinfo->left_relids)));
227 : }
228 : else
229 : {
230 1728 : ok = (NumRelids(root, clause) == 1) &&
231 864 : (is_pseudo_constant_clause(lsecond(expr->args)) ||
232 0 : (varonleft = false,
233 0 : is_pseudo_constant_clause(linitial(expr->args))));
234 : }
235 :
236 399606 : if (ok)
237 : {
238 : /*
239 : * If it's not a "<"/"<="/">"/">=" operator, just merge the
240 : * selectivity in generically. But if it's the right oprrest,
241 : * add the clause to rqlist for later processing.
242 : */
243 280176 : switch (get_oprrest(expr->opno))
244 : {
245 8778 : case F_SCALARLTSEL:
246 : case F_SCALARLESEL:
247 8778 : addRangeClause(&rqlist, clause,
248 : varonleft, true, s2);
249 8778 : break;
250 47236 : case F_SCALARGTSEL:
251 : case F_SCALARGESEL:
252 47236 : addRangeClause(&rqlist, clause,
253 : varonleft, false, s2);
254 47236 : break;
255 224162 : default:
256 : /* Just merge the selectivity in generically */
257 224162 : s1 = s1 * s2;
258 224162 : break;
259 : }
260 280176 : continue; /* drop to loop bottom */
261 : }
262 : }
263 :
264 : /* Not the right form, so treat it generically. */
265 162484 : s1 = s1 * s2;
266 : }
267 :
268 : /*
269 : * Now scan the rangequery pair list.
270 : */
271 692034 : while (rqlist != NULL)
272 : {
273 : RangeQueryClause *rqnext;
274 :
275 50150 : if (rqlist->have_lobound && rqlist->have_hibound)
276 5652 : {
277 : /* Successfully matched a pair of range clauses */
278 : Selectivity s2;
279 :
280 : /*
281 : * Exact equality to the default value probably means the
282 : * selectivity function punted. This is not airtight but should
283 : * be good enough.
284 : */
285 5652 : if (rqlist->hibound == DEFAULT_INEQ_SEL ||
286 3614 : rqlist->lobound == DEFAULT_INEQ_SEL)
287 : {
288 2038 : s2 = DEFAULT_RANGE_INEQ_SEL;
289 : }
290 : else
291 : {
292 3614 : s2 = rqlist->hibound + rqlist->lobound - 1.0;
293 :
294 : /* Adjust for double-exclusion of NULLs */
295 3614 : s2 += nulltestsel(root, IS_NULL, rqlist->var,
296 : varRelid, jointype, sjinfo);
297 :
298 : /*
299 : * A zero or slightly negative s2 should be converted into a
300 : * small positive value; we probably are dealing with a very
301 : * tight range and got a bogus result due to roundoff errors.
302 : * However, if s2 is very negative, then we probably have
303 : * default selectivity estimates on one or both sides of the
304 : * range that we failed to recognize above for some reason.
305 : */
306 3614 : if (s2 <= 0.0)
307 : {
308 468 : if (s2 < -0.01)
309 : {
310 : /*
311 : * No data available --- use a default estimate that
312 : * is small, but not real small.
313 : */
314 0 : s2 = DEFAULT_RANGE_INEQ_SEL;
315 : }
316 : else
317 : {
318 : /*
319 : * It's just roundoff error; use a small positive
320 : * value
321 : */
322 468 : s2 = 1.0e-10;
323 : }
324 : }
325 : }
326 : /* Merge in the selectivity of the pair of clauses */
327 5652 : s1 *= s2;
328 : }
329 : else
330 : {
331 : /* Only found one of a pair, merge it in generically */
332 44498 : if (rqlist->have_lobound)
333 41404 : s1 *= rqlist->lobound;
334 : else
335 3094 : s1 *= rqlist->hibound;
336 : }
337 : /* release storage and advance */
338 50150 : rqnext = rqlist->next;
339 50150 : pfree(rqlist);
340 50150 : rqlist = rqnext;
341 : }
342 :
343 641884 : return s1;
344 : }
345 :
346 : /*
347 : * clauselist_selectivity_or -
348 : * Compute the selectivity of an implicitly-ORed list of boolean
349 : * expression clauses. The list can be empty, in which case 0.0
350 : * must be returned. List elements may be either RestrictInfos
351 : * or bare expression clauses --- the former is preferred since
352 : * it allows caching of results.
353 : *
354 : * See clause_selectivity() for the meaning of the additional parameters.
355 : *
356 : * The basic approach is to apply extended statistics first, on as many
357 : * clauses as possible, in order to capture cross-column dependencies etc.
358 : * The remaining clauses are then estimated as if they were independent.
359 : */
360 : static Selectivity
361 9536 : clauselist_selectivity_or(PlannerInfo *root,
362 : List *clauses,
363 : int varRelid,
364 : JoinType jointype,
365 : SpecialJoinInfo *sjinfo,
366 : bool use_extended_stats)
367 : {
368 9536 : Selectivity s1 = 0.0;
369 : RelOptInfo *rel;
370 9536 : Bitmapset *estimatedclauses = NULL;
371 : ListCell *lc;
372 : int listidx;
373 :
374 : /*
375 : * Determine if these clauses reference a single relation. If so, and if
376 : * it has extended statistics, try to apply those.
377 : */
378 9536 : rel = find_single_rel_for_clauses(root, clauses);
379 9536 : if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
380 : {
381 : /*
382 : * Estimate as many clauses as possible using extended statistics.
383 : *
384 : * 'estimatedclauses' is populated with the 0-based list position
385 : * index of clauses estimated here, and that should be ignored below.
386 : */
387 52 : s1 = statext_clauselist_selectivity(root, clauses, varRelid,
388 : jointype, sjinfo, rel,
389 : &estimatedclauses, true);
390 : }
391 :
392 : /*
393 : * Estimate the remaining clauses as if they were independent.
394 : *
395 : * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to account
396 : * for the probable overlap of selected tuple sets.
397 : *
398 : * XXX is this too conservative?
399 : */
400 9536 : listidx = -1;
401 31104 : foreach(lc, clauses)
402 : {
403 : Selectivity s2;
404 :
405 21568 : listidx++;
406 :
407 : /*
408 : * Skip this clause if it's already been estimated by some other
409 : * statistics above.
410 : */
411 21568 : if (bms_is_member(listidx, estimatedclauses))
412 128 : continue;
413 :
414 21440 : s2 = clause_selectivity_ext(root, (Node *) lfirst(lc), varRelid,
415 : jointype, sjinfo, use_extended_stats);
416 :
417 21440 : s1 = s1 + s2 - s1 * s2;
418 : }
419 :
420 9536 : return s1;
421 : }
422 :
423 : /*
424 : * addRangeClause --- add a new range clause for clauselist_selectivity
425 : *
426 : * Here is where we try to match up pairs of range-query clauses
427 : */
428 : static void
429 56014 : addRangeClause(RangeQueryClause **rqlist, Node *clause,
430 : bool varonleft, bool isLTsel, Selectivity s2)
431 : {
432 : RangeQueryClause *rqelem;
433 : Node *var;
434 : bool is_lobound;
435 :
436 56014 : if (varonleft)
437 : {
438 55930 : var = get_leftop((Expr *) clause);
439 55930 : is_lobound = !isLTsel; /* x < something is high bound */
440 : }
441 : else
442 : {
443 84 : var = get_rightop((Expr *) clause);
444 84 : is_lobound = isLTsel; /* something < x is low bound */
445 : }
446 :
447 56922 : for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
448 : {
449 : /*
450 : * We use full equal() here because the "var" might be a function of
451 : * one or more attributes of the same relation...
452 : */
453 6772 : if (!equal(var, rqelem->var))
454 908 : continue;
455 : /* Found the right group to put this clause in */
456 5864 : if (is_lobound)
457 : {
458 188 : if (!rqelem->have_lobound)
459 : {
460 68 : rqelem->have_lobound = true;
461 68 : rqelem->lobound = s2;
462 : }
463 : else
464 : {
465 :
466 : /*------
467 : * We have found two similar clauses, such as
468 : * x < y AND x <= z.
469 : * Keep only the more restrictive one.
470 : *------
471 : */
472 120 : if (rqelem->lobound > s2)
473 0 : rqelem->lobound = s2;
474 : }
475 : }
476 : else
477 : {
478 5676 : if (!rqelem->have_hibound)
479 : {
480 5584 : rqelem->have_hibound = true;
481 5584 : rqelem->hibound = s2;
482 : }
483 : else
484 : {
485 :
486 : /*------
487 : * We have found two similar clauses, such as
488 : * x > y AND x >= z.
489 : * Keep only the more restrictive one.
490 : *------
491 : */
492 92 : if (rqelem->hibound > s2)
493 24 : rqelem->hibound = s2;
494 : }
495 : }
496 5864 : return;
497 : }
498 :
499 : /* No matching var found, so make a new clause-pair data structure */
500 50150 : rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
501 50150 : rqelem->var = var;
502 50150 : if (is_lobound)
503 : {
504 46988 : rqelem->have_lobound = true;
505 46988 : rqelem->have_hibound = false;
506 46988 : rqelem->lobound = s2;
507 : }
508 : else
509 : {
510 3162 : rqelem->have_lobound = false;
511 3162 : rqelem->have_hibound = true;
512 3162 : rqelem->hibound = s2;
513 : }
514 50150 : rqelem->next = *rqlist;
515 50150 : *rqlist = rqelem;
516 : }
517 :
518 : /*
519 : * find_single_rel_for_clauses
520 : * Examine each clause in 'clauses' and determine if all clauses
521 : * reference only a single relation. If so return that relation,
522 : * otherwise return NULL.
523 : */
524 : static RelOptInfo *
525 652630 : find_single_rel_for_clauses(PlannerInfo *root, List *clauses)
526 : {
527 652630 : int lastrelid = 0;
528 : ListCell *l;
529 :
530 930268 : foreach(l, clauses)
531 : {
532 370286 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
533 : int relid;
534 :
535 : /*
536 : * If we have a list of bare clauses rather than RestrictInfos, we
537 : * could pull out their relids the hard way with pull_varnos().
538 : * However, currently the extended-stats machinery won't do anything
539 : * with non-RestrictInfo clauses anyway, so there's no point in
540 : * spending extra cycles; just fail if that's what we have.
541 : *
542 : * An exception to that rule is if we have a bare BoolExpr AND clause.
543 : * We treat this as a special case because the restrictinfo machinery
544 : * doesn't build RestrictInfos on top of AND clauses.
545 : */
546 370286 : if (is_andclause(rinfo))
547 : {
548 : RelOptInfo *rel;
549 :
550 1210 : rel = find_single_rel_for_clauses(root,
551 : ((BoolExpr *) rinfo)->args);
552 :
553 1210 : if (rel == NULL)
554 92648 : return NULL;
555 976 : if (lastrelid == 0)
556 394 : lastrelid = rel->relid;
557 582 : else if (rel->relid != lastrelid)
558 0 : return NULL;
559 :
560 3976 : continue;
561 : }
562 :
563 369076 : if (!IsA(rinfo, RestrictInfo))
564 764 : return NULL;
565 :
566 368312 : if (bms_is_empty(rinfo->clause_relids))
567 3000 : continue; /* we can ignore variable-free clauses */
568 365312 : if (!bms_get_singleton_member(rinfo->clause_relids, &relid))
569 87418 : return NULL; /* multiple relations in this clause */
570 277894 : if (lastrelid == 0)
571 132992 : lastrelid = relid; /* first clause referencing a relation */
572 144902 : else if (relid != lastrelid)
573 4232 : return NULL; /* relation not same as last one */
574 : }
575 :
576 559982 : if (lastrelid != 0)
577 112060 : return find_base_rel(root, lastrelid);
578 :
579 447922 : return NULL; /* no clauses */
580 : }
581 :
582 : /*
583 : * bms_is_subset_singleton
584 : *
585 : * Same result as bms_is_subset(s, bms_make_singleton(x)),
586 : * but a little faster and doesn't leak memory.
587 : *
588 : * Is this of use anywhere else? If so move to bitmapset.c ...
589 : */
590 : static bool
591 648182 : bms_is_subset_singleton(const Bitmapset *s, int x)
592 : {
593 648182 : switch (bms_membership(s))
594 : {
595 0 : case BMS_EMPTY_SET:
596 0 : return true;
597 499036 : case BMS_SINGLETON:
598 499036 : return bms_is_member(x, s);
599 149146 : case BMS_MULTIPLE:
600 149146 : return false;
601 : }
602 : /* can't get here... */
603 0 : return false;
604 : }
605 :
606 : /*
607 : * treat_as_join_clause -
608 : * Decide whether an operator clause is to be handled by the
609 : * restriction or join estimator. Subroutine for clause_selectivity().
610 : */
611 : static inline bool
612 501422 : treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo,
613 : int varRelid, SpecialJoinInfo *sjinfo)
614 : {
615 501422 : if (varRelid != 0)
616 : {
617 : /*
618 : * Caller is forcing restriction mode (eg, because we are examining an
619 : * inner indexscan qual).
620 : */
621 155554 : return false;
622 : }
623 345868 : else if (sjinfo == NULL)
624 : {
625 : /*
626 : * It must be a restriction clause, since it's being evaluated at a
627 : * scan node.
628 : */
629 212230 : return false;
630 : }
631 : else
632 : {
633 : /*
634 : * Otherwise, it's a join if there's more than one relation used. We
635 : * can optimize this calculation if an rinfo was passed.
636 : *
637 : * XXX Since we know the clause is being evaluated at a join, the
638 : * only way it could be single-relation is if it was delayed by outer
639 : * joins. Although we can make use of the restriction qual estimators
640 : * anyway, it seems likely that we ought to account for the
641 : * probability of injected nulls somehow.
642 : */
643 133638 : if (rinfo)
644 133338 : return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
645 : else
646 300 : return (NumRelids(root, clause) > 1);
647 : }
648 : }
649 :
650 :
651 : /*
652 : * clause_selectivity -
653 : * Compute the selectivity of a general boolean expression clause.
654 : *
655 : * The clause can be either a RestrictInfo or a plain expression. If it's
656 : * a RestrictInfo, we try to cache the selectivity for possible re-use,
657 : * so passing RestrictInfos is preferred.
658 : *
659 : * varRelid is either 0 or a rangetable index.
660 : *
661 : * When varRelid is not 0, only variables belonging to that relation are
662 : * considered in computing selectivity; other vars are treated as constants
663 : * of unknown values. This is appropriate for estimating the selectivity of
664 : * a join clause that is being used as a restriction clause in a scan of a
665 : * nestloop join's inner relation --- varRelid should then be the ID of the
666 : * inner relation.
667 : *
668 : * When varRelid is 0, all variables are treated as variables. This
669 : * is appropriate for ordinary join clauses and restriction clauses.
670 : *
671 : * jointype is the join type, if the clause is a join clause. Pass JOIN_INNER
672 : * if the clause isn't a join clause.
673 : *
674 : * sjinfo is NULL for a non-join clause, otherwise it provides additional
675 : * context information about the join being performed. There are some
676 : * special cases:
677 : * 1. For a special (not INNER) join, sjinfo is always a member of
678 : * root->join_info_list.
679 : * 2. For an INNER join, sjinfo is just a transient struct, and only the
680 : * relids and jointype fields in it can be trusted.
681 : * It is possible for jointype to be different from sjinfo->jointype.
682 : * This indicates we are considering a variant join: either with
683 : * the LHS and RHS switched, or with one input unique-ified.
684 : *
685 : * Note: when passing nonzero varRelid, it's normally appropriate to set
686 : * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
687 : * join clause; because we aren't treating it as a join clause.
688 : */
689 : Selectivity
690 291482 : clause_selectivity(PlannerInfo *root,
691 : Node *clause,
692 : int varRelid,
693 : JoinType jointype,
694 : SpecialJoinInfo *sjinfo)
695 : {
696 291482 : return clause_selectivity_ext(root, clause, varRelid,
697 : jointype, sjinfo, true);
698 : }
699 :
700 : /*
701 : * clause_selectivity_ext -
702 : * Extended version of clause_selectivity(). If "use_extended_stats" is
703 : * false, all extended statistics will be ignored, and only per-column
704 : * statistics will be used.
705 : */
706 : Selectivity
707 1563004 : clause_selectivity_ext(PlannerInfo *root,
708 : Node *clause,
709 : int varRelid,
710 : JoinType jointype,
711 : SpecialJoinInfo *sjinfo,
712 : bool use_extended_stats)
713 : {
714 1563004 : Selectivity s1 = 0.5; /* default for any unhandled clause type */
715 1563004 : RestrictInfo *rinfo = NULL;
716 1563004 : bool cacheable = false;
717 :
718 1563004 : if (clause == NULL) /* can this still happen? */
719 0 : return s1;
720 :
721 1563004 : if (IsA(clause, RestrictInfo))
722 : {
723 1551772 : rinfo = (RestrictInfo *) clause;
724 :
725 : /*
726 : * If the clause is marked pseudoconstant, then it will be used as a
727 : * gating qual and should not affect selectivity estimates; hence
728 : * return 1.0. The only exception is that a constant FALSE may be
729 : * taken as having selectivity 0.0, since it will surely mean no rows
730 : * out of the plan. This case is simple enough that we need not
731 : * bother caching the result.
732 : */
733 1551772 : if (rinfo->pseudoconstant)
734 : {
735 3076 : if (!IsA(rinfo->clause, Const))
736 2984 : return (Selectivity) 1.0;
737 : }
738 :
739 : /*
740 : * If the clause is marked redundant, always return 1.0.
741 : */
742 1548788 : if (rinfo->norm_selec > 1)
743 48662 : return (Selectivity) 1.0;
744 :
745 : /*
746 : * If possible, cache the result of the selectivity calculation for
747 : * the clause. We can cache if varRelid is zero or the clause
748 : * contains only vars of that relid --- otherwise varRelid will affect
749 : * the result, so mustn't cache. Outer join quals might be examined
750 : * with either their join's actual jointype or JOIN_INNER, so we need
751 : * two cache variables to remember both cases. Note: we assume the
752 : * result won't change if we are switching the input relations or
753 : * considering a unique-ified case, so we only need one cache variable
754 : * for all non-JOIN_INNER cases.
755 : */
756 2148308 : if (varRelid == 0 ||
757 648182 : bms_is_subset_singleton(rinfo->clause_relids, varRelid))
758 : {
759 : /* Cacheable --- do we already have the result? */
760 1350140 : if (jointype == JOIN_INNER)
761 : {
762 1163248 : if (rinfo->norm_selec >= 0)
763 848288 : return rinfo->norm_selec;
764 : }
765 : else
766 : {
767 186892 : if (rinfo->outer_selec >= 0)
768 115558 : return rinfo->outer_selec;
769 : }
770 386294 : cacheable = true;
771 : }
772 :
773 : /*
774 : * Proceed with examination of contained clause. If the clause is an
775 : * OR-clause, we want to look at the variant with sub-RestrictInfos,
776 : * so that per-subclause selectivities can be cached.
777 : */
778 536280 : if (rinfo->orclause)
779 9500 : clause = (Node *) rinfo->orclause;
780 : else
781 526780 : clause = (Node *) rinfo->clause;
782 : }
783 :
784 547512 : if (IsA(clause, Var))
785 : {
786 13858 : Var *var = (Var *) clause;
787 :
788 : /*
789 : * We probably shouldn't ever see an uplevel Var here, but if we do,
790 : * return the default selectivity...
791 : */
792 13858 : if (var->varlevelsup == 0 &&
793 24 : (varRelid == 0 || varRelid == (int) var->varno))
794 : {
795 : /* Use the restriction selectivity function for a bool Var */
796 13854 : s1 = boolvarsel(root, (Node *) var, varRelid);
797 : }
798 : }
799 533654 : else if (IsA(clause, Const))
800 : {
801 : /* bool constant is pretty easy... */
802 132 : Const *con = (Const *) clause;
803 :
804 264 : s1 = con->constisnull ? 0.0 :
805 132 : DatumGetBool(con->constvalue) ? 1.0 : 0.0;
806 : }
807 533522 : else if (IsA(clause, Param))
808 : {
809 : /* see if we can replace the Param */
810 16 : Node *subst = estimate_expression_value(root, clause);
811 :
812 16 : if (IsA(subst, Const))
813 : {
814 : /* bool constant is pretty easy... */
815 0 : Const *con = (Const *) subst;
816 :
817 0 : s1 = con->constisnull ? 0.0 :
818 0 : DatumGetBool(con->constvalue) ? 1.0 : 0.0;
819 : }
820 : else
821 : {
822 : /* XXX any way to do better than default? */
823 : }
824 : }
825 533506 : else if (is_notclause(clause))
826 : {
827 : /* inverse of the selectivity of the underlying clause */
828 5624 : s1 = 1.0 - clause_selectivity_ext(root,
829 5624 : (Node *) get_notclausearg((Expr *) clause),
830 : varRelid,
831 : jointype,
832 : sjinfo,
833 : use_extended_stats);
834 : }
835 527882 : else if (is_andclause(clause))
836 : {
837 : /* share code with clauselist_selectivity() */
838 1506 : s1 = clauselist_selectivity_ext(root,
839 : ((BoolExpr *) clause)->args,
840 : varRelid,
841 : jointype,
842 : sjinfo,
843 : use_extended_stats);
844 : }
845 526376 : else if (is_orclause(clause))
846 : {
847 : /*
848 : * Almost the same thing as clauselist_selectivity, but with the
849 : * clauses connected by OR.
850 : */
851 9536 : s1 = clauselist_selectivity_or(root,
852 : ((BoolExpr *) clause)->args,
853 : varRelid,
854 : jointype,
855 : sjinfo,
856 : use_extended_stats);
857 : }
858 516840 : else if (is_opclause(clause) || IsA(clause, DistinctExpr))
859 480624 : {
860 480624 : OpExpr *opclause = (OpExpr *) clause;
861 480624 : Oid opno = opclause->opno;
862 :
863 480624 : if (treat_as_join_clause(root, clause, rinfo, varRelid, sjinfo))
864 : {
865 : /* Estimate selectivity for a join clause. */
866 126586 : s1 = join_selectivity(root, opno,
867 : opclause->args,
868 : opclause->inputcollid,
869 : jointype,
870 : sjinfo);
871 : }
872 : else
873 : {
874 : /* Estimate selectivity for a restriction clause. */
875 354038 : s1 = restriction_selectivity(root, opno,
876 : opclause->args,
877 : opclause->inputcollid,
878 : varRelid);
879 : }
880 :
881 : /*
882 : * DistinctExpr has the same representation as OpExpr, but the
883 : * contained operator is "=" not "<>", so we must negate the result.
884 : * This estimation method doesn't give the right behavior for nulls,
885 : * but it's better than doing nothing.
886 : */
887 480624 : if (IsA(clause, DistinctExpr))
888 426 : s1 = 1.0 - s1;
889 : }
890 36216 : else if (is_funcclause(clause))
891 : {
892 5866 : FuncExpr *funcclause = (FuncExpr *) clause;
893 :
894 : /* Try to get an estimate from the support function, if any */
895 5866 : s1 = function_selectivity(root,
896 : funcclause->funcid,
897 : funcclause->args,
898 : funcclause->inputcollid,
899 5866 : treat_as_join_clause(root, clause, rinfo,
900 : varRelid, sjinfo),
901 : varRelid,
902 : jointype,
903 : sjinfo);
904 : }
905 30350 : else if (IsA(clause, ScalarArrayOpExpr))
906 : {
907 : /* Use node specific selectivity calculation function */
908 14932 : s1 = scalararraysel(root,
909 : (ScalarArrayOpExpr *) clause,
910 14932 : treat_as_join_clause(root, clause, rinfo,
911 : varRelid, sjinfo),
912 : varRelid,
913 : jointype,
914 : sjinfo);
915 : }
916 15418 : else if (IsA(clause, RowCompareExpr))
917 : {
918 : /* Use node specific selectivity calculation function */
919 104 : s1 = rowcomparesel(root,
920 : (RowCompareExpr *) clause,
921 : varRelid,
922 : jointype,
923 : sjinfo);
924 : }
925 15314 : else if (IsA(clause, NullTest))
926 : {
927 : /* Use node specific selectivity calculation function */
928 13450 : s1 = nulltestsel(root,
929 : ((NullTest *) clause)->nulltesttype,
930 13450 : (Node *) ((NullTest *) clause)->arg,
931 : varRelid,
932 : jointype,
933 : sjinfo);
934 : }
935 1864 : else if (IsA(clause, BooleanTest))
936 : {
937 : /* Use node specific selectivity calculation function */
938 132 : s1 = booltestsel(root,
939 : ((BooleanTest *) clause)->booltesttype,
940 132 : (Node *) ((BooleanTest *) clause)->arg,
941 : varRelid,
942 : jointype,
943 : sjinfo);
944 : }
945 1732 : else if (IsA(clause, CurrentOfExpr))
946 : {
947 : /* CURRENT OF selects at most one row of its table */
948 384 : CurrentOfExpr *cexpr = (CurrentOfExpr *) clause;
949 384 : RelOptInfo *crel = find_base_rel(root, cexpr->cvarno);
950 :
951 384 : if (crel->tuples > 0)
952 364 : s1 = 1.0 / crel->tuples;
953 : }
954 1348 : else if (IsA(clause, RelabelType))
955 : {
956 : /* Not sure this case is needed, but it can't hurt */
957 0 : s1 = clause_selectivity_ext(root,
958 0 : (Node *) ((RelabelType *) clause)->arg,
959 : varRelid,
960 : jointype,
961 : sjinfo,
962 : use_extended_stats);
963 : }
964 1348 : else if (IsA(clause, CoerceToDomain))
965 : {
966 : /* Not sure this case is needed, but it can't hurt */
967 0 : s1 = clause_selectivity_ext(root,
968 0 : (Node *) ((CoerceToDomain *) clause)->arg,
969 : varRelid,
970 : jointype,
971 : sjinfo,
972 : use_extended_stats);
973 : }
974 : else
975 : {
976 : /*
977 : * For anything else, see if we can consider it as a boolean variable.
978 : * This only works if it's an immutable expression in Vars of a single
979 : * relation; but there's no point in us checking that here because
980 : * boolvarsel() will do it internally, and return a suitable default
981 : * selectivity if not.
982 : */
983 1348 : s1 = boolvarsel(root, clause, varRelid);
984 : }
985 :
986 : /* Cache the result if possible */
987 547512 : if (cacheable)
988 : {
989 386294 : if (jointype == JOIN_INNER)
990 314960 : rinfo->norm_selec = s1;
991 : else
992 71334 : rinfo->outer_selec = s1;
993 : }
994 :
995 : #ifdef SELECTIVITY_DEBUG
996 : elog(DEBUG4, "clause_selectivity: s1 %f", s1);
997 : #endif /* SELECTIVITY_DEBUG */
998 :
999 547512 : return s1;
1000 : }
|