Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * clausesel.c
4 : * Routines to compute clause selectivities
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/optimizer/path/clausesel.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "nodes/makefuncs.h"
18 : #include "nodes/nodeFuncs.h"
19 : #include "optimizer/clauses.h"
20 : #include "optimizer/cost.h"
21 : #include "optimizer/optimizer.h"
22 : #include "optimizer/pathnode.h"
23 : #include "optimizer/plancat.h"
24 : #include "statistics/statistics.h"
25 : #include "utils/fmgroids.h"
26 : #include "utils/lsyscache.h"
27 : #include "utils/selfuncs.h"
28 :
29 : /*
30 : * Data structure for accumulating info about possible range-query
31 : * clause pairs in clauselist_selectivity.
32 : */
33 : typedef struct RangeQueryClause
34 : {
35 : struct RangeQueryClause *next; /* next in linked list */
36 : Node *var; /* The common variable of the clauses */
37 : bool have_lobound; /* found a low-bound clause yet? */
38 : bool have_hibound; /* found a high-bound clause yet? */
39 : Selectivity lobound; /* Selectivity of a var > something clause */
40 : Selectivity hibound; /* Selectivity of a var < something clause */
41 : } RangeQueryClause;
42 :
43 : static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
44 : bool varonleft, bool isLTsel, Selectivity s2);
45 : static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root,
46 : List *clauses);
47 : static Selectivity clauselist_selectivity_or(PlannerInfo *root,
48 : List *clauses,
49 : int varRelid,
50 : JoinType jointype,
51 : SpecialJoinInfo *sjinfo,
52 : bool use_extended_stats);
53 :
54 : /****************************************************************************
55 : * ROUTINES TO COMPUTE SELECTIVITIES
56 : ****************************************************************************/
57 :
58 : /*
59 : * clauselist_selectivity -
60 : * Compute the selectivity of an implicitly-ANDed list of boolean
61 : * expression clauses. The list can be empty, in which case 1.0
62 : * must be returned. List elements may be either RestrictInfos
63 : * or bare expression clauses --- the former is preferred since
64 : * it allows caching of results.
65 : *
66 : * See clause_selectivity() for the meaning of the additional parameters.
67 : *
68 : * The basic approach is to apply extended statistics first, on as many
69 : * clauses as possible, in order to capture cross-column dependencies etc.
70 : * The remaining clauses are then estimated by taking the product of their
71 : * selectivities, but that's only right if they have independent
72 : * probabilities, and in reality they are often NOT independent even if they
73 : * only refer to a single column. So, we want to be smarter where we can.
74 : *
75 : * We also recognize "range queries", such as "x > 34 AND x < 42". Clauses
76 : * are recognized as possible range query components if they are restriction
77 : * opclauses whose operators have scalarltsel or a related function as their
78 : * restriction selectivity estimator. We pair up clauses of this form that
79 : * refer to the same variable. An unpairable clause of this kind is simply
80 : * multiplied into the selectivity product in the normal way. But when we
81 : * find a pair, we know that the selectivities represent the relative
82 : * positions of the low and high bounds within the column's range, so instead
83 : * of figuring the selectivity as hisel * losel, we can figure it as hisel +
84 : * losel - 1. (To visualize this, see that hisel is the fraction of the range
85 : * below the high bound, while losel is the fraction above the low bound; so
86 : * hisel can be interpreted directly as a 0..1 value but we need to convert
87 : * losel to 1-losel before interpreting it as a value. Then the available
88 : * range is 1-losel to hisel. However, this calculation double-excludes
89 : * nulls, so really we need hisel + losel + null_frac - 1.)
90 : *
91 : * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
92 : * and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation
93 : * yields an impossible (negative) result.
94 : *
95 : * A free side-effect is that we can recognize redundant inequalities such
96 : * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
97 : *
98 : * Of course this is all very dependent on the behavior of the inequality
99 : * selectivity functions; perhaps some day we can generalize the approach.
100 : */
101 : Selectivity
102 2035404 : clauselist_selectivity(PlannerInfo *root,
103 : List *clauses,
104 : int varRelid,
105 : JoinType jointype,
106 : SpecialJoinInfo *sjinfo)
107 : {
108 2035404 : return clauselist_selectivity_ext(root, clauses, varRelid,
109 : jointype, sjinfo, true);
110 : }
111 :
112 : /*
113 : * clauselist_selectivity_ext -
114 : * Extended version of clauselist_selectivity(). If "use_extended_stats"
115 : * is false, all extended statistics will be ignored, and only per-column
116 : * statistics will be used.
117 : */
118 : Selectivity
119 2041292 : clauselist_selectivity_ext(PlannerInfo *root,
120 : List *clauses,
121 : int varRelid,
122 : JoinType jointype,
123 : SpecialJoinInfo *sjinfo,
124 : bool use_extended_stats)
125 : {
126 2041292 : Selectivity s1 = 1.0;
127 : RelOptInfo *rel;
128 2041292 : Bitmapset *estimatedclauses = NULL;
129 2041292 : RangeQueryClause *rqlist = NULL;
130 : ListCell *l;
131 : int listidx;
132 :
133 : /*
134 : * If there's exactly one clause, just go directly to
135 : * clause_selectivity_ext(). None of what we might do below is relevant.
136 : */
137 2041292 : if (list_length(clauses) == 1)
138 1068078 : return clause_selectivity_ext(root, (Node *) linitial(clauses),
139 : varRelid, jointype, sjinfo,
140 : use_extended_stats);
141 :
142 : /*
143 : * Determine if these clauses reference a single relation. If so, and if
144 : * it has extended statistics, try to apply those.
145 : */
146 973214 : rel = find_single_rel_for_clauses(root, clauses);
147 973214 : if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
148 : {
149 : /*
150 : * Estimate as many clauses as possible using extended statistics.
151 : *
152 : * 'estimatedclauses' is populated with the 0-based list position
153 : * index of clauses estimated here, and that should be ignored below.
154 : */
155 1770 : s1 = statext_clauselist_selectivity(root, clauses, varRelid,
156 : jointype, sjinfo, rel,
157 : &estimatedclauses, false);
158 : }
159 :
160 : /*
161 : * Apply normal selectivity estimates for remaining clauses. We'll be
162 : * careful to skip any clauses which were already estimated above.
163 : *
164 : * Anything that doesn't look like a potential rangequery clause gets
165 : * multiplied into s1 and forgotten. Anything that does gets inserted into
166 : * an rqlist entry.
167 : */
168 973214 : listidx = -1;
169 1586280 : foreach(l, clauses)
170 : {
171 613066 : Node *clause = (Node *) lfirst(l);
172 : RestrictInfo *rinfo;
173 : Selectivity s2;
174 :
175 613066 : listidx++;
176 :
177 : /*
178 : * Skip this clause if it's already been estimated by some other
179 : * statistics above.
180 : */
181 613066 : if (bms_is_member(listidx, estimatedclauses))
182 3474 : continue;
183 :
184 : /* Compute the selectivity of this clause in isolation */
185 609592 : s2 = clause_selectivity_ext(root, clause, varRelid, jointype, sjinfo,
186 : use_extended_stats);
187 :
188 : /*
189 : * Check for being passed a RestrictInfo.
190 : *
191 : * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
192 : * 0.0; just use that rather than looking for range pairs.
193 : */
194 609592 : if (IsA(clause, RestrictInfo))
195 : {
196 608412 : rinfo = (RestrictInfo *) clause;
197 608412 : if (rinfo->pseudoconstant)
198 : {
199 22016 : s1 = s1 * s2;
200 22016 : continue;
201 : }
202 586396 : clause = (Node *) rinfo->clause;
203 : }
204 : else
205 1180 : rinfo = NULL;
206 :
207 : /*
208 : * See if it looks like a restriction clause with a pseudoconstant on
209 : * one side. (Anything more complicated than that might not behave in
210 : * the simple way we are expecting.) Most of the tests here can be
211 : * done more efficiently with rinfo than without.
212 : */
213 587576 : if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
214 : {
215 498002 : OpExpr *expr = (OpExpr *) clause;
216 498002 : bool varonleft = true;
217 : bool ok;
218 :
219 498002 : if (rinfo)
220 : {
221 818888 : ok = (rinfo->num_base_rels == 1) &&
222 317382 : (is_pseudo_constant_clause_relids(lsecond(expr->args),
223 4668 : rinfo->right_relids) ||
224 4668 : (varonleft = false,
225 4668 : is_pseudo_constant_clause_relids(linitial(expr->args),
226 : rinfo->left_relids)));
227 : }
228 : else
229 : {
230 2328 : ok = (NumRelids(root, clause) == 1) &&
231 1164 : (is_pseudo_constant_clause(lsecond(expr->args)) ||
232 0 : (varonleft = false,
233 0 : is_pseudo_constant_clause(linitial(expr->args))));
234 : }
235 :
236 498002 : if (ok)
237 : {
238 : /*
239 : * If it's not a "<"/"<="/">"/">=" operator, just merge the
240 : * selectivity in generically. But if it's the right oprrest,
241 : * add the clause to rqlist for later processing.
242 : */
243 318176 : switch (get_oprrest(expr->opno))
244 : {
245 12358 : case F_SCALARLTSEL:
246 : case F_SCALARLESEL:
247 12358 : addRangeClause(&rqlist, clause,
248 : varonleft, true, s2);
249 12358 : break;
250 31542 : case F_SCALARGTSEL:
251 : case F_SCALARGESEL:
252 31542 : addRangeClause(&rqlist, clause,
253 : varonleft, false, s2);
254 31542 : break;
255 274276 : default:
256 : /* Just merge the selectivity in generically */
257 274276 : s1 = s1 * s2;
258 274276 : break;
259 : }
260 318176 : continue; /* drop to loop bottom */
261 : }
262 : }
263 :
264 : /* Not the right form, so treat it generically. */
265 269400 : s1 = s1 * s2;
266 : }
267 :
268 : /*
269 : * Now scan the rangequery pair list.
270 : */
271 1008754 : while (rqlist != NULL)
272 : {
273 : RangeQueryClause *rqnext;
274 :
275 35540 : if (rqlist->have_lobound && rqlist->have_hibound)
276 8042 : {
277 : /* Successfully matched a pair of range clauses */
278 : Selectivity s2;
279 :
280 : /*
281 : * Exact equality to the default value probably means the
282 : * selectivity function punted. This is not airtight but should
283 : * be good enough.
284 : */
285 8042 : if (rqlist->hibound == DEFAULT_INEQ_SEL ||
286 6020 : rqlist->lobound == DEFAULT_INEQ_SEL)
287 : {
288 2022 : s2 = DEFAULT_RANGE_INEQ_SEL;
289 : }
290 : else
291 : {
292 6020 : s2 = rqlist->hibound + rqlist->lobound - 1.0;
293 :
294 : /* Adjust for double-exclusion of NULLs */
295 6020 : s2 += nulltestsel(root, IS_NULL, rqlist->var,
296 : varRelid, jointype, sjinfo);
297 :
298 : /*
299 : * A zero or slightly negative s2 should be converted into a
300 : * small positive value; we probably are dealing with a very
301 : * tight range and got a bogus result due to roundoff errors.
302 : * However, if s2 is very negative, then we probably have
303 : * default selectivity estimates on one or both sides of the
304 : * range that we failed to recognize above for some reason.
305 : */
306 6020 : if (s2 <= 0.0)
307 : {
308 878 : if (s2 < -0.01)
309 : {
310 : /*
311 : * No data available --- use a default estimate that
312 : * is small, but not real small.
313 : */
314 12 : s2 = DEFAULT_RANGE_INEQ_SEL;
315 : }
316 : else
317 : {
318 : /*
319 : * It's just roundoff error; use a small positive
320 : * value
321 : */
322 866 : s2 = 1.0e-10;
323 : }
324 : }
325 : }
326 : /* Merge in the selectivity of the pair of clauses */
327 8042 : s1 *= s2;
328 : }
329 : else
330 : {
331 : /* Only found one of a pair, merge it in generically */
332 27498 : if (rqlist->have_lobound)
333 23152 : s1 *= rqlist->lobound;
334 : else
335 4346 : s1 *= rqlist->hibound;
336 : }
337 : /* release storage and advance */
338 35540 : rqnext = rqlist->next;
339 35540 : pfree(rqlist);
340 35540 : rqlist = rqnext;
341 : }
342 :
343 973214 : return s1;
344 : }
345 :
346 : /*
347 : * clauselist_selectivity_or -
348 : * Compute the selectivity of an implicitly-ORed list of boolean
349 : * expression clauses. The list can be empty, in which case 0.0
350 : * must be returned. List elements may be either RestrictInfos
351 : * or bare expression clauses --- the former is preferred since
352 : * it allows caching of results.
353 : *
354 : * See clause_selectivity() for the meaning of the additional parameters.
355 : *
356 : * The basic approach is to apply extended statistics first, on as many
357 : * clauses as possible, in order to capture cross-column dependencies etc.
358 : * The remaining clauses are then estimated as if they were independent.
359 : */
360 : static Selectivity
361 10820 : clauselist_selectivity_or(PlannerInfo *root,
362 : List *clauses,
363 : int varRelid,
364 : JoinType jointype,
365 : SpecialJoinInfo *sjinfo,
366 : bool use_extended_stats)
367 : {
368 10820 : Selectivity s1 = 0.0;
369 : RelOptInfo *rel;
370 10820 : Bitmapset *estimatedclauses = NULL;
371 : ListCell *lc;
372 : int listidx;
373 :
374 : /*
375 : * Determine if these clauses reference a single relation. If so, and if
376 : * it has extended statistics, try to apply those.
377 : */
378 10820 : rel = find_single_rel_for_clauses(root, clauses);
379 10820 : if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
380 : {
381 : /*
382 : * Estimate as many clauses as possible using extended statistics.
383 : *
384 : * 'estimatedclauses' is populated with the 0-based list position
385 : * index of clauses estimated here, and that should be ignored below.
386 : */
387 108 : s1 = statext_clauselist_selectivity(root, clauses, varRelid,
388 : jointype, sjinfo, rel,
389 : &estimatedclauses, true);
390 : }
391 :
392 : /*
393 : * Estimate the remaining clauses as if they were independent.
394 : *
395 : * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to account
396 : * for the probable overlap of selected tuple sets.
397 : *
398 : * XXX is this too conservative?
399 : */
400 10820 : listidx = -1;
401 35826 : foreach(lc, clauses)
402 : {
403 : Selectivity s2;
404 :
405 25006 : listidx++;
406 :
407 : /*
408 : * Skip this clause if it's already been estimated by some other
409 : * statistics above.
410 : */
411 25006 : if (bms_is_member(listidx, estimatedclauses))
412 240 : continue;
413 :
414 24766 : s2 = clause_selectivity_ext(root, (Node *) lfirst(lc), varRelid,
415 : jointype, sjinfo, use_extended_stats);
416 :
417 24766 : s1 = s1 + s2 - s1 * s2;
418 : }
419 :
420 10820 : return s1;
421 : }
422 :
423 : /*
424 : * addRangeClause --- add a new range clause for clauselist_selectivity
425 : *
426 : * Here is where we try to match up pairs of range-query clauses
427 : */
428 : static void
429 43900 : addRangeClause(RangeQueryClause **rqlist, Node *clause,
430 : bool varonleft, bool isLTsel, Selectivity s2)
431 : {
432 : RangeQueryClause *rqelem;
433 : Node *var;
434 : bool is_lobound;
435 :
436 43900 : if (varonleft)
437 : {
438 43684 : var = get_leftop((Expr *) clause);
439 43684 : is_lobound = !isLTsel; /* x < something is high bound */
440 : }
441 : else
442 : {
443 216 : var = get_rightop((Expr *) clause);
444 216 : is_lobound = isLTsel; /* something < x is low bound */
445 : }
446 :
447 45130 : for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
448 : {
449 : /*
450 : * We use full equal() here because the "var" might be a function of
451 : * one or more attributes of the same relation...
452 : */
453 9590 : if (!equal(var, rqelem->var))
454 1230 : continue;
455 : /* Found the right group to put this clause in */
456 8360 : if (is_lobound)
457 : {
458 312 : if (!rqelem->have_lobound)
459 : {
460 132 : rqelem->have_lobound = true;
461 132 : rqelem->lobound = s2;
462 : }
463 : else
464 : {
465 :
466 : /*------
467 : * We have found two similar clauses, such as
468 : * x < y AND x <= z.
469 : * Keep only the more restrictive one.
470 : *------
471 : */
472 180 : if (rqelem->lobound > s2)
473 0 : rqelem->lobound = s2;
474 : }
475 : }
476 : else
477 : {
478 8048 : if (!rqelem->have_hibound)
479 : {
480 7910 : rqelem->have_hibound = true;
481 7910 : rqelem->hibound = s2;
482 : }
483 : else
484 : {
485 :
486 : /*------
487 : * We have found two similar clauses, such as
488 : * x > y AND x >= z.
489 : * Keep only the more restrictive one.
490 : *------
491 : */
492 138 : if (rqelem->hibound > s2)
493 36 : rqelem->hibound = s2;
494 : }
495 : }
496 8360 : return;
497 : }
498 :
499 : /* No matching var found, so make a new clause-pair data structure */
500 35540 : rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
501 35540 : rqelem->var = var;
502 35540 : if (is_lobound)
503 : {
504 31062 : rqelem->have_lobound = true;
505 31062 : rqelem->have_hibound = false;
506 31062 : rqelem->lobound = s2;
507 : }
508 : else
509 : {
510 4478 : rqelem->have_lobound = false;
511 4478 : rqelem->have_hibound = true;
512 4478 : rqelem->hibound = s2;
513 : }
514 35540 : rqelem->next = *rqlist;
515 35540 : *rqlist = rqelem;
516 : }
517 :
518 : /*
519 : * find_single_rel_for_clauses
520 : * Examine each clause in 'clauses' and determine if all clauses
521 : * reference only a single relation. If so return that relation,
522 : * otherwise return NULL.
523 : */
524 : static RelOptInfo *
525 986332 : find_single_rel_for_clauses(PlannerInfo *root, List *clauses)
526 : {
527 986332 : int lastrelid = 0;
528 : ListCell *l;
529 :
530 1354768 : foreach(l, clauses)
531 : {
532 519458 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
533 : int relid;
534 :
535 : /*
536 : * If we have a list of bare clauses rather than RestrictInfos, we
537 : * could pull out their relids the hard way with pull_varnos().
538 : * However, currently the extended-stats machinery won't do anything
539 : * with non-RestrictInfo clauses anyway, so there's no point in
540 : * spending extra cycles; just fail if that's what we have.
541 : *
542 : * An exception to that rule is if we have a bare BoolExpr AND clause.
543 : * We treat this as a special case because the restrictinfo machinery
544 : * doesn't build RestrictInfos on top of AND clauses.
545 : */
546 519458 : if (is_andclause(rinfo))
547 : {
548 : RelOptInfo *rel;
549 :
550 2298 : rel = find_single_rel_for_clauses(root,
551 : ((BoolExpr *) rinfo)->args);
552 :
553 2298 : if (rel == NULL)
554 151022 : return NULL;
555 1838 : if (lastrelid == 0)
556 912 : lastrelid = rel->relid;
557 926 : else if (rel->relid != lastrelid)
558 0 : return NULL;
559 :
560 30786 : continue;
561 : }
562 :
563 517160 : if (!IsA(rinfo, RestrictInfo))
564 1022 : return NULL;
565 :
566 516138 : if (bms_is_empty(rinfo->clause_relids))
567 28948 : continue; /* we can ignore variable-free clauses */
568 487190 : if (!bms_get_singleton_member(rinfo->clause_relids, &relid))
569 146722 : return NULL; /* multiple relations in this clause */
570 340468 : if (lastrelid == 0)
571 165506 : lastrelid = relid; /* first clause referencing a relation */
572 174962 : else if (relid != lastrelid)
573 2818 : return NULL; /* relation not same as last one */
574 : }
575 :
576 835310 : if (lastrelid != 0)
577 133814 : return find_base_rel(root, lastrelid);
578 :
579 701496 : return NULL; /* no clauses */
580 : }
581 :
582 : /*
583 : * treat_as_join_clause -
584 : * Decide whether an operator clause is to be handled by the
585 : * restriction or join estimator. Subroutine for clause_selectivity().
586 : */
587 : static inline bool
588 732488 : treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo,
589 : int varRelid, SpecialJoinInfo *sjinfo)
590 : {
591 732488 : if (varRelid != 0)
592 : {
593 : /*
594 : * Caller is forcing restriction mode (eg, because we are examining an
595 : * inner indexscan qual).
596 : */
597 264528 : return false;
598 : }
599 467960 : else if (sjinfo == NULL)
600 : {
601 : /*
602 : * It must be a restriction clause, since it's being evaluated at a
603 : * scan node.
604 : */
605 284516 : return false;
606 : }
607 : else
608 : {
609 : /*
610 : * Otherwise, it's a join if there's more than one base relation used.
611 : * We can optimize this calculation if an rinfo was passed.
612 : *
613 : * XXX Since we know the clause is being evaluated at a join, the
614 : * only way it could be single-relation is if it was delayed by outer
615 : * joins. We intentionally count only baserels here, not OJs that
616 : * might be present in rinfo->clause_relids, so that we direct such
617 : * cases to the restriction qual estimators not join estimators.
618 : * Eventually some notice should be taken of the possibility of
619 : * injected nulls, but we'll likely want to do that in the restriction
620 : * estimators rather than starting to treat such cases as join quals.
621 : */
622 183444 : if (rinfo)
623 182988 : return (rinfo->num_base_rels > 1);
624 : else
625 456 : return (NumRelids(root, clause) > 1);
626 : }
627 : }
628 :
629 :
630 : /*
631 : * clause_selectivity -
632 : * Compute the selectivity of a general boolean expression clause.
633 : *
634 : * The clause can be either a RestrictInfo or a plain expression. If it's
635 : * a RestrictInfo, we try to cache the selectivity for possible re-use,
636 : * so passing RestrictInfos is preferred.
637 : *
638 : * varRelid is either 0 or a rangetable index.
639 : *
640 : * When varRelid is not 0, only variables belonging to that relation are
641 : * considered in computing selectivity; other vars are treated as constants
642 : * of unknown values. This is appropriate for estimating the selectivity of
643 : * a join clause that is being used as a restriction clause in a scan of a
644 : * nestloop join's inner relation --- varRelid should then be the ID of the
645 : * inner relation.
646 : *
647 : * When varRelid is 0, all variables are treated as variables. This
648 : * is appropriate for ordinary join clauses and restriction clauses.
649 : *
650 : * jointype is the join type, if the clause is a join clause. Pass JOIN_INNER
651 : * if the clause isn't a join clause.
652 : *
653 : * sjinfo is NULL for a non-join clause, otherwise it provides additional
654 : * context information about the join being performed. There are some
655 : * special cases:
656 : * 1. For a special (not INNER) join, sjinfo is always a member of
657 : * root->join_info_list.
658 : * 2. For an INNER join, sjinfo is just a transient struct, and only the
659 : * relids and jointype fields in it can be trusted.
660 : * It is possible for jointype to be different from sjinfo->jointype.
661 : * This indicates we are considering a variant join: either with
662 : * the LHS and RHS switched, or with one input unique-ified.
663 : *
664 : * Note: when passing nonzero varRelid, it's normally appropriate to set
665 : * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
666 : * join clause; because we aren't treating it as a join clause.
667 : */
668 : Selectivity
669 388330 : clause_selectivity(PlannerInfo *root,
670 : Node *clause,
671 : int varRelid,
672 : JoinType jointype,
673 : SpecialJoinInfo *sjinfo)
674 : {
675 388330 : return clause_selectivity_ext(root, clause, varRelid,
676 : jointype, sjinfo, true);
677 : }
678 :
679 : /*
680 : * clause_selectivity_ext -
681 : * Extended version of clause_selectivity(). If "use_extended_stats" is
682 : * false, all extended statistics will be ignored, and only per-column
683 : * statistics will be used.
684 : */
685 : Selectivity
686 2097686 : clause_selectivity_ext(PlannerInfo *root,
687 : Node *clause,
688 : int varRelid,
689 : JoinType jointype,
690 : SpecialJoinInfo *sjinfo,
691 : bool use_extended_stats)
692 : {
693 2097686 : Selectivity s1 = 0.5; /* default for any unhandled clause type */
694 2097686 : RestrictInfo *rinfo = NULL;
695 2097686 : bool cacheable = false;
696 :
697 2097686 : if (clause == NULL) /* can this still happen? */
698 0 : return s1;
699 :
700 2097686 : if (IsA(clause, RestrictInfo))
701 : {
702 2081886 : rinfo = (RestrictInfo *) clause;
703 :
704 : /*
705 : * If the clause is marked pseudoconstant, then it will be used as a
706 : * gating qual and should not affect selectivity estimates; hence
707 : * return 1.0. The only exception is that a constant FALSE may be
708 : * taken as having selectivity 0.0, since it will surely mean no rows
709 : * out of the plan. This case is simple enough that we need not
710 : * bother caching the result.
711 : */
712 2081886 : if (rinfo->pseudoconstant)
713 : {
714 22334 : if (!IsA(rinfo->clause, Const))
715 22146 : return (Selectivity) 1.0;
716 : }
717 :
718 : /*
719 : * If possible, cache the result of the selectivity calculation for
720 : * the clause. We can cache if varRelid is zero or the clause
721 : * contains only vars of that relid --- otherwise varRelid will affect
722 : * the result, so mustn't cache. Outer join quals might be examined
723 : * with either their join's actual jointype or JOIN_INNER, so we need
724 : * two cache variables to remember both cases. Note: we assume the
725 : * result won't change if we are switching the input relations or
726 : * considering a unique-ified case, so we only need one cache variable
727 : * for all non-JOIN_INNER cases.
728 : */
729 2059740 : if (varRelid == 0 ||
730 821416 : rinfo->num_base_rels == 0 ||
731 1387528 : (rinfo->num_base_rels == 1 &&
732 566112 : bms_is_member(varRelid, rinfo->clause_relids)))
733 : {
734 : /* Cacheable --- do we already have the result? */
735 1802448 : if (jointype == JOIN_INNER)
736 : {
737 1535086 : if (rinfo->norm_selec >= 0)
738 1095258 : return rinfo->norm_selec;
739 : }
740 : else
741 : {
742 267362 : if (rinfo->outer_selec >= 0)
743 171210 : return rinfo->outer_selec;
744 : }
745 535980 : cacheable = true;
746 : }
747 :
748 : /*
749 : * Proceed with examination of contained clause. If the clause is an
750 : * OR-clause, we want to look at the variant with sub-RestrictInfos,
751 : * so that per-subclause selectivities can be cached.
752 : */
753 793272 : if (rinfo->orclause)
754 10780 : clause = (Node *) rinfo->orclause;
755 : else
756 782492 : clause = (Node *) rinfo->clause;
757 : }
758 :
759 809072 : if (IsA(clause, Var))
760 : {
761 38950 : Var *var = (Var *) clause;
762 :
763 : /*
764 : * We probably shouldn't ever see an uplevel Var here, but if we do,
765 : * return the default selectivity...
766 : */
767 38950 : if (var->varlevelsup == 0 &&
768 544 : (varRelid == 0 || varRelid == (int) var->varno))
769 : {
770 : /* Use the restriction selectivity function for a bool Var */
771 38702 : s1 = boolvarsel(root, (Node *) var, varRelid);
772 : }
773 : }
774 770122 : else if (IsA(clause, Const))
775 : {
776 : /* bool constant is pretty easy... */
777 3064 : Const *con = (Const *) clause;
778 :
779 6012 : s1 = con->constisnull ? 0.0 :
780 2948 : DatumGetBool(con->constvalue) ? 1.0 : 0.0;
781 : }
782 767058 : else if (IsA(clause, Param))
783 : {
784 : /* see if we can replace the Param */
785 24 : Node *subst = estimate_expression_value(root, clause);
786 :
787 24 : if (IsA(subst, Const))
788 : {
789 : /* bool constant is pretty easy... */
790 0 : Const *con = (Const *) subst;
791 :
792 0 : s1 = con->constisnull ? 0.0 :
793 0 : DatumGetBool(con->constvalue) ? 1.0 : 0.0;
794 : }
795 : else
796 : {
797 : /* XXX any way to do better than default? */
798 : }
799 : }
800 767034 : else if (is_notclause(clause))
801 : {
802 : /* inverse of the selectivity of the underlying clause */
803 6680 : s1 = 1.0 - clause_selectivity_ext(root,
804 6680 : (Node *) get_notclausearg((Expr *) clause),
805 : varRelid,
806 : jointype,
807 : sjinfo,
808 : use_extended_stats);
809 : }
810 760354 : else if (is_andclause(clause))
811 : {
812 : /* share code with clauselist_selectivity() */
813 2942 : s1 = clauselist_selectivity_ext(root,
814 : ((BoolExpr *) clause)->args,
815 : varRelid,
816 : jointype,
817 : sjinfo,
818 : use_extended_stats);
819 : }
820 757412 : else if (is_orclause(clause))
821 : {
822 : /*
823 : * Almost the same thing as clauselist_selectivity, but with the
824 : * clauses connected by OR.
825 : */
826 10820 : s1 = clauselist_selectivity_or(root,
827 : ((BoolExpr *) clause)->args,
828 : varRelid,
829 : jointype,
830 : sjinfo,
831 : use_extended_stats);
832 : }
833 746592 : else if (is_opclause(clause) || IsA(clause, DistinctExpr))
834 708396 : {
835 708404 : OpExpr *opclause = (OpExpr *) clause;
836 708404 : Oid opno = opclause->opno;
837 :
838 708404 : if (treat_as_join_clause(root, clause, rinfo, varRelid, sjinfo))
839 : {
840 : /* Estimate selectivity for a join clause. */
841 175946 : s1 = join_selectivity(root, opno,
842 : opclause->args,
843 : opclause->inputcollid,
844 : jointype,
845 : sjinfo);
846 : }
847 : else
848 : {
849 : /* Estimate selectivity for a restriction clause. */
850 532458 : s1 = restriction_selectivity(root, opno,
851 : opclause->args,
852 : opclause->inputcollid,
853 : varRelid);
854 : }
855 :
856 : /*
857 : * DistinctExpr has the same representation as OpExpr, but the
858 : * contained operator is "=" not "<>", so we must negate the result.
859 : * This estimation method doesn't give the right behavior for nulls,
860 : * but it's better than doing nothing.
861 : */
862 708396 : if (IsA(clause, DistinctExpr))
863 654 : s1 = 1.0 - s1;
864 : }
865 38188 : else if (is_funcclause(clause))
866 : {
867 9420 : FuncExpr *funcclause = (FuncExpr *) clause;
868 :
869 : /* Try to get an estimate from the support function, if any */
870 9420 : s1 = function_selectivity(root,
871 : funcclause->funcid,
872 : funcclause->args,
873 : funcclause->inputcollid,
874 9420 : treat_as_join_clause(root, clause, rinfo,
875 : varRelid, sjinfo),
876 : varRelid,
877 : jointype,
878 : sjinfo);
879 : }
880 28768 : else if (IsA(clause, ScalarArrayOpExpr))
881 : {
882 : /* Use node specific selectivity calculation function */
883 14664 : s1 = scalararraysel(root,
884 : (ScalarArrayOpExpr *) clause,
885 14664 : treat_as_join_clause(root, clause, rinfo,
886 : varRelid, sjinfo),
887 : varRelid,
888 : jointype,
889 : sjinfo);
890 : }
891 14104 : else if (IsA(clause, RowCompareExpr))
892 : {
893 : /* Use node specific selectivity calculation function */
894 156 : s1 = rowcomparesel(root,
895 : (RowCompareExpr *) clause,
896 : varRelid,
897 : jointype,
898 : sjinfo);
899 : }
900 13948 : else if (IsA(clause, NullTest))
901 : {
902 : /* Use node specific selectivity calculation function */
903 10880 : s1 = nulltestsel(root,
904 : ((NullTest *) clause)->nulltesttype,
905 10880 : (Node *) ((NullTest *) clause)->arg,
906 : varRelid,
907 : jointype,
908 : sjinfo);
909 : }
910 3068 : else if (IsA(clause, BooleanTest))
911 : {
912 : /* Use node specific selectivity calculation function */
913 624 : s1 = booltestsel(root,
914 : ((BooleanTest *) clause)->booltesttype,
915 624 : (Node *) ((BooleanTest *) clause)->arg,
916 : varRelid,
917 : jointype,
918 : sjinfo);
919 : }
920 2444 : else if (IsA(clause, CurrentOfExpr))
921 : {
922 : /* CURRENT OF selects at most one row of its table */
923 394 : CurrentOfExpr *cexpr = (CurrentOfExpr *) clause;
924 394 : RelOptInfo *crel = find_base_rel(root, cexpr->cvarno);
925 :
926 394 : if (crel->tuples > 0)
927 380 : s1 = 1.0 / crel->tuples;
928 : }
929 2050 : else if (IsA(clause, RelabelType))
930 : {
931 : /* Not sure this case is needed, but it can't hurt */
932 0 : s1 = clause_selectivity_ext(root,
933 0 : (Node *) ((RelabelType *) clause)->arg,
934 : varRelid,
935 : jointype,
936 : sjinfo,
937 : use_extended_stats);
938 : }
939 2050 : else if (IsA(clause, CoerceToDomain))
940 : {
941 : /* Not sure this case is needed, but it can't hurt */
942 0 : s1 = clause_selectivity_ext(root,
943 0 : (Node *) ((CoerceToDomain *) clause)->arg,
944 : varRelid,
945 : jointype,
946 : sjinfo,
947 : use_extended_stats);
948 : }
949 : else
950 : {
951 : /*
952 : * For anything else, see if we can consider it as a boolean variable.
953 : * This only works if it's an immutable expression in Vars of a single
954 : * relation; but there's no point in us checking that here because
955 : * boolvarsel() will do it internally, and return a suitable default
956 : * selectivity if not.
957 : */
958 2050 : s1 = boolvarsel(root, clause, varRelid);
959 : }
960 :
961 : /* Cache the result if possible */
962 809064 : if (cacheable)
963 : {
964 535972 : if (jointype == JOIN_INNER)
965 439820 : rinfo->norm_selec = s1;
966 : else
967 96152 : rinfo->outer_selec = s1;
968 : }
969 :
970 : #ifdef SELECTIVITY_DEBUG
971 : elog(DEBUG4, "clause_selectivity: s1 %f", s1);
972 : #endif /* SELECTIVITY_DEBUG */
973 :
974 809064 : return s1;
975 : }
|