Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * clausesel.c
4 : * Routines to compute clause selectivities
5 : *
6 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/optimizer/path/clausesel.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "nodes/nodeFuncs.h"
18 : #include "optimizer/clauses.h"
19 : #include "optimizer/optimizer.h"
20 : #include "optimizer/pathnode.h"
21 : #include "optimizer/plancat.h"
22 : #include "statistics/statistics.h"
23 : #include "utils/fmgroids.h"
24 : #include "utils/lsyscache.h"
25 : #include "utils/selfuncs.h"
26 :
27 : /*
28 : * Data structure for accumulating info about possible range-query
29 : * clause pairs in clauselist_selectivity.
30 : */
31 : typedef struct RangeQueryClause
32 : {
33 : struct RangeQueryClause *next; /* next in linked list */
34 : Node *var; /* The common variable of the clauses */
35 : bool have_lobound; /* found a low-bound clause yet? */
36 : bool have_hibound; /* found a high-bound clause yet? */
37 : Selectivity lobound; /* Selectivity of a var > something clause */
38 : Selectivity hibound; /* Selectivity of a var < something clause */
39 : } RangeQueryClause;
40 :
41 : static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
42 : bool varonleft, bool isLTsel, Selectivity s2);
43 : static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root,
44 : List *clauses);
45 : static Selectivity clauselist_selectivity_or(PlannerInfo *root,
46 : List *clauses,
47 : int varRelid,
48 : JoinType jointype,
49 : SpecialJoinInfo *sjinfo,
50 : bool use_extended_stats);
51 :
52 : /****************************************************************************
53 : * ROUTINES TO COMPUTE SELECTIVITIES
54 : ****************************************************************************/
55 :
56 : /*
57 : * clauselist_selectivity -
58 : * Compute the selectivity of an implicitly-ANDed list of boolean
59 : * expression clauses. The list can be empty, in which case 1.0
60 : * must be returned. List elements may be either RestrictInfos
61 : * or bare expression clauses --- the former is preferred since
62 : * it allows caching of results.
63 : *
64 : * See clause_selectivity() for the meaning of the additional parameters.
65 : *
66 : * The basic approach is to apply extended statistics first, on as many
67 : * clauses as possible, in order to capture cross-column dependencies etc.
68 : * The remaining clauses are then estimated by taking the product of their
69 : * selectivities, but that's only right if they have independent
70 : * probabilities, and in reality they are often NOT independent even if they
71 : * only refer to a single column. So, we want to be smarter where we can.
72 : *
73 : * We also recognize "range queries", such as "x > 34 AND x < 42". Clauses
74 : * are recognized as possible range query components if they are restriction
75 : * opclauses whose operators have scalarltsel or a related function as their
76 : * restriction selectivity estimator. We pair up clauses of this form that
77 : * refer to the same variable. An unpairable clause of this kind is simply
78 : * multiplied into the selectivity product in the normal way. But when we
79 : * find a pair, we know that the selectivities represent the relative
80 : * positions of the low and high bounds within the column's range, so instead
81 : * of figuring the selectivity as hisel * losel, we can figure it as hisel +
82 : * losel - 1. (To visualize this, see that hisel is the fraction of the range
83 : * below the high bound, while losel is the fraction above the low bound; so
84 : * hisel can be interpreted directly as a 0..1 value but we need to convert
85 : * losel to 1-losel before interpreting it as a value. Then the available
86 : * range is 1-losel to hisel. However, this calculation double-excludes
87 : * nulls, so really we need hisel + losel + null_frac - 1.)
88 : *
89 : * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
90 : * and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation
91 : * yields an impossible (negative) result.
92 : *
93 : * A free side-effect is that we can recognize redundant inequalities such
94 : * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
95 : *
96 : * Of course this is all very dependent on the behavior of the inequality
97 : * selectivity functions; perhaps some day we can generalize the approach.
98 : */
99 : Selectivity
100 2364742 : clauselist_selectivity(PlannerInfo *root,
101 : List *clauses,
102 : int varRelid,
103 : JoinType jointype,
104 : SpecialJoinInfo *sjinfo)
105 : {
106 2364742 : return clauselist_selectivity_ext(root, clauses, varRelid,
107 : jointype, sjinfo, true);
108 : }
109 :
110 : /*
111 : * clauselist_selectivity_ext -
112 : * Extended version of clauselist_selectivity(). If "use_extended_stats"
113 : * is false, all extended statistics will be ignored, and only per-column
114 : * statistics will be used.
115 : */
116 : Selectivity
117 2370842 : clauselist_selectivity_ext(PlannerInfo *root,
118 : List *clauses,
119 : int varRelid,
120 : JoinType jointype,
121 : SpecialJoinInfo *sjinfo,
122 : bool use_extended_stats)
123 : {
124 2370842 : Selectivity s1 = 1.0;
125 : RelOptInfo *rel;
126 2370842 : Bitmapset *estimatedclauses = NULL;
127 2370842 : RangeQueryClause *rqlist = NULL;
128 : ListCell *l;
129 : int listidx;
130 :
131 : /*
132 : * If there's exactly one clause, just go directly to
133 : * clause_selectivity_ext(). None of what we might do below is relevant.
134 : */
135 2370842 : if (list_length(clauses) == 1)
136 1205042 : return clause_selectivity_ext(root, (Node *) linitial(clauses),
137 : varRelid, jointype, sjinfo,
138 : use_extended_stats);
139 :
140 : /*
141 : * Determine if these clauses reference a single relation. If so, and if
142 : * it has extended statistics, try to apply those.
143 : */
144 1165800 : rel = find_single_rel_for_clauses(root, clauses);
145 1165800 : if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
146 : {
147 : /*
148 : * Estimate as many clauses as possible using extended statistics.
149 : *
150 : * 'estimatedclauses' is populated with the 0-based list position
151 : * index of clauses estimated here, and that should be ignored below.
152 : */
153 1734 : s1 = statext_clauselist_selectivity(root, clauses, varRelid,
154 : jointype, sjinfo, rel,
155 : &estimatedclauses, false);
156 : }
157 :
158 : /*
159 : * Apply normal selectivity estimates for remaining clauses. We'll be
160 : * careful to skip any clauses which were already estimated above.
161 : *
162 : * Anything that doesn't look like a potential rangequery clause gets
163 : * multiplied into s1 and forgotten. Anything that does gets inserted into
164 : * an rqlist entry.
165 : */
166 1165800 : listidx = -1;
167 1918096 : foreach(l, clauses)
168 : {
169 752296 : Node *clause = (Node *) lfirst(l);
170 : RestrictInfo *rinfo;
171 : Selectivity s2;
172 :
173 752296 : listidx++;
174 :
175 : /*
176 : * Skip this clause if it's already been estimated by some other
177 : * statistics above.
178 : */
179 752296 : if (bms_is_member(listidx, estimatedclauses))
180 3402 : continue;
181 :
182 : /* Compute the selectivity of this clause in isolation */
183 748894 : s2 = clause_selectivity_ext(root, clause, varRelid, jointype, sjinfo,
184 : use_extended_stats);
185 :
186 : /*
187 : * Check for being passed a RestrictInfo.
188 : *
189 : * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
190 : * 0.0; just use that rather than looking for range pairs.
191 : */
192 748894 : if (IsA(clause, RestrictInfo))
193 : {
194 747594 : rinfo = (RestrictInfo *) clause;
195 747594 : if (rinfo->pseudoconstant)
196 : {
197 25562 : s1 = s1 * s2;
198 25562 : continue;
199 : }
200 722032 : clause = (Node *) rinfo->clause;
201 : }
202 : else
203 1300 : rinfo = NULL;
204 :
205 : /*
206 : * See if it looks like a restriction clause with a pseudoconstant on
207 : * one side. (Anything more complicated than that might not behave in
208 : * the simple way we are expecting.) Most of the tests here can be
209 : * done more efficiently with rinfo than without.
210 : */
211 723332 : if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
212 : {
213 620946 : OpExpr *expr = (OpExpr *) clause;
214 620946 : bool varonleft = true;
215 : bool ok;
216 :
217 620946 : if (rinfo)
218 : {
219 1025306 : ok = (rinfo->num_base_rels == 1) &&
220 399188 : (is_pseudo_constant_clause_relids(lsecond(expr->args),
221 6456 : rinfo->right_relids) ||
222 6456 : (varonleft = false,
223 6456 : is_pseudo_constant_clause_relids(linitial(expr->args),
224 : rinfo->left_relids)));
225 : }
226 : else
227 : {
228 2496 : ok = (NumRelids(root, clause) == 1) &&
229 1212 : (is_pseudo_constant_clause(lsecond(expr->args)) ||
230 0 : (varonleft = false,
231 0 : is_pseudo_constant_clause(linitial(expr->args))));
232 : }
233 :
234 620946 : if (ok)
235 : {
236 : /*
237 : * If it's not a "<"/"<="/">"/">=" operator, just merge the
238 : * selectivity in generically. But if it's the right oprrest,
239 : * add the clause to rqlist for later processing.
240 : */
241 399808 : switch (get_oprrest(expr->opno))
242 : {
243 13242 : case F_SCALARLTSEL:
244 : case F_SCALARLESEL:
245 13242 : addRangeClause(&rqlist, clause,
246 : varonleft, true, s2);
247 13242 : break;
248 35326 : case F_SCALARGTSEL:
249 : case F_SCALARGESEL:
250 35326 : addRangeClause(&rqlist, clause,
251 : varonleft, false, s2);
252 35326 : break;
253 351240 : default:
254 : /* Just merge the selectivity in generically */
255 351240 : s1 = s1 * s2;
256 351240 : break;
257 : }
258 399808 : continue; /* drop to loop bottom */
259 : }
260 : }
261 :
262 : /* Not the right form, so treat it generically. */
263 323524 : s1 = s1 * s2;
264 : }
265 :
266 : /*
267 : * Now scan the rangequery pair list.
268 : */
269 1205298 : while (rqlist != NULL)
270 : {
271 : RangeQueryClause *rqnext;
272 :
273 39498 : if (rqlist->have_lobound && rqlist->have_hibound)
274 8572 : {
275 : /* Successfully matched a pair of range clauses */
276 : Selectivity s2;
277 :
278 : /*
279 : * Exact equality to the default value probably means the
280 : * selectivity function punted. This is not airtight but should
281 : * be good enough.
282 : */
283 8572 : if (rqlist->hibound == DEFAULT_INEQ_SEL ||
284 6426 : rqlist->lobound == DEFAULT_INEQ_SEL)
285 : {
286 2146 : s2 = DEFAULT_RANGE_INEQ_SEL;
287 : }
288 : else
289 : {
290 6426 : s2 = rqlist->hibound + rqlist->lobound - 1.0;
291 :
292 : /* Adjust for double-exclusion of NULLs */
293 6426 : s2 += nulltestsel(root, IS_NULL, rqlist->var,
294 : varRelid, jointype, sjinfo);
295 :
296 : /*
297 : * A zero or slightly negative s2 should be converted into a
298 : * small positive value; we probably are dealing with a very
299 : * tight range and got a bogus result due to roundoff errors.
300 : * However, if s2 is very negative, then we probably have
301 : * default selectivity estimates on one or both sides of the
302 : * range that we failed to recognize above for some reason.
303 : */
304 6426 : if (s2 <= 0.0)
305 : {
306 950 : if (s2 < -0.01)
307 : {
308 : /*
309 : * No data available --- use a default estimate that
310 : * is small, but not real small.
311 : */
312 12 : s2 = DEFAULT_RANGE_INEQ_SEL;
313 : }
314 : else
315 : {
316 : /*
317 : * It's just roundoff error; use a small positive
318 : * value
319 : */
320 938 : s2 = 1.0e-10;
321 : }
322 : }
323 : }
324 : /* Merge in the selectivity of the pair of clauses */
325 8572 : s1 *= s2;
326 : }
327 : else
328 : {
329 : /* Only found one of a pair, merge it in generically */
330 30926 : if (rqlist->have_lobound)
331 26352 : s1 *= rqlist->lobound;
332 : else
333 4574 : s1 *= rqlist->hibound;
334 : }
335 : /* release storage and advance */
336 39498 : rqnext = rqlist->next;
337 39498 : pfree(rqlist);
338 39498 : rqlist = rqnext;
339 : }
340 :
341 1165800 : return s1;
342 : }
343 :
344 : /*
345 : * clauselist_selectivity_or -
346 : * Compute the selectivity of an implicitly-ORed list of boolean
347 : * expression clauses. The list can be empty, in which case 0.0
348 : * must be returned. List elements may be either RestrictInfos
349 : * or bare expression clauses --- the former is preferred since
350 : * it allows caching of results.
351 : *
352 : * See clause_selectivity() for the meaning of the additional parameters.
353 : *
354 : * The basic approach is to apply extended statistics first, on as many
355 : * clauses as possible, in order to capture cross-column dependencies etc.
356 : * The remaining clauses are then estimated as if they were independent.
357 : */
358 : static Selectivity
359 12344 : clauselist_selectivity_or(PlannerInfo *root,
360 : List *clauses,
361 : int varRelid,
362 : JoinType jointype,
363 : SpecialJoinInfo *sjinfo,
364 : bool use_extended_stats)
365 : {
366 12344 : Selectivity s1 = 0.0;
367 : RelOptInfo *rel;
368 12344 : Bitmapset *estimatedclauses = NULL;
369 : ListCell *lc;
370 : int listidx;
371 :
372 : /*
373 : * Determine if these clauses reference a single relation. If so, and if
374 : * it has extended statistics, try to apply those.
375 : */
376 12344 : rel = find_single_rel_for_clauses(root, clauses);
377 12344 : if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
378 : {
379 : /*
380 : * Estimate as many clauses as possible using extended statistics.
381 : *
382 : * 'estimatedclauses' is populated with the 0-based list position
383 : * index of clauses estimated here, and that should be ignored below.
384 : */
385 108 : s1 = statext_clauselist_selectivity(root, clauses, varRelid,
386 : jointype, sjinfo, rel,
387 : &estimatedclauses, true);
388 : }
389 :
390 : /*
391 : * Estimate the remaining clauses as if they were independent.
392 : *
393 : * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to account
394 : * for the probable overlap of selected tuple sets.
395 : *
396 : * XXX is this too conservative?
397 : */
398 12344 : listidx = -1;
399 40542 : foreach(lc, clauses)
400 : {
401 : Selectivity s2;
402 :
403 28198 : listidx++;
404 :
405 : /*
406 : * Skip this clause if it's already been estimated by some other
407 : * statistics above.
408 : */
409 28198 : if (bms_is_member(listidx, estimatedclauses))
410 240 : continue;
411 :
412 27958 : s2 = clause_selectivity_ext(root, (Node *) lfirst(lc), varRelid,
413 : jointype, sjinfo, use_extended_stats);
414 :
415 27958 : s1 = s1 + s2 - s1 * s2;
416 : }
417 :
418 12344 : return s1;
419 : }
420 :
421 : /*
422 : * addRangeClause --- add a new range clause for clauselist_selectivity
423 : *
424 : * Here is where we try to match up pairs of range-query clauses
425 : */
426 : static void
427 48568 : addRangeClause(RangeQueryClause **rqlist, Node *clause,
428 : bool varonleft, bool isLTsel, Selectivity s2)
429 : {
430 : RangeQueryClause *rqelem;
431 : Node *var;
432 : bool is_lobound;
433 :
434 48568 : if (varonleft)
435 : {
436 48346 : var = get_leftop((Expr *) clause);
437 48346 : is_lobound = !isLTsel; /* x < something is high bound */
438 : }
439 : else
440 : {
441 222 : var = get_rightop((Expr *) clause);
442 222 : is_lobound = isLTsel; /* something < x is low bound */
443 : }
444 :
445 49802 : for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
446 : {
447 : /*
448 : * We use full equal() here because the "var" might be a function of
449 : * one or more attributes of the same relation...
450 : */
451 10304 : if (!equal(var, rqelem->var))
452 1234 : continue;
453 : /* Found the right group to put this clause in */
454 9070 : if (is_lobound)
455 : {
456 420 : if (!rqelem->have_lobound)
457 : {
458 180 : rqelem->have_lobound = true;
459 180 : rqelem->lobound = s2;
460 : }
461 : else
462 : {
463 :
464 : /*------
465 : * We have found two similar clauses, such as
466 : * x < y AND x <= z.
467 : * Keep only the more restrictive one.
468 : *------
469 : */
470 240 : if (rqelem->lobound > s2)
471 60 : rqelem->lobound = s2;
472 : }
473 : }
474 : else
475 : {
476 8650 : if (!rqelem->have_hibound)
477 : {
478 8392 : rqelem->have_hibound = true;
479 8392 : rqelem->hibound = s2;
480 : }
481 : else
482 : {
483 :
484 : /*------
485 : * We have found two similar clauses, such as
486 : * x > y AND x >= z.
487 : * Keep only the more restrictive one.
488 : *------
489 : */
490 258 : if (rqelem->hibound > s2)
491 96 : rqelem->hibound = s2;
492 : }
493 : }
494 9070 : return;
495 : }
496 :
497 : /* No matching var found, so make a new clause-pair data structure */
498 39498 : rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
499 39498 : rqelem->var = var;
500 39498 : if (is_lobound)
501 : {
502 34744 : rqelem->have_lobound = true;
503 34744 : rqelem->have_hibound = false;
504 34744 : rqelem->lobound = s2;
505 : }
506 : else
507 : {
508 4754 : rqelem->have_lobound = false;
509 4754 : rqelem->have_hibound = true;
510 4754 : rqelem->hibound = s2;
511 : }
512 39498 : rqelem->next = *rqlist;
513 39498 : *rqlist = rqelem;
514 : }
515 :
516 : /*
517 : * find_single_rel_for_clauses
518 : * Examine each clause in 'clauses' and determine if all clauses
519 : * reference only a single relation. If so return that relation,
520 : * otherwise return NULL.
521 : */
522 : static RelOptInfo *
523 1180642 : find_single_rel_for_clauses(PlannerInfo *root, List *clauses)
524 : {
525 1180642 : int lastrelid = 0;
526 : ListCell *l;
527 :
528 1625406 : foreach(l, clauses)
529 : {
530 631074 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
531 : int relid;
532 :
533 : /*
534 : * If we have a list of bare clauses rather than RestrictInfos, we
535 : * could pull out their relids the hard way with pull_varnos().
536 : * However, currently the extended-stats machinery won't do anything
537 : * with non-RestrictInfo clauses anyway, so there's no point in
538 : * spending extra cycles; just fail if that's what we have.
539 : *
540 : * An exception to that rule is if we have a bare BoolExpr AND clause.
541 : * We treat this as a special case because the restrictinfo machinery
542 : * doesn't build RestrictInfos on top of AND clauses.
543 : */
544 631074 : if (is_andclause(rinfo))
545 : {
546 : RelOptInfo *rel;
547 :
548 2498 : rel = find_single_rel_for_clauses(root,
549 : ((BoolExpr *) rinfo)->args);
550 :
551 2498 : if (rel == NULL)
552 186310 : return NULL;
553 1990 : if (lastrelid == 0)
554 990 : lastrelid = rel->relid;
555 1000 : else if (rel->relid != lastrelid)
556 0 : return NULL;
557 :
558 35880 : continue;
559 : }
560 :
561 628576 : if (!IsA(rinfo, RestrictInfo))
562 1106 : return NULL;
563 :
564 627470 : if (bms_is_empty(rinfo->clause_relids))
565 33890 : continue; /* we can ignore variable-free clauses */
566 593580 : if (!bms_get_singleton_member(rinfo->clause_relids, &relid))
567 181464 : return NULL; /* multiple relations in this clause */
568 412116 : if (lastrelid == 0)
569 204448 : lastrelid = relid; /* first clause referencing a relation */
570 207668 : else if (relid != lastrelid)
571 3232 : return NULL; /* relation not same as last one */
572 : }
573 :
574 994332 : if (lastrelid != 0)
575 161684 : return find_base_rel(root, lastrelid);
576 :
577 832648 : return NULL; /* no clauses */
578 : }
579 :
580 : /*
581 : * treat_as_join_clause -
582 : * Decide whether an operator clause is to be handled by the
583 : * restriction or join estimator. Subroutine for clause_selectivity().
584 : */
585 : static inline bool
586 846568 : treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo,
587 : int varRelid, SpecialJoinInfo *sjinfo)
588 : {
589 846568 : if (varRelid != 0)
590 : {
591 : /*
592 : * Caller is forcing restriction mode (eg, because we are examining an
593 : * inner indexscan qual).
594 : */
595 310624 : return false;
596 : }
597 535944 : else if (sjinfo == NULL)
598 : {
599 : /*
600 : * It must be a restriction clause, since it's being evaluated at a
601 : * scan node.
602 : */
603 328186 : return false;
604 : }
605 : else
606 : {
607 : /*
608 : * Otherwise, it's a join if there's more than one base relation used.
609 : * We can optimize this calculation if an rinfo was passed.
610 : *
611 : * XXX Since we know the clause is being evaluated at a join, the
612 : * only way it could be single-relation is if it was delayed by outer
613 : * joins. We intentionally count only baserels here, not OJs that
614 : * might be present in rinfo->clause_relids, so that we direct such
615 : * cases to the restriction qual estimators not join estimators.
616 : * Eventually some notice should be taken of the possibility of
617 : * injected nulls, but we'll likely want to do that in the restriction
618 : * estimators rather than starting to treat such cases as join quals.
619 : */
620 207758 : if (rinfo)
621 207296 : return (rinfo->num_base_rels > 1);
622 : else
623 462 : return (NumRelids(root, clause) > 1);
624 : }
625 : }
626 :
627 :
628 : /*
629 : * clause_selectivity -
630 : * Compute the selectivity of a general boolean expression clause.
631 : *
632 : * The clause can be either a RestrictInfo or a plain expression. If it's
633 : * a RestrictInfo, we try to cache the selectivity for possible re-use,
634 : * so passing RestrictInfos is preferred.
635 : *
636 : * varRelid is either 0 or a rangetable index.
637 : *
638 : * When varRelid is not 0, only variables belonging to that relation are
639 : * considered in computing selectivity; other vars are treated as constants
640 : * of unknown values. This is appropriate for estimating the selectivity of
641 : * a join clause that is being used as a restriction clause in a scan of a
642 : * nestloop join's inner relation --- varRelid should then be the ID of the
643 : * inner relation.
644 : *
645 : * When varRelid is 0, all variables are treated as variables. This
646 : * is appropriate for ordinary join clauses and restriction clauses.
647 : *
648 : * jointype is the join type, if the clause is a join clause. Pass JOIN_INNER
649 : * if the clause isn't a join clause.
650 : *
651 : * sjinfo is NULL for a non-join clause, otherwise it provides additional
652 : * context information about the join being performed. There are some
653 : * special cases:
654 : * 1. For a special (not INNER) join, sjinfo is always a member of
655 : * root->join_info_list.
656 : * 2. For an INNER join, sjinfo is just a transient struct, and only the
657 : * relids and jointype fields in it can be trusted.
658 : * It is possible for jointype to be different from sjinfo->jointype.
659 : * This indicates we are considering a variant join: either with
660 : * the LHS and RHS switched, or with one input unique-ified.
661 : *
662 : * Note: when passing nonzero varRelid, it's normally appropriate to set
663 : * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
664 : * join clause; because we aren't treating it as a join clause.
665 : */
666 : Selectivity
667 460066 : clause_selectivity(PlannerInfo *root,
668 : Node *clause,
669 : int varRelid,
670 : JoinType jointype,
671 : SpecialJoinInfo *sjinfo)
672 : {
673 460066 : return clause_selectivity_ext(root, clause, varRelid,
674 : jointype, sjinfo, true);
675 : }
676 :
677 : /*
678 : * clause_selectivity_ext -
679 : * Extended version of clause_selectivity(). If "use_extended_stats" is
680 : * false, all extended statistics will be ignored, and only per-column
681 : * statistics will be used.
682 : */
683 : Selectivity
684 2450336 : clause_selectivity_ext(PlannerInfo *root,
685 : Node *clause,
686 : int varRelid,
687 : JoinType jointype,
688 : SpecialJoinInfo *sjinfo,
689 : bool use_extended_stats)
690 : {
691 2450336 : Selectivity s1 = 0.5; /* default for any unhandled clause type */
692 2450336 : RestrictInfo *rinfo = NULL;
693 2450336 : bool cacheable = false;
694 :
695 2450336 : if (clause == NULL) /* can this still happen? */
696 0 : return s1;
697 :
698 2450336 : if (IsA(clause, RestrictInfo))
699 : {
700 2431678 : rinfo = (RestrictInfo *) clause;
701 :
702 : /*
703 : * If the clause is marked pseudoconstant, then it will be used as a
704 : * gating qual and should not affect selectivity estimates; hence
705 : * return 1.0. The only exception is that a constant FALSE may be
706 : * taken as having selectivity 0.0, since it will surely mean no rows
707 : * out of the plan. This case is simple enough that we need not
708 : * bother caching the result.
709 : */
710 2431678 : if (rinfo->pseudoconstant)
711 : {
712 25904 : if (!IsA(rinfo->clause, Const))
713 25698 : return (Selectivity) 1.0;
714 : }
715 :
716 : /*
717 : * If possible, cache the result of the selectivity calculation for
718 : * the clause. We can cache if varRelid is zero or the clause
719 : * contains only vars of that relid --- otherwise varRelid will affect
720 : * the result, so mustn't cache. Outer join quals might be examined
721 : * with either their join's actual jointype or JOIN_INNER, so we need
722 : * two cache variables to remember both cases. Note: we assume the
723 : * result won't change if we are switching the input relations or
724 : * considering a unique-ified case, so we only need one cache variable
725 : * for all non-JOIN_INNER cases.
726 : */
727 2405980 : if (varRelid == 0 ||
728 987700 : rinfo->num_base_rels == 0 ||
729 1676648 : (rinfo->num_base_rels == 1 &&
730 688948 : bms_is_member(varRelid, rinfo->clause_relids)))
731 : {
732 : /* Cacheable --- do we already have the result? */
733 2104952 : if (jointype == JOIN_INNER)
734 : {
735 1810572 : if (rinfo->norm_selec >= 0)
736 1311716 : return rinfo->norm_selec;
737 : }
738 : else
739 : {
740 294380 : if (rinfo->outer_selec >= 0)
741 185594 : return rinfo->outer_selec;
742 : }
743 607642 : cacheable = true;
744 : }
745 :
746 : /*
747 : * Proceed with examination of contained clause. If the clause is an
748 : * OR-clause, we want to look at the variant with sub-RestrictInfos,
749 : * so that per-subclause selectivities can be cached.
750 : */
751 908670 : if (rinfo->orclause)
752 12304 : clause = (Node *) rinfo->orclause;
753 : else
754 896366 : clause = (Node *) rinfo->clause;
755 : }
756 :
757 927328 : if (IsA(clause, Var))
758 : {
759 40472 : Var *var = (Var *) clause;
760 :
761 : /*
762 : * We probably shouldn't ever see an uplevel Var here, but if we do,
763 : * return the default selectivity...
764 : */
765 40472 : if (var->varlevelsup == 0 &&
766 676 : (varRelid == 0 || varRelid == (int) var->varno))
767 : {
768 : /* Use the restriction selectivity function for a bool Var */
769 40158 : s1 = boolvarsel(root, (Node *) var, varRelid);
770 : }
771 : }
772 886856 : else if (IsA(clause, Const))
773 : {
774 : /* bool constant is pretty easy... */
775 3588 : Const *con = (Const *) clause;
776 :
777 7060 : s1 = con->constisnull ? 0.0 :
778 3472 : DatumGetBool(con->constvalue) ? 1.0 : 0.0;
779 : }
780 883268 : else if (IsA(clause, Param))
781 : {
782 : /* see if we can replace the Param */
783 24 : Node *subst = estimate_expression_value(root, clause);
784 :
785 24 : if (IsA(subst, Const))
786 : {
787 : /* bool constant is pretty easy... */
788 0 : Const *con = (Const *) subst;
789 :
790 0 : s1 = con->constisnull ? 0.0 :
791 0 : DatumGetBool(con->constvalue) ? 1.0 : 0.0;
792 : }
793 : else
794 : {
795 : /* XXX any way to do better than default? */
796 : }
797 : }
798 883244 : else if (is_notclause(clause))
799 : {
800 : /* inverse of the selectivity of the underlying clause */
801 8136 : s1 = 1.0 - clause_selectivity_ext(root,
802 8136 : (Node *) get_notclausearg((Expr *) clause),
803 : varRelid,
804 : jointype,
805 : sjinfo,
806 : use_extended_stats);
807 : }
808 875108 : else if (is_andclause(clause))
809 : {
810 : /* share code with clauselist_selectivity() */
811 3226 : s1 = clauselist_selectivity_ext(root,
812 : ((BoolExpr *) clause)->args,
813 : varRelid,
814 : jointype,
815 : sjinfo,
816 : use_extended_stats);
817 : }
818 871882 : else if (is_orclause(clause))
819 : {
820 : /*
821 : * Almost the same thing as clauselist_selectivity, but with the
822 : * clauses connected by OR.
823 : */
824 12344 : s1 = clauselist_selectivity_or(root,
825 : ((BoolExpr *) clause)->args,
826 : varRelid,
827 : jointype,
828 : sjinfo,
829 : use_extended_stats);
830 : }
831 859538 : else if (is_opclause(clause) || IsA(clause, DistinctExpr))
832 815894 : {
833 815918 : OpExpr *opclause = (OpExpr *) clause;
834 815918 : Oid opno = opclause->opno;
835 :
836 815918 : if (treat_as_join_clause(root, clause, rinfo, varRelid, sjinfo))
837 : {
838 : /* Estimate selectivity for a join clause. */
839 198990 : s1 = join_selectivity(root, opno,
840 : opclause->args,
841 : opclause->inputcollid,
842 : jointype,
843 : sjinfo);
844 : }
845 : else
846 : {
847 : /* Estimate selectivity for a restriction clause. */
848 616928 : s1 = restriction_selectivity(root, opno,
849 : opclause->args,
850 : opclause->inputcollid,
851 : varRelid);
852 : }
853 :
854 : /*
855 : * DistinctExpr has the same representation as OpExpr, but the
856 : * contained operator is "=" not "<>", so we must negate the result.
857 : * This estimation method doesn't give the right behavior for nulls,
858 : * but it's better than doing nothing.
859 : */
860 815894 : if (IsA(clause, DistinctExpr))
861 792 : s1 = 1.0 - s1;
862 : }
863 43620 : else if (is_funcclause(clause))
864 : {
865 10868 : FuncExpr *funcclause = (FuncExpr *) clause;
866 :
867 : /* Try to get an estimate from the support function, if any */
868 10868 : s1 = function_selectivity(root,
869 : funcclause->funcid,
870 : funcclause->args,
871 : funcclause->inputcollid,
872 10868 : treat_as_join_clause(root, clause, rinfo,
873 : varRelid, sjinfo),
874 : varRelid,
875 : jointype,
876 : sjinfo);
877 : }
878 32752 : else if (IsA(clause, ScalarArrayOpExpr))
879 : {
880 : /* Use node specific selectivity calculation function */
881 19782 : s1 = scalararraysel(root,
882 : (ScalarArrayOpExpr *) clause,
883 19782 : treat_as_join_clause(root, clause, rinfo,
884 : varRelid, sjinfo),
885 : varRelid,
886 : jointype,
887 : sjinfo);
888 : }
889 12970 : else if (IsA(clause, RowCompareExpr))
890 : {
891 : /* Use node specific selectivity calculation function */
892 156 : s1 = rowcomparesel(root,
893 : (RowCompareExpr *) clause,
894 : varRelid,
895 : jointype,
896 : sjinfo);
897 : }
898 12814 : else if (IsA(clause, NullTest))
899 : {
900 : /* Use node specific selectivity calculation function */
901 9340 : s1 = nulltestsel(root,
902 : ((NullTest *) clause)->nulltesttype,
903 9340 : (Node *) ((NullTest *) clause)->arg,
904 : varRelid,
905 : jointype,
906 : sjinfo);
907 : }
908 3474 : else if (IsA(clause, BooleanTest))
909 : {
910 : /* Use node specific selectivity calculation function */
911 838 : s1 = booltestsel(root,
912 : ((BooleanTest *) clause)->booltesttype,
913 838 : (Node *) ((BooleanTest *) clause)->arg,
914 : varRelid,
915 : jointype,
916 : sjinfo);
917 : }
918 2636 : else if (IsA(clause, CurrentOfExpr))
919 : {
920 : /* CURRENT OF selects at most one row of its table */
921 406 : CurrentOfExpr *cexpr = (CurrentOfExpr *) clause;
922 406 : RelOptInfo *crel = find_base_rel(root, cexpr->cvarno);
923 :
924 406 : if (crel->tuples > 0)
925 392 : s1 = 1.0 / crel->tuples;
926 : }
927 2230 : else if (IsA(clause, RelabelType))
928 : {
929 : /* Not sure this case is needed, but it can't hurt */
930 0 : s1 = clause_selectivity_ext(root,
931 0 : (Node *) ((RelabelType *) clause)->arg,
932 : varRelid,
933 : jointype,
934 : sjinfo,
935 : use_extended_stats);
936 : }
937 2230 : else if (IsA(clause, CoerceToDomain))
938 : {
939 : /* Not sure this case is needed, but it can't hurt */
940 0 : s1 = clause_selectivity_ext(root,
941 0 : (Node *) ((CoerceToDomain *) clause)->arg,
942 : varRelid,
943 : jointype,
944 : sjinfo,
945 : use_extended_stats);
946 : }
947 : else
948 : {
949 : /*
950 : * For anything else, see if we can consider it as a boolean variable.
951 : * This only works if it's an immutable expression in Vars of a single
952 : * relation; but there's no point in us checking that here because
953 : * boolvarsel() will do it internally, and return a suitable default
954 : * selectivity if not.
955 : */
956 2230 : s1 = boolvarsel(root, clause, varRelid);
957 : }
958 :
959 : /* Cache the result if possible */
960 927304 : if (cacheable)
961 : {
962 607618 : if (jointype == JOIN_INNER)
963 498832 : rinfo->norm_selec = s1;
964 : else
965 108786 : rinfo->outer_selec = s1;
966 : }
967 :
968 : #ifdef SELECTIVITY_DEBUG
969 : elog(DEBUG4, "clause_selectivity: s1 %f", s1);
970 : #endif /* SELECTIVITY_DEBUG */
971 :
972 927304 : return s1;
973 : }
|