Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * orclauses.c
4 : * Routines to extract restriction OR clauses from join OR clauses
5 : *
6 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/optimizer/util/orclauses.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 :
16 : #include "postgres.h"
17 :
18 : #include "nodes/makefuncs.h"
19 : #include "nodes/nodeFuncs.h"
20 : #include "optimizer/optimizer.h"
21 : #include "optimizer/orclauses.h"
22 : #include "optimizer/paths.h"
23 : #include "optimizer/restrictinfo.h"
24 :
25 :
26 : static bool is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel);
27 : static Expr *extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel);
28 : static void consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
29 : Expr *orclause, RestrictInfo *join_or_rinfo);
30 :
31 :
32 : /*
33 : * extract_restriction_or_clauses
34 : * Examine join OR-of-AND clauses to see if any useful restriction OR
35 : * clauses can be extracted. If so, add them to the query.
36 : *
37 : * Although a join clause must reference multiple relations overall,
38 : * an OR of ANDs clause might contain sub-clauses that reference just one
39 : * relation and can be used to build a restriction clause for that rel.
40 : * For example consider
41 : * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45));
42 : * We can transform this into
43 : * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45))
44 : * AND (a.x = 42 OR a.x = 44)
45 : * AND (b.y = 43 OR b.z = 45);
46 : * which allows the latter clauses to be applied during the scans of a and b,
47 : * perhaps as index qualifications, and in any case reducing the number of
48 : * rows arriving at the join. In essence this is a partial transformation to
49 : * CNF (AND of ORs format). It is not complete, however, because we do not
50 : * unravel the original OR --- doing so would usually bloat the qualification
51 : * expression to little gain.
52 : *
53 : * The added quals are partially redundant with the original OR, and therefore
54 : * would cause the size of the joinrel to be underestimated when it is finally
55 : * formed. (This would be true of a full transformation to CNF as well; the
56 : * fault is not really in the transformation, but in clauselist_selectivity's
57 : * inability to recognize redundant conditions.) We can compensate for this
58 : * redundancy by changing the cached selectivity of the original OR clause,
59 : * canceling out the (valid) reduction in the estimated sizes of the base
60 : * relations so that the estimated joinrel size remains the same. This is
61 : * a MAJOR HACK: it depends on the fact that clause selectivities are cached
62 : * and on the fact that the same RestrictInfo node will appear in every
63 : * joininfo list that might be used when the joinrel is formed.
64 : * And it doesn't work in cases where the size estimation is nonlinear
65 : * (i.e., outer and IN joins). But it beats not doing anything.
66 : *
67 : * We examine each base relation to see if join clauses associated with it
68 : * contain extractable restriction conditions. If so, add those conditions
69 : * to the rel's baserestrictinfo and update the cached selectivities of the
70 : * join clauses. Note that the same join clause will be examined afresh
71 : * from the point of view of each baserel that participates in it, so its
72 : * cached selectivity may get updated multiple times.
73 : */
74 : void
75 289296 : extract_restriction_or_clauses(PlannerInfo *root)
76 : {
77 : Index rti;
78 :
79 : /* Examine each baserel for potential join OR clauses */
80 835948 : for (rti = 1; rti < root->simple_rel_array_size; rti++)
81 : {
82 546652 : RelOptInfo *rel = root->simple_rel_array[rti];
83 : ListCell *lc;
84 :
85 : /* there may be empty slots corresponding to non-baserel RTEs */
86 546652 : if (rel == NULL)
87 140000 : continue;
88 :
89 : Assert(rel->relid == rti); /* sanity check on array */
90 :
91 : /* ignore RTEs that are "other rels" */
92 406652 : if (rel->reloptkind != RELOPT_BASEREL)
93 0 : continue;
94 :
95 : /*
96 : * Find potentially interesting OR joinclauses. We can use any
97 : * joinclause that is considered safe to move to this rel by the
98 : * parameterized-path machinery, even though what we are going to do
99 : * with it is not exactly a parameterized path.
100 : */
101 515342 : foreach(lc, rel->joininfo)
102 : {
103 108690 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
104 :
105 116202 : if (restriction_is_or_clause(rinfo) &&
106 7512 : join_clause_is_movable_to(rinfo, rel))
107 : {
108 : /* Try to extract a qual for this rel only */
109 5040 : Expr *orclause = extract_or_clause(rinfo, rel);
110 :
111 : /*
112 : * If successful, decide whether we want to use the clause,
113 : * and insert it into the rel's restrictinfo list if so.
114 : */
115 5040 : if (orclause)
116 244 : consider_new_or_clause(root, rel, orclause, rinfo);
117 : }
118 : }
119 : }
120 289296 : }
121 :
122 : /*
123 : * Is the given primitive (non-OR) RestrictInfo safe to move to the rel?
124 : */
125 : static bool
126 10072 : is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel)
127 : {
128 : /*
129 : * We want clauses that mention the rel, and only the rel. So in
130 : * particular pseudoconstant clauses can be rejected quickly. Then check
131 : * the clause's Var membership.
132 : */
133 10072 : if (rinfo->pseudoconstant)
134 0 : return false;
135 10072 : if (!bms_equal(rinfo->clause_relids, rel->relids))
136 5498 : return false;
137 :
138 : /* We don't want extra evaluations of any volatile functions */
139 4574 : if (contain_volatile_functions((Node *) rinfo->clause))
140 0 : return false;
141 :
142 4574 : return true;
143 : }
144 :
145 : /*
146 : * Try to extract a restriction clause mentioning only "rel" from the given
147 : * join OR-clause.
148 : *
149 : * We must be able to extract at least one qual for this rel from each of
150 : * the arms of the OR, else we can't use it.
151 : *
152 : * Returns an OR clause (not a RestrictInfo!) pertaining to rel, or NULL
153 : * if no OR clause could be extracted.
154 : */
155 : static Expr *
156 5094 : extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel)
157 : {
158 5094 : List *clauselist = NIL;
159 : ListCell *lc;
160 :
161 : /*
162 : * Scan each arm of the input OR clause. Notice we descend into
163 : * or_rinfo->orclause, which has RestrictInfo nodes embedded below the
164 : * toplevel OR/AND structure. This is useful because we can use the info
165 : * in those nodes to make is_safe_restriction_clause_for()'s checks
166 : * cheaper. We'll strip those nodes from the returned tree, though,
167 : * meaning that fresh ones will be built if the clause is accepted as a
168 : * restriction clause. This might seem wasteful --- couldn't we re-use
169 : * the existing RestrictInfos? But that'd require assuming that
170 : * selectivity and other cached data is computed exactly the same way for
171 : * a restriction clause as for a join clause, which seems undesirable.
172 : */
173 : Assert(is_orclause(or_rinfo->orclause));
174 9196 : foreach(lc, ((BoolExpr *) or_rinfo->orclause)->args)
175 : {
176 8934 : Node *orarg = (Node *) lfirst(lc);
177 8934 : List *subclauses = NIL;
178 : Node *subclause;
179 :
180 : /* OR arguments should be ANDs or sub-RestrictInfos */
181 8934 : if (is_andclause(orarg))
182 : {
183 824 : List *andargs = ((BoolExpr *) orarg)->args;
184 : ListCell *lc2;
185 :
186 2840 : foreach(lc2, andargs)
187 : {
188 2016 : RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
189 :
190 2016 : if (restriction_is_or_clause(rinfo))
191 : {
192 : /*
193 : * Recurse to deal with nested OR. Note we *must* recurse
194 : * here, this isn't just overly-tense optimization: we
195 : * have to descend far enough to find and strip all
196 : * RestrictInfos in the expression.
197 : */
198 : Expr *suborclause;
199 :
200 54 : suborclause = extract_or_clause(rinfo, rel);
201 54 : if (suborclause)
202 18 : subclauses = lappend(subclauses, suborclause);
203 : }
204 1962 : else if (is_safe_restriction_clause_for(rinfo, rel))
205 1266 : subclauses = lappend(subclauses, rinfo->clause);
206 : }
207 : }
208 : else
209 : {
210 8110 : RestrictInfo *rinfo = castNode(RestrictInfo, orarg);
211 :
212 : Assert(!restriction_is_or_clause(rinfo));
213 8110 : if (is_safe_restriction_clause_for(rinfo, rel))
214 3308 : subclauses = lappend(subclauses, rinfo->clause);
215 : }
216 :
217 : /*
218 : * If nothing could be extracted from this arm, we can't do anything
219 : * with this OR clause.
220 : */
221 8934 : if (subclauses == NIL)
222 4832 : return NULL;
223 :
224 : /*
225 : * OK, add subclause(s) to the result OR. If we found more than one,
226 : * we need an AND node. But if we found only one, and it is itself an
227 : * OR node, add its subclauses to the result instead; this is needed
228 : * to preserve AND/OR flatness (ie, no OR directly underneath OR).
229 : */
230 4102 : subclause = (Node *) make_ands_explicit(subclauses);
231 4102 : if (is_orclause(subclause))
232 18 : clauselist = list_concat(clauselist,
233 18 : ((BoolExpr *) subclause)->args);
234 : else
235 4084 : clauselist = lappend(clauselist, subclause);
236 : }
237 :
238 : /*
239 : * If we got a restriction clause from every arm, wrap them up in an OR
240 : * node. (In theory the OR node might be unnecessary, if there was only
241 : * one arm --- but then the input OR node was also redundant.)
242 : */
243 262 : if (clauselist != NIL)
244 262 : return make_orclause(clauselist);
245 0 : return NULL;
246 : }
247 :
248 : /*
249 : * Consider whether a successfully-extracted restriction OR clause is
250 : * actually worth using. If so, add it to the planner's data structures,
251 : * and adjust the original join clause (join_or_rinfo) to compensate.
252 : */
253 : static void
254 244 : consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
255 : Expr *orclause, RestrictInfo *join_or_rinfo)
256 : {
257 : RestrictInfo *or_rinfo;
258 : Selectivity or_selec,
259 : orig_selec;
260 :
261 : /*
262 : * Build a RestrictInfo from the new OR clause. We can assume it's valid
263 : * as a base restriction clause.
264 : */
265 244 : or_rinfo = make_restrictinfo(root,
266 : orclause,
267 : true,
268 : false,
269 : false,
270 : false,
271 : join_or_rinfo->security_level,
272 : NULL,
273 : NULL,
274 : NULL);
275 :
276 : /*
277 : * Estimate its selectivity. (We could have done this earlier, but doing
278 : * it on the RestrictInfo representation allows the result to get cached,
279 : * saving work later.)
280 : */
281 244 : or_selec = clause_selectivity(root, (Node *) or_rinfo,
282 : 0, JOIN_INNER, NULL);
283 :
284 : /*
285 : * The clause is only worth adding to the query if it rejects a useful
286 : * fraction of the base relation's rows; otherwise, it's just going to
287 : * cause duplicate computation (since we will still have to check the
288 : * original OR clause when the join is formed). Somewhat arbitrarily, we
289 : * set the selectivity threshold at 0.9.
290 : */
291 244 : if (or_selec > 0.9)
292 4 : return; /* forget it */
293 :
294 : /*
295 : * OK, add it to the rel's restriction-clause list.
296 : */
297 240 : rel->baserestrictinfo = lappend(rel->baserestrictinfo, or_rinfo);
298 240 : rel->baserestrict_min_security = Min(rel->baserestrict_min_security,
299 : or_rinfo->security_level);
300 :
301 : /*
302 : * Adjust the original join OR clause's cached selectivity to compensate
303 : * for the selectivity of the added (but redundant) lower-level qual. This
304 : * should result in the join rel getting approximately the same rows
305 : * estimate as it would have gotten without all these shenanigans.
306 : *
307 : * XXX major hack alert: this depends on the assumption that the
308 : * selectivity will stay cached.
309 : *
310 : * XXX another major hack: we adjust only norm_selec, the cached
311 : * selectivity for JOIN_INNER semantics, even though the join clause
312 : * might've been an outer-join clause. This is partly because we can't
313 : * easily identify the relevant SpecialJoinInfo here, and partly because
314 : * the linearity assumption we're making would fail anyway. (If it is an
315 : * outer-join clause, "rel" must be on the nullable side, else we'd not
316 : * have gotten here. So the computation of the join size is going to be
317 : * quite nonlinear with respect to the size of "rel", so it's not clear
318 : * how we ought to adjust outer_selec even if we could compute its
319 : * original value correctly.)
320 : */
321 240 : if (or_selec > 0)
322 : {
323 : SpecialJoinInfo sjinfo;
324 :
325 : /*
326 : * Make up a SpecialJoinInfo for JOIN_INNER semantics. (Compare
327 : * approx_tuple_count() in costsize.c.)
328 : */
329 240 : init_dummy_sjinfo(&sjinfo,
330 240 : bms_difference(join_or_rinfo->clause_relids,
331 240 : rel->relids),
332 : rel->relids);
333 :
334 : /* Compute inner-join size */
335 240 : orig_selec = clause_selectivity(root, (Node *) join_or_rinfo,
336 : 0, JOIN_INNER, &sjinfo);
337 :
338 : /* And hack cached selectivity so join size remains the same */
339 240 : join_or_rinfo->norm_selec = orig_selec / or_selec;
340 : /* ensure result stays in sane range */
341 240 : if (join_or_rinfo->norm_selec > 1)
342 0 : join_or_rinfo->norm_selec = 1;
343 : /* as explained above, we don't touch outer_selec */
344 : }
345 : }
|