Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * prepqual.c
4 : * Routines for preprocessing qualification expressions
5 : *
6 : *
7 : * While the parser will produce flattened (N-argument) AND/OR trees from
8 : * simple sequences of AND'ed or OR'ed clauses, there might be an AND clause
9 : * directly underneath another AND, or OR underneath OR, if the input was
10 : * oddly parenthesized. Also, rule expansion and subquery flattening could
11 : * produce such parsetrees. The planner wants to flatten all such cases
12 : * to ensure consistent optimization behavior.
13 : *
14 : * Formerly, this module was responsible for doing the initial flattening,
15 : * but now we leave it to eval_const_expressions to do that since it has to
16 : * make a complete pass over the expression tree anyway. Instead, we just
17 : * have to ensure that our manipulations preserve AND/OR flatness.
18 : * pull_ands() and pull_ors() are used to maintain flatness of the AND/OR
19 : * tree after local transformations that might introduce nested AND/ORs.
20 : *
21 : *
22 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
23 : * Portions Copyright (c) 1994, Regents of the University of California
24 : *
25 : *
26 : * IDENTIFICATION
27 : * src/backend/optimizer/prep/prepqual.c
28 : *
29 : *-------------------------------------------------------------------------
30 : */
31 :
32 : #include "postgres.h"
33 :
34 : #include "nodes/makefuncs.h"
35 : #include "nodes/nodeFuncs.h"
36 : #include "optimizer/optimizer.h"
37 : #include "utils/lsyscache.h"
38 :
39 :
40 : static List *pull_ands(List *andlist);
41 : static List *pull_ors(List *orlist);
42 : static Expr *find_duplicate_ors(Expr *qual, bool is_check);
43 : static Expr *process_duplicate_ors(List *orlist);
44 :
45 :
46 : /*
47 : * negate_clause
48 : * Negate a Boolean expression.
49 : *
50 : * Input is a clause to be negated (e.g., the argument of a NOT clause).
51 : * Returns a new clause equivalent to the negation of the given clause.
52 : *
53 : * Although this can be invoked on its own, it's mainly intended as a helper
54 : * for eval_const_expressions(), and that context drives several design
55 : * decisions. In particular, if the input is already AND/OR flat, we must
56 : * preserve that property. We also don't bother to recurse in situations
57 : * where we can assume that lower-level executions of eval_const_expressions
58 : * would already have simplified sub-clauses of the input.
59 : *
60 : * The difference between this and a simple make_notclause() is that this
61 : * tries to get rid of the NOT node by logical simplification. It's clearly
62 : * always a win if the NOT node can be eliminated altogether. However, our
63 : * use of DeMorgan's laws could result in having more NOT nodes rather than
64 : * fewer. We do that unconditionally anyway, because in WHERE clauses it's
65 : * important to expose as much top-level AND/OR structure as possible.
66 : * Also, eliminating an intermediate NOT may allow us to flatten two levels
67 : * of AND or OR together that we couldn't have otherwise. Finally, one of
68 : * the motivations for doing this is to ensure that logically equivalent
69 : * expressions will be seen as physically equal(), so we should always apply
70 : * the same transformations.
71 : */
72 : Node *
73 23508 : negate_clause(Node *node)
74 : {
75 23508 : if (node == NULL) /* should not happen */
76 0 : elog(ERROR, "can't negate an empty subexpression");
77 23508 : switch (nodeTag(node))
78 : {
79 90 : case T_Const:
80 : {
81 90 : Const *c = (Const *) node;
82 :
83 : /* NOT NULL is still NULL */
84 90 : if (c->constisnull)
85 0 : return makeBoolConst(false, true);
86 : /* otherwise pretty easy */
87 90 : return makeBoolConst(!DatumGetBool(c->constvalue), false);
88 : }
89 : break;
90 6554 : case T_OpExpr:
91 : {
92 : /*
93 : * Negate operator if possible: (NOT (< A B)) => (>= A B)
94 : */
95 6554 : OpExpr *opexpr = (OpExpr *) node;
96 6554 : Oid negator = get_negator(opexpr->opno);
97 :
98 6554 : if (negator)
99 : {
100 6336 : OpExpr *newopexpr = makeNode(OpExpr);
101 :
102 6336 : newopexpr->opno = negator;
103 6336 : newopexpr->opfuncid = InvalidOid;
104 6336 : newopexpr->opresulttype = opexpr->opresulttype;
105 6336 : newopexpr->opretset = opexpr->opretset;
106 6336 : newopexpr->opcollid = opexpr->opcollid;
107 6336 : newopexpr->inputcollid = opexpr->inputcollid;
108 6336 : newopexpr->args = opexpr->args;
109 6336 : newopexpr->location = opexpr->location;
110 6336 : return (Node *) newopexpr;
111 : }
112 : }
113 218 : break;
114 480 : case T_ScalarArrayOpExpr:
115 : {
116 : /*
117 : * Negate a ScalarArrayOpExpr if its operator has a negator;
118 : * for example x = ANY (list) becomes x <> ALL (list)
119 : */
120 480 : ScalarArrayOpExpr *saopexpr = (ScalarArrayOpExpr *) node;
121 480 : Oid negator = get_negator(saopexpr->opno);
122 :
123 480 : if (negator)
124 : {
125 480 : ScalarArrayOpExpr *newopexpr = makeNode(ScalarArrayOpExpr);
126 :
127 480 : newopexpr->opno = negator;
128 480 : newopexpr->opfuncid = InvalidOid;
129 480 : newopexpr->hashfuncid = InvalidOid;
130 480 : newopexpr->negfuncid = InvalidOid;
131 480 : newopexpr->useOr = !saopexpr->useOr;
132 480 : newopexpr->inputcollid = saopexpr->inputcollid;
133 480 : newopexpr->args = saopexpr->args;
134 480 : newopexpr->location = saopexpr->location;
135 480 : return (Node *) newopexpr;
136 : }
137 : }
138 0 : break;
139 4012 : case T_BoolExpr:
140 : {
141 4012 : BoolExpr *expr = (BoolExpr *) node;
142 :
143 4012 : switch (expr->boolop)
144 : {
145 : /*--------------------
146 : * Apply DeMorgan's Laws:
147 : * (NOT (AND A B)) => (OR (NOT A) (NOT B))
148 : * (NOT (OR A B)) => (AND (NOT A) (NOT B))
149 : * i.e., swap AND for OR and negate each subclause.
150 : *
151 : * If the input is already AND/OR flat and has no NOT
152 : * directly above AND or OR, this transformation preserves
153 : * those properties. For example, if no direct child of
154 : * the given AND clause is an AND or a NOT-above-OR, then
155 : * the recursive calls of negate_clause() can't return any
156 : * OR clauses. So we needn't call pull_ors() before
157 : * building a new OR clause. Similarly for the OR case.
158 : *--------------------
159 : */
160 3304 : case AND_EXPR:
161 : {
162 3304 : List *nargs = NIL;
163 : ListCell *lc;
164 :
165 11516 : foreach(lc, expr->args)
166 : {
167 8212 : nargs = lappend(nargs,
168 8212 : negate_clause(lfirst(lc)));
169 : }
170 3304 : return (Node *) make_orclause(nargs);
171 : }
172 : break;
173 612 : case OR_EXPR:
174 : {
175 612 : List *nargs = NIL;
176 : ListCell *lc;
177 :
178 2596 : foreach(lc, expr->args)
179 : {
180 1984 : nargs = lappend(nargs,
181 1984 : negate_clause(lfirst(lc)));
182 : }
183 612 : return (Node *) make_andclause(nargs);
184 : }
185 : break;
186 96 : case NOT_EXPR:
187 :
188 : /*
189 : * NOT underneath NOT: they cancel. We assume the
190 : * input is already simplified, so no need to recurse.
191 : */
192 96 : return (Node *) linitial(expr->args);
193 0 : default:
194 0 : elog(ERROR, "unrecognized boolop: %d",
195 : (int) expr->boolop);
196 : break;
197 : }
198 : }
199 : break;
200 1828 : case T_NullTest:
201 : {
202 1828 : NullTest *expr = (NullTest *) node;
203 :
204 : /*
205 : * In the rowtype case, the two flavors of NullTest are *not*
206 : * logical inverses, so we can't simplify. But it does work
207 : * for scalar datatypes.
208 : */
209 1828 : if (!expr->argisrow)
210 : {
211 1828 : NullTest *newexpr = makeNode(NullTest);
212 :
213 1828 : newexpr->arg = expr->arg;
214 1828 : newexpr->nulltesttype = (expr->nulltesttype == IS_NULL ?
215 1828 : IS_NOT_NULL : IS_NULL);
216 1828 : newexpr->argisrow = expr->argisrow;
217 1828 : newexpr->location = expr->location;
218 1828 : return (Node *) newexpr;
219 : }
220 : }
221 0 : break;
222 0 : case T_BooleanTest:
223 : {
224 0 : BooleanTest *expr = (BooleanTest *) node;
225 0 : BooleanTest *newexpr = makeNode(BooleanTest);
226 :
227 0 : newexpr->arg = expr->arg;
228 0 : switch (expr->booltesttype)
229 : {
230 0 : case IS_TRUE:
231 0 : newexpr->booltesttype = IS_NOT_TRUE;
232 0 : break;
233 0 : case IS_NOT_TRUE:
234 0 : newexpr->booltesttype = IS_TRUE;
235 0 : break;
236 0 : case IS_FALSE:
237 0 : newexpr->booltesttype = IS_NOT_FALSE;
238 0 : break;
239 0 : case IS_NOT_FALSE:
240 0 : newexpr->booltesttype = IS_FALSE;
241 0 : break;
242 0 : case IS_UNKNOWN:
243 0 : newexpr->booltesttype = IS_NOT_UNKNOWN;
244 0 : break;
245 0 : case IS_NOT_UNKNOWN:
246 0 : newexpr->booltesttype = IS_UNKNOWN;
247 0 : break;
248 0 : default:
249 0 : elog(ERROR, "unrecognized booltesttype: %d",
250 : (int) expr->booltesttype);
251 : break;
252 : }
253 0 : newexpr->location = expr->location;
254 0 : return (Node *) newexpr;
255 : }
256 : break;
257 10544 : default:
258 : /* else fall through */
259 10544 : break;
260 : }
261 :
262 : /*
263 : * Otherwise we don't know how to simplify this, so just tack on an
264 : * explicit NOT node.
265 : */
266 10762 : return (Node *) make_notclause((Expr *) node);
267 : }
268 :
269 :
270 : /*
271 : * canonicalize_qual
272 : * Convert a qualification expression to the most useful form.
273 : *
274 : * This is primarily intended to be used on top-level WHERE (or JOIN/ON)
275 : * clauses. It can also be used on top-level CHECK constraints, for which
276 : * pass is_check = true. DO NOT call it on any expression that is not known
277 : * to be one or the other, as it might apply inappropriate simplifications.
278 : *
279 : * The name of this routine is a holdover from a time when it would try to
280 : * force the expression into canonical AND-of-ORs or OR-of-ANDs form.
281 : * Eventually, we recognized that that had more theoretical purity than
282 : * actual usefulness, and so now the transformation doesn't involve any
283 : * notion of reaching a canonical form.
284 : *
285 : * NOTE: we assume the input has already been through eval_const_expressions
286 : * and therefore possesses AND/OR flatness. Formerly this function included
287 : * its own flattening logic, but that requires a useless extra pass over the
288 : * tree.
289 : *
290 : * Returns the modified qualification.
291 : */
292 : Expr *
293 292134 : canonicalize_qual(Expr *qual, bool is_check)
294 : {
295 : Expr *newqual;
296 :
297 : /* Quick exit for empty qual */
298 292134 : if (qual == NULL)
299 0 : return NULL;
300 :
301 : /* This should not be invoked on quals in implicit-AND format */
302 : Assert(!IsA(qual, List));
303 :
304 : /*
305 : * Pull up redundant subclauses in OR-of-AND trees. We do this only
306 : * within the top-level AND/OR structure; there's no point in looking
307 : * deeper. Also remove any NULL constants in the top-level structure.
308 : */
309 292134 : newqual = find_duplicate_ors(qual, is_check);
310 :
311 292134 : return newqual;
312 : }
313 :
314 :
315 : /*
316 : * pull_ands
317 : * Recursively flatten nested AND clauses into a single and-clause list.
318 : *
319 : * Input is the arglist of an AND clause.
320 : * Returns the rebuilt arglist (note original list structure is not touched).
321 : */
322 : static List *
323 107808 : pull_ands(List *andlist)
324 : {
325 107808 : List *out_list = NIL;
326 : ListCell *arg;
327 :
328 406620 : foreach(arg, andlist)
329 : {
330 298812 : Node *subexpr = (Node *) lfirst(arg);
331 :
332 298812 : if (is_andclause(subexpr))
333 0 : out_list = list_concat(out_list,
334 0 : pull_ands(((BoolExpr *) subexpr)->args));
335 : else
336 298812 : out_list = lappend(out_list, subexpr);
337 : }
338 107808 : return out_list;
339 : }
340 :
341 : /*
342 : * pull_ors
343 : * Recursively flatten nested OR clauses into a single or-clause list.
344 : *
345 : * Input is the arglist of an OR clause.
346 : * Returns the rebuilt arglist (note original list structure is not touched).
347 : */
348 : static List *
349 8038 : pull_ors(List *orlist)
350 : {
351 8038 : List *out_list = NIL;
352 : ListCell *arg;
353 :
354 27104 : foreach(arg, orlist)
355 : {
356 19066 : Node *subexpr = (Node *) lfirst(arg);
357 :
358 19066 : if (is_orclause(subexpr))
359 0 : out_list = list_concat(out_list,
360 0 : pull_ors(((BoolExpr *) subexpr)->args));
361 : else
362 19066 : out_list = lappend(out_list, subexpr);
363 : }
364 8038 : return out_list;
365 : }
366 :
367 :
368 : /*--------------------
369 : * The following code attempts to apply the inverse OR distributive law:
370 : * ((A AND B) OR (A AND C)) => (A AND (B OR C))
371 : * That is, locate OR clauses in which every subclause contains an
372 : * identical term, and pull out the duplicated terms.
373 : *
374 : * This may seem like a fairly useless activity, but it turns out to be
375 : * applicable to many machine-generated queries, and there are also queries
376 : * in some of the TPC benchmarks that need it. This was in fact almost the
377 : * sole useful side-effect of the old prepqual code that tried to force
378 : * the query into canonical AND-of-ORs form: the canonical equivalent of
379 : * ((A AND B) OR (A AND C))
380 : * is
381 : * ((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
382 : * which the code was able to simplify to
383 : * (A AND (A OR C) AND (B OR A) AND (B OR C))
384 : * thus successfully extracting the common condition A --- but at the cost
385 : * of cluttering the qual with many redundant clauses.
386 : *--------------------
387 : */
388 :
389 : /*
390 : * find_duplicate_ors
391 : * Given a qualification tree with the NOTs pushed down, search for
392 : * OR clauses to which the inverse OR distributive law might apply.
393 : * Only the top-level AND/OR structure is searched.
394 : *
395 : * While at it, we remove any NULL constants within the top-level AND/OR
396 : * structure, eg in a WHERE clause, "x OR NULL::boolean" is reduced to "x".
397 : * In general that would change the result, so eval_const_expressions can't
398 : * do it; but at top level of WHERE, we don't need to distinguish between
399 : * FALSE and NULL results, so it's valid to treat NULL::boolean the same
400 : * as FALSE and then simplify AND/OR accordingly. Conversely, in a top-level
401 : * CHECK constraint, we may treat a NULL the same as TRUE.
402 : *
403 : * Returns the modified qualification. AND/OR flatness is preserved.
404 : */
405 : static Expr *
406 610030 : find_duplicate_ors(Expr *qual, bool is_check)
407 : {
408 610030 : if (is_orclause(qual))
409 : {
410 8044 : List *orlist = NIL;
411 : ListCell *temp;
412 :
413 : /* Recurse */
414 27122 : foreach(temp, ((BoolExpr *) qual)->args)
415 : {
416 19084 : Expr *arg = (Expr *) lfirst(temp);
417 :
418 19084 : arg = find_duplicate_ors(arg, is_check);
419 :
420 : /* Get rid of any constant inputs */
421 19084 : if (arg && IsA(arg, Const))
422 : {
423 12 : Const *carg = (Const *) arg;
424 :
425 12 : if (is_check)
426 : {
427 : /* Within OR in CHECK, drop constant FALSE */
428 6 : if (!carg->constisnull && !DatumGetBool(carg->constvalue))
429 0 : continue;
430 : /* Constant TRUE or NULL, so OR reduces to TRUE */
431 6 : return (Expr *) makeBoolConst(true, false);
432 : }
433 : else
434 : {
435 : /* Within OR in WHERE, drop constant FALSE or NULL */
436 6 : if (carg->constisnull || !DatumGetBool(carg->constvalue))
437 6 : continue;
438 : /* Constant TRUE, so OR reduces to TRUE */
439 0 : return arg;
440 : }
441 : }
442 :
443 19072 : orlist = lappend(orlist, arg);
444 : }
445 :
446 : /* Flatten any ORs pulled up to just below here */
447 8038 : orlist = pull_ors(orlist);
448 :
449 : /* Now we can look for duplicate ORs */
450 8038 : return process_duplicate_ors(orlist);
451 : }
452 601986 : else if (is_andclause(qual))
453 : {
454 107808 : List *andlist = NIL;
455 : ListCell *temp;
456 :
457 : /* Recurse */
458 406620 : foreach(temp, ((BoolExpr *) qual)->args)
459 : {
460 298812 : Expr *arg = (Expr *) lfirst(temp);
461 :
462 298812 : arg = find_duplicate_ors(arg, is_check);
463 :
464 : /* Get rid of any constant inputs */
465 298812 : if (arg && IsA(arg, Const))
466 : {
467 0 : Const *carg = (Const *) arg;
468 :
469 0 : if (is_check)
470 : {
471 : /* Within AND in CHECK, drop constant TRUE or NULL */
472 0 : if (carg->constisnull || DatumGetBool(carg->constvalue))
473 0 : continue;
474 : /* Constant FALSE, so AND reduces to FALSE */
475 0 : return arg;
476 : }
477 : else
478 : {
479 : /* Within AND in WHERE, drop constant TRUE */
480 0 : if (!carg->constisnull && DatumGetBool(carg->constvalue))
481 0 : continue;
482 : /* Constant FALSE or NULL, so AND reduces to FALSE */
483 0 : return (Expr *) makeBoolConst(false, false);
484 : }
485 : }
486 :
487 298812 : andlist = lappend(andlist, arg);
488 : }
489 :
490 : /* Flatten any ANDs introduced just below here */
491 107808 : andlist = pull_ands(andlist);
492 :
493 : /* AND of no inputs reduces to TRUE */
494 107808 : if (andlist == NIL)
495 0 : return (Expr *) makeBoolConst(true, false);
496 :
497 : /* Single-expression AND just reduces to that expression */
498 107808 : if (list_length(andlist) == 1)
499 0 : return (Expr *) linitial(andlist);
500 :
501 : /* Else we still need an AND node */
502 107808 : return make_andclause(andlist);
503 : }
504 : else
505 494178 : return qual;
506 : }
507 :
508 : /*
509 : * process_duplicate_ors
510 : * Given a list of exprs which are ORed together, try to apply
511 : * the inverse OR distributive law.
512 : *
513 : * Returns the resulting expression (could be an AND clause, an OR
514 : * clause, or maybe even a single subexpression).
515 : */
516 : static Expr *
517 8038 : process_duplicate_ors(List *orlist)
518 : {
519 8038 : List *reference = NIL;
520 8038 : int num_subclauses = 0;
521 : List *winners;
522 : List *neworlist;
523 : ListCell *temp;
524 :
525 : /* OR of no inputs reduces to FALSE */
526 8038 : if (orlist == NIL)
527 0 : return (Expr *) makeBoolConst(false, false);
528 :
529 : /* Single-expression OR just reduces to that expression */
530 8038 : if (list_length(orlist) == 1)
531 6 : return (Expr *) linitial(orlist);
532 :
533 : /*
534 : * Choose the shortest AND clause as the reference list --- obviously, any
535 : * subclause not in this clause isn't in all the clauses. If we find a
536 : * clause that's not an AND, we can treat it as a one-element AND clause,
537 : * which necessarily wins as shortest.
538 : */
539 9132 : foreach(temp, orlist)
540 : {
541 8798 : Expr *clause = (Expr *) lfirst(temp);
542 :
543 8798 : if (is_andclause(clause))
544 : {
545 1100 : List *subclauses = ((BoolExpr *) clause)->args;
546 1100 : int nclauses = list_length(subclauses);
547 :
548 1100 : if (reference == NIL || nclauses < num_subclauses)
549 : {
550 810 : reference = subclauses;
551 810 : num_subclauses = nclauses;
552 : }
553 : }
554 : else
555 : {
556 7698 : reference = list_make1(clause);
557 7698 : break;
558 : }
559 : }
560 :
561 : /*
562 : * Just in case, eliminate any duplicates in the reference list.
563 : */
564 8032 : reference = list_union(NIL, reference);
565 :
566 : /*
567 : * Check each element of the reference list to see if it's in all the OR
568 : * clauses. Build a new list of winning clauses.
569 : */
570 8032 : winners = NIL;
571 16410 : foreach(temp, reference)
572 : {
573 8378 : Expr *refclause = (Expr *) lfirst(temp);
574 8378 : bool win = true;
575 : ListCell *temp2;
576 :
577 16206 : foreach(temp2, orlist)
578 : {
579 16206 : Expr *clause = (Expr *) lfirst(temp2);
580 :
581 16206 : if (is_andclause(clause))
582 : {
583 1922 : if (!list_member(((BoolExpr *) clause)->args, refclause))
584 : {
585 1426 : win = false;
586 1426 : break;
587 : }
588 : }
589 : else
590 : {
591 14284 : if (!equal(refclause, clause))
592 : {
593 6952 : win = false;
594 6952 : break;
595 : }
596 : }
597 : }
598 :
599 8378 : if (win)
600 0 : winners = lappend(winners, refclause);
601 : }
602 :
603 : /*
604 : * If no winners, we can't transform the OR
605 : */
606 8032 : if (winners == NIL)
607 8032 : return make_orclause(orlist);
608 :
609 : /*
610 : * Generate new OR list consisting of the remaining sub-clauses.
611 : *
612 : * If any clause degenerates to empty, then we have a situation like (A
613 : * AND B) OR (A), which can be reduced to just A --- that is, the
614 : * additional conditions in other arms of the OR are irrelevant.
615 : *
616 : * Note that because we use list_difference, any multiple occurrences of a
617 : * winning clause in an AND sub-clause will be removed automatically.
618 : */
619 0 : neworlist = NIL;
620 0 : foreach(temp, orlist)
621 : {
622 0 : Expr *clause = (Expr *) lfirst(temp);
623 :
624 0 : if (is_andclause(clause))
625 : {
626 0 : List *subclauses = ((BoolExpr *) clause)->args;
627 :
628 0 : subclauses = list_difference(subclauses, winners);
629 0 : if (subclauses != NIL)
630 : {
631 0 : if (list_length(subclauses) == 1)
632 0 : neworlist = lappend(neworlist, linitial(subclauses));
633 : else
634 0 : neworlist = lappend(neworlist, make_andclause(subclauses));
635 : }
636 : else
637 : {
638 0 : neworlist = NIL; /* degenerate case, see above */
639 0 : break;
640 : }
641 : }
642 : else
643 : {
644 0 : if (!list_member(winners, clause))
645 0 : neworlist = lappend(neworlist, clause);
646 : else
647 : {
648 0 : neworlist = NIL; /* degenerate case, see above */
649 0 : break;
650 : }
651 : }
652 : }
653 :
654 : /*
655 : * Append reduced OR to the winners list, if it's not degenerate, handling
656 : * the special case of one element correctly (can that really happen?).
657 : * Also be careful to maintain AND/OR flatness in case we pulled up a
658 : * sub-sub-OR-clause.
659 : */
660 0 : if (neworlist != NIL)
661 : {
662 0 : if (list_length(neworlist) == 1)
663 0 : winners = lappend(winners, linitial(neworlist));
664 : else
665 0 : winners = lappend(winners, make_orclause(pull_ors(neworlist)));
666 : }
667 :
668 : /*
669 : * And return the constructed AND clause, again being wary of a single
670 : * element and AND/OR flatness.
671 : */
672 0 : if (list_length(winners) == 1)
673 0 : return (Expr *) linitial(winners);
674 : else
675 0 : return make_andclause(pull_ands(winners));
676 : }
|