LCOV - code coverage report
Current view: top level - src/backend/optimizer/util - orclauses.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 92.4 % 66 61
Test Date: 2026-02-17 17:20:33 Functions: 100.0 % 4 4
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * orclauses.c
       4              :  *    Routines to extract restriction OR clauses from join OR clauses
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *    src/backend/optimizer/util/orclauses.c
      12              :  *
      13              :  *-------------------------------------------------------------------------
      14              :  */
      15              : 
      16              : #include "postgres.h"
      17              : 
      18              : #include "nodes/makefuncs.h"
      19              : #include "nodes/nodeFuncs.h"
      20              : #include "optimizer/optimizer.h"
      21              : #include "optimizer/orclauses.h"
      22              : #include "optimizer/paths.h"
      23              : #include "optimizer/restrictinfo.h"
      24              : 
      25              : 
      26              : static bool is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel);
      27              : static Expr *extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel);
      28              : static void consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
      29              :                                    Expr *orclause, RestrictInfo *join_or_rinfo);
      30              : 
      31              : 
      32              : /*
      33              :  * extract_restriction_or_clauses
      34              :  *    Examine join OR-of-AND clauses to see if any useful restriction OR
      35              :  *    clauses can be extracted.  If so, add them to the query.
      36              :  *
      37              :  * Although a join clause must reference multiple relations overall,
      38              :  * an OR of ANDs clause might contain sub-clauses that reference just one
      39              :  * relation and can be used to build a restriction clause for that rel.
      40              :  * For example consider
      41              :  *      WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45));
      42              :  * We can transform this into
      43              :  *      WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45))
      44              :  *          AND (a.x = 42 OR a.x = 44)
      45              :  *          AND (b.y = 43 OR b.z = 45);
      46              :  * which allows the latter clauses to be applied during the scans of a and b,
      47              :  * perhaps as index qualifications, and in any case reducing the number of
      48              :  * rows arriving at the join.  In essence this is a partial transformation to
      49              :  * CNF (AND of ORs format).  It is not complete, however, because we do not
      50              :  * unravel the original OR --- doing so would usually bloat the qualification
      51              :  * expression to little gain.
      52              :  *
      53              :  * The added quals are partially redundant with the original OR, and therefore
      54              :  * would cause the size of the joinrel to be underestimated when it is finally
      55              :  * formed.  (This would be true of a full transformation to CNF as well; the
      56              :  * fault is not really in the transformation, but in clauselist_selectivity's
      57              :  * inability to recognize redundant conditions.)  We can compensate for this
      58              :  * redundancy by changing the cached selectivity of the original OR clause,
      59              :  * canceling out the (valid) reduction in the estimated sizes of the base
      60              :  * relations so that the estimated joinrel size remains the same.  This is
      61              :  * a MAJOR HACK: it depends on the fact that clause selectivities are cached
      62              :  * and on the fact that the same RestrictInfo node will appear in every
      63              :  * joininfo list that might be used when the joinrel is formed.
      64              :  * And it doesn't work in cases where the size estimation is nonlinear
      65              :  * (i.e., outer and IN joins).  But it beats not doing anything.
      66              :  *
      67              :  * We examine each base relation to see if join clauses associated with it
      68              :  * contain extractable restriction conditions.  If so, add those conditions
      69              :  * to the rel's baserestrictinfo and update the cached selectivities of the
      70              :  * join clauses.  Note that the same join clause will be examined afresh
      71              :  * from the point of view of each baserel that participates in it, so its
      72              :  * cached selectivity may get updated multiple times.
      73              :  */
      74              : void
      75       172439 : extract_restriction_or_clauses(PlannerInfo *root)
      76              : {
      77              :     Index       rti;
      78              : 
      79              :     /* Examine each baserel for potential join OR clauses */
      80       508196 :     for (rti = 1; rti < root->simple_rel_array_size; rti++)
      81              :     {
      82       335757 :         RelOptInfo *rel = root->simple_rel_array[rti];
      83              :         ListCell   *lc;
      84              : 
      85              :         /* there may be empty slots corresponding to non-baserel RTEs */
      86       335757 :         if (rel == NULL)
      87        92034 :             continue;
      88              : 
      89              :         Assert(rel->relid == rti);   /* sanity check on array */
      90              : 
      91              :         /* ignore RTEs that are "other rels" */
      92       243723 :         if (rel->reloptkind != RELOPT_BASEREL)
      93            0 :             continue;
      94              : 
      95              :         /*
      96              :          * Find potentially interesting OR joinclauses.  We can use any
      97              :          * joinclause that is considered safe to move to this rel by the
      98              :          * parameterized-path machinery, even though what we are going to do
      99              :          * with it is not exactly a parameterized path.
     100              :          */
     101       306382 :         foreach(lc, rel->joininfo)
     102              :         {
     103        62659 :             RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
     104              : 
     105        67176 :             if (restriction_is_or_clause(rinfo) &&
     106         4517 :                 join_clause_is_movable_to(rinfo, rel))
     107              :             {
     108              :                 /* Try to extract a qual for this rel only */
     109         3122 :                 Expr       *orclause = extract_or_clause(rinfo, rel);
     110              : 
     111              :                 /*
     112              :                  * If successful, decide whether we want to use the clause,
     113              :                  * and insert it into the rel's restrictinfo list if so.
     114              :                  */
     115         3122 :                 if (orclause)
     116          246 :                     consider_new_or_clause(root, rel, orclause, rinfo);
     117              :             }
     118              :         }
     119              :     }
     120       172439 : }
     121              : 
     122              : /*
     123              :  * Is the given primitive (non-OR) RestrictInfo safe to move to the rel?
     124              :  */
     125              : static bool
     126         6670 : is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel)
     127              : {
     128              :     /*
     129              :      * We want clauses that mention the rel, and only the rel.  So in
     130              :      * particular pseudoconstant clauses can be rejected quickly.  Then check
     131              :      * the clause's Var membership.
     132              :      */
     133         6670 :     if (rinfo->pseudoconstant)
     134            0 :         return false;
     135         6670 :     if (!bms_equal(rinfo->clause_relids, rel->relids))
     136         3599 :         return false;
     137              : 
     138              :     /* We don't want extra evaluations of any volatile functions */
     139         3071 :     if (contain_volatile_functions((Node *) rinfo->clause))
     140            0 :         return false;
     141              : 
     142         3071 :     return true;
     143              : }
     144              : 
     145              : /*
     146              :  * Try to extract a restriction clause mentioning only "rel" from the given
     147              :  * join OR-clause.
     148              :  *
     149              :  * We must be able to extract at least one qual for this rel from each of
     150              :  * the arms of the OR, else we can't use it.
     151              :  *
     152              :  * Returns an OR clause (not a RestrictInfo!) pertaining to rel, or NULL
     153              :  * if no OR clause could be extracted.
     154              :  */
     155              : static Expr *
     156         3149 : extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel)
     157              : {
     158         3149 :     List       *clauselist = NIL;
     159              :     ListCell   *lc;
     160              : 
     161              :     /*
     162              :      * Scan each arm of the input OR clause.  Notice we descend into
     163              :      * or_rinfo->orclause, which has RestrictInfo nodes embedded below the
     164              :      * toplevel OR/AND structure.  This is useful because we can use the info
     165              :      * in those nodes to make is_safe_restriction_clause_for()'s checks
     166              :      * cheaper.  We'll strip those nodes from the returned tree, though,
     167              :      * meaning that fresh ones will be built if the clause is accepted as a
     168              :      * restriction clause.  This might seem wasteful --- couldn't we re-use
     169              :      * the existing RestrictInfos?  But that'd require assuming that
     170              :      * selectivity and other cached data is computed exactly the same way for
     171              :      * a restriction clause as for a join clause, which seems undesirable.
     172              :      */
     173              :     Assert(is_orclause(or_rinfo->orclause));
     174         5826 :     foreach(lc, ((BoolExpr *) or_rinfo->orclause)->args)
     175              :     {
     176         5571 :         Node       *orarg = (Node *) lfirst(lc);
     177         5571 :         List       *subclauses = NIL;
     178              :         Node       *subclause;
     179              : 
     180              :         /* OR arguments should be ANDs or sub-RestrictInfos */
     181         5571 :         if (is_andclause(orarg))
     182              :         {
     183          694 :             List       *andargs = ((BoolExpr *) orarg)->args;
     184              :             ListCell   *lc2;
     185              : 
     186         2514 :             foreach(lc2, andargs)
     187              :             {
     188         1820 :                 RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
     189              : 
     190         1820 :                 if (restriction_is_or_clause(rinfo))
     191              :                 {
     192              :                     /*
     193              :                      * Recurse to deal with nested OR.  Note we *must* recurse
     194              :                      * here, this isn't just overly-tense optimization: we
     195              :                      * have to descend far enough to find and strip all
     196              :                      * RestrictInfos in the expression.
     197              :                      */
     198              :                     Expr       *suborclause;
     199              : 
     200           27 :                     suborclause = extract_or_clause(rinfo, rel);
     201           27 :                     if (suborclause)
     202            9 :                         subclauses = lappend(subclauses, suborclause);
     203              :                 }
     204         1793 :                 else if (is_safe_restriction_clause_for(rinfo, rel))
     205         1073 :                     subclauses = lappend(subclauses, rinfo->clause);
     206              :             }
     207              :         }
     208              :         else
     209              :         {
     210         4877 :             RestrictInfo *rinfo = castNode(RestrictInfo, orarg);
     211              : 
     212              :             Assert(!restriction_is_or_clause(rinfo));
     213         4877 :             if (is_safe_restriction_clause_for(rinfo, rel))
     214         1998 :                 subclauses = lappend(subclauses, rinfo->clause);
     215              :         }
     216              : 
     217              :         /*
     218              :          * If nothing could be extracted from this arm, we can't do anything
     219              :          * with this OR clause.
     220              :          */
     221         5571 :         if (subclauses == NIL)
     222         2894 :             return NULL;
     223              : 
     224              :         /*
     225              :          * OK, add subclause(s) to the result OR.  If we found more than one,
     226              :          * we need an AND node.  But if we found only one, and it is itself an
     227              :          * OR node, add its subclauses to the result instead; this is needed
     228              :          * to preserve AND/OR flatness (ie, no OR directly underneath OR).
     229              :          */
     230         2677 :         subclause = (Node *) make_ands_explicit(subclauses);
     231         2677 :         if (is_orclause(subclause))
     232            9 :             clauselist = list_concat(clauselist,
     233            9 :                                      ((BoolExpr *) subclause)->args);
     234              :         else
     235         2668 :             clauselist = lappend(clauselist, subclause);
     236              :     }
     237              : 
     238              :     /*
     239              :      * If we got a restriction clause from every arm, wrap them up in an OR
     240              :      * node.  (In theory the OR node might be unnecessary, if there was only
     241              :      * one arm --- but then the input OR node was also redundant.)
     242              :      */
     243          255 :     if (clauselist != NIL)
     244          255 :         return make_orclause(clauselist);
     245            0 :     return NULL;
     246              : }
     247              : 
     248              : /*
     249              :  * Consider whether a successfully-extracted restriction OR clause is
     250              :  * actually worth using.  If so, add it to the planner's data structures,
     251              :  * and adjust the original join clause (join_or_rinfo) to compensate.
     252              :  */
     253              : static void
     254          246 : consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
     255              :                        Expr *orclause, RestrictInfo *join_or_rinfo)
     256              : {
     257              :     RestrictInfo *or_rinfo;
     258              :     Selectivity or_selec,
     259              :                 orig_selec;
     260              : 
     261              :     /*
     262              :      * Build a RestrictInfo from the new OR clause.  We can assume it's valid
     263              :      * as a base restriction clause.
     264              :      */
     265          246 :     or_rinfo = make_restrictinfo(root,
     266              :                                  orclause,
     267              :                                  true,
     268              :                                  false,
     269              :                                  false,
     270              :                                  false,
     271              :                                  join_or_rinfo->security_level,
     272              :                                  NULL,
     273              :                                  NULL,
     274              :                                  NULL);
     275              : 
     276              :     /*
     277              :      * Estimate its selectivity.  (We could have done this earlier, but doing
     278              :      * it on the RestrictInfo representation allows the result to get cached,
     279              :      * saving work later.)
     280              :      */
     281          246 :     or_selec = clause_selectivity(root, (Node *) or_rinfo,
     282              :                                   0, JOIN_INNER, NULL);
     283              : 
     284              :     /*
     285              :      * The clause is only worth adding to the query if it rejects a useful
     286              :      * fraction of the base relation's rows; otherwise, it's just going to
     287              :      * cause duplicate computation (since we will still have to check the
     288              :      * original OR clause when the join is formed).  Somewhat arbitrarily, we
     289              :      * set the selectivity threshold at 0.9.
     290              :      */
     291          246 :     if (or_selec > 0.9)
     292            2 :         return;                 /* forget it */
     293              : 
     294              :     /*
     295              :      * OK, add it to the rel's restriction-clause list.
     296              :      */
     297          244 :     rel->baserestrictinfo = lappend(rel->baserestrictinfo, or_rinfo);
     298          244 :     rel->baserestrict_min_security = Min(rel->baserestrict_min_security,
     299              :                                          or_rinfo->security_level);
     300              : 
     301              :     /*
     302              :      * Adjust the original join OR clause's cached selectivity to compensate
     303              :      * for the selectivity of the added (but redundant) lower-level qual. This
     304              :      * should result in the join rel getting approximately the same rows
     305              :      * estimate as it would have gotten without all these shenanigans.
     306              :      *
     307              :      * XXX major hack alert: this depends on the assumption that the
     308              :      * selectivity will stay cached.
     309              :      *
     310              :      * XXX another major hack: we adjust only norm_selec, the cached
     311              :      * selectivity for JOIN_INNER semantics, even though the join clause
     312              :      * might've been an outer-join clause.  This is partly because we can't
     313              :      * easily identify the relevant SpecialJoinInfo here, and partly because
     314              :      * the linearity assumption we're making would fail anyway.  (If it is an
     315              :      * outer-join clause, "rel" must be on the nullable side, else we'd not
     316              :      * have gotten here.  So the computation of the join size is going to be
     317              :      * quite nonlinear with respect to the size of "rel", so it's not clear
     318              :      * how we ought to adjust outer_selec even if we could compute its
     319              :      * original value correctly.)
     320              :      */
     321          244 :     if (or_selec > 0)
     322              :     {
     323              :         SpecialJoinInfo sjinfo;
     324              : 
     325              :         /*
     326              :          * Make up a SpecialJoinInfo for JOIN_INNER semantics.  (Compare
     327              :          * approx_tuple_count() in costsize.c.)
     328              :          */
     329          244 :         init_dummy_sjinfo(&sjinfo,
     330          244 :                           bms_difference(join_or_rinfo->clause_relids,
     331          244 :                                          rel->relids),
     332              :                           rel->relids);
     333              : 
     334              :         /* Compute inner-join size */
     335          244 :         orig_selec = clause_selectivity(root, (Node *) join_or_rinfo,
     336              :                                         0, JOIN_INNER, &sjinfo);
     337              : 
     338              :         /* And hack cached selectivity so join size remains the same */
     339          244 :         join_or_rinfo->norm_selec = orig_selec / or_selec;
     340              :         /* ensure result stays in sane range */
     341          244 :         if (join_or_rinfo->norm_selec > 1)
     342            0 :             join_or_rinfo->norm_selec = 1;
     343              :         /* as explained above, we don't touch outer_selec */
     344              :     }
     345              : }
        

Generated by: LCOV version 2.0-1