LCOV - code coverage report
Current view: top level - src/backend/optimizer/util - orclauses.c (source / functions) Hit Total Coverage
Test: PostgreSQL 12beta2 Lines: 72 77 93.5 %
Date: 2019-06-19 16:07:09 Functions: 4 4 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * orclauses.c
       4             :  *    Routines to extract restriction OR clauses from join OR clauses
       5             :  *
       6             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/optimizer/util/orclauses.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : 
      16             : #include "postgres.h"
      17             : 
      18             : #include "nodes/makefuncs.h"
      19             : #include "nodes/nodeFuncs.h"
      20             : #include "optimizer/clauses.h"
      21             : #include "optimizer/cost.h"
      22             : #include "optimizer/optimizer.h"
      23             : #include "optimizer/orclauses.h"
      24             : #include "optimizer/restrictinfo.h"
      25             : 
      26             : 
      27             : static bool is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel);
      28             : static Expr *extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel);
      29             : static void consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
      30             :                                    Expr *orclause, RestrictInfo *join_or_rinfo);
      31             : 
      32             : 
      33             : /*
      34             :  * extract_restriction_or_clauses
      35             :  *    Examine join OR-of-AND clauses to see if any useful restriction OR
      36             :  *    clauses can be extracted.  If so, add them to the query.
      37             :  *
      38             :  * Although a join clause must reference multiple relations overall,
      39             :  * an OR of ANDs clause might contain sub-clauses that reference just one
      40             :  * relation and can be used to build a restriction clause for that rel.
      41             :  * For example consider
      42             :  *      WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45));
      43             :  * We can transform this into
      44             :  *      WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45))
      45             :  *          AND (a.x = 42 OR a.x = 44)
      46             :  *          AND (b.y = 43 OR b.z = 45);
      47             :  * which allows the latter clauses to be applied during the scans of a and b,
      48             :  * perhaps as index qualifications, and in any case reducing the number of
      49             :  * rows arriving at the join.  In essence this is a partial transformation to
      50             :  * CNF (AND of ORs format).  It is not complete, however, because we do not
      51             :  * unravel the original OR --- doing so would usually bloat the qualification
      52             :  * expression to little gain.
      53             :  *
      54             :  * The added quals are partially redundant with the original OR, and therefore
      55             :  * would cause the size of the joinrel to be underestimated when it is finally
      56             :  * formed.  (This would be true of a full transformation to CNF as well; the
      57             :  * fault is not really in the transformation, but in clauselist_selectivity's
      58             :  * inability to recognize redundant conditions.)  We can compensate for this
      59             :  * redundancy by changing the cached selectivity of the original OR clause,
      60             :  * canceling out the (valid) reduction in the estimated sizes of the base
      61             :  * relations so that the estimated joinrel size remains the same.  This is
      62             :  * a MAJOR HACK: it depends on the fact that clause selectivities are cached
      63             :  * and on the fact that the same RestrictInfo node will appear in every
      64             :  * joininfo list that might be used when the joinrel is formed.
      65             :  * And it doesn't work in cases where the size estimation is nonlinear
      66             :  * (i.e., outer and IN joins).  But it beats not doing anything.
      67             :  *
      68             :  * We examine each base relation to see if join clauses associated with it
      69             :  * contain extractable restriction conditions.  If so, add those conditions
      70             :  * to the rel's baserestrictinfo and update the cached selectivities of the
      71             :  * join clauses.  Note that the same join clause will be examined afresh
      72             :  * from the point of view of each baserel that participates in it, so its
      73             :  * cached selectivity may get updated multiple times.
      74             :  */
      75             : void
      76      186784 : extract_restriction_or_clauses(PlannerInfo *root)
      77             : {
      78             :     Index       rti;
      79             : 
      80             :     /* Examine each baserel for potential join OR clauses */
      81      590940 :     for (rti = 1; rti < root->simple_rel_array_size; rti++)
      82             :     {
      83      404156 :         RelOptInfo *rel = root->simple_rel_array[rti];
      84             :         ListCell   *lc;
      85             : 
      86             :         /* there may be empty slots corresponding to non-baserel RTEs */
      87      404156 :         if (rel == NULL)
      88      145732 :             continue;
      89             : 
      90             :         Assert(rel->relid == rti);   /* sanity check on array */
      91             : 
      92             :         /* ignore RTEs that are "other rels" */
      93      258424 :         if (rel->reloptkind != RELOPT_BASEREL)
      94       11970 :             continue;
      95             : 
      96             :         /*
      97             :          * Find potentially interesting OR joinclauses.  We can use any
      98             :          * joinclause that is considered safe to move to this rel by the
      99             :          * parameterized-path machinery, even though what we are going to do
     100             :          * with it is not exactly a parameterized path.
     101             :          *
     102             :          * However, it seems best to ignore clauses that have been marked
     103             :          * redundant (by setting norm_selec > 1).  That likely can't happen
     104             :          * for OR clauses, but let's be safe.
     105             :          */
     106      327404 :         foreach(lc, rel->joininfo)
     107             :         {
     108       80950 :             RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
     109             : 
     110       87292 :             if (restriction_is_or_clause(rinfo) &&
     111       11378 :                 join_clause_is_movable_to(rinfo, rel) &&
     112        5036 :                 rinfo->norm_selec <= 1)
     113             :             {
     114             :                 /* Try to extract a qual for this rel only */
     115        5036 :                 Expr       *orclause = extract_or_clause(rinfo, rel);
     116             : 
     117             :                 /*
     118             :                  * If successful, decide whether we want to use the clause,
     119             :                  * and insert it into the rel's restrictinfo list if so.
     120             :                  */
     121        5036 :                 if (orclause)
     122          96 :                     consider_new_or_clause(root, rel, orclause, rinfo);
     123             :             }
     124             :         }
     125             :     }
     126      186784 : }
     127             : 
     128             : /*
     129             :  * Is the given primitive (non-OR) RestrictInfo safe to move to the rel?
     130             :  */
     131             : static bool
     132        7850 : is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel)
     133             : {
     134             :     /*
     135             :      * We want clauses that mention the rel, and only the rel.  So in
     136             :      * particular pseudoconstant clauses can be rejected quickly.  Then check
     137             :      * the clause's Var membership.
     138             :      */
     139        7850 :     if (rinfo->pseudoconstant)
     140           0 :         return false;
     141        7850 :     if (!bms_equal(rinfo->clause_relids, rel->relids))
     142        5208 :         return false;
     143             : 
     144             :     /* We don't want extra evaluations of any volatile functions */
     145        2642 :     if (contain_volatile_functions((Node *) rinfo->clause))
     146           0 :         return false;
     147             : 
     148        2642 :     return true;
     149             : }
     150             : 
     151             : /*
     152             :  * Try to extract a restriction clause mentioning only "rel" from the given
     153             :  * join OR-clause.
     154             :  *
     155             :  * We must be able to extract at least one qual for this rel from each of
     156             :  * the arms of the OR, else we can't use it.
     157             :  *
     158             :  * Returns an OR clause (not a RestrictInfo!) pertaining to rel, or NULL
     159             :  * if no OR clause could be extracted.
     160             :  */
     161             : static Expr *
     162        5060 : extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel)
     163             : {
     164        5060 :     List       *clauselist = NIL;
     165             :     ListCell   *lc;
     166             : 
     167             :     /*
     168             :      * Scan each arm of the input OR clause.  Notice we descend into
     169             :      * or_rinfo->orclause, which has RestrictInfo nodes embedded below the
     170             :      * toplevel OR/AND structure.  This is useful because we can use the info
     171             :      * in those nodes to make is_safe_restriction_clause_for()'s checks
     172             :      * cheaper.  We'll strip those nodes from the returned tree, though,
     173             :      * meaning that fresh ones will be built if the clause is accepted as a
     174             :      * restriction clause.  This might seem wasteful --- couldn't we re-use
     175             :      * the existing RestrictInfos?  But that'd require assuming that
     176             :      * selectivity and other cached data is computed exactly the same way for
     177             :      * a restriction clause as for a join clause, which seems undesirable.
     178             :      */
     179             :     Assert(is_orclause(or_rinfo->orclause));
     180        7650 :     foreach(lc, ((BoolExpr *) or_rinfo->orclause)->args)
     181             :     {
     182        7550 :         Node       *orarg = (Node *) lfirst(lc);
     183        7550 :         List       *subclauses = NIL;
     184             :         Node       *subclause;
     185             : 
     186             :         /* OR arguments should be ANDs or sub-RestrictInfos */
     187        7550 :         if (is_andclause(orarg))
     188             :         {
     189         212 :             List       *andargs = ((BoolExpr *) orarg)->args;
     190             :             ListCell   *lc2;
     191             : 
     192         748 :             foreach(lc2, andargs)
     193             :             {
     194         536 :                 RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
     195             : 
     196         536 :                 if (restriction_is_or_clause(rinfo))
     197             :                 {
     198             :                     /*
     199             :                      * Recurse to deal with nested OR.  Note we *must* recurse
     200             :                      * here, this isn't just overly-tense optimization: we
     201             :                      * have to descend far enough to find and strip all
     202             :                      * RestrictInfos in the expression.
     203             :                      */
     204             :                     Expr       *suborclause;
     205             : 
     206          24 :                     suborclause = extract_or_clause(rinfo, rel);
     207          24 :                     if (suborclause)
     208           4 :                         subclauses = lappend(subclauses, suborclause);
     209             :                 }
     210         512 :                 else if (is_safe_restriction_clause_for(rinfo, rel))
     211         232 :                     subclauses = lappend(subclauses, rinfo->clause);
     212             :             }
     213             :         }
     214             :         else
     215             :         {
     216        7338 :             RestrictInfo *rinfo = castNode(RestrictInfo, orarg);
     217             : 
     218             :             Assert(!restriction_is_or_clause(rinfo));
     219        7338 :             if (is_safe_restriction_clause_for(rinfo, rel))
     220        2410 :                 subclauses = lappend(subclauses, rinfo->clause);
     221             :         }
     222             : 
     223             :         /*
     224             :          * If nothing could be extracted from this arm, we can't do anything
     225             :          * with this OR clause.
     226             :          */
     227        7550 :         if (subclauses == NIL)
     228        4960 :             return NULL;
     229             : 
     230             :         /*
     231             :          * OK, add subclause(s) to the result OR.  If we found more than one,
     232             :          * we need an AND node.  But if we found only one, and it is itself an
     233             :          * OR node, add its subclauses to the result instead; this is needed
     234             :          * to preserve AND/OR flatness (ie, no OR directly underneath OR).
     235             :          */
     236        2590 :         subclause = (Node *) make_ands_explicit(subclauses);
     237        2590 :         if (is_orclause(subclause))
     238           4 :             clauselist = list_concat(clauselist,
     239           4 :                                      list_copy(((BoolExpr *) subclause)->args));
     240             :         else
     241        2586 :             clauselist = lappend(clauselist, subclause);
     242             :     }
     243             : 
     244             :     /*
     245             :      * If we got a restriction clause from every arm, wrap them up in an OR
     246             :      * node.  (In theory the OR node might be unnecessary, if there was only
     247             :      * one arm --- but then the input OR node was also redundant.)
     248             :      */
     249         100 :     if (clauselist != NIL)
     250         100 :         return make_orclause(clauselist);
     251           0 :     return NULL;
     252             : }
     253             : 
     254             : /*
     255             :  * Consider whether a successfully-extracted restriction OR clause is
     256             :  * actually worth using.  If so, add it to the planner's data structures,
     257             :  * and adjust the original join clause (join_or_rinfo) to compensate.
     258             :  */
     259             : static void
     260          96 : consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
     261             :                        Expr *orclause, RestrictInfo *join_or_rinfo)
     262             : {
     263             :     RestrictInfo *or_rinfo;
     264             :     Selectivity or_selec,
     265             :                 orig_selec;
     266             : 
     267             :     /*
     268             :      * Build a RestrictInfo from the new OR clause.  We can assume it's valid
     269             :      * as a base restriction clause.
     270             :      */
     271          96 :     or_rinfo = make_restrictinfo(orclause,
     272             :                                  true,
     273             :                                  false,
     274             :                                  false,
     275             :                                  join_or_rinfo->security_level,
     276             :                                  NULL,
     277             :                                  NULL,
     278             :                                  NULL);
     279             : 
     280             :     /*
     281             :      * Estimate its selectivity.  (We could have done this earlier, but doing
     282             :      * it on the RestrictInfo representation allows the result to get cached,
     283             :      * saving work later.)
     284             :      */
     285          96 :     or_selec = clause_selectivity(root, (Node *) or_rinfo,
     286             :                                   0, JOIN_INNER, NULL);
     287             : 
     288             :     /*
     289             :      * The clause is only worth adding to the query if it rejects a useful
     290             :      * fraction of the base relation's rows; otherwise, it's just going to
     291             :      * cause duplicate computation (since we will still have to check the
     292             :      * original OR clause when the join is formed).  Somewhat arbitrarily, we
     293             :      * set the selectivity threshold at 0.9.
     294             :      */
     295          96 :     if (or_selec > 0.9)
     296           0 :         return;                 /* forget it */
     297             : 
     298             :     /*
     299             :      * OK, add it to the rel's restriction-clause list.
     300             :      */
     301          96 :     rel->baserestrictinfo = lappend(rel->baserestrictinfo, or_rinfo);
     302          96 :     rel->baserestrict_min_security = Min(rel->baserestrict_min_security,
     303             :                                          or_rinfo->security_level);
     304             : 
     305             :     /*
     306             :      * Adjust the original join OR clause's cached selectivity to compensate
     307             :      * for the selectivity of the added (but redundant) lower-level qual. This
     308             :      * should result in the join rel getting approximately the same rows
     309             :      * estimate as it would have gotten without all these shenanigans.
     310             :      *
     311             :      * XXX major hack alert: this depends on the assumption that the
     312             :      * selectivity will stay cached.
     313             :      *
     314             :      * XXX another major hack: we adjust only norm_selec, the cached
     315             :      * selectivity for JOIN_INNER semantics, even though the join clause
     316             :      * might've been an outer-join clause.  This is partly because we can't
     317             :      * easily identify the relevant SpecialJoinInfo here, and partly because
     318             :      * the linearity assumption we're making would fail anyway.  (If it is an
     319             :      * outer-join clause, "rel" must be on the nullable side, else we'd not
     320             :      * have gotten here.  So the computation of the join size is going to be
     321             :      * quite nonlinear with respect to the size of "rel", so it's not clear
     322             :      * how we ought to adjust outer_selec even if we could compute its
     323             :      * original value correctly.)
     324             :      */
     325          96 :     if (or_selec > 0)
     326             :     {
     327             :         SpecialJoinInfo sjinfo;
     328             : 
     329             :         /*
     330             :          * Make up a SpecialJoinInfo for JOIN_INNER semantics.  (Compare
     331             :          * approx_tuple_count() in costsize.c.)
     332             :          */
     333          96 :         sjinfo.type = T_SpecialJoinInfo;
     334          96 :         sjinfo.min_lefthand = bms_difference(join_or_rinfo->clause_relids,
     335          96 :                                              rel->relids);
     336          96 :         sjinfo.min_righthand = rel->relids;
     337          96 :         sjinfo.syn_lefthand = sjinfo.min_lefthand;
     338          96 :         sjinfo.syn_righthand = sjinfo.min_righthand;
     339          96 :         sjinfo.jointype = JOIN_INNER;
     340             :         /* we don't bother trying to make the remaining fields valid */
     341          96 :         sjinfo.lhs_strict = false;
     342          96 :         sjinfo.delay_upper_joins = false;
     343          96 :         sjinfo.semi_can_btree = false;
     344          96 :         sjinfo.semi_can_hash = false;
     345          96 :         sjinfo.semi_operators = NIL;
     346          96 :         sjinfo.semi_rhs_exprs = NIL;
     347             : 
     348             :         /* Compute inner-join size */
     349          96 :         orig_selec = clause_selectivity(root, (Node *) join_or_rinfo,
     350             :                                         0, JOIN_INNER, &sjinfo);
     351             : 
     352             :         /* And hack cached selectivity so join size remains the same */
     353          96 :         join_or_rinfo->norm_selec = orig_selec / or_selec;
     354             :         /* ensure result stays in sane range, in particular not "redundant" */
     355          96 :         if (join_or_rinfo->norm_selec > 1)
     356           0 :             join_or_rinfo->norm_selec = 1;
     357             :         /* as explained above, we don't touch outer_selec */
     358             :     }
     359             : }

Generated by: LCOV version 1.13