LCOV - code coverage report
Current view: top level - src/backend/optimizer/plan - planagg.c (source / functions) Hit Total Coverage
Test: PostgreSQL 14devel Lines: 142 147 96.6 %
Date: 2021-05-13 09:07:15 Functions: 5 5 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * planagg.c
       4             :  *    Special planning for aggregate queries.
       5             :  *
       6             :  * This module tries to replace MIN/MAX aggregate functions by subqueries
       7             :  * of the form
       8             :  *      (SELECT col FROM tab
       9             :  *       WHERE col IS NOT NULL AND existing-quals
      10             :  *       ORDER BY col ASC/DESC
      11             :  *       LIMIT 1)
      12             :  * Given a suitable index on tab.col, this can be much faster than the
      13             :  * generic scan-all-the-rows aggregation plan.  We can handle multiple
      14             :  * MIN/MAX aggregates by generating multiple subqueries, and their
      15             :  * orderings can be different.  However, if the query contains any
      16             :  * non-optimizable aggregates, there's no point since we'll have to
      17             :  * scan all the rows anyway.
      18             :  *
      19             :  *
      20             :  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
      21             :  * Portions Copyright (c) 1994, Regents of the University of California
      22             :  *
      23             :  *
      24             :  * IDENTIFICATION
      25             :  *    src/backend/optimizer/plan/planagg.c
      26             :  *
      27             :  *-------------------------------------------------------------------------
      28             :  */
      29             : #include "postgres.h"
      30             : 
      31             : #include "access/htup_details.h"
      32             : #include "catalog/pg_aggregate.h"
      33             : #include "catalog/pg_type.h"
      34             : #include "nodes/makefuncs.h"
      35             : #include "nodes/nodeFuncs.h"
      36             : #include "optimizer/clauses.h"
      37             : #include "optimizer/cost.h"
      38             : #include "optimizer/optimizer.h"
      39             : #include "optimizer/pathnode.h"
      40             : #include "optimizer/paths.h"
      41             : #include "optimizer/planmain.h"
      42             : #include "optimizer/subselect.h"
      43             : #include "optimizer/tlist.h"
      44             : #include "parser/parse_clause.h"
      45             : #include "parser/parsetree.h"
      46             : #include "rewrite/rewriteManip.h"
      47             : #include "utils/lsyscache.h"
      48             : #include "utils/syscache.h"
      49             : 
      50             : static bool can_minmax_aggs(PlannerInfo *root, List **context);
      51             : static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
      52             :                               Oid eqop, Oid sortop, bool nulls_first);
      53             : static void minmax_qp_callback(PlannerInfo *root, void *extra);
      54             : static Oid  fetch_agg_sort_op(Oid aggfnoid);
      55             : 
      56             : 
      57             : /*
      58             :  * preprocess_minmax_aggregates - preprocess MIN/MAX aggregates
      59             :  *
      60             :  * Check to see whether the query contains MIN/MAX aggregate functions that
      61             :  * might be optimizable via indexscans.  If it does, and all the aggregates
      62             :  * are potentially optimizable, then create a MinMaxAggPath and add it to
      63             :  * the (UPPERREL_GROUP_AGG, NULL) upperrel.
      64             :  *
      65             :  * This should be called by grouping_planner() just before it's ready to call
      66             :  * query_planner(), because we generate indexscan paths by cloning the
      67             :  * planner's state and invoking query_planner() on a modified version of
      68             :  * the query parsetree.  Thus, all preprocessing needed before query_planner()
      69             :  * must already be done.  This relies on the list of aggregates in
      70             :  * root->agginfos, so preprocess_aggrefs() must have been called already, too.
      71             :  */
      72             : void
      73       28520 : preprocess_minmax_aggregates(PlannerInfo *root)
      74             : {
      75       28520 :     Query      *parse = root->parse;
      76             :     FromExpr   *jtnode;
      77             :     RangeTblRef *rtr;
      78             :     RangeTblEntry *rte;
      79             :     List       *aggs_list;
      80             :     RelOptInfo *grouped_rel;
      81             :     ListCell   *lc;
      82             : 
      83             :     /* minmax_aggs list should be empty at this point */
      84             :     Assert(root->minmax_aggs == NIL);
      85             : 
      86             :     /* Nothing to do if query has no aggregates */
      87       28520 :     if (!parse->hasAggs)
      88       28122 :         return;
      89             : 
      90             :     Assert(!parse->setOperations);   /* shouldn't get here if a setop */
      91             :     Assert(parse->rowMarks == NIL); /* nor if FOR UPDATE */
      92             : 
      93             :     /*
      94             :      * Reject unoptimizable cases.
      95             :      *
      96             :      * We don't handle GROUP BY or windowing, because our current
      97             :      * implementations of grouping require looking at all the rows anyway, and
      98             :      * so there's not much point in optimizing MIN/MAX.
      99             :      */
     100       28520 :     if (parse->groupClause || list_length(parse->groupingSets) > 1 ||
     101       26232 :         parse->hasWindowFuncs)
     102        2292 :         return;
     103             : 
     104             :     /*
     105             :      * Reject if query contains any CTEs; there's no way to build an indexscan
     106             :      * on one so we couldn't succeed here.  (If the CTEs are unreferenced,
     107             :      * that's not true, but it doesn't seem worth expending cycles to check.)
     108             :      */
     109       26228 :     if (parse->cteList)
     110           8 :         return;
     111             : 
     112             :     /*
     113             :      * We also restrict the query to reference exactly one table, since join
     114             :      * conditions can't be handled reasonably.  (We could perhaps handle a
     115             :      * query containing cartesian-product joins, but it hardly seems worth the
     116             :      * trouble.)  However, the single table could be buried in several levels
     117             :      * of FromExpr due to subqueries.  Note the "single" table could be an
     118             :      * inheritance parent, too, including the case of a UNION ALL subquery
     119             :      * that's been flattened to an appendrel.
     120             :      */
     121       26220 :     jtnode = parse->jointree;
     122       52320 :     while (IsA(jtnode, FromExpr))
     123             :     {
     124       26246 :         if (list_length(jtnode->fromlist) != 1)
     125         146 :             return;
     126       26100 :         jtnode = linitial(jtnode->fromlist);
     127             :     }
     128       26074 :     if (!IsA(jtnode, RangeTblRef))
     129        7112 :         return;
     130       18962 :     rtr = (RangeTblRef *) jtnode;
     131       18962 :     rte = planner_rt_fetch(rtr->rtindex, root);
     132       18962 :     if (rte->rtekind == RTE_RELATION)
     133             :          /* ordinary relation, ok */ ;
     134        1566 :     else if (rte->rtekind == RTE_SUBQUERY && rte->inh)
     135             :          /* flattened UNION ALL subquery, ok */ ;
     136             :     else
     137        1530 :         return;
     138             : 
     139             :     /*
     140             :      * Scan the tlist and HAVING qual to find all the aggregates and verify
     141             :      * all are MIN/MAX aggregates.  Stop as soon as we find one that isn't.
     142             :      */
     143       17432 :     aggs_list = NIL;
     144       17432 :     if (!can_minmax_aggs(root, &aggs_list))
     145       16852 :         return;
     146             : 
     147             :     /*
     148             :      * OK, there is at least the possibility of performing the optimization.
     149             :      * Build an access path for each aggregate.  If any of the aggregates
     150             :      * prove to be non-indexable, give up; there is no point in optimizing
     151             :      * just some of them.
     152             :      */
     153        1002 :     foreach(lc, aggs_list)
     154             :     {
     155         604 :         MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
     156             :         Oid         eqop;
     157             :         bool        reverse;
     158             : 
     159             :         /*
     160             :          * We'll need the equality operator that goes with the aggregate's
     161             :          * ordering operator.
     162             :          */
     163         604 :         eqop = get_equality_op_for_ordering_op(mminfo->aggsortop, &reverse);
     164         604 :         if (!OidIsValid(eqop))  /* shouldn't happen */
     165           0 :             elog(ERROR, "could not find equality operator for ordering operator %u",
     166             :                  mminfo->aggsortop);
     167             : 
     168             :         /*
     169             :          * We can use either an ordering that gives NULLS FIRST or one that
     170             :          * gives NULLS LAST; furthermore there's unlikely to be much
     171             :          * performance difference between them, so it doesn't seem worth
     172             :          * costing out both ways if we get a hit on the first one.  NULLS
     173             :          * FIRST is more likely to be available if the operator is a
     174             :          * reverse-sort operator, so try that first if reverse.
     175             :          */
     176         604 :         if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse))
     177         422 :             continue;
     178         182 :         if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, !reverse))
     179           0 :             continue;
     180             : 
     181             :         /* No indexable path for this aggregate, so fail */
     182         182 :         return;
     183             :     }
     184             : 
     185             :     /*
     186             :      * OK, we can do the query this way.  Prepare to create a MinMaxAggPath
     187             :      * node.
     188             :      *
     189             :      * First, create an output Param node for each agg.  (If we end up not
     190             :      * using the MinMaxAggPath, we'll waste a PARAM_EXEC slot for each agg,
     191             :      * which is not worth worrying about.  We can't wait till create_plan time
     192             :      * to decide whether to make the Param, unfortunately.)
     193             :      */
     194         820 :     foreach(lc, aggs_list)
     195             :     {
     196         422 :         MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
     197             : 
     198         422 :         mminfo->param =
     199         422 :             SS_make_initplan_output_param(root,
     200         422 :                                           exprType((Node *) mminfo->target),
     201             :                                           -1,
     202         422 :                                           exprCollation((Node *) mminfo->target));
     203             :     }
     204             : 
     205             :     /*
     206             :      * Create a MinMaxAggPath node with the appropriate estimated costs and
     207             :      * other needed data, and add it to the UPPERREL_GROUP_AGG upperrel, where
     208             :      * it will compete against the standard aggregate implementation.  (It
     209             :      * will likely always win, but we need not assume that here.)
     210             :      *
     211             :      * Note: grouping_planner won't have created this upperrel yet, but it's
     212             :      * fine for us to create it first.  We will not have inserted the correct
     213             :      * consider_parallel value in it, but MinMaxAggPath paths are currently
     214             :      * never parallel-safe anyway, so that doesn't matter.  Likewise, it
     215             :      * doesn't matter that we haven't filled FDW-related fields in the rel.
     216             :      * Also, because there are no rowmarks, we know that the processed_tlist
     217             :      * doesn't need to change anymore, so making the pathtarget now is safe.
     218             :      */
     219         398 :     grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
     220         398 :     add_path(grouped_rel, (Path *)
     221         398 :              create_minmaxagg_path(root, grouped_rel,
     222             :                                    create_pathtarget(root,
     223             :                                                      root->processed_tlist),
     224             :                                    aggs_list,
     225         398 :                                    (List *) parse->havingQual));
     226             : }
     227             : 
     228             : /*
     229             :  * can_minmax_aggs
     230             :  *      Walk through all the aggregates in the query, and check
     231             :  *      if they are all MIN/MAX aggregates.  If so, build a list of the
     232             :  *      distinct aggregate calls in the tree.
     233             :  *
     234             :  * Returns false if a non-MIN/MAX aggregate is found, true otherwise.
     235             :  *
     236             :  * This does not descend into subqueries, and so should be used only after
     237             :  * reduction of sublinks to subplans.  There mustn't be outer-aggregate
     238             :  * references either.
     239             :  */
     240             : static bool
     241       17432 : can_minmax_aggs(PlannerInfo *root, List **context)
     242             : {
     243             :     ListCell   *lc;
     244             : 
     245       18236 :     foreach(lc, root->agginfos)
     246             :     {
     247       17656 :         AggInfo    *agginfo = (AggInfo *) lfirst(lc);
     248       17656 :         Aggref     *aggref = agginfo->representative_aggref;
     249             :         Oid         aggsortop;
     250             :         TargetEntry *curTarget;
     251             :         MinMaxAggInfo *mminfo;
     252             : 
     253             :         Assert(aggref->agglevelsup == 0);
     254       17656 :         if (list_length(aggref->args) != 1)
     255       16852 :             return false;       /* it couldn't be MIN/MAX */
     256             : 
     257             :         /*
     258             :          * ORDER BY is usually irrelevant for MIN/MAX, but it can change the
     259             :          * outcome if the aggsortop's operator class recognizes non-identical
     260             :          * values as equal.  For example, 4.0 and 4.00 are equal according to
     261             :          * numeric_ops, yet distinguishable.  If MIN() receives more than one
     262             :          * value equal to 4.0 and no value less than 4.0, it is unspecified
     263             :          * which of those equal values MIN() returns.  An ORDER BY expression
     264             :          * that differs for each of those equal values of the argument
     265             :          * expression makes the result predictable once again.  This is a
     266             :          * niche requirement, and we do not implement it with subquery paths.
     267             :          * In any case, this test lets us reject ordered-set aggregates
     268             :          * quickly.
     269             :          */
     270       10760 :         if (aggref->aggorder != NIL)
     271        1618 :             return false;
     272             :         /* note: we do not care if DISTINCT is mentioned ... */
     273             : 
     274             :         /*
     275             :          * We might implement the optimization when a FILTER clause is present
     276             :          * by adding the filter to the quals of the generated subquery.  For
     277             :          * now, just punt.
     278             :          */
     279        9142 :         if (aggref->aggfilter != NULL)
     280         422 :             return false;
     281             : 
     282        8720 :         aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
     283        8720 :         if (!OidIsValid(aggsortop))
     284        7916 :             return false;       /* not a MIN/MAX aggregate */
     285             : 
     286         804 :         curTarget = (TargetEntry *) linitial(aggref->args);
     287             : 
     288         804 :         if (contain_mutable_functions((Node *) curTarget->expr))
     289           0 :             return false;       /* not potentially indexable */
     290             : 
     291         804 :         if (type_is_rowtype(exprType((Node *) curTarget->expr)))
     292           0 :             return false;       /* IS NOT NULL would have weird semantics */
     293             : 
     294         804 :         mminfo = makeNode(MinMaxAggInfo);
     295         804 :         mminfo->aggfnoid = aggref->aggfnoid;
     296         804 :         mminfo->aggsortop = aggsortop;
     297         804 :         mminfo->target = curTarget->expr;
     298         804 :         mminfo->subroot = NULL; /* don't compute path yet */
     299         804 :         mminfo->path = NULL;
     300         804 :         mminfo->pathcost = 0;
     301         804 :         mminfo->param = NULL;
     302             : 
     303         804 :         *context = lappend(*context, mminfo);
     304             :     }
     305         580 :     return true;
     306             : }
     307             : 
     308             : /*
     309             :  * build_minmax_path
     310             :  *      Given a MIN/MAX aggregate, try to build an indexscan Path it can be
     311             :  *      optimized with.
     312             :  *
     313             :  * If successful, stash the best path in *mminfo and return true.
     314             :  * Otherwise, return false.
     315             :  */
     316             : static bool
     317         786 : build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
     318             :                   Oid eqop, Oid sortop, bool nulls_first)
     319             : {
     320             :     PlannerInfo *subroot;
     321             :     Query      *parse;
     322             :     TargetEntry *tle;
     323             :     List       *tlist;
     324             :     NullTest   *ntest;
     325             :     SortGroupClause *sortcl;
     326             :     RelOptInfo *final_rel;
     327             :     Path       *sorted_path;
     328             :     Cost        path_cost;
     329             :     double      path_fraction;
     330             : 
     331             :     /*
     332             :      * We are going to construct what is effectively a sub-SELECT query, so
     333             :      * clone the current query level's state and adjust it to make it look
     334             :      * like a subquery.  Any outer references will now be one level higher
     335             :      * than before.  (This means that when we are done, there will be no Vars
     336             :      * of level 1, which is why the subquery can become an initplan.)
     337             :      */
     338         786 :     subroot = (PlannerInfo *) palloc(sizeof(PlannerInfo));
     339         786 :     memcpy(subroot, root, sizeof(PlannerInfo));
     340         786 :     subroot->query_level++;
     341         786 :     subroot->parent_root = root;
     342             :     /* reset subplan-related stuff */
     343         786 :     subroot->plan_params = NIL;
     344         786 :     subroot->outer_params = NULL;
     345         786 :     subroot->init_plans = NIL;
     346         786 :     subroot->agginfos = NIL;
     347         786 :     subroot->aggtransinfos = NIL;
     348             : 
     349         786 :     subroot->parse = parse = copyObject(root->parse);
     350         786 :     IncrementVarSublevelsUp((Node *) parse, 1, 1);
     351             : 
     352             :     /* append_rel_list might contain outer Vars? */
     353         786 :     subroot->append_rel_list = copyObject(root->append_rel_list);
     354         786 :     IncrementVarSublevelsUp((Node *) subroot->append_rel_list, 1, 1);
     355             :     /* There shouldn't be any OJ info to translate, as yet */
     356             :     Assert(subroot->join_info_list == NIL);
     357             :     /* and we haven't made equivalence classes, either */
     358             :     Assert(subroot->eq_classes == NIL);
     359             :     /* and we haven't created PlaceHolderInfos, either */
     360             :     Assert(subroot->placeholder_list == NIL);
     361             : 
     362             :     /*----------
     363             :      * Generate modified query of the form
     364             :      *      (SELECT col FROM tab
     365             :      *       WHERE col IS NOT NULL AND existing-quals
     366             :      *       ORDER BY col ASC/DESC
     367             :      *       LIMIT 1)
     368             :      *----------
     369             :      */
     370             :     /* single tlist entry that is the aggregate target */
     371         786 :     tle = makeTargetEntry(copyObject(mminfo->target),
     372             :                           (AttrNumber) 1,
     373             :                           pstrdup("agg_target"),
     374             :                           false);
     375         786 :     tlist = list_make1(tle);
     376         786 :     subroot->processed_tlist = parse->targetList = tlist;
     377             : 
     378             :     /* No HAVING, no DISTINCT, no aggregates anymore */
     379         786 :     parse->havingQual = NULL;
     380         786 :     subroot->hasHavingQual = false;
     381         786 :     parse->distinctClause = NIL;
     382         786 :     parse->hasDistinctOn = false;
     383         786 :     parse->hasAggs = false;
     384             : 
     385             :     /* Build "target IS NOT NULL" expression */
     386         786 :     ntest = makeNode(NullTest);
     387         786 :     ntest->nulltesttype = IS_NOT_NULL;
     388         786 :     ntest->arg = copyObject(mminfo->target);
     389             :     /* we checked it wasn't a rowtype in find_minmax_aggs_walker */
     390         786 :     ntest->argisrow = false;
     391         786 :     ntest->location = -1;
     392             : 
     393             :     /* User might have had that in WHERE already */
     394         786 :     if (!list_member((List *) parse->jointree->quals, ntest))
     395         786 :         parse->jointree->quals = (Node *)
     396         786 :             lcons(ntest, (List *) parse->jointree->quals);
     397             : 
     398             :     /* Build suitable ORDER BY clause */
     399         786 :     sortcl = makeNode(SortGroupClause);
     400         786 :     sortcl->tleSortGroupRef = assignSortGroupRef(tle, subroot->processed_tlist);
     401         786 :     sortcl->eqop = eqop;
     402         786 :     sortcl->sortop = sortop;
     403         786 :     sortcl->nulls_first = nulls_first;
     404         786 :     sortcl->hashable = false;    /* no need to make this accurate */
     405         786 :     parse->sortClause = list_make1(sortcl);
     406             : 
     407             :     /* set up expressions for LIMIT 1 */
     408         786 :     parse->limitOffset = NULL;
     409         786 :     parse->limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
     410             :                                            sizeof(int64),
     411             :                                            Int64GetDatum(1), false,
     412             :                                            FLOAT8PASSBYVAL);
     413             : 
     414             :     /*
     415             :      * Generate the best paths for this query, telling query_planner that we
     416             :      * have LIMIT 1.
     417             :      */
     418         786 :     subroot->tuple_fraction = 1.0;
     419         786 :     subroot->limit_tuples = 1.0;
     420             : 
     421         786 :     final_rel = query_planner(subroot, minmax_qp_callback, NULL);
     422             : 
     423             :     /*
     424             :      * Since we didn't go through subquery_planner() to handle the subquery,
     425             :      * we have to do some of the same cleanup it would do, in particular cope
     426             :      * with params and initplans used within this subquery.  (This won't
     427             :      * matter if we end up not using the subplan.)
     428             :      */
     429         786 :     SS_identify_outer_params(subroot);
     430         786 :     SS_charge_for_initplans(subroot, final_rel);
     431             : 
     432             :     /*
     433             :      * Get the best presorted path, that being the one that's cheapest for
     434             :      * fetching just one row.  If there's no such path, fail.
     435             :      */
     436         786 :     if (final_rel->rows > 1.0)
     437         722 :         path_fraction = 1.0 / final_rel->rows;
     438             :     else
     439          64 :         path_fraction = 1.0;
     440             : 
     441             :     sorted_path =
     442         786 :         get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
     443             :                                                   subroot->query_pathkeys,
     444             :                                                   NULL,
     445             :                                                   path_fraction);
     446         786 :     if (!sorted_path)
     447         364 :         return false;
     448             : 
     449             :     /*
     450             :      * The path might not return exactly what we want, so fix that.  (We
     451             :      * assume that this won't change any conclusions about which was the
     452             :      * cheapest path.)
     453             :      */
     454         422 :     sorted_path = apply_projection_to_path(subroot, final_rel, sorted_path,
     455             :                                            create_pathtarget(subroot,
     456             :                                                              subroot->processed_tlist));
     457             : 
     458             :     /*
     459             :      * Determine cost to get just the first row of the presorted path.
     460             :      *
     461             :      * Note: cost calculation here should match
     462             :      * compare_fractional_path_costs().
     463             :      */
     464         844 :     path_cost = sorted_path->startup_cost +
     465         422 :         path_fraction * (sorted_path->total_cost - sorted_path->startup_cost);
     466             : 
     467             :     /* Save state for further processing */
     468         422 :     mminfo->subroot = subroot;
     469         422 :     mminfo->path = sorted_path;
     470         422 :     mminfo->pathcost = path_cost;
     471             : 
     472         422 :     return true;
     473             : }
     474             : 
     475             : /*
     476             :  * Compute query_pathkeys and other pathkeys during query_planner()
     477             :  */
     478             : static void
     479         786 : minmax_qp_callback(PlannerInfo *root, void *extra)
     480             : {
     481         786 :     root->group_pathkeys = NIL;
     482         786 :     root->window_pathkeys = NIL;
     483         786 :     root->distinct_pathkeys = NIL;
     484             : 
     485         786 :     root->sort_pathkeys =
     486        1572 :         make_pathkeys_for_sortclauses(root,
     487         786 :                                       root->parse->sortClause,
     488         786 :                                       root->parse->targetList);
     489             : 
     490         786 :     root->query_pathkeys = root->sort_pathkeys;
     491         786 : }
     492             : 
     493             : /*
     494             :  * Get the OID of the sort operator, if any, associated with an aggregate.
     495             :  * Returns InvalidOid if there is no such operator.
     496             :  */
     497             : static Oid
     498        8720 : fetch_agg_sort_op(Oid aggfnoid)
     499             : {
     500             :     HeapTuple   aggTuple;
     501             :     Form_pg_aggregate aggform;
     502             :     Oid         aggsortop;
     503             : 
     504             :     /* fetch aggregate entry from pg_aggregate */
     505        8720 :     aggTuple = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(aggfnoid));
     506        8720 :     if (!HeapTupleIsValid(aggTuple))
     507           0 :         return InvalidOid;
     508        8720 :     aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
     509        8720 :     aggsortop = aggform->aggsortop;
     510        8720 :     ReleaseSysCache(aggTuple);
     511             : 
     512        8720 :     return aggsortop;
     513             : }

Generated by: LCOV version 1.13