LCOV - code coverage report
Current view: top level - src/backend/optimizer/plan - planagg.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 143 147 97.3 %
Date: 2024-04-26 11:11:00 Functions: 5 5 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * planagg.c
       4             :  *    Special planning for aggregate queries.
       5             :  *
       6             :  * This module tries to replace MIN/MAX aggregate functions by subqueries
       7             :  * of the form
       8             :  *      (SELECT col FROM tab
       9             :  *       WHERE col IS NOT NULL AND existing-quals
      10             :  *       ORDER BY col ASC/DESC
      11             :  *       LIMIT 1)
      12             :  * Given a suitable index on tab.col, this can be much faster than the
      13             :  * generic scan-all-the-rows aggregation plan.  We can handle multiple
      14             :  * MIN/MAX aggregates by generating multiple subqueries, and their
      15             :  * orderings can be different.  However, if the query contains any
      16             :  * non-optimizable aggregates, there's no point since we'll have to
      17             :  * scan all the rows anyway.
      18             :  *
      19             :  *
      20             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
      21             :  * Portions Copyright (c) 1994, Regents of the University of California
      22             :  *
      23             :  *
      24             :  * IDENTIFICATION
      25             :  *    src/backend/optimizer/plan/planagg.c
      26             :  *
      27             :  *-------------------------------------------------------------------------
      28             :  */
      29             : #include "postgres.h"
      30             : 
      31             : #include "access/htup_details.h"
      32             : #include "catalog/pg_aggregate.h"
      33             : #include "catalog/pg_type.h"
      34             : #include "nodes/makefuncs.h"
      35             : #include "nodes/nodeFuncs.h"
      36             : #include "optimizer/cost.h"
      37             : #include "optimizer/optimizer.h"
      38             : #include "optimizer/pathnode.h"
      39             : #include "optimizer/paths.h"
      40             : #include "optimizer/planmain.h"
      41             : #include "optimizer/subselect.h"
      42             : #include "optimizer/tlist.h"
      43             : #include "parser/parse_clause.h"
      44             : #include "parser/parsetree.h"
      45             : #include "rewrite/rewriteManip.h"
      46             : #include "utils/lsyscache.h"
      47             : #include "utils/syscache.h"
      48             : 
      49             : static bool can_minmax_aggs(PlannerInfo *root, List **context);
      50             : static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
      51             :                               Oid eqop, Oid sortop, bool nulls_first);
      52             : static void minmax_qp_callback(PlannerInfo *root, void *extra);
      53             : static Oid  fetch_agg_sort_op(Oid aggfnoid);
      54             : 
      55             : 
      56             : /*
      57             :  * preprocess_minmax_aggregates - preprocess MIN/MAX aggregates
      58             :  *
      59             :  * Check to see whether the query contains MIN/MAX aggregate functions that
      60             :  * might be optimizable via indexscans.  If it does, and all the aggregates
      61             :  * are potentially optimizable, then create a MinMaxAggPath and add it to
      62             :  * the (UPPERREL_GROUP_AGG, NULL) upperrel.
      63             :  *
      64             :  * This should be called by grouping_planner() just before it's ready to call
      65             :  * query_planner(), because we generate indexscan paths by cloning the
      66             :  * planner's state and invoking query_planner() on a modified version of
      67             :  * the query parsetree.  Thus, all preprocessing needed before query_planner()
      68             :  * must already be done.  This relies on the list of aggregates in
      69             :  * root->agginfos, so preprocess_aggrefs() must have been called already, too.
      70             :  */
      71             : void
      72       33648 : preprocess_minmax_aggregates(PlannerInfo *root)
      73             : {
      74       33648 :     Query      *parse = root->parse;
      75             :     FromExpr   *jtnode;
      76             :     RangeTblRef *rtr;
      77             :     RangeTblEntry *rte;
      78             :     List       *aggs_list;
      79             :     RelOptInfo *grouped_rel;
      80             :     ListCell   *lc;
      81             : 
      82             :     /* minmax_aggs list should be empty at this point */
      83             :     Assert(root->minmax_aggs == NIL);
      84             : 
      85             :     /* Nothing to do if query has no aggregates */
      86       33648 :     if (!parse->hasAggs)
      87       33262 :         return;
      88             : 
      89             :     Assert(!parse->setOperations);   /* shouldn't get here if a setop */
      90             :     Assert(parse->rowMarks == NIL); /* nor if FOR UPDATE */
      91             : 
      92             :     /*
      93             :      * Reject unoptimizable cases.
      94             :      *
      95             :      * We don't handle GROUP BY or windowing, because our current
      96             :      * implementations of grouping require looking at all the rows anyway, and
      97             :      * so there's not much point in optimizing MIN/MAX.
      98             :      */
      99       33648 :     if (parse->groupClause || list_length(parse->groupingSets) > 1 ||
     100       30038 :         parse->hasWindowFuncs)
     101        3616 :         return;
     102             : 
     103             :     /*
     104             :      * Reject if query contains any CTEs; there's no way to build an indexscan
     105             :      * on one so we couldn't succeed here.  (If the CTEs are unreferenced,
     106             :      * that's not true, but it doesn't seem worth expending cycles to check.)
     107             :      */
     108       30032 :     if (parse->cteList)
     109          62 :         return;
     110             : 
     111             :     /*
     112             :      * We also restrict the query to reference exactly one table, since join
     113             :      * conditions can't be handled reasonably.  (We could perhaps handle a
     114             :      * query containing cartesian-product joins, but it hardly seems worth the
     115             :      * trouble.)  However, the single table could be buried in several levels
     116             :      * of FromExpr due to subqueries.  Note the "single" table could be an
     117             :      * inheritance parent, too, including the case of a UNION ALL subquery
     118             :      * that's been flattened to an appendrel.
     119             :      */
     120       29970 :     jtnode = parse->jointree;
     121       56762 :     while (IsA(jtnode, FromExpr))
     122             :     {
     123       30004 :         if (list_length(jtnode->fromlist) != 1)
     124        3212 :             return;
     125       26792 :         jtnode = linitial(jtnode->fromlist);
     126             :     }
     127       26758 :     if (!IsA(jtnode, RangeTblRef))
     128        1106 :         return;
     129       25652 :     rtr = (RangeTblRef *) jtnode;
     130       25652 :     rte = planner_rt_fetch(rtr->rtindex, root);
     131       25652 :     if (rte->rtekind == RTE_RELATION)
     132             :          /* ordinary relation, ok */ ;
     133        3164 :     else if (rte->rtekind == RTE_SUBQUERY && rte->inh)
     134             :          /* flattened UNION ALL subquery, ok */ ;
     135             :     else
     136        3110 :         return;
     137             : 
     138             :     /*
     139             :      * Examine all the aggregates and verify all are MIN/MAX aggregates.  Stop
     140             :      * as soon as we find one that isn't.
     141             :      */
     142       22542 :     aggs_list = NIL;
     143       22542 :     if (!can_minmax_aggs(root, &aggs_list))
     144       21868 :         return;
     145             : 
     146             :     /*
     147             :      * OK, there is at least the possibility of performing the optimization.
     148             :      * Build an access path for each aggregate.  If any of the aggregates
     149             :      * prove to be non-indexable, give up; there is no point in optimizing
     150             :      * just some of them.
     151             :      */
     152        1096 :     foreach(lc, aggs_list)
     153             :     {
     154         710 :         MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
     155             :         Oid         eqop;
     156             :         bool        reverse;
     157             : 
     158             :         /*
     159             :          * We'll need the equality operator that goes with the aggregate's
     160             :          * ordering operator.
     161             :          */
     162         710 :         eqop = get_equality_op_for_ordering_op(mminfo->aggsortop, &reverse);
     163         710 :         if (!OidIsValid(eqop))  /* shouldn't happen */
     164           0 :             elog(ERROR, "could not find equality operator for ordering operator %u",
     165             :                  mminfo->aggsortop);
     166             : 
     167             :         /*
     168             :          * We can use either an ordering that gives NULLS FIRST or one that
     169             :          * gives NULLS LAST; furthermore there's unlikely to be much
     170             :          * performance difference between them, so it doesn't seem worth
     171             :          * costing out both ways if we get a hit on the first one.  NULLS
     172             :          * FIRST is more likely to be available if the operator is a
     173             :          * reverse-sort operator, so try that first if reverse.
     174             :          */
     175         710 :         if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse))
     176         422 :             continue;
     177         288 :         if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, !reverse))
     178           0 :             continue;
     179             : 
     180             :         /* No indexable path for this aggregate, so fail */
     181         288 :         return;
     182             :     }
     183             : 
     184             :     /*
     185             :      * OK, we can do the query this way.  Prepare to create a MinMaxAggPath
     186             :      * node.
     187             :      *
     188             :      * First, create an output Param node for each agg.  (If we end up not
     189             :      * using the MinMaxAggPath, we'll waste a PARAM_EXEC slot for each agg,
     190             :      * which is not worth worrying about.  We can't wait till create_plan time
     191             :      * to decide whether to make the Param, unfortunately.)
     192             :      */
     193         808 :     foreach(lc, aggs_list)
     194             :     {
     195         422 :         MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
     196             : 
     197         422 :         mminfo->param =
     198         422 :             SS_make_initplan_output_param(root,
     199         422 :                                           exprType((Node *) mminfo->target),
     200             :                                           -1,
     201         422 :                                           exprCollation((Node *) mminfo->target));
     202             :     }
     203             : 
     204             :     /*
     205             :      * Create a MinMaxAggPath node with the appropriate estimated costs and
     206             :      * other needed data, and add it to the UPPERREL_GROUP_AGG upperrel, where
     207             :      * it will compete against the standard aggregate implementation.  (It
     208             :      * will likely always win, but we need not assume that here.)
     209             :      *
     210             :      * Note: grouping_planner won't have created this upperrel yet, but it's
     211             :      * fine for us to create it first.  We will not have inserted the correct
     212             :      * consider_parallel value in it, but MinMaxAggPath paths are currently
     213             :      * never parallel-safe anyway, so that doesn't matter.  Likewise, it
     214             :      * doesn't matter that we haven't filled FDW-related fields in the rel.
     215             :      * Also, because there are no rowmarks, we know that the processed_tlist
     216             :      * doesn't need to change anymore, so making the pathtarget now is safe.
     217             :      */
     218         386 :     grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
     219         386 :     add_path(grouped_rel, (Path *)
     220         386 :              create_minmaxagg_path(root, grouped_rel,
     221             :                                    create_pathtarget(root,
     222             :                                                      root->processed_tlist),
     223             :                                    aggs_list,
     224         386 :                                    (List *) parse->havingQual));
     225             : }
     226             : 
     227             : /*
     228             :  * can_minmax_aggs
     229             :  *      Examine all the aggregates in the query, and check if they are
     230             :  *      all MIN/MAX aggregates.  If so, build a list of MinMaxAggInfo
     231             :  *      nodes for them.
     232             :  *
     233             :  * Returns false if a non-MIN/MAX aggregate is found, true otherwise.
     234             :  */
     235             : static bool
     236       22542 : can_minmax_aggs(PlannerInfo *root, List **context)
     237             : {
     238             :     ListCell   *lc;
     239             : 
     240             :     /*
     241             :      * This function used to have to scan the query for itself, but now we can
     242             :      * just thumb through the AggInfo list made by preprocess_aggrefs.
     243             :      */
     244       23576 :     foreach(lc, root->agginfos)
     245             :     {
     246       22902 :         AggInfo    *agginfo = lfirst_node(AggInfo, lc);
     247       22902 :         Aggref     *aggref = linitial_node(Aggref, agginfo->aggrefs);
     248             :         Oid         aggsortop;
     249             :         TargetEntry *curTarget;
     250             :         MinMaxAggInfo *mminfo;
     251             : 
     252             :         Assert(aggref->agglevelsup == 0);
     253       22902 :         if (list_length(aggref->args) != 1)
     254       21868 :             return false;       /* it couldn't be MIN/MAX */
     255             : 
     256             :         /*
     257             :          * ORDER BY is usually irrelevant for MIN/MAX, but it can change the
     258             :          * outcome if the aggsortop's operator class recognizes non-identical
     259             :          * values as equal.  For example, 4.0 and 4.00 are equal according to
     260             :          * numeric_ops, yet distinguishable.  If MIN() receives more than one
     261             :          * value equal to 4.0 and no value less than 4.0, it is unspecified
     262             :          * which of those equal values MIN() returns.  An ORDER BY expression
     263             :          * that differs for each of those equal values of the argument
     264             :          * expression makes the result predictable once again.  This is a
     265             :          * niche requirement, and we do not implement it with subquery paths.
     266             :          * In any case, this test lets us reject ordered-set aggregates
     267             :          * quickly.
     268             :          */
     269       14236 :         if (aggref->aggorder != NIL)
     270         452 :             return false;
     271             :         /* note: we do not care if DISTINCT is mentioned ... */
     272             : 
     273             :         /*
     274             :          * We might implement the optimization when a FILTER clause is present
     275             :          * by adding the filter to the quals of the generated subquery.  For
     276             :          * now, just punt.
     277             :          */
     278       13784 :         if (aggref->aggfilter != NULL)
     279         442 :             return false;
     280             : 
     281       13342 :         aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
     282       13342 :         if (!OidIsValid(aggsortop))
     283       12304 :             return false;       /* not a MIN/MAX aggregate */
     284             : 
     285        1038 :         curTarget = (TargetEntry *) linitial(aggref->args);
     286             : 
     287        1038 :         if (contain_mutable_functions((Node *) curTarget->expr))
     288           4 :             return false;       /* not potentially indexable */
     289             : 
     290        1034 :         if (type_is_rowtype(exprType((Node *) curTarget->expr)))
     291           0 :             return false;       /* IS NOT NULL would have weird semantics */
     292             : 
     293        1034 :         mminfo = makeNode(MinMaxAggInfo);
     294        1034 :         mminfo->aggfnoid = aggref->aggfnoid;
     295        1034 :         mminfo->aggsortop = aggsortop;
     296        1034 :         mminfo->target = curTarget->expr;
     297        1034 :         mminfo->subroot = NULL; /* don't compute path yet */
     298        1034 :         mminfo->path = NULL;
     299        1034 :         mminfo->pathcost = 0;
     300        1034 :         mminfo->param = NULL;
     301             : 
     302        1034 :         *context = lappend(*context, mminfo);
     303             :     }
     304         674 :     return true;
     305             : }
     306             : 
     307             : /*
     308             :  * build_minmax_path
     309             :  *      Given a MIN/MAX aggregate, try to build an indexscan Path it can be
     310             :  *      optimized with.
     311             :  *
     312             :  * If successful, stash the best path in *mminfo and return true.
     313             :  * Otherwise, return false.
     314             :  */
     315             : static bool
     316         998 : build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
     317             :                   Oid eqop, Oid sortop, bool nulls_first)
     318             : {
     319             :     PlannerInfo *subroot;
     320             :     Query      *parse;
     321             :     TargetEntry *tle;
     322             :     List       *tlist;
     323             :     NullTest   *ntest;
     324             :     SortGroupClause *sortcl;
     325             :     RelOptInfo *final_rel;
     326             :     Path       *sorted_path;
     327             :     Cost        path_cost;
     328             :     double      path_fraction;
     329             : 
     330             :     /*
     331             :      * We are going to construct what is effectively a sub-SELECT query, so
     332             :      * clone the current query level's state and adjust it to make it look
     333             :      * like a subquery.  Any outer references will now be one level higher
     334             :      * than before.  (This means that when we are done, there will be no Vars
     335             :      * of level 1, which is why the subquery can become an initplan.)
     336             :      */
     337         998 :     subroot = (PlannerInfo *) palloc(sizeof(PlannerInfo));
     338         998 :     memcpy(subroot, root, sizeof(PlannerInfo));
     339         998 :     subroot->query_level++;
     340         998 :     subroot->parent_root = root;
     341             :     /* reset subplan-related stuff */
     342         998 :     subroot->plan_params = NIL;
     343         998 :     subroot->outer_params = NULL;
     344         998 :     subroot->init_plans = NIL;
     345         998 :     subroot->agginfos = NIL;
     346         998 :     subroot->aggtransinfos = NIL;
     347             : 
     348         998 :     subroot->parse = parse = copyObject(root->parse);
     349         998 :     IncrementVarSublevelsUp((Node *) parse, 1, 1);
     350             : 
     351             :     /* append_rel_list might contain outer Vars? */
     352         998 :     subroot->append_rel_list = copyObject(root->append_rel_list);
     353         998 :     IncrementVarSublevelsUp((Node *) subroot->append_rel_list, 1, 1);
     354             :     /* There shouldn't be any OJ info to translate, as yet */
     355             :     Assert(subroot->join_info_list == NIL);
     356             :     /* and we haven't made equivalence classes, either */
     357             :     Assert(subroot->eq_classes == NIL);
     358             :     /* and we haven't created PlaceHolderInfos, either */
     359             :     Assert(subroot->placeholder_list == NIL);
     360             : 
     361             :     /*----------
     362             :      * Generate modified query of the form
     363             :      *      (SELECT col FROM tab
     364             :      *       WHERE col IS NOT NULL AND existing-quals
     365             :      *       ORDER BY col ASC/DESC
     366             :      *       LIMIT 1)
     367             :      *----------
     368             :      */
     369             :     /* single tlist entry that is the aggregate target */
     370         998 :     tle = makeTargetEntry(copyObject(mminfo->target),
     371             :                           (AttrNumber) 1,
     372             :                           pstrdup("agg_target"),
     373             :                           false);
     374         998 :     tlist = list_make1(tle);
     375         998 :     subroot->processed_tlist = parse->targetList = tlist;
     376             : 
     377             :     /* No HAVING, no DISTINCT, no aggregates anymore */
     378         998 :     parse->havingQual = NULL;
     379         998 :     subroot->hasHavingQual = false;
     380         998 :     parse->distinctClause = NIL;
     381         998 :     parse->hasDistinctOn = false;
     382         998 :     parse->hasAggs = false;
     383             : 
     384             :     /* Build "target IS NOT NULL" expression */
     385         998 :     ntest = makeNode(NullTest);
     386         998 :     ntest->nulltesttype = IS_NOT_NULL;
     387         998 :     ntest->arg = copyObject(mminfo->target);
     388             :     /* we checked it wasn't a rowtype in can_minmax_aggs */
     389         998 :     ntest->argisrow = false;
     390         998 :     ntest->location = -1;
     391             : 
     392             :     /* User might have had that in WHERE already */
     393         998 :     if (!list_member((List *) parse->jointree->quals, ntest))
     394         998 :         parse->jointree->quals = (Node *)
     395         998 :             lcons(ntest, (List *) parse->jointree->quals);
     396             : 
     397             :     /* Build suitable ORDER BY clause */
     398         998 :     sortcl = makeNode(SortGroupClause);
     399         998 :     sortcl->tleSortGroupRef = assignSortGroupRef(tle, subroot->processed_tlist);
     400         998 :     sortcl->eqop = eqop;
     401         998 :     sortcl->sortop = sortop;
     402         998 :     sortcl->nulls_first = nulls_first;
     403         998 :     sortcl->hashable = false;    /* no need to make this accurate */
     404         998 :     parse->sortClause = list_make1(sortcl);
     405             : 
     406             :     /* set up expressions for LIMIT 1 */
     407         998 :     parse->limitOffset = NULL;
     408         998 :     parse->limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
     409             :                                            sizeof(int64),
     410             :                                            Int64GetDatum(1), false,
     411             :                                            FLOAT8PASSBYVAL);
     412             : 
     413             :     /*
     414             :      * Generate the best paths for this query, telling query_planner that we
     415             :      * have LIMIT 1.
     416             :      */
     417         998 :     subroot->tuple_fraction = 1.0;
     418         998 :     subroot->limit_tuples = 1.0;
     419             : 
     420         998 :     final_rel = query_planner(subroot, minmax_qp_callback, NULL);
     421             : 
     422             :     /*
     423             :      * Since we didn't go through subquery_planner() to handle the subquery,
     424             :      * we have to do some of the same cleanup it would do, in particular cope
     425             :      * with params and initplans used within this subquery.  (This won't
     426             :      * matter if we end up not using the subplan.)
     427             :      */
     428         998 :     SS_identify_outer_params(subroot);
     429         998 :     SS_charge_for_initplans(subroot, final_rel);
     430             : 
     431             :     /*
     432             :      * Get the best presorted path, that being the one that's cheapest for
     433             :      * fetching just one row.  If there's no such path, fail.
     434             :      */
     435         998 :     if (final_rel->rows > 1.0)
     436         986 :         path_fraction = 1.0 / final_rel->rows;
     437             :     else
     438          12 :         path_fraction = 1.0;
     439             : 
     440             :     sorted_path =
     441         998 :         get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
     442             :                                                   subroot->query_pathkeys,
     443             :                                                   NULL,
     444             :                                                   path_fraction);
     445         998 :     if (!sorted_path)
     446         576 :         return false;
     447             : 
     448             :     /*
     449             :      * The path might not return exactly what we want, so fix that.  (We
     450             :      * assume that this won't change any conclusions about which was the
     451             :      * cheapest path.)
     452             :      */
     453         422 :     sorted_path = apply_projection_to_path(subroot, final_rel, sorted_path,
     454             :                                            create_pathtarget(subroot,
     455             :                                                              subroot->processed_tlist));
     456             : 
     457             :     /*
     458             :      * Determine cost to get just the first row of the presorted path.
     459             :      *
     460             :      * Note: cost calculation here should match
     461             :      * compare_fractional_path_costs().
     462             :      */
     463         422 :     path_cost = sorted_path->startup_cost +
     464         422 :         path_fraction * (sorted_path->total_cost - sorted_path->startup_cost);
     465             : 
     466             :     /* Save state for further processing */
     467         422 :     mminfo->subroot = subroot;
     468         422 :     mminfo->path = sorted_path;
     469         422 :     mminfo->pathcost = path_cost;
     470             : 
     471         422 :     return true;
     472             : }
     473             : 
     474             : /*
     475             :  * Compute query_pathkeys and other pathkeys during query_planner()
     476             :  */
     477             : static void
     478         998 : minmax_qp_callback(PlannerInfo *root, void *extra)
     479             : {
     480         998 :     root->group_pathkeys = NIL;
     481         998 :     root->window_pathkeys = NIL;
     482         998 :     root->distinct_pathkeys = NIL;
     483             : 
     484         998 :     root->sort_pathkeys =
     485         998 :         make_pathkeys_for_sortclauses(root,
     486         998 :                                       root->parse->sortClause,
     487         998 :                                       root->parse->targetList);
     488             : 
     489         998 :     root->query_pathkeys = root->sort_pathkeys;
     490         998 : }
     491             : 
     492             : /*
     493             :  * Get the OID of the sort operator, if any, associated with an aggregate.
     494             :  * Returns InvalidOid if there is no such operator.
     495             :  */
     496             : static Oid
     497       13342 : fetch_agg_sort_op(Oid aggfnoid)
     498             : {
     499             :     HeapTuple   aggTuple;
     500             :     Form_pg_aggregate aggform;
     501             :     Oid         aggsortop;
     502             : 
     503             :     /* fetch aggregate entry from pg_aggregate */
     504       13342 :     aggTuple = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(aggfnoid));
     505       13342 :     if (!HeapTupleIsValid(aggTuple))
     506           0 :         return InvalidOid;
     507       13342 :     aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
     508       13342 :     aggsortop = aggform->aggsortop;
     509       13342 :     ReleaseSysCache(aggTuple);
     510             : 
     511       13342 :     return aggsortop;
     512             : }

Generated by: LCOV version 1.14