Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * planagg.c
4 : * Special planning for aggregate queries.
5 : *
6 : * This module tries to replace MIN/MAX aggregate functions by subqueries
7 : * of the form
8 : * (SELECT col FROM tab
9 : * WHERE col IS NOT NULL AND existing-quals
10 : * ORDER BY col ASC/DESC
11 : * LIMIT 1)
12 : * Given a suitable index on tab.col, this can be much faster than the
13 : * generic scan-all-the-rows aggregation plan. We can handle multiple
14 : * MIN/MAX aggregates by generating multiple subqueries, and their
15 : * orderings can be different. However, if the query contains any
16 : * non-optimizable aggregates, there's no point since we'll have to
17 : * scan all the rows anyway.
18 : *
19 : *
20 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
21 : * Portions Copyright (c) 1994, Regents of the University of California
22 : *
23 : *
24 : * IDENTIFICATION
25 : * src/backend/optimizer/plan/planagg.c
26 : *
27 : *-------------------------------------------------------------------------
28 : */
29 : #include "postgres.h"
30 :
31 : #include "access/htup_details.h"
32 : #include "catalog/pg_aggregate.h"
33 : #include "catalog/pg_type.h"
34 : #include "nodes/makefuncs.h"
35 : #include "nodes/nodeFuncs.h"
36 : #include "optimizer/cost.h"
37 : #include "optimizer/optimizer.h"
38 : #include "optimizer/pathnode.h"
39 : #include "optimizer/paths.h"
40 : #include "optimizer/planmain.h"
41 : #include "optimizer/planner.h"
42 : #include "optimizer/subselect.h"
43 : #include "optimizer/tlist.h"
44 : #include "parser/parse_clause.h"
45 : #include "parser/parsetree.h"
46 : #include "rewrite/rewriteManip.h"
47 : #include "utils/lsyscache.h"
48 : #include "utils/syscache.h"
49 :
50 : static bool can_minmax_aggs(PlannerInfo *root, List **context);
51 : static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
52 : Oid eqop, Oid sortop, bool reverse_sort,
53 : bool nulls_first);
54 : static void minmax_qp_callback(PlannerInfo *root, void *extra);
55 : static Oid fetch_agg_sort_op(Oid aggfnoid);
56 :
57 :
58 : /*
59 : * preprocess_minmax_aggregates - preprocess MIN/MAX aggregates
60 : *
61 : * Check to see whether the query contains MIN/MAX aggregate functions that
62 : * might be optimizable via indexscans. If it does, and all the aggregates
63 : * are potentially optimizable, then create a MinMaxAggPath and add it to
64 : * the (UPPERREL_GROUP_AGG, NULL) upperrel.
65 : *
66 : * This should be called by grouping_planner() just before it's ready to call
67 : * query_planner(), because we generate indexscan paths by cloning the
68 : * planner's state and invoking query_planner() on a modified version of
69 : * the query parsetree. Thus, all preprocessing needed before query_planner()
70 : * must already be done. This relies on the list of aggregates in
71 : * root->agginfos, so preprocess_aggrefs() must have been called already, too.
72 : */
73 : void
74 38586 : preprocess_minmax_aggregates(PlannerInfo *root)
75 : {
76 38586 : Query *parse = root->parse;
77 : FromExpr *jtnode;
78 : RangeTblRef *rtr;
79 : RangeTblEntry *rte;
80 : List *aggs_list;
81 : RelOptInfo *grouped_rel;
82 : ListCell *lc;
83 :
84 : /* minmax_aggs list should be empty at this point */
85 : Assert(root->minmax_aggs == NIL);
86 :
87 : /* Nothing to do if query has no aggregates */
88 38586 : if (!parse->hasAggs)
89 38176 : return;
90 :
91 : Assert(!parse->setOperations); /* shouldn't get here if a setop */
92 : Assert(parse->rowMarks == NIL); /* nor if FOR UPDATE */
93 :
94 : /*
95 : * Reject unoptimizable cases.
96 : *
97 : * We don't handle GROUP BY or windowing, because our current
98 : * implementations of grouping require looking at all the rows anyway, and
99 : * so there's not much point in optimizing MIN/MAX.
100 : */
101 38586 : if (parse->groupClause || list_length(parse->groupingSets) > 1 ||
102 34486 : parse->hasWindowFuncs)
103 4106 : return;
104 :
105 : /*
106 : * Reject if query contains any CTEs; there's no way to build an indexscan
107 : * on one so we couldn't succeed here. (If the CTEs are unreferenced,
108 : * that's not true, but it doesn't seem worth expending cycles to check.)
109 : */
110 34480 : if (parse->cteList)
111 86 : return;
112 :
113 : /*
114 : * We also restrict the query to reference exactly one table, since join
115 : * conditions can't be handled reasonably. (We could perhaps handle a
116 : * query containing cartesian-product joins, but it hardly seems worth the
117 : * trouble.) However, the single table could be buried in several levels
118 : * of FromExpr due to subqueries. Note the "single" table could be an
119 : * inheritance parent, too, including the case of a UNION ALL subquery
120 : * that's been flattened to an appendrel.
121 : */
122 34394 : jtnode = parse->jointree;
123 65158 : while (IsA(jtnode, FromExpr))
124 : {
125 34428 : if (list_length(jtnode->fromlist) != 1)
126 3664 : return;
127 30764 : jtnode = linitial(jtnode->fromlist);
128 : }
129 30730 : if (!IsA(jtnode, RangeTblRef))
130 1468 : return;
131 29262 : rtr = (RangeTblRef *) jtnode;
132 29262 : rte = planner_rt_fetch(rtr->rtindex, root);
133 29262 : if (rte->rtekind == RTE_RELATION)
134 : /* ordinary relation, ok */ ;
135 5620 : else if (rte->rtekind == RTE_SUBQUERY && rte->inh)
136 : /* flattened UNION ALL subquery, ok */ ;
137 : else
138 5566 : return;
139 :
140 : /*
141 : * Examine all the aggregates and verify all are MIN/MAX aggregates. Stop
142 : * as soon as we find one that isn't.
143 : */
144 23696 : aggs_list = NIL;
145 23696 : if (!can_minmax_aggs(root, &aggs_list))
146 22974 : return;
147 :
148 : /*
149 : * OK, there is at least the possibility of performing the optimization.
150 : * Build an access path for each aggregate. If any of the aggregates
151 : * prove to be non-indexable, give up; there is no point in optimizing
152 : * just some of them.
153 : */
154 1168 : foreach(lc, aggs_list)
155 : {
156 758 : MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
157 : Oid eqop;
158 : bool reverse;
159 :
160 : /*
161 : * We'll need the equality operator that goes with the aggregate's
162 : * ordering operator.
163 : */
164 758 : eqop = get_equality_op_for_ordering_op(mminfo->aggsortop, &reverse);
165 758 : if (!OidIsValid(eqop)) /* shouldn't happen */
166 0 : elog(ERROR, "could not find equality operator for ordering operator %u",
167 : mminfo->aggsortop);
168 :
169 : /*
170 : * We can use either an ordering that gives NULLS FIRST or one that
171 : * gives NULLS LAST; furthermore there's unlikely to be much
172 : * performance difference between them, so it doesn't seem worth
173 : * costing out both ways if we get a hit on the first one. NULLS
174 : * FIRST is more likely to be available if the operator is a
175 : * reverse-sort operator, so try that first if reverse.
176 : */
177 758 : if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse, reverse))
178 446 : continue;
179 312 : if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse, !reverse))
180 0 : continue;
181 :
182 : /* No indexable path for this aggregate, so fail */
183 312 : return;
184 : }
185 :
186 : /*
187 : * OK, we can do the query this way. Prepare to create a MinMaxAggPath
188 : * node.
189 : *
190 : * First, create an output Param node for each agg. (If we end up not
191 : * using the MinMaxAggPath, we'll waste a PARAM_EXEC slot for each agg,
192 : * which is not worth worrying about. We can't wait till create_plan time
193 : * to decide whether to make the Param, unfortunately.)
194 : */
195 856 : foreach(lc, aggs_list)
196 : {
197 446 : MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
198 :
199 446 : mminfo->param =
200 446 : SS_make_initplan_output_param(root,
201 446 : exprType((Node *) mminfo->target),
202 : -1,
203 446 : exprCollation((Node *) mminfo->target));
204 : }
205 :
206 : /*
207 : * Create a MinMaxAggPath node with the appropriate estimated costs and
208 : * other needed data, and add it to the UPPERREL_GROUP_AGG upperrel, where
209 : * it will compete against the standard aggregate implementation. (It
210 : * will likely always win, but we need not assume that here.)
211 : *
212 : * Note: grouping_planner won't have created this upperrel yet, but it's
213 : * fine for us to create it first. We will not have inserted the correct
214 : * consider_parallel value in it, but MinMaxAggPath paths are currently
215 : * never parallel-safe anyway, so that doesn't matter. Likewise, it
216 : * doesn't matter that we haven't filled FDW-related fields in the rel.
217 : * Also, because there are no rowmarks, we know that the processed_tlist
218 : * doesn't need to change anymore, so making the pathtarget now is safe.
219 : */
220 410 : grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
221 410 : add_path(grouped_rel, (Path *)
222 410 : create_minmaxagg_path(root, grouped_rel,
223 : create_pathtarget(root,
224 : root->processed_tlist),
225 : aggs_list,
226 410 : (List *) parse->havingQual));
227 : }
228 :
229 : /*
230 : * can_minmax_aggs
231 : * Examine all the aggregates in the query, and check if they are
232 : * all MIN/MAX aggregates. If so, build a list of MinMaxAggInfo
233 : * nodes for them.
234 : *
235 : * Returns false if a non-MIN/MAX aggregate is found, true otherwise.
236 : */
237 : static bool
238 23696 : can_minmax_aggs(PlannerInfo *root, List **context)
239 : {
240 : ListCell *lc;
241 :
242 : /*
243 : * This function used to have to scan the query for itself, but now we can
244 : * just thumb through the AggInfo list made by preprocess_aggrefs.
245 : */
246 24778 : foreach(lc, root->agginfos)
247 : {
248 24056 : AggInfo *agginfo = lfirst_node(AggInfo, lc);
249 24056 : Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs);
250 : Oid aggsortop;
251 : TargetEntry *curTarget;
252 : MinMaxAggInfo *mminfo;
253 :
254 : Assert(aggref->agglevelsup == 0);
255 24056 : if (list_length(aggref->args) != 1)
256 22974 : return false; /* it couldn't be MIN/MAX */
257 :
258 : /*
259 : * ORDER BY is usually irrelevant for MIN/MAX, but it can change the
260 : * outcome if the aggsortop's operator class recognizes non-identical
261 : * values as equal. For example, 4.0 and 4.00 are equal according to
262 : * numeric_ops, yet distinguishable. If MIN() receives more than one
263 : * value equal to 4.0 and no value less than 4.0, it is unspecified
264 : * which of those equal values MIN() returns. An ORDER BY expression
265 : * that differs for each of those equal values of the argument
266 : * expression makes the result predictable once again. This is a
267 : * niche requirement, and we do not implement it with subquery paths.
268 : * In any case, this test lets us reject ordered-set aggregates
269 : * quickly.
270 : */
271 14572 : if (aggref->aggorder != NIL)
272 524 : return false;
273 : /* note: we do not care if DISTINCT is mentioned ... */
274 :
275 : /*
276 : * We might implement the optimization when a FILTER clause is present
277 : * by adding the filter to the quals of the generated subquery. For
278 : * now, just punt.
279 : */
280 14048 : if (aggref->aggfilter != NULL)
281 442 : return false;
282 :
283 13606 : aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
284 13606 : if (!OidIsValid(aggsortop))
285 12494 : return false; /* not a MIN/MAX aggregate */
286 :
287 1112 : curTarget = (TargetEntry *) linitial(aggref->args);
288 :
289 1112 : if (contain_mutable_functions((Node *) curTarget->expr))
290 6 : return false; /* not potentially indexable */
291 :
292 1106 : if (type_is_rowtype(exprType((Node *) curTarget->expr)))
293 24 : return false; /* IS NOT NULL would have weird semantics */
294 :
295 1082 : mminfo = makeNode(MinMaxAggInfo);
296 1082 : mminfo->aggfnoid = aggref->aggfnoid;
297 1082 : mminfo->aggsortop = aggsortop;
298 1082 : mminfo->target = curTarget->expr;
299 1082 : mminfo->subroot = NULL; /* don't compute path yet */
300 1082 : mminfo->path = NULL;
301 1082 : mminfo->pathcost = 0;
302 1082 : mminfo->param = NULL;
303 :
304 1082 : *context = lappend(*context, mminfo);
305 : }
306 722 : return true;
307 : }
308 :
309 : /*
310 : * build_minmax_path
311 : * Given a MIN/MAX aggregate, try to build an indexscan Path it can be
312 : * optimized with.
313 : *
314 : * If successful, stash the best path in *mminfo and return true.
315 : * Otherwise, return false.
316 : */
317 : static bool
318 1070 : build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
319 : Oid eqop, Oid sortop, bool reverse_sort, bool nulls_first)
320 : {
321 : PlannerInfo *subroot;
322 : Query *parse;
323 : TargetEntry *tle;
324 : List *tlist;
325 : NullTest *ntest;
326 : SortGroupClause *sortcl;
327 : RelOptInfo *final_rel;
328 : Path *sorted_path;
329 : Cost path_cost;
330 : double path_fraction;
331 :
332 : /*
333 : * We are going to construct what is effectively a sub-SELECT query, so
334 : * clone the current query level's state and adjust it to make it look
335 : * like a subquery. Any outer references will now be one level higher
336 : * than before. (This means that when we are done, there will be no Vars
337 : * of level 1, which is why the subquery can become an initplan.)
338 : */
339 1070 : subroot = (PlannerInfo *) palloc(sizeof(PlannerInfo));
340 1070 : memcpy(subroot, root, sizeof(PlannerInfo));
341 1070 : subroot->query_level++;
342 1070 : subroot->parent_root = root;
343 1070 : subroot->plan_name = choose_plan_name(root->glob, "minmax", true);
344 :
345 : /* reset subplan-related stuff */
346 1070 : subroot->plan_params = NIL;
347 1070 : subroot->outer_params = NULL;
348 1070 : subroot->init_plans = NIL;
349 1070 : subroot->agginfos = NIL;
350 1070 : subroot->aggtransinfos = NIL;
351 :
352 1070 : subroot->parse = parse = copyObject(root->parse);
353 1070 : IncrementVarSublevelsUp((Node *) parse, 1, 1);
354 :
355 : /* append_rel_list might contain outer Vars? */
356 1070 : subroot->append_rel_list = copyObject(root->append_rel_list);
357 1070 : IncrementVarSublevelsUp((Node *) subroot->append_rel_list, 1, 1);
358 : /* There shouldn't be any OJ info to translate, as yet */
359 : Assert(subroot->join_info_list == NIL);
360 : /* and we haven't made equivalence classes, either */
361 : Assert(subroot->eq_classes == NIL);
362 : /* and we haven't created PlaceHolderInfos, either */
363 : Assert(subroot->placeholder_list == NIL);
364 :
365 : /*----------
366 : * Generate modified query of the form
367 : * (SELECT col FROM tab
368 : * WHERE col IS NOT NULL AND existing-quals
369 : * ORDER BY col ASC/DESC
370 : * LIMIT 1)
371 : *----------
372 : */
373 : /* single tlist entry that is the aggregate target */
374 1070 : tle = makeTargetEntry(copyObject(mminfo->target),
375 : (AttrNumber) 1,
376 : pstrdup("agg_target"),
377 : false);
378 1070 : tlist = list_make1(tle);
379 1070 : subroot->processed_tlist = parse->targetList = tlist;
380 :
381 : /* No HAVING, no DISTINCT, no aggregates anymore */
382 1070 : parse->havingQual = NULL;
383 1070 : subroot->hasHavingQual = false;
384 1070 : parse->distinctClause = NIL;
385 1070 : parse->hasDistinctOn = false;
386 1070 : parse->hasAggs = false;
387 :
388 : /* Build "target IS NOT NULL" expression */
389 1070 : ntest = makeNode(NullTest);
390 1070 : ntest->nulltesttype = IS_NOT_NULL;
391 1070 : ntest->arg = copyObject(mminfo->target);
392 : /* we checked it wasn't a rowtype in can_minmax_aggs */
393 1070 : ntest->argisrow = false;
394 1070 : ntest->location = -1;
395 :
396 : /* User might have had that in WHERE already */
397 1070 : if (!list_member((List *) parse->jointree->quals, ntest))
398 1070 : parse->jointree->quals = (Node *)
399 1070 : lcons(ntest, (List *) parse->jointree->quals);
400 :
401 : /* Build suitable ORDER BY clause */
402 1070 : sortcl = makeNode(SortGroupClause);
403 1070 : sortcl->tleSortGroupRef = assignSortGroupRef(tle, subroot->processed_tlist);
404 1070 : sortcl->eqop = eqop;
405 1070 : sortcl->sortop = sortop;
406 1070 : sortcl->reverse_sort = reverse_sort;
407 1070 : sortcl->nulls_first = nulls_first;
408 1070 : sortcl->hashable = false; /* no need to make this accurate */
409 1070 : parse->sortClause = list_make1(sortcl);
410 :
411 : /* set up expressions for LIMIT 1 */
412 1070 : parse->limitOffset = NULL;
413 1070 : parse->limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
414 : sizeof(int64),
415 : Int64GetDatum(1), false,
416 : true);
417 :
418 : /*
419 : * Generate the best paths for this query, telling query_planner that we
420 : * have LIMIT 1.
421 : */
422 1070 : subroot->tuple_fraction = 1.0;
423 1070 : subroot->limit_tuples = 1.0;
424 :
425 1070 : final_rel = query_planner(subroot, minmax_qp_callback, NULL);
426 :
427 : /*
428 : * Since we didn't go through subquery_planner() to handle the subquery,
429 : * we have to do some of the same cleanup it would do, in particular cope
430 : * with params and initplans used within this subquery. (This won't
431 : * matter if we end up not using the subplan.)
432 : */
433 1070 : SS_identify_outer_params(subroot);
434 1070 : SS_charge_for_initplans(subroot, final_rel);
435 :
436 : /*
437 : * Get the best presorted path, that being the one that's cheapest for
438 : * fetching just one row. If there's no such path, fail.
439 : */
440 1070 : if (final_rel->rows > 1.0)
441 1034 : path_fraction = 1.0 / final_rel->rows;
442 : else
443 36 : path_fraction = 1.0;
444 :
445 : sorted_path =
446 1070 : get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
447 : subroot->query_pathkeys,
448 : NULL,
449 : path_fraction);
450 1070 : if (!sorted_path)
451 624 : return false;
452 :
453 : /*
454 : * The path might not return exactly what we want, so fix that. (We
455 : * assume that this won't change any conclusions about which was the
456 : * cheapest path.)
457 : */
458 446 : sorted_path = apply_projection_to_path(subroot, final_rel, sorted_path,
459 : create_pathtarget(subroot,
460 : subroot->processed_tlist));
461 :
462 : /*
463 : * Determine cost to get just the first row of the presorted path.
464 : *
465 : * Note: cost calculation here should match
466 : * compare_fractional_path_costs().
467 : */
468 446 : path_cost = sorted_path->startup_cost +
469 446 : path_fraction * (sorted_path->total_cost - sorted_path->startup_cost);
470 :
471 : /* Save state for further processing */
472 446 : mminfo->subroot = subroot;
473 446 : mminfo->path = sorted_path;
474 446 : mminfo->pathcost = path_cost;
475 :
476 446 : return true;
477 : }
478 :
479 : /*
480 : * Compute query_pathkeys and other pathkeys during query_planner()
481 : */
482 : static void
483 1070 : minmax_qp_callback(PlannerInfo *root, void *extra)
484 : {
485 1070 : root->group_pathkeys = NIL;
486 1070 : root->window_pathkeys = NIL;
487 1070 : root->distinct_pathkeys = NIL;
488 :
489 1070 : root->sort_pathkeys =
490 1070 : make_pathkeys_for_sortclauses(root,
491 1070 : root->parse->sortClause,
492 1070 : root->parse->targetList);
493 :
494 1070 : root->query_pathkeys = root->sort_pathkeys;
495 1070 : }
496 :
497 : /*
498 : * Get the OID of the sort operator, if any, associated with an aggregate.
499 : * Returns InvalidOid if there is no such operator.
500 : */
501 : static Oid
502 13606 : fetch_agg_sort_op(Oid aggfnoid)
503 : {
504 : HeapTuple aggTuple;
505 : Form_pg_aggregate aggform;
506 : Oid aggsortop;
507 :
508 : /* fetch aggregate entry from pg_aggregate */
509 13606 : aggTuple = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(aggfnoid));
510 13606 : if (!HeapTupleIsValid(aggTuple))
511 0 : return InvalidOid;
512 13606 : aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
513 13606 : aggsortop = aggform->aggsortop;
514 13606 : ReleaseSysCache(aggTuple);
515 :
516 13606 : return aggsortop;
517 : }
|