Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * planner.c
4 : * The query optimizer external interface.
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/optimizer/plan/planner.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 :
16 : #include "postgres.h"
17 :
18 : #include <limits.h>
19 : #include <math.h>
20 :
21 : #include "access/genam.h"
22 : #include "access/parallel.h"
23 : #include "access/sysattr.h"
24 : #include "access/table.h"
25 : #include "catalog/pg_aggregate.h"
26 : #include "catalog/pg_inherits.h"
27 : #include "catalog/pg_proc.h"
28 : #include "catalog/pg_type.h"
29 : #include "executor/executor.h"
30 : #include "foreign/fdwapi.h"
31 : #include "jit/jit.h"
32 : #include "lib/bipartite_match.h"
33 : #include "lib/knapsack.h"
34 : #include "miscadmin.h"
35 : #include "nodes/makefuncs.h"
36 : #include "nodes/nodeFuncs.h"
37 : #ifdef OPTIMIZER_DEBUG
38 : #include "nodes/print.h"
39 : #endif
40 : #include "nodes/supportnodes.h"
41 : #include "optimizer/appendinfo.h"
42 : #include "optimizer/clauses.h"
43 : #include "optimizer/cost.h"
44 : #include "optimizer/optimizer.h"
45 : #include "optimizer/paramassign.h"
46 : #include "optimizer/pathnode.h"
47 : #include "optimizer/paths.h"
48 : #include "optimizer/plancat.h"
49 : #include "optimizer/planmain.h"
50 : #include "optimizer/planner.h"
51 : #include "optimizer/prep.h"
52 : #include "optimizer/subselect.h"
53 : #include "optimizer/tlist.h"
54 : #include "parser/analyze.h"
55 : #include "parser/parse_agg.h"
56 : #include "parser/parse_clause.h"
57 : #include "parser/parse_relation.h"
58 : #include "parser/parsetree.h"
59 : #include "partitioning/partdesc.h"
60 : #include "rewrite/rewriteManip.h"
61 : #include "utils/backend_status.h"
62 : #include "utils/lsyscache.h"
63 : #include "utils/rel.h"
64 : #include "utils/selfuncs.h"
65 :
66 : /* GUC parameters */
67 : double cursor_tuple_fraction = DEFAULT_CURSOR_TUPLE_FRACTION;
68 : int debug_parallel_query = DEBUG_PARALLEL_OFF;
69 : bool parallel_leader_participation = true;
70 : bool enable_distinct_reordering = true;
71 :
72 : /* Hook for plugins to get control in planner() */
73 : planner_hook_type planner_hook = NULL;
74 :
75 : /* Hook for plugins to get control when grouping_planner() plans upper rels */
76 : create_upper_paths_hook_type create_upper_paths_hook = NULL;
77 :
78 :
79 : /* Expression kind codes for preprocess_expression */
80 : #define EXPRKIND_QUAL 0
81 : #define EXPRKIND_TARGET 1
82 : #define EXPRKIND_RTFUNC 2
83 : #define EXPRKIND_RTFUNC_LATERAL 3
84 : #define EXPRKIND_VALUES 4
85 : #define EXPRKIND_VALUES_LATERAL 5
86 : #define EXPRKIND_LIMIT 6
87 : #define EXPRKIND_APPINFO 7
88 : #define EXPRKIND_PHV 8
89 : #define EXPRKIND_TABLESAMPLE 9
90 : #define EXPRKIND_ARBITER_ELEM 10
91 : #define EXPRKIND_TABLEFUNC 11
92 : #define EXPRKIND_TABLEFUNC_LATERAL 12
93 : #define EXPRKIND_GROUPEXPR 13
94 :
95 : /*
96 : * Data specific to grouping sets
97 : */
98 : typedef struct
99 : {
100 : List *rollups;
101 : List *hash_sets_idx;
102 : double dNumHashGroups;
103 : bool any_hashable;
104 : Bitmapset *unsortable_refs;
105 : Bitmapset *unhashable_refs;
106 : List *unsortable_sets;
107 : int *tleref_to_colnum_map;
108 : } grouping_sets_data;
109 :
110 : /*
111 : * Temporary structure for use during WindowClause reordering in order to be
112 : * able to sort WindowClauses on partitioning/ordering prefix.
113 : */
114 : typedef struct
115 : {
116 : WindowClause *wc;
117 : List *uniqueOrder; /* A List of unique ordering/partitioning
118 : * clauses per Window */
119 : } WindowClauseSortData;
120 :
121 : /* Passthrough data for standard_qp_callback */
122 : typedef struct
123 : {
124 : List *activeWindows; /* active windows, if any */
125 : grouping_sets_data *gset_data; /* grouping sets data, if any */
126 : SetOperationStmt *setop; /* parent set operation or NULL if not a
127 : * subquery belonging to a set operation */
128 : } standard_qp_extra;
129 :
130 : /* Local functions */
131 : static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
132 : static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
133 : static void grouping_planner(PlannerInfo *root, double tuple_fraction,
134 : SetOperationStmt *setops);
135 : static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root);
136 : static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
137 : int *tleref_to_colnum_map);
138 : static void preprocess_rowmarks(PlannerInfo *root);
139 : static double preprocess_limit(PlannerInfo *root,
140 : double tuple_fraction,
141 : int64 *offset_est, int64 *count_est);
142 : static List *preprocess_groupclause(PlannerInfo *root, List *force);
143 : static List *extract_rollup_sets(List *groupingSets);
144 : static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
145 : static void standard_qp_callback(PlannerInfo *root, void *extra);
146 : static double get_number_of_groups(PlannerInfo *root,
147 : double path_rows,
148 : grouping_sets_data *gd,
149 : List *target_list);
150 : static RelOptInfo *create_grouping_paths(PlannerInfo *root,
151 : RelOptInfo *input_rel,
152 : PathTarget *target,
153 : bool target_parallel_safe,
154 : grouping_sets_data *gd);
155 : static bool is_degenerate_grouping(PlannerInfo *root);
156 : static void create_degenerate_grouping_paths(PlannerInfo *root,
157 : RelOptInfo *input_rel,
158 : RelOptInfo *grouped_rel);
159 : static RelOptInfo *make_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
160 : PathTarget *target, bool target_parallel_safe,
161 : Node *havingQual);
162 : static void create_ordinary_grouping_paths(PlannerInfo *root,
163 : RelOptInfo *input_rel,
164 : RelOptInfo *grouped_rel,
165 : const AggClauseCosts *agg_costs,
166 : grouping_sets_data *gd,
167 : GroupPathExtraData *extra,
168 : RelOptInfo **partially_grouped_rel_p);
169 : static void consider_groupingsets_paths(PlannerInfo *root,
170 : RelOptInfo *grouped_rel,
171 : Path *path,
172 : bool is_sorted,
173 : bool can_hash,
174 : grouping_sets_data *gd,
175 : const AggClauseCosts *agg_costs,
176 : double dNumGroups);
177 : static RelOptInfo *create_window_paths(PlannerInfo *root,
178 : RelOptInfo *input_rel,
179 : PathTarget *input_target,
180 : PathTarget *output_target,
181 : bool output_target_parallel_safe,
182 : WindowFuncLists *wflists,
183 : List *activeWindows);
184 : static void create_one_window_path(PlannerInfo *root,
185 : RelOptInfo *window_rel,
186 : Path *path,
187 : PathTarget *input_target,
188 : PathTarget *output_target,
189 : WindowFuncLists *wflists,
190 : List *activeWindows);
191 : static RelOptInfo *create_distinct_paths(PlannerInfo *root,
192 : RelOptInfo *input_rel,
193 : PathTarget *target);
194 : static void create_partial_distinct_paths(PlannerInfo *root,
195 : RelOptInfo *input_rel,
196 : RelOptInfo *final_distinct_rel,
197 : PathTarget *target);
198 : static RelOptInfo *create_final_distinct_paths(PlannerInfo *root,
199 : RelOptInfo *input_rel,
200 : RelOptInfo *distinct_rel);
201 : static List *get_useful_pathkeys_for_distinct(PlannerInfo *root,
202 : List *needed_pathkeys,
203 : List *path_pathkeys);
204 : static RelOptInfo *create_ordered_paths(PlannerInfo *root,
205 : RelOptInfo *input_rel,
206 : PathTarget *target,
207 : bool target_parallel_safe,
208 : double limit_tuples);
209 : static PathTarget *make_group_input_target(PlannerInfo *root,
210 : PathTarget *final_target);
211 : static PathTarget *make_partial_grouping_target(PlannerInfo *root,
212 : PathTarget *grouping_target,
213 : Node *havingQual);
214 : static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
215 : static void optimize_window_clauses(PlannerInfo *root,
216 : WindowFuncLists *wflists);
217 : static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
218 : static void name_active_windows(List *activeWindows);
219 : static PathTarget *make_window_input_target(PlannerInfo *root,
220 : PathTarget *final_target,
221 : List *activeWindows);
222 : static List *make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
223 : List *tlist);
224 : static PathTarget *make_sort_input_target(PlannerInfo *root,
225 : PathTarget *final_target,
226 : bool *have_postponed_srfs);
227 : static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
228 : List *targets, List *targets_contain_srfs);
229 : static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
230 : RelOptInfo *grouped_rel,
231 : RelOptInfo *partially_grouped_rel,
232 : const AggClauseCosts *agg_costs,
233 : grouping_sets_data *gd,
234 : double dNumGroups,
235 : GroupPathExtraData *extra);
236 : static RelOptInfo *create_partial_grouping_paths(PlannerInfo *root,
237 : RelOptInfo *grouped_rel,
238 : RelOptInfo *input_rel,
239 : grouping_sets_data *gd,
240 : GroupPathExtraData *extra,
241 : bool force_rel_creation);
242 : static Path *make_ordered_path(PlannerInfo *root,
243 : RelOptInfo *rel,
244 : Path *path,
245 : Path *cheapest_path,
246 : List *pathkeys,
247 : double limit_tuples);
248 : static void gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel);
249 : static bool can_partial_agg(PlannerInfo *root);
250 : static void apply_scanjoin_target_to_paths(PlannerInfo *root,
251 : RelOptInfo *rel,
252 : List *scanjoin_targets,
253 : List *scanjoin_targets_contain_srfs,
254 : bool scanjoin_target_parallel_safe,
255 : bool tlist_same_exprs);
256 : static void create_partitionwise_grouping_paths(PlannerInfo *root,
257 : RelOptInfo *input_rel,
258 : RelOptInfo *grouped_rel,
259 : RelOptInfo *partially_grouped_rel,
260 : const AggClauseCosts *agg_costs,
261 : grouping_sets_data *gd,
262 : PartitionwiseAggregateType patype,
263 : GroupPathExtraData *extra);
264 : static bool group_by_has_partkey(RelOptInfo *input_rel,
265 : List *targetList,
266 : List *groupClause);
267 : static int common_prefix_cmp(const void *a, const void *b);
268 : static List *generate_setop_child_grouplist(SetOperationStmt *op,
269 : List *targetlist);
270 :
271 :
272 : /*****************************************************************************
273 : *
274 : * Query optimizer entry point
275 : *
276 : * To support loadable plugins that monitor or modify planner behavior,
277 : * we provide a hook variable that lets a plugin get control before and
278 : * after the standard planning process. The plugin would normally call
279 : * standard_planner().
280 : *
281 : * Note to plugin authors: standard_planner() scribbles on its Query input,
282 : * so you'd better copy that data structure if you want to plan more than once.
283 : *
284 : *****************************************************************************/
285 : PlannedStmt *
286 454880 : planner(Query *parse, const char *query_string, int cursorOptions,
287 : ParamListInfo boundParams)
288 : {
289 : PlannedStmt *result;
290 :
291 454880 : if (planner_hook)
292 93878 : result = (*planner_hook) (parse, query_string, cursorOptions, boundParams);
293 : else
294 361002 : result = standard_planner(parse, query_string, cursorOptions, boundParams);
295 :
296 450546 : pgstat_report_plan_id(result->planId, false);
297 :
298 450546 : return result;
299 : }
300 :
301 : PlannedStmt *
302 454880 : standard_planner(Query *parse, const char *query_string, int cursorOptions,
303 : ParamListInfo boundParams)
304 : {
305 : PlannedStmt *result;
306 : PlannerGlobal *glob;
307 : double tuple_fraction;
308 : PlannerInfo *root;
309 : RelOptInfo *final_rel;
310 : Path *best_path;
311 : Plan *top_plan;
312 : ListCell *lp,
313 : *lr;
314 :
315 : /*
316 : * Set up global state for this planner invocation. This data is needed
317 : * across all levels of sub-Query that might exist in the given command,
318 : * so we keep it in a separate struct that's linked to by each per-Query
319 : * PlannerInfo.
320 : */
321 454880 : glob = makeNode(PlannerGlobal);
322 :
323 454880 : glob->boundParams = boundParams;
324 454880 : glob->subplans = NIL;
325 454880 : glob->subpaths = NIL;
326 454880 : glob->subroots = NIL;
327 454880 : glob->rewindPlanIDs = NULL;
328 454880 : glob->finalrtable = NIL;
329 454880 : glob->allRelids = NULL;
330 454880 : glob->prunableRelids = NULL;
331 454880 : glob->finalrteperminfos = NIL;
332 454880 : glob->finalrowmarks = NIL;
333 454880 : glob->resultRelations = NIL;
334 454880 : glob->appendRelations = NIL;
335 454880 : glob->partPruneInfos = NIL;
336 454880 : glob->relationOids = NIL;
337 454880 : glob->invalItems = NIL;
338 454880 : glob->paramExecTypes = NIL;
339 454880 : glob->lastPHId = 0;
340 454880 : glob->lastRowMarkId = 0;
341 454880 : glob->lastPlanNodeId = 0;
342 454880 : glob->transientPlan = false;
343 454880 : glob->dependsOnRole = false;
344 454880 : glob->partition_directory = NULL;
345 :
346 : /*
347 : * Assess whether it's feasible to use parallel mode for this query. We
348 : * can't do this in a standalone backend, or if the command will try to
349 : * modify any data, or if this is a cursor operation, or if GUCs are set
350 : * to values that don't permit parallelism, or if parallel-unsafe
351 : * functions are present in the query tree.
352 : *
353 : * (Note that we do allow CREATE TABLE AS, SELECT INTO, and CREATE
354 : * MATERIALIZED VIEW to use parallel plans, but this is safe only because
355 : * the command is writing into a completely new table which workers won't
356 : * be able to see. If the workers could see the table, the fact that
357 : * group locking would cause them to ignore the leader's heavyweight GIN
358 : * page locks would make this unsafe. We'll have to fix that somehow if
359 : * we want to allow parallel inserts in general; updates and deletes have
360 : * additional problems especially around combo CIDs.)
361 : *
362 : * For now, we don't try to use parallel mode if we're running inside a
363 : * parallel worker. We might eventually be able to relax this
364 : * restriction, but for now it seems best not to have parallel workers
365 : * trying to create their own parallel workers.
366 : */
367 454880 : if ((cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 &&
368 426802 : IsUnderPostmaster &&
369 426802 : parse->commandType == CMD_SELECT &&
370 343194 : !parse->hasModifyingCTE &&
371 343054 : max_parallel_workers_per_gather > 0 &&
372 342450 : !IsParallelWorker())
373 : {
374 : /* all the cheap tests pass, so scan the query tree */
375 342402 : glob->maxParallelHazard = max_parallel_hazard(parse);
376 342402 : glob->parallelModeOK = (glob->maxParallelHazard != PROPARALLEL_UNSAFE);
377 : }
378 : else
379 : {
380 : /* skip the query tree scan, just assume it's unsafe */
381 112478 : glob->maxParallelHazard = PROPARALLEL_UNSAFE;
382 112478 : glob->parallelModeOK = false;
383 : }
384 :
385 : /*
386 : * glob->parallelModeNeeded is normally set to false here and changed to
387 : * true during plan creation if a Gather or Gather Merge plan is actually
388 : * created (cf. create_gather_plan, create_gather_merge_plan).
389 : *
390 : * However, if debug_parallel_query = on or debug_parallel_query =
391 : * regress, then we impose parallel mode whenever it's safe to do so, even
392 : * if the final plan doesn't use parallelism. It's not safe to do so if
393 : * the query contains anything parallel-unsafe; parallelModeOK will be
394 : * false in that case. Note that parallelModeOK can't change after this
395 : * point. Otherwise, everything in the query is either parallel-safe or
396 : * parallel-restricted, and in either case it should be OK to impose
397 : * parallel-mode restrictions. If that ends up breaking something, then
398 : * either some function the user included in the query is incorrectly
399 : * labeled as parallel-safe or parallel-restricted when in reality it's
400 : * parallel-unsafe, or else the query planner itself has a bug.
401 : */
402 748984 : glob->parallelModeNeeded = glob->parallelModeOK &&
403 294104 : (debug_parallel_query != DEBUG_PARALLEL_OFF);
404 :
405 : /* Determine what fraction of the plan is likely to be scanned */
406 454880 : if (cursorOptions & CURSOR_OPT_FAST_PLAN)
407 : {
408 : /*
409 : * We have no real idea how many tuples the user will ultimately FETCH
410 : * from a cursor, but it is often the case that he doesn't want 'em
411 : * all, or would prefer a fast-start plan anyway so that he can
412 : * process some of the tuples sooner. Use a GUC parameter to decide
413 : * what fraction to optimize for.
414 : */
415 4700 : tuple_fraction = cursor_tuple_fraction;
416 :
417 : /*
418 : * We document cursor_tuple_fraction as simply being a fraction, which
419 : * means the edge cases 0 and 1 have to be treated specially here. We
420 : * convert 1 to 0 ("all the tuples") and 0 to a very small fraction.
421 : */
422 4700 : if (tuple_fraction >= 1.0)
423 0 : tuple_fraction = 0.0;
424 4700 : else if (tuple_fraction <= 0.0)
425 0 : tuple_fraction = 1e-10;
426 : }
427 : else
428 : {
429 : /* Default assumption is we need all the tuples */
430 450180 : tuple_fraction = 0.0;
431 : }
432 :
433 : /* primary planning entry point (may recurse for subqueries) */
434 454880 : root = subquery_planner(glob, parse, NULL, false, tuple_fraction, NULL);
435 :
436 : /* Select best Path and turn it into a Plan */
437 450942 : final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
438 450942 : best_path = get_cheapest_fractional_path(final_rel, tuple_fraction);
439 :
440 450942 : top_plan = create_plan(root, best_path);
441 :
442 : /*
443 : * If creating a plan for a scrollable cursor, make sure it can run
444 : * backwards on demand. Add a Material node at the top at need.
445 : */
446 450546 : if (cursorOptions & CURSOR_OPT_SCROLL)
447 : {
448 266 : if (!ExecSupportsBackwardScan(top_plan))
449 32 : top_plan = materialize_finished_plan(top_plan);
450 : }
451 :
452 : /*
453 : * Optionally add a Gather node for testing purposes, provided this is
454 : * actually a safe thing to do.
455 : *
456 : * We can add Gather even when top_plan has parallel-safe initPlans, but
457 : * then we have to move the initPlans to the Gather node because of
458 : * SS_finalize_plan's limitations. That would cause cosmetic breakage of
459 : * regression tests when debug_parallel_query = regress, because initPlans
460 : * that would normally appear on the top_plan move to the Gather, causing
461 : * them to disappear from EXPLAIN output. That doesn't seem worth kluging
462 : * EXPLAIN to hide, so skip it when debug_parallel_query = regress.
463 : */
464 450546 : if (debug_parallel_query != DEBUG_PARALLEL_OFF &&
465 194 : top_plan->parallel_safe &&
466 128 : (top_plan->initPlan == NIL ||
467 0 : debug_parallel_query != DEBUG_PARALLEL_REGRESS))
468 : {
469 128 : Gather *gather = makeNode(Gather);
470 : Cost initplan_cost;
471 : bool unsafe_initplans;
472 :
473 128 : gather->plan.targetlist = top_plan->targetlist;
474 128 : gather->plan.qual = NIL;
475 128 : gather->plan.lefttree = top_plan;
476 128 : gather->plan.righttree = NULL;
477 128 : gather->num_workers = 1;
478 128 : gather->single_copy = true;
479 128 : gather->invisible = (debug_parallel_query == DEBUG_PARALLEL_REGRESS);
480 :
481 : /* Transfer any initPlans to the new top node */
482 128 : gather->plan.initPlan = top_plan->initPlan;
483 128 : top_plan->initPlan = NIL;
484 :
485 : /*
486 : * Since this Gather has no parallel-aware descendants to signal to,
487 : * we don't need a rescan Param.
488 : */
489 128 : gather->rescan_param = -1;
490 :
491 : /*
492 : * Ideally we'd use cost_gather here, but setting up dummy path data
493 : * to satisfy it doesn't seem much cleaner than knowing what it does.
494 : */
495 128 : gather->plan.startup_cost = top_plan->startup_cost +
496 : parallel_setup_cost;
497 128 : gather->plan.total_cost = top_plan->total_cost +
498 128 : parallel_setup_cost + parallel_tuple_cost * top_plan->plan_rows;
499 128 : gather->plan.plan_rows = top_plan->plan_rows;
500 128 : gather->plan.plan_width = top_plan->plan_width;
501 128 : gather->plan.parallel_aware = false;
502 128 : gather->plan.parallel_safe = false;
503 :
504 : /*
505 : * Delete the initplans' cost from top_plan. We needn't add it to the
506 : * Gather node, since the above coding already included it there.
507 : */
508 128 : SS_compute_initplan_cost(gather->plan.initPlan,
509 : &initplan_cost, &unsafe_initplans);
510 128 : top_plan->startup_cost -= initplan_cost;
511 128 : top_plan->total_cost -= initplan_cost;
512 :
513 : /* use parallel mode for parallel plans. */
514 128 : root->glob->parallelModeNeeded = true;
515 :
516 128 : top_plan = &gather->plan;
517 : }
518 :
519 : /*
520 : * If any Params were generated, run through the plan tree and compute
521 : * each plan node's extParam/allParam sets. Ideally we'd merge this into
522 : * set_plan_references' tree traversal, but for now it has to be separate
523 : * because we need to visit subplans before not after main plan.
524 : */
525 450546 : if (glob->paramExecTypes != NIL)
526 : {
527 : Assert(list_length(glob->subplans) == list_length(glob->subroots));
528 201334 : forboth(lp, glob->subplans, lr, glob->subroots)
529 : {
530 45740 : Plan *subplan = (Plan *) lfirst(lp);
531 45740 : PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
532 :
533 45740 : SS_finalize_plan(subroot, subplan);
534 : }
535 155594 : SS_finalize_plan(root, top_plan);
536 : }
537 :
538 : /* final cleanup of the plan */
539 : Assert(glob->finalrtable == NIL);
540 : Assert(glob->finalrteperminfos == NIL);
541 : Assert(glob->finalrowmarks == NIL);
542 : Assert(glob->resultRelations == NIL);
543 : Assert(glob->appendRelations == NIL);
544 450546 : top_plan = set_plan_references(root, top_plan);
545 : /* ... and the subplans (both regular subplans and initplans) */
546 : Assert(list_length(glob->subplans) == list_length(glob->subroots));
547 496286 : forboth(lp, glob->subplans, lr, glob->subroots)
548 : {
549 45740 : Plan *subplan = (Plan *) lfirst(lp);
550 45740 : PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
551 :
552 45740 : lfirst(lp) = set_plan_references(subroot, subplan);
553 : }
554 :
555 : /* build the PlannedStmt result */
556 450546 : result = makeNode(PlannedStmt);
557 :
558 450546 : result->commandType = parse->commandType;
559 450546 : result->queryId = parse->queryId;
560 450546 : result->hasReturning = (parse->returningList != NIL);
561 450546 : result->hasModifyingCTE = parse->hasModifyingCTE;
562 450546 : result->canSetTag = parse->canSetTag;
563 450546 : result->transientPlan = glob->transientPlan;
564 450546 : result->dependsOnRole = glob->dependsOnRole;
565 450546 : result->parallelModeNeeded = glob->parallelModeNeeded;
566 450546 : result->planTree = top_plan;
567 450546 : result->partPruneInfos = glob->partPruneInfos;
568 450546 : result->rtable = glob->finalrtable;
569 901092 : result->unprunableRelids = bms_difference(glob->allRelids,
570 450546 : glob->prunableRelids);
571 450546 : result->permInfos = glob->finalrteperminfos;
572 450546 : result->resultRelations = glob->resultRelations;
573 450546 : result->appendRelations = glob->appendRelations;
574 450546 : result->subplans = glob->subplans;
575 450546 : result->rewindPlanIDs = glob->rewindPlanIDs;
576 450546 : result->rowMarks = glob->finalrowmarks;
577 450546 : result->relationOids = glob->relationOids;
578 450546 : result->invalItems = glob->invalItems;
579 450546 : result->paramExecTypes = glob->paramExecTypes;
580 : /* utilityStmt should be null, but we might as well copy it */
581 450546 : result->utilityStmt = parse->utilityStmt;
582 450546 : result->stmt_location = parse->stmt_location;
583 450546 : result->stmt_len = parse->stmt_len;
584 :
585 450546 : result->jitFlags = PGJIT_NONE;
586 450546 : if (jit_enabled && jit_above_cost >= 0 &&
587 449788 : top_plan->total_cost > jit_above_cost)
588 : {
589 940 : result->jitFlags |= PGJIT_PERFORM;
590 :
591 : /*
592 : * Decide how much effort should be put into generating better code.
593 : */
594 940 : if (jit_optimize_above_cost >= 0 &&
595 940 : top_plan->total_cost > jit_optimize_above_cost)
596 432 : result->jitFlags |= PGJIT_OPT3;
597 940 : if (jit_inline_above_cost >= 0 &&
598 940 : top_plan->total_cost > jit_inline_above_cost)
599 432 : result->jitFlags |= PGJIT_INLINE;
600 :
601 : /*
602 : * Decide which operations should be JITed.
603 : */
604 940 : if (jit_expressions)
605 940 : result->jitFlags |= PGJIT_EXPR;
606 940 : if (jit_tuple_deforming)
607 940 : result->jitFlags |= PGJIT_DEFORM;
608 : }
609 :
610 450546 : if (glob->partition_directory != NULL)
611 11552 : DestroyPartitionDirectory(glob->partition_directory);
612 :
613 450546 : return result;
614 : }
615 :
616 :
617 : /*--------------------
618 : * subquery_planner
619 : * Invokes the planner on a subquery. We recurse to here for each
620 : * sub-SELECT found in the query tree.
621 : *
622 : * glob is the global state for the current planner run.
623 : * parse is the querytree produced by the parser & rewriter.
624 : * parent_root is the immediate parent Query's info (NULL at the top level).
625 : * hasRecursion is true if this is a recursive WITH query.
626 : * tuple_fraction is the fraction of tuples we expect will be retrieved.
627 : * tuple_fraction is interpreted as explained for grouping_planner, below.
628 : * setops is used for set operation subqueries to provide the subquery with
629 : * the context in which it's being used so that Paths correctly sorted for the
630 : * set operation can be generated. NULL when not planning a set operation
631 : * child, or when a child of a set op that isn't interested in sorted input.
632 : *
633 : * Basically, this routine does the stuff that should only be done once
634 : * per Query object. It then calls grouping_planner. At one time,
635 : * grouping_planner could be invoked recursively on the same Query object;
636 : * that's not currently true, but we keep the separation between the two
637 : * routines anyway, in case we need it again someday.
638 : *
639 : * subquery_planner will be called recursively to handle sub-Query nodes
640 : * found within the query's expressions and rangetable.
641 : *
642 : * Returns the PlannerInfo struct ("root") that contains all data generated
643 : * while planning the subquery. In particular, the Path(s) attached to
644 : * the (UPPERREL_FINAL, NULL) upperrel represent our conclusions about the
645 : * cheapest way(s) to implement the query. The top level will select the
646 : * best Path and pass it through createplan.c to produce a finished Plan.
647 : *--------------------
648 : */
649 : PlannerInfo *
650 528074 : subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root,
651 : bool hasRecursion, double tuple_fraction,
652 : SetOperationStmt *setops)
653 : {
654 : PlannerInfo *root;
655 : List *newWithCheckOptions;
656 : List *newHaving;
657 : bool hasOuterJoins;
658 : bool hasResultRTEs;
659 : RelOptInfo *final_rel;
660 : ListCell *l;
661 :
662 : /* Create a PlannerInfo data structure for this subquery */
663 528074 : root = makeNode(PlannerInfo);
664 528074 : root->parse = parse;
665 528074 : root->glob = glob;
666 528074 : root->query_level = parent_root ? parent_root->query_level + 1 : 1;
667 528074 : root->parent_root = parent_root;
668 528074 : root->plan_params = NIL;
669 528074 : root->outer_params = NULL;
670 528074 : root->planner_cxt = CurrentMemoryContext;
671 528074 : root->init_plans = NIL;
672 528074 : root->cte_plan_ids = NIL;
673 528074 : root->multiexpr_params = NIL;
674 528074 : root->join_domains = NIL;
675 528074 : root->eq_classes = NIL;
676 528074 : root->ec_merging_done = false;
677 528074 : root->last_rinfo_serial = 0;
678 528074 : root->all_result_relids =
679 528074 : parse->resultRelation ? bms_make_singleton(parse->resultRelation) : NULL;
680 528074 : root->leaf_result_relids = NULL; /* we'll find out leaf-ness later */
681 528074 : root->append_rel_list = NIL;
682 528074 : root->row_identity_vars = NIL;
683 528074 : root->rowMarks = NIL;
684 528074 : memset(root->upper_rels, 0, sizeof(root->upper_rels));
685 528074 : memset(root->upper_targets, 0, sizeof(root->upper_targets));
686 528074 : root->processed_groupClause = NIL;
687 528074 : root->processed_distinctClause = NIL;
688 528074 : root->processed_tlist = NIL;
689 528074 : root->update_colnos = NIL;
690 528074 : root->grouping_map = NULL;
691 528074 : root->minmax_aggs = NIL;
692 528074 : root->qual_security_level = 0;
693 528074 : root->hasPseudoConstantQuals = false;
694 528074 : root->hasAlternativeSubPlans = false;
695 528074 : root->placeholdersFrozen = false;
696 528074 : root->hasRecursion = hasRecursion;
697 528074 : if (hasRecursion)
698 1010 : root->wt_param_id = assign_special_exec_param(root);
699 : else
700 527064 : root->wt_param_id = -1;
701 528074 : root->non_recursive_path = NULL;
702 528074 : root->partColsUpdated = false;
703 :
704 : /*
705 : * Create the top-level join domain. This won't have valid contents until
706 : * deconstruct_jointree fills it in, but the node needs to exist before
707 : * that so we can build EquivalenceClasses referencing it.
708 : */
709 528074 : root->join_domains = list_make1(makeNode(JoinDomain));
710 :
711 : /*
712 : * If there is a WITH list, process each WITH query and either convert it
713 : * to RTE_SUBQUERY RTE(s) or build an initplan SubPlan structure for it.
714 : */
715 528074 : if (parse->cteList)
716 2880 : SS_process_ctes(root);
717 :
718 : /*
719 : * If it's a MERGE command, transform the joinlist as appropriate.
720 : */
721 528068 : transform_MERGE_to_join(parse);
722 :
723 : /*
724 : * If the FROM clause is empty, replace it with a dummy RTE_RESULT RTE, so
725 : * that we don't need so many special cases to deal with that situation.
726 : */
727 528068 : replace_empty_jointree(parse);
728 :
729 : /*
730 : * Look for ANY and EXISTS SubLinks in WHERE and JOIN/ON clauses, and try
731 : * to transform them into joins. Note that this step does not descend
732 : * into subqueries; if we pull up any subqueries below, their SubLinks are
733 : * processed just before pulling them up.
734 : */
735 528068 : if (parse->hasSubLinks)
736 37844 : pull_up_sublinks(root);
737 :
738 : /*
739 : * Scan the rangetable for function RTEs, do const-simplification on them,
740 : * and then inline them if possible (producing subqueries that might get
741 : * pulled up next). Recursion issues here are handled in the same way as
742 : * for SubLinks.
743 : */
744 528068 : preprocess_function_rtes(root);
745 :
746 : /*
747 : * Scan the rangetable for relations with virtual generated columns, and
748 : * replace all Var nodes in the query that reference these columns with
749 : * the generation expressions. Recursion issues here are handled in the
750 : * same way as for SubLinks.
751 : */
752 528062 : parse = root->parse = expand_virtual_generated_columns(root);
753 :
754 : /*
755 : * Check to see if any subqueries in the jointree can be merged into this
756 : * query.
757 : */
758 528062 : pull_up_subqueries(root);
759 :
760 : /*
761 : * If this is a simple UNION ALL query, flatten it into an appendrel. We
762 : * do this now because it requires applying pull_up_subqueries to the leaf
763 : * queries of the UNION ALL, which weren't touched above because they
764 : * weren't referenced by the jointree (they will be after we do this).
765 : */
766 528056 : if (parse->setOperations)
767 6600 : flatten_simple_union_all(root);
768 :
769 : /*
770 : * Survey the rangetable to see what kinds of entries are present. We can
771 : * skip some later processing if relevant SQL features are not used; for
772 : * example if there are no JOIN RTEs we can avoid the expense of doing
773 : * flatten_join_alias_vars(). This must be done after we have finished
774 : * adding rangetable entries, of course. (Note: actually, processing of
775 : * inherited or partitioned rels can cause RTEs for their child tables to
776 : * get added later; but those must all be RTE_RELATION entries, so they
777 : * don't invalidate the conclusions drawn here.)
778 : */
779 528056 : root->hasJoinRTEs = false;
780 528056 : root->hasLateralRTEs = false;
781 528056 : root->group_rtindex = 0;
782 528056 : hasOuterJoins = false;
783 528056 : hasResultRTEs = false;
784 1429686 : foreach(l, parse->rtable)
785 : {
786 901630 : RangeTblEntry *rte = lfirst_node(RangeTblEntry, l);
787 :
788 901630 : switch (rte->rtekind)
789 : {
790 472068 : case RTE_RELATION:
791 472068 : if (rte->inh)
792 : {
793 : /*
794 : * Check to see if the relation actually has any children;
795 : * if not, clear the inh flag so we can treat it as a
796 : * plain base relation.
797 : *
798 : * Note: this could give a false-positive result, if the
799 : * rel once had children but no longer does. We used to
800 : * be able to clear rte->inh later on when we discovered
801 : * that, but no more; we have to handle such cases as
802 : * full-fledged inheritance.
803 : */
804 387970 : rte->inh = has_subclass(rte->relid);
805 : }
806 472068 : break;
807 88120 : case RTE_JOIN:
808 88120 : root->hasJoinRTEs = true;
809 88120 : if (IS_OUTER_JOIN(rte->jointype))
810 48784 : hasOuterJoins = true;
811 88120 : break;
812 202708 : case RTE_RESULT:
813 202708 : hasResultRTEs = true;
814 202708 : break;
815 4470 : case RTE_GROUP:
816 : Assert(parse->hasGroupRTE);
817 4470 : root->group_rtindex = list_cell_number(parse->rtable, l) + 1;
818 4470 : break;
819 134264 : default:
820 : /* No work here for other RTE types */
821 134264 : break;
822 : }
823 :
824 901630 : if (rte->lateral)
825 10506 : root->hasLateralRTEs = true;
826 :
827 : /*
828 : * We can also determine the maximum security level required for any
829 : * securityQuals now. Addition of inheritance-child RTEs won't affect
830 : * this, because child tables don't have their own securityQuals; see
831 : * expand_single_inheritance_child().
832 : */
833 901630 : if (rte->securityQuals)
834 2508 : root->qual_security_level = Max(root->qual_security_level,
835 : list_length(rte->securityQuals));
836 : }
837 :
838 : /*
839 : * If we have now verified that the query target relation is
840 : * non-inheriting, mark it as a leaf target.
841 : */
842 528056 : if (parse->resultRelation)
843 : {
844 90350 : RangeTblEntry *rte = rt_fetch(parse->resultRelation, parse->rtable);
845 :
846 90350 : if (!rte->inh)
847 87504 : root->leaf_result_relids =
848 87504 : bms_make_singleton(parse->resultRelation);
849 : }
850 :
851 : /*
852 : * Preprocess RowMark information. We need to do this after subquery
853 : * pullup, so that all base relations are present.
854 : */
855 528056 : preprocess_rowmarks(root);
856 :
857 : /*
858 : * Set hasHavingQual to remember if HAVING clause is present. Needed
859 : * because preprocess_expression will reduce a constant-true condition to
860 : * an empty qual list ... but "HAVING TRUE" is not a semantic no-op.
861 : */
862 528056 : root->hasHavingQual = (parse->havingQual != NULL);
863 :
864 : /*
865 : * Do expression preprocessing on targetlist and quals, as well as other
866 : * random expressions in the querytree. Note that we do not need to
867 : * handle sort/group expressions explicitly, because they are actually
868 : * part of the targetlist.
869 : */
870 524202 : parse->targetList = (List *)
871 528056 : preprocess_expression(root, (Node *) parse->targetList,
872 : EXPRKIND_TARGET);
873 :
874 524202 : newWithCheckOptions = NIL;
875 526690 : foreach(l, parse->withCheckOptions)
876 : {
877 2488 : WithCheckOption *wco = lfirst_node(WithCheckOption, l);
878 :
879 2488 : wco->qual = preprocess_expression(root, wco->qual,
880 : EXPRKIND_QUAL);
881 2488 : if (wco->qual != NULL)
882 2088 : newWithCheckOptions = lappend(newWithCheckOptions, wco);
883 : }
884 524202 : parse->withCheckOptions = newWithCheckOptions;
885 :
886 524202 : parse->returningList = (List *)
887 524202 : preprocess_expression(root, (Node *) parse->returningList,
888 : EXPRKIND_TARGET);
889 :
890 524202 : preprocess_qual_conditions(root, (Node *) parse->jointree);
891 :
892 524202 : parse->havingQual = preprocess_expression(root, parse->havingQual,
893 : EXPRKIND_QUAL);
894 :
895 526820 : foreach(l, parse->windowClause)
896 : {
897 2618 : WindowClause *wc = lfirst_node(WindowClause, l);
898 :
899 : /* partitionClause/orderClause are sort/group expressions */
900 2618 : wc->startOffset = preprocess_expression(root, wc->startOffset,
901 : EXPRKIND_LIMIT);
902 2618 : wc->endOffset = preprocess_expression(root, wc->endOffset,
903 : EXPRKIND_LIMIT);
904 : }
905 :
906 524202 : parse->limitOffset = preprocess_expression(root, parse->limitOffset,
907 : EXPRKIND_LIMIT);
908 524202 : parse->limitCount = preprocess_expression(root, parse->limitCount,
909 : EXPRKIND_LIMIT);
910 :
911 524202 : if (parse->onConflict)
912 : {
913 3628 : parse->onConflict->arbiterElems = (List *)
914 1814 : preprocess_expression(root,
915 1814 : (Node *) parse->onConflict->arbiterElems,
916 : EXPRKIND_ARBITER_ELEM);
917 3628 : parse->onConflict->arbiterWhere =
918 1814 : preprocess_expression(root,
919 1814 : parse->onConflict->arbiterWhere,
920 : EXPRKIND_QUAL);
921 3628 : parse->onConflict->onConflictSet = (List *)
922 1814 : preprocess_expression(root,
923 1814 : (Node *) parse->onConflict->onConflictSet,
924 : EXPRKIND_TARGET);
925 1814 : parse->onConflict->onConflictWhere =
926 1814 : preprocess_expression(root,
927 1814 : parse->onConflict->onConflictWhere,
928 : EXPRKIND_QUAL);
929 : /* exclRelTlist contains only Vars, so no preprocessing needed */
930 : }
931 :
932 527046 : foreach(l, parse->mergeActionList)
933 : {
934 2844 : MergeAction *action = (MergeAction *) lfirst(l);
935 :
936 2844 : action->targetList = (List *)
937 2844 : preprocess_expression(root,
938 2844 : (Node *) action->targetList,
939 : EXPRKIND_TARGET);
940 2844 : action->qual =
941 2844 : preprocess_expression(root,
942 : (Node *) action->qual,
943 : EXPRKIND_QUAL);
944 : }
945 :
946 524202 : parse->mergeJoinCondition =
947 524202 : preprocess_expression(root, parse->mergeJoinCondition, EXPRKIND_QUAL);
948 :
949 524202 : root->append_rel_list = (List *)
950 524202 : preprocess_expression(root, (Node *) root->append_rel_list,
951 : EXPRKIND_APPINFO);
952 :
953 : /* Also need to preprocess expressions within RTEs */
954 1421700 : foreach(l, parse->rtable)
955 : {
956 897498 : RangeTblEntry *rte = lfirst_node(RangeTblEntry, l);
957 : int kind;
958 : ListCell *lcsq;
959 :
960 897498 : if (rte->rtekind == RTE_RELATION)
961 : {
962 471802 : if (rte->tablesample)
963 228 : rte->tablesample = (TableSampleClause *)
964 228 : preprocess_expression(root,
965 228 : (Node *) rte->tablesample,
966 : EXPRKIND_TABLESAMPLE);
967 : }
968 425696 : else if (rte->rtekind == RTE_SUBQUERY)
969 : {
970 : /*
971 : * We don't want to do all preprocessing yet on the subquery's
972 : * expressions, since that will happen when we plan it. But if it
973 : * contains any join aliases of our level, those have to get
974 : * expanded now, because planning of the subquery won't do it.
975 : * That's only possible if the subquery is LATERAL.
976 : */
977 68334 : if (rte->lateral && root->hasJoinRTEs)
978 1228 : rte->subquery = (Query *)
979 1228 : flatten_join_alias_vars(root, root->parse,
980 1228 : (Node *) rte->subquery);
981 : }
982 357362 : else if (rte->rtekind == RTE_FUNCTION)
983 : {
984 : /* Preprocess the function expression(s) fully */
985 51460 : kind = rte->lateral ? EXPRKIND_RTFUNC_LATERAL : EXPRKIND_RTFUNC;
986 51460 : rte->functions = (List *)
987 51460 : preprocess_expression(root, (Node *) rte->functions, kind);
988 : }
989 305902 : else if (rte->rtekind == RTE_TABLEFUNC)
990 : {
991 : /* Preprocess the function expression(s) fully */
992 626 : kind = rte->lateral ? EXPRKIND_TABLEFUNC_LATERAL : EXPRKIND_TABLEFUNC;
993 626 : rte->tablefunc = (TableFunc *)
994 626 : preprocess_expression(root, (Node *) rte->tablefunc, kind);
995 : }
996 305276 : else if (rte->rtekind == RTE_VALUES)
997 : {
998 : /* Preprocess the values lists fully */
999 8266 : kind = rte->lateral ? EXPRKIND_VALUES_LATERAL : EXPRKIND_VALUES;
1000 8266 : rte->values_lists = (List *)
1001 8266 : preprocess_expression(root, (Node *) rte->values_lists, kind);
1002 : }
1003 297010 : else if (rte->rtekind == RTE_GROUP)
1004 : {
1005 : /* Preprocess the groupexprs list fully */
1006 4470 : rte->groupexprs = (List *)
1007 4470 : preprocess_expression(root, (Node *) rte->groupexprs,
1008 : EXPRKIND_GROUPEXPR);
1009 : }
1010 :
1011 : /*
1012 : * Process each element of the securityQuals list as if it were a
1013 : * separate qual expression (as indeed it is). We need to do it this
1014 : * way to get proper canonicalization of AND/OR structure. Note that
1015 : * this converts each element into an implicit-AND sublist.
1016 : */
1017 900364 : foreach(lcsq, rte->securityQuals)
1018 : {
1019 2866 : lfirst(lcsq) = preprocess_expression(root,
1020 2866 : (Node *) lfirst(lcsq),
1021 : EXPRKIND_QUAL);
1022 : }
1023 : }
1024 :
1025 : /*
1026 : * Now that we are done preprocessing expressions, and in particular done
1027 : * flattening join alias variables, get rid of the joinaliasvars lists.
1028 : * They no longer match what expressions in the rest of the tree look
1029 : * like, because we have not preprocessed expressions in those lists (and
1030 : * do not want to; for example, expanding a SubLink there would result in
1031 : * a useless unreferenced subplan). Leaving them in place simply creates
1032 : * a hazard for later scans of the tree. We could try to prevent that by
1033 : * using QTW_IGNORE_JOINALIASES in every tree scan done after this point,
1034 : * but that doesn't sound very reliable.
1035 : */
1036 524202 : if (root->hasJoinRTEs)
1037 : {
1038 304774 : foreach(l, parse->rtable)
1039 : {
1040 251262 : RangeTblEntry *rte = lfirst_node(RangeTblEntry, l);
1041 :
1042 251262 : rte->joinaliasvars = NIL;
1043 : }
1044 : }
1045 :
1046 : /*
1047 : * Replace any Vars in the subquery's targetlist and havingQual that
1048 : * reference GROUP outputs with the underlying grouping expressions.
1049 : *
1050 : * Note that we need to perform this replacement after we've preprocessed
1051 : * the grouping expressions. This is to ensure that there is only one
1052 : * instance of SubPlan for each SubLink contained within the grouping
1053 : * expressions.
1054 : */
1055 524202 : if (parse->hasGroupRTE)
1056 : {
1057 4470 : parse->targetList = (List *)
1058 4470 : flatten_group_exprs(root, root->parse, (Node *) parse->targetList);
1059 4470 : parse->havingQual =
1060 4470 : flatten_group_exprs(root, root->parse, parse->havingQual);
1061 : }
1062 :
1063 : /* Constant-folding might have removed all set-returning functions */
1064 524202 : if (parse->hasTargetSRFs)
1065 12036 : parse->hasTargetSRFs = expression_returns_set((Node *) parse->targetList);
1066 :
1067 : /*
1068 : * In some cases we may want to transfer a HAVING clause into WHERE. We
1069 : * cannot do so if the HAVING clause contains aggregates (obviously) or
1070 : * volatile functions (since a HAVING clause is supposed to be executed
1071 : * only once per group). We also can't do this if there are any nonempty
1072 : * grouping sets and the clause references any columns that are nullable
1073 : * by the grouping sets; moving such a clause into WHERE would potentially
1074 : * change the results. (If there are only empty grouping sets, then the
1075 : * HAVING clause must be degenerate as discussed below.)
1076 : *
1077 : * Also, it may be that the clause is so expensive to execute that we're
1078 : * better off doing it only once per group, despite the loss of
1079 : * selectivity. This is hard to estimate short of doing the entire
1080 : * planning process twice, so we use a heuristic: clauses containing
1081 : * subplans are left in HAVING. Otherwise, we move or copy the HAVING
1082 : * clause into WHERE, in hopes of eliminating tuples before aggregation
1083 : * instead of after.
1084 : *
1085 : * If the query has explicit grouping then we can simply move such a
1086 : * clause into WHERE; any group that fails the clause will not be in the
1087 : * output because none of its tuples will reach the grouping or
1088 : * aggregation stage. Otherwise we must have a degenerate (variable-free)
1089 : * HAVING clause, which we put in WHERE so that query_planner() can use it
1090 : * in a gating Result node, but also keep in HAVING to ensure that we
1091 : * don't emit a bogus aggregated row. (This could be done better, but it
1092 : * seems not worth optimizing.)
1093 : *
1094 : * Note that a HAVING clause may contain expressions that are not fully
1095 : * preprocessed. This can happen if these expressions are part of
1096 : * grouping items. In such cases, they are replaced with GROUP Vars in
1097 : * the parser and then replaced back after we've done with expression
1098 : * preprocessing on havingQual. This is not an issue if the clause
1099 : * remains in HAVING, because these expressions will be matched to lower
1100 : * target items in setrefs.c. However, if the clause is moved or copied
1101 : * into WHERE, we need to ensure that these expressions are fully
1102 : * preprocessed.
1103 : *
1104 : * Note that both havingQual and parse->jointree->quals are in
1105 : * implicitly-ANDed-list form at this point, even though they are declared
1106 : * as Node *.
1107 : */
1108 524202 : newHaving = NIL;
1109 525356 : foreach(l, (List *) parse->havingQual)
1110 : {
1111 1154 : Node *havingclause = (Node *) lfirst(l);
1112 :
1113 1488 : if (contain_agg_clause(havingclause) ||
1114 668 : contain_volatile_functions(havingclause) ||
1115 334 : contain_subplans(havingclause) ||
1116 418 : (parse->groupClause && parse->groupingSets &&
1117 84 : bms_is_member(root->group_rtindex, pull_varnos(root, havingclause))))
1118 : {
1119 : /* keep it in HAVING */
1120 892 : newHaving = lappend(newHaving, havingclause);
1121 : }
1122 262 : else if (parse->groupClause)
1123 : {
1124 : Node *whereclause;
1125 :
1126 : /* Preprocess the HAVING clause fully */
1127 244 : whereclause = preprocess_expression(root, havingclause,
1128 : EXPRKIND_QUAL);
1129 : /* ... and move it to WHERE */
1130 244 : parse->jointree->quals = (Node *)
1131 244 : list_concat((List *) parse->jointree->quals,
1132 : (List *) whereclause);
1133 : }
1134 : else
1135 : {
1136 : Node *whereclause;
1137 :
1138 : /* Preprocess the HAVING clause fully */
1139 18 : whereclause = preprocess_expression(root, copyObject(havingclause),
1140 : EXPRKIND_QUAL);
1141 : /* ... and put a copy in WHERE */
1142 36 : parse->jointree->quals = (Node *)
1143 18 : list_concat((List *) parse->jointree->quals,
1144 : (List *) whereclause);
1145 : /* ... and also keep it in HAVING */
1146 18 : newHaving = lappend(newHaving, havingclause);
1147 : }
1148 : }
1149 524202 : parse->havingQual = (Node *) newHaving;
1150 :
1151 : /*
1152 : * If we have any outer joins, try to reduce them to plain inner joins.
1153 : * This step is most easily done after we've done expression
1154 : * preprocessing.
1155 : */
1156 524202 : if (hasOuterJoins)
1157 34454 : reduce_outer_joins(root);
1158 :
1159 : /*
1160 : * If we have any RTE_RESULT relations, see if they can be deleted from
1161 : * the jointree. We also rely on this processing to flatten single-child
1162 : * FromExprs underneath outer joins. This step is most effectively done
1163 : * after we've done expression preprocessing and outer join reduction.
1164 : */
1165 524202 : if (hasResultRTEs || hasOuterJoins)
1166 231978 : remove_useless_result_rtes(root);
1167 :
1168 : /*
1169 : * Do the main planning.
1170 : */
1171 524202 : grouping_planner(root, tuple_fraction, setops);
1172 :
1173 : /*
1174 : * Capture the set of outer-level param IDs we have access to, for use in
1175 : * extParam/allParam calculations later.
1176 : */
1177 524130 : SS_identify_outer_params(root);
1178 :
1179 : /*
1180 : * If any initPlans were created in this query level, adjust the surviving
1181 : * Paths' costs and parallel-safety flags to account for them. The
1182 : * initPlans won't actually get attached to the plan tree till
1183 : * create_plan() runs, but we must include their effects now.
1184 : */
1185 524130 : final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1186 524130 : SS_charge_for_initplans(root, final_rel);
1187 :
1188 : /*
1189 : * Make sure we've identified the cheapest Path for the final rel. (By
1190 : * doing this here not in grouping_planner, we include initPlan costs in
1191 : * the decision, though it's unlikely that will change anything.)
1192 : */
1193 524130 : set_cheapest(final_rel);
1194 :
1195 524130 : return root;
1196 : }
1197 :
1198 : /*
1199 : * preprocess_expression
1200 : * Do subquery_planner's preprocessing work for an expression,
1201 : * which can be a targetlist, a WHERE clause (including JOIN/ON
1202 : * conditions), a HAVING clause, or a few other things.
1203 : */
1204 : static Node *
1205 4396968 : preprocess_expression(PlannerInfo *root, Node *expr, int kind)
1206 : {
1207 : /*
1208 : * Fall out quickly if expression is empty. This occurs often enough to
1209 : * be worth checking. Note that null->null is the correct conversion for
1210 : * implicit-AND result format, too.
1211 : */
1212 4396968 : if (expr == NULL)
1213 3475650 : return NULL;
1214 :
1215 : /*
1216 : * If the query has any join RTEs, replace join alias variables with
1217 : * base-relation variables. We must do this first, since any expressions
1218 : * we may extract from the joinaliasvars lists have not been preprocessed.
1219 : * For example, if we did this after sublink processing, sublinks expanded
1220 : * out from join aliases would not get processed. But we can skip this in
1221 : * non-lateral RTE functions, VALUES lists, and TABLESAMPLE clauses, since
1222 : * they can't contain any Vars of the current query level.
1223 : */
1224 921318 : if (root->hasJoinRTEs &&
1225 385526 : !(kind == EXPRKIND_RTFUNC ||
1226 192590 : kind == EXPRKIND_VALUES ||
1227 : kind == EXPRKIND_TABLESAMPLE ||
1228 : kind == EXPRKIND_TABLEFUNC))
1229 192572 : expr = flatten_join_alias_vars(root, root->parse, expr);
1230 :
1231 : /*
1232 : * Simplify constant expressions. For function RTEs, this was already
1233 : * done by preprocess_function_rtes. (But note we must do it again for
1234 : * EXPRKIND_RTFUNC_LATERAL, because those might by now contain
1235 : * un-simplified subexpressions inserted by flattening of subqueries or
1236 : * join alias variables.)
1237 : *
1238 : * Note: an essential effect of this is to convert named-argument function
1239 : * calls to positional notation and insert the current actual values of
1240 : * any default arguments for functions. To ensure that happens, we *must*
1241 : * process all expressions here. Previous PG versions sometimes skipped
1242 : * const-simplification if it didn't seem worth the trouble, but we can't
1243 : * do that anymore.
1244 : *
1245 : * Note: this also flattens nested AND and OR expressions into N-argument
1246 : * form. All processing of a qual expression after this point must be
1247 : * careful to maintain AND/OR flatness --- that is, do not generate a tree
1248 : * with AND directly under AND, nor OR directly under OR.
1249 : */
1250 921318 : if (kind != EXPRKIND_RTFUNC)
1251 878224 : expr = eval_const_expressions(root, expr);
1252 :
1253 : /*
1254 : * If it's a qual or havingQual, canonicalize it.
1255 : */
1256 917464 : if (kind == EXPRKIND_QUAL)
1257 : {
1258 328000 : expr = (Node *) canonicalize_qual((Expr *) expr, false);
1259 :
1260 : #ifdef OPTIMIZER_DEBUG
1261 : printf("After canonicalize_qual()\n");
1262 : pprint(expr);
1263 : #endif
1264 : }
1265 :
1266 : /*
1267 : * Check for ANY ScalarArrayOpExpr with Const arrays and set the
1268 : * hashfuncid of any that might execute more quickly by using hash lookups
1269 : * instead of a linear search.
1270 : */
1271 917464 : if (kind == EXPRKIND_QUAL || kind == EXPRKIND_TARGET)
1272 : {
1273 839320 : convert_saop_to_hashed_saop(expr);
1274 : }
1275 :
1276 : /* Expand SubLinks to SubPlans */
1277 917464 : if (root->parse->hasSubLinks)
1278 108460 : expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
1279 :
1280 : /*
1281 : * XXX do not insert anything here unless you have grokked the comments in
1282 : * SS_replace_correlation_vars ...
1283 : */
1284 :
1285 : /* Replace uplevel vars with Param nodes (this IS possible in VALUES) */
1286 917464 : if (root->query_level > 1)
1287 160602 : expr = SS_replace_correlation_vars(root, expr);
1288 :
1289 : /*
1290 : * If it's a qual or havingQual, convert it to implicit-AND format. (We
1291 : * don't want to do this before eval_const_expressions, since the latter
1292 : * would be unable to simplify a top-level AND correctly. Also,
1293 : * SS_process_sublinks expects explicit-AND format.)
1294 : */
1295 917464 : if (kind == EXPRKIND_QUAL)
1296 328000 : expr = (Node *) make_ands_implicit((Expr *) expr);
1297 :
1298 917464 : return expr;
1299 : }
1300 :
1301 : /*
1302 : * preprocess_qual_conditions
1303 : * Recursively scan the query's jointree and do subquery_planner's
1304 : * preprocessing work on each qual condition found therein.
1305 : */
1306 : static void
1307 1298936 : preprocess_qual_conditions(PlannerInfo *root, Node *jtnode)
1308 : {
1309 1298936 : if (jtnode == NULL)
1310 0 : return;
1311 1298936 : if (IsA(jtnode, RangeTblRef))
1312 : {
1313 : /* nothing to do here */
1314 : }
1315 634782 : else if (IsA(jtnode, FromExpr))
1316 : {
1317 538894 : FromExpr *f = (FromExpr *) jtnode;
1318 : ListCell *l;
1319 :
1320 1121852 : foreach(l, f->fromlist)
1321 582958 : preprocess_qual_conditions(root, lfirst(l));
1322 :
1323 538894 : f->quals = preprocess_expression(root, f->quals, EXPRKIND_QUAL);
1324 : }
1325 95888 : else if (IsA(jtnode, JoinExpr))
1326 : {
1327 95888 : JoinExpr *j = (JoinExpr *) jtnode;
1328 :
1329 95888 : preprocess_qual_conditions(root, j->larg);
1330 95888 : preprocess_qual_conditions(root, j->rarg);
1331 :
1332 95888 : j->quals = preprocess_expression(root, j->quals, EXPRKIND_QUAL);
1333 : }
1334 : else
1335 0 : elog(ERROR, "unrecognized node type: %d",
1336 : (int) nodeTag(jtnode));
1337 : }
1338 :
1339 : /*
1340 : * preprocess_phv_expression
1341 : * Do preprocessing on a PlaceHolderVar expression that's been pulled up.
1342 : *
1343 : * If a LATERAL subquery references an output of another subquery, and that
1344 : * output must be wrapped in a PlaceHolderVar because of an intermediate outer
1345 : * join, then we'll push the PlaceHolderVar expression down into the subquery
1346 : * and later pull it back up during find_lateral_references, which runs after
1347 : * subquery_planner has preprocessed all the expressions that were in the
1348 : * current query level to start with. So we need to preprocess it then.
1349 : */
1350 : Expr *
1351 72 : preprocess_phv_expression(PlannerInfo *root, Expr *expr)
1352 : {
1353 72 : return (Expr *) preprocess_expression(root, (Node *) expr, EXPRKIND_PHV);
1354 : }
1355 :
1356 : /*--------------------
1357 : * grouping_planner
1358 : * Perform planning steps related to grouping, aggregation, etc.
1359 : *
1360 : * This function adds all required top-level processing to the scan/join
1361 : * Path(s) produced by query_planner.
1362 : *
1363 : * tuple_fraction is the fraction of tuples we expect will be retrieved.
1364 : * tuple_fraction is interpreted as follows:
1365 : * 0: expect all tuples to be retrieved (normal case)
1366 : * 0 < tuple_fraction < 1: expect the given fraction of tuples available
1367 : * from the plan to be retrieved
1368 : * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
1369 : * expected to be retrieved (ie, a LIMIT specification).
1370 : * setops is used for set operation subqueries to provide the subquery with
1371 : * the context in which it's being used so that Paths correctly sorted for the
1372 : * set operation can be generated. NULL when not planning a set operation
1373 : * child, or when a child of a set op that isn't interested in sorted input.
1374 : *
1375 : * Returns nothing; the useful output is in the Paths we attach to the
1376 : * (UPPERREL_FINAL, NULL) upperrel in *root. In addition,
1377 : * root->processed_tlist contains the final processed targetlist.
1378 : *
1379 : * Note that we have not done set_cheapest() on the final rel; it's convenient
1380 : * to leave this to the caller.
1381 : *--------------------
1382 : */
1383 : static void
1384 524202 : grouping_planner(PlannerInfo *root, double tuple_fraction,
1385 : SetOperationStmt *setops)
1386 : {
1387 524202 : Query *parse = root->parse;
1388 524202 : int64 offset_est = 0;
1389 524202 : int64 count_est = 0;
1390 524202 : double limit_tuples = -1.0;
1391 524202 : bool have_postponed_srfs = false;
1392 : PathTarget *final_target;
1393 : List *final_targets;
1394 : List *final_targets_contain_srfs;
1395 : bool final_target_parallel_safe;
1396 : RelOptInfo *current_rel;
1397 : RelOptInfo *final_rel;
1398 : FinalPathExtraData extra;
1399 : ListCell *lc;
1400 :
1401 : /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
1402 524202 : if (parse->limitCount || parse->limitOffset)
1403 : {
1404 4956 : tuple_fraction = preprocess_limit(root, tuple_fraction,
1405 : &offset_est, &count_est);
1406 :
1407 : /*
1408 : * If we have a known LIMIT, and don't have an unknown OFFSET, we can
1409 : * estimate the effects of using a bounded sort.
1410 : */
1411 4956 : if (count_est > 0 && offset_est >= 0)
1412 4434 : limit_tuples = (double) count_est + (double) offset_est;
1413 : }
1414 :
1415 : /* Make tuple_fraction accessible to lower-level routines */
1416 524202 : root->tuple_fraction = tuple_fraction;
1417 :
1418 524202 : if (parse->setOperations)
1419 : {
1420 : /*
1421 : * Construct Paths for set operations. The results will not need any
1422 : * work except perhaps a top-level sort and/or LIMIT. Note that any
1423 : * special work for recursive unions is the responsibility of
1424 : * plan_set_operations.
1425 : */
1426 6164 : current_rel = plan_set_operations(root);
1427 :
1428 : /*
1429 : * We should not need to call preprocess_targetlist, since we must be
1430 : * in a SELECT query node. Instead, use the processed_tlist returned
1431 : * by plan_set_operations (since this tells whether it returned any
1432 : * resjunk columns!), and transfer any sort key information from the
1433 : * original tlist.
1434 : */
1435 : Assert(parse->commandType == CMD_SELECT);
1436 :
1437 : /* for safety, copy processed_tlist instead of modifying in-place */
1438 6158 : root->processed_tlist =
1439 6158 : postprocess_setop_tlist(copyObject(root->processed_tlist),
1440 : parse->targetList);
1441 :
1442 : /* Also extract the PathTarget form of the setop result tlist */
1443 6158 : final_target = current_rel->cheapest_total_path->pathtarget;
1444 :
1445 : /* And check whether it's parallel safe */
1446 : final_target_parallel_safe =
1447 6158 : is_parallel_safe(root, (Node *) final_target->exprs);
1448 :
1449 : /* The setop result tlist couldn't contain any SRFs */
1450 : Assert(!parse->hasTargetSRFs);
1451 6158 : final_targets = final_targets_contain_srfs = NIL;
1452 :
1453 : /*
1454 : * Can't handle FOR [KEY] UPDATE/SHARE here (parser should have
1455 : * checked already, but let's make sure).
1456 : */
1457 6158 : if (parse->rowMarks)
1458 0 : ereport(ERROR,
1459 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1460 : /*------
1461 : translator: %s is a SQL row locking clause such as FOR UPDATE */
1462 : errmsg("%s is not allowed with UNION/INTERSECT/EXCEPT",
1463 : LCS_asString(linitial_node(RowMarkClause,
1464 : parse->rowMarks)->strength))));
1465 :
1466 : /*
1467 : * Calculate pathkeys that represent result ordering requirements
1468 : */
1469 : Assert(parse->distinctClause == NIL);
1470 6158 : root->sort_pathkeys = make_pathkeys_for_sortclauses(root,
1471 : parse->sortClause,
1472 : root->processed_tlist);
1473 : }
1474 : else
1475 : {
1476 : /* No set operations, do regular planning */
1477 : PathTarget *sort_input_target;
1478 : List *sort_input_targets;
1479 : List *sort_input_targets_contain_srfs;
1480 : bool sort_input_target_parallel_safe;
1481 : PathTarget *grouping_target;
1482 : List *grouping_targets;
1483 : List *grouping_targets_contain_srfs;
1484 : bool grouping_target_parallel_safe;
1485 : PathTarget *scanjoin_target;
1486 : List *scanjoin_targets;
1487 : List *scanjoin_targets_contain_srfs;
1488 : bool scanjoin_target_parallel_safe;
1489 : bool scanjoin_target_same_exprs;
1490 : bool have_grouping;
1491 518038 : WindowFuncLists *wflists = NULL;
1492 518038 : List *activeWindows = NIL;
1493 518038 : grouping_sets_data *gset_data = NULL;
1494 : standard_qp_extra qp_extra;
1495 :
1496 : /* A recursive query should always have setOperations */
1497 : Assert(!root->hasRecursion);
1498 :
1499 : /* Preprocess grouping sets and GROUP BY clause, if any */
1500 518038 : if (parse->groupingSets)
1501 : {
1502 878 : gset_data = preprocess_grouping_sets(root);
1503 : }
1504 517160 : else if (parse->groupClause)
1505 : {
1506 : /* Preprocess regular GROUP BY clause, if any */
1507 3634 : root->processed_groupClause = preprocess_groupclause(root, NIL);
1508 : }
1509 :
1510 : /*
1511 : * Preprocess targetlist. Note that much of the remaining planning
1512 : * work will be done with the PathTarget representation of tlists, but
1513 : * we must also maintain the full representation of the final tlist so
1514 : * that we can transfer its decoration (resnames etc) to the topmost
1515 : * tlist of the finished Plan. This is kept in processed_tlist.
1516 : */
1517 518032 : preprocess_targetlist(root);
1518 :
1519 : /*
1520 : * Mark all the aggregates with resolved aggtranstypes, and detect
1521 : * aggregates that are duplicates or can share transition state. We
1522 : * must do this before slicing and dicing the tlist into various
1523 : * pathtargets, else some copies of the Aggref nodes might escape
1524 : * being marked.
1525 : */
1526 518032 : if (parse->hasAggs)
1527 : {
1528 38372 : preprocess_aggrefs(root, (Node *) root->processed_tlist);
1529 38372 : preprocess_aggrefs(root, (Node *) parse->havingQual);
1530 : }
1531 :
1532 : /*
1533 : * Locate any window functions in the tlist. (We don't need to look
1534 : * anywhere else, since expressions used in ORDER BY will be in there
1535 : * too.) Note that they could all have been eliminated by constant
1536 : * folding, in which case we don't need to do any more work.
1537 : */
1538 518032 : if (parse->hasWindowFuncs)
1539 : {
1540 2384 : wflists = find_window_functions((Node *) root->processed_tlist,
1541 2384 : list_length(parse->windowClause));
1542 2384 : if (wflists->numWindowFuncs > 0)
1543 : {
1544 : /*
1545 : * See if any modifications can be made to each WindowClause
1546 : * to allow the executor to execute the WindowFuncs more
1547 : * quickly.
1548 : */
1549 2378 : optimize_window_clauses(root, wflists);
1550 :
1551 : /* Extract the list of windows actually in use. */
1552 2378 : activeWindows = select_active_windows(root, wflists);
1553 :
1554 : /* Make sure they all have names, for EXPLAIN's use. */
1555 2378 : name_active_windows(activeWindows);
1556 : }
1557 : else
1558 6 : parse->hasWindowFuncs = false;
1559 : }
1560 :
1561 : /*
1562 : * Preprocess MIN/MAX aggregates, if any. Note: be careful about
1563 : * adding logic between here and the query_planner() call. Anything
1564 : * that is needed in MIN/MAX-optimizable cases will have to be
1565 : * duplicated in planagg.c.
1566 : */
1567 518032 : if (parse->hasAggs)
1568 38372 : preprocess_minmax_aggregates(root);
1569 :
1570 : /*
1571 : * Figure out whether there's a hard limit on the number of rows that
1572 : * query_planner's result subplan needs to return. Even if we know a
1573 : * hard limit overall, it doesn't apply if the query has any
1574 : * grouping/aggregation operations, or SRFs in the tlist.
1575 : */
1576 518032 : if (parse->groupClause ||
1577 513568 : parse->groupingSets ||
1578 513526 : parse->distinctClause ||
1579 510882 : parse->hasAggs ||
1580 476404 : parse->hasWindowFuncs ||
1581 474164 : parse->hasTargetSRFs ||
1582 462590 : root->hasHavingQual)
1583 55460 : root->limit_tuples = -1.0;
1584 : else
1585 462572 : root->limit_tuples = limit_tuples;
1586 :
1587 : /* Set up data needed by standard_qp_callback */
1588 518032 : qp_extra.activeWindows = activeWindows;
1589 518032 : qp_extra.gset_data = gset_data;
1590 :
1591 : /*
1592 : * If we're a subquery for a set operation, store the SetOperationStmt
1593 : * in qp_extra.
1594 : */
1595 518032 : qp_extra.setop = setops;
1596 :
1597 : /*
1598 : * Generate the best unsorted and presorted paths for the scan/join
1599 : * portion of this Query, ie the processing represented by the
1600 : * FROM/WHERE clauses. (Note there may not be any presorted paths.)
1601 : * We also generate (in standard_qp_callback) pathkey representations
1602 : * of the query's sort clause, distinct clause, etc.
1603 : */
1604 518032 : current_rel = query_planner(root, standard_qp_callback, &qp_extra);
1605 :
1606 : /*
1607 : * Convert the query's result tlist into PathTarget format.
1608 : *
1609 : * Note: this cannot be done before query_planner() has performed
1610 : * appendrel expansion, because that might add resjunk entries to
1611 : * root->processed_tlist. Waiting till afterwards is also helpful
1612 : * because the target width estimates can use per-Var width numbers
1613 : * that were obtained within query_planner().
1614 : */
1615 517978 : final_target = create_pathtarget(root, root->processed_tlist);
1616 : final_target_parallel_safe =
1617 517978 : is_parallel_safe(root, (Node *) final_target->exprs);
1618 :
1619 : /*
1620 : * If ORDER BY was given, consider whether we should use a post-sort
1621 : * projection, and compute the adjusted target for preceding steps if
1622 : * so.
1623 : */
1624 517978 : if (parse->sortClause)
1625 : {
1626 71776 : sort_input_target = make_sort_input_target(root,
1627 : final_target,
1628 : &have_postponed_srfs);
1629 : sort_input_target_parallel_safe =
1630 71776 : is_parallel_safe(root, (Node *) sort_input_target->exprs);
1631 : }
1632 : else
1633 : {
1634 446202 : sort_input_target = final_target;
1635 446202 : sort_input_target_parallel_safe = final_target_parallel_safe;
1636 : }
1637 :
1638 : /*
1639 : * If we have window functions to deal with, the output from any
1640 : * grouping step needs to be what the window functions want;
1641 : * otherwise, it should be sort_input_target.
1642 : */
1643 517978 : if (activeWindows)
1644 : {
1645 2378 : grouping_target = make_window_input_target(root,
1646 : final_target,
1647 : activeWindows);
1648 : grouping_target_parallel_safe =
1649 2378 : is_parallel_safe(root, (Node *) grouping_target->exprs);
1650 : }
1651 : else
1652 : {
1653 515600 : grouping_target = sort_input_target;
1654 515600 : grouping_target_parallel_safe = sort_input_target_parallel_safe;
1655 : }
1656 :
1657 : /*
1658 : * If we have grouping or aggregation to do, the topmost scan/join
1659 : * plan node must emit what the grouping step wants; otherwise, it
1660 : * should emit grouping_target.
1661 : */
1662 513514 : have_grouping = (parse->groupClause || parse->groupingSets ||
1663 1031492 : parse->hasAggs || root->hasHavingQual);
1664 517978 : if (have_grouping)
1665 : {
1666 39040 : scanjoin_target = make_group_input_target(root, final_target);
1667 : scanjoin_target_parallel_safe =
1668 39040 : is_parallel_safe(root, (Node *) scanjoin_target->exprs);
1669 : }
1670 : else
1671 : {
1672 478938 : scanjoin_target = grouping_target;
1673 478938 : scanjoin_target_parallel_safe = grouping_target_parallel_safe;
1674 : }
1675 :
1676 : /*
1677 : * If there are any SRFs in the targetlist, we must separate each of
1678 : * these PathTargets into SRF-computing and SRF-free targets. Replace
1679 : * each of the named targets with a SRF-free version, and remember the
1680 : * list of additional projection steps we need to add afterwards.
1681 : */
1682 517978 : if (parse->hasTargetSRFs)
1683 : {
1684 : /* final_target doesn't recompute any SRFs in sort_input_target */
1685 12036 : split_pathtarget_at_srfs(root, final_target, sort_input_target,
1686 : &final_targets,
1687 : &final_targets_contain_srfs);
1688 12036 : final_target = linitial_node(PathTarget, final_targets);
1689 : Assert(!linitial_int(final_targets_contain_srfs));
1690 : /* likewise for sort_input_target vs. grouping_target */
1691 12036 : split_pathtarget_at_srfs(root, sort_input_target, grouping_target,
1692 : &sort_input_targets,
1693 : &sort_input_targets_contain_srfs);
1694 12036 : sort_input_target = linitial_node(PathTarget, sort_input_targets);
1695 : Assert(!linitial_int(sort_input_targets_contain_srfs));
1696 : /* likewise for grouping_target vs. scanjoin_target */
1697 12036 : split_pathtarget_at_srfs(root, grouping_target, scanjoin_target,
1698 : &grouping_targets,
1699 : &grouping_targets_contain_srfs);
1700 12036 : grouping_target = linitial_node(PathTarget, grouping_targets);
1701 : Assert(!linitial_int(grouping_targets_contain_srfs));
1702 : /* scanjoin_target will not have any SRFs precomputed for it */
1703 12036 : split_pathtarget_at_srfs(root, scanjoin_target, NULL,
1704 : &scanjoin_targets,
1705 : &scanjoin_targets_contain_srfs);
1706 12036 : scanjoin_target = linitial_node(PathTarget, scanjoin_targets);
1707 : Assert(!linitial_int(scanjoin_targets_contain_srfs));
1708 : }
1709 : else
1710 : {
1711 : /* initialize lists; for most of these, dummy values are OK */
1712 505942 : final_targets = final_targets_contain_srfs = NIL;
1713 505942 : sort_input_targets = sort_input_targets_contain_srfs = NIL;
1714 505942 : grouping_targets = grouping_targets_contain_srfs = NIL;
1715 505942 : scanjoin_targets = list_make1(scanjoin_target);
1716 505942 : scanjoin_targets_contain_srfs = NIL;
1717 : }
1718 :
1719 : /* Apply scan/join target. */
1720 517978 : scanjoin_target_same_exprs = list_length(scanjoin_targets) == 1
1721 517978 : && equal(scanjoin_target->exprs, current_rel->reltarget->exprs);
1722 517978 : apply_scanjoin_target_to_paths(root, current_rel, scanjoin_targets,
1723 : scanjoin_targets_contain_srfs,
1724 : scanjoin_target_parallel_safe,
1725 : scanjoin_target_same_exprs);
1726 :
1727 : /*
1728 : * Save the various upper-rel PathTargets we just computed into
1729 : * root->upper_targets[]. The core code doesn't use this, but it
1730 : * provides a convenient place for extensions to get at the info. For
1731 : * consistency, we save all the intermediate targets, even though some
1732 : * of the corresponding upperrels might not be needed for this query.
1733 : */
1734 517978 : root->upper_targets[UPPERREL_FINAL] = final_target;
1735 517978 : root->upper_targets[UPPERREL_ORDERED] = final_target;
1736 517978 : root->upper_targets[UPPERREL_DISTINCT] = sort_input_target;
1737 517978 : root->upper_targets[UPPERREL_PARTIAL_DISTINCT] = sort_input_target;
1738 517978 : root->upper_targets[UPPERREL_WINDOW] = sort_input_target;
1739 517978 : root->upper_targets[UPPERREL_GROUP_AGG] = grouping_target;
1740 :
1741 : /*
1742 : * If we have grouping and/or aggregation, consider ways to implement
1743 : * that. We build a new upperrel representing the output of this
1744 : * phase.
1745 : */
1746 517978 : if (have_grouping)
1747 : {
1748 39040 : current_rel = create_grouping_paths(root,
1749 : current_rel,
1750 : grouping_target,
1751 : grouping_target_parallel_safe,
1752 : gset_data);
1753 : /* Fix things up if grouping_target contains SRFs */
1754 39034 : if (parse->hasTargetSRFs)
1755 420 : adjust_paths_for_srfs(root, current_rel,
1756 : grouping_targets,
1757 : grouping_targets_contain_srfs);
1758 : }
1759 :
1760 : /*
1761 : * If we have window functions, consider ways to implement those. We
1762 : * build a new upperrel representing the output of this phase.
1763 : */
1764 517972 : if (activeWindows)
1765 : {
1766 2378 : current_rel = create_window_paths(root,
1767 : current_rel,
1768 : grouping_target,
1769 : sort_input_target,
1770 : sort_input_target_parallel_safe,
1771 : wflists,
1772 : activeWindows);
1773 : /* Fix things up if sort_input_target contains SRFs */
1774 2378 : if (parse->hasTargetSRFs)
1775 12 : adjust_paths_for_srfs(root, current_rel,
1776 : sort_input_targets,
1777 : sort_input_targets_contain_srfs);
1778 : }
1779 :
1780 : /*
1781 : * If there is a DISTINCT clause, consider ways to implement that. We
1782 : * build a new upperrel representing the output of this phase.
1783 : */
1784 517972 : if (parse->distinctClause)
1785 : {
1786 2678 : current_rel = create_distinct_paths(root,
1787 : current_rel,
1788 : sort_input_target);
1789 : }
1790 : } /* end of if (setOperations) */
1791 :
1792 : /*
1793 : * If ORDER BY was given, consider ways to implement that, and generate a
1794 : * new upperrel containing only paths that emit the correct ordering and
1795 : * project the correct final_target. We can apply the original
1796 : * limit_tuples limit in sort costing here, but only if there are no
1797 : * postponed SRFs.
1798 : */
1799 524130 : if (parse->sortClause)
1800 : {
1801 75732 : current_rel = create_ordered_paths(root,
1802 : current_rel,
1803 : final_target,
1804 : final_target_parallel_safe,
1805 : have_postponed_srfs ? -1.0 :
1806 : limit_tuples);
1807 : /* Fix things up if final_target contains SRFs */
1808 75732 : if (parse->hasTargetSRFs)
1809 196 : adjust_paths_for_srfs(root, current_rel,
1810 : final_targets,
1811 : final_targets_contain_srfs);
1812 : }
1813 :
1814 : /*
1815 : * Now we are prepared to build the final-output upperrel.
1816 : */
1817 524130 : final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1818 :
1819 : /*
1820 : * If the input rel is marked consider_parallel and there's nothing that's
1821 : * not parallel-safe in the LIMIT clause, then the final_rel can be marked
1822 : * consider_parallel as well. Note that if the query has rowMarks or is
1823 : * not a SELECT, consider_parallel will be false for every relation in the
1824 : * query.
1825 : */
1826 698692 : if (current_rel->consider_parallel &&
1827 349100 : is_parallel_safe(root, parse->limitOffset) &&
1828 174538 : is_parallel_safe(root, parse->limitCount))
1829 174532 : final_rel->consider_parallel = true;
1830 :
1831 : /*
1832 : * If the current_rel belongs to a single FDW, so does the final_rel.
1833 : */
1834 524130 : final_rel->serverid = current_rel->serverid;
1835 524130 : final_rel->userid = current_rel->userid;
1836 524130 : final_rel->useridiscurrent = current_rel->useridiscurrent;
1837 524130 : final_rel->fdwroutine = current_rel->fdwroutine;
1838 :
1839 : /*
1840 : * Generate paths for the final_rel. Insert all surviving paths, with
1841 : * LockRows, Limit, and/or ModifyTable steps added if needed.
1842 : */
1843 1067704 : foreach(lc, current_rel->pathlist)
1844 : {
1845 543574 : Path *path = (Path *) lfirst(lc);
1846 :
1847 : /*
1848 : * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node.
1849 : * (Note: we intentionally test parse->rowMarks not root->rowMarks
1850 : * here. If there are only non-locking rowmarks, they should be
1851 : * handled by the ModifyTable node instead. However, root->rowMarks
1852 : * is what goes into the LockRows node.)
1853 : */
1854 543574 : if (parse->rowMarks)
1855 : {
1856 8252 : path = (Path *) create_lockrows_path(root, final_rel, path,
1857 : root->rowMarks,
1858 : assign_special_exec_param(root));
1859 : }
1860 :
1861 : /*
1862 : * If there is a LIMIT/OFFSET clause, add the LIMIT node.
1863 : */
1864 543574 : if (limit_needed(parse))
1865 : {
1866 5880 : path = (Path *) create_limit_path(root, final_rel, path,
1867 : parse->limitOffset,
1868 : parse->limitCount,
1869 : parse->limitOption,
1870 : offset_est, count_est);
1871 : }
1872 :
1873 : /*
1874 : * If this is an INSERT/UPDATE/DELETE/MERGE, add the ModifyTable node.
1875 : */
1876 543574 : if (parse->commandType != CMD_SELECT)
1877 : {
1878 : Index rootRelation;
1879 90102 : List *resultRelations = NIL;
1880 90102 : List *updateColnosLists = NIL;
1881 90102 : List *withCheckOptionLists = NIL;
1882 90102 : List *returningLists = NIL;
1883 90102 : List *mergeActionLists = NIL;
1884 90102 : List *mergeJoinConditions = NIL;
1885 : List *rowMarks;
1886 :
1887 90102 : if (bms_membership(root->all_result_relids) == BMS_MULTIPLE)
1888 : {
1889 : /* Inherited UPDATE/DELETE/MERGE */
1890 2816 : RelOptInfo *top_result_rel = find_base_rel(root,
1891 : parse->resultRelation);
1892 2816 : int resultRelation = -1;
1893 :
1894 : /* Pass the root result rel forward to the executor. */
1895 2816 : rootRelation = parse->resultRelation;
1896 :
1897 : /* Add only leaf children to ModifyTable. */
1898 8230 : while ((resultRelation = bms_next_member(root->leaf_result_relids,
1899 8230 : resultRelation)) >= 0)
1900 : {
1901 5414 : RelOptInfo *this_result_rel = find_base_rel(root,
1902 : resultRelation);
1903 :
1904 : /*
1905 : * Also exclude any leaf rels that have turned dummy since
1906 : * being added to the list, for example, by being excluded
1907 : * by constraint exclusion.
1908 : */
1909 5414 : if (IS_DUMMY_REL(this_result_rel))
1910 174 : continue;
1911 :
1912 : /* Build per-target-rel lists needed by ModifyTable */
1913 5240 : resultRelations = lappend_int(resultRelations,
1914 : resultRelation);
1915 5240 : if (parse->commandType == CMD_UPDATE)
1916 : {
1917 3624 : List *update_colnos = root->update_colnos;
1918 :
1919 3624 : if (this_result_rel != top_result_rel)
1920 : update_colnos =
1921 3624 : adjust_inherited_attnums_multilevel(root,
1922 : update_colnos,
1923 : this_result_rel->relid,
1924 : top_result_rel->relid);
1925 3624 : updateColnosLists = lappend(updateColnosLists,
1926 : update_colnos);
1927 : }
1928 5240 : if (parse->withCheckOptions)
1929 : {
1930 504 : List *withCheckOptions = parse->withCheckOptions;
1931 :
1932 504 : if (this_result_rel != top_result_rel)
1933 : withCheckOptions = (List *)
1934 504 : adjust_appendrel_attrs_multilevel(root,
1935 : (Node *) withCheckOptions,
1936 : this_result_rel,
1937 : top_result_rel);
1938 504 : withCheckOptionLists = lappend(withCheckOptionLists,
1939 : withCheckOptions);
1940 : }
1941 5240 : if (parse->returningList)
1942 : {
1943 840 : List *returningList = parse->returningList;
1944 :
1945 840 : if (this_result_rel != top_result_rel)
1946 : returningList = (List *)
1947 840 : adjust_appendrel_attrs_multilevel(root,
1948 : (Node *) returningList,
1949 : this_result_rel,
1950 : top_result_rel);
1951 840 : returningLists = lappend(returningLists,
1952 : returningList);
1953 : }
1954 5240 : if (parse->mergeActionList)
1955 : {
1956 : ListCell *l;
1957 522 : List *mergeActionList = NIL;
1958 :
1959 : /*
1960 : * Copy MergeActions and translate stuff that
1961 : * references attribute numbers.
1962 : */
1963 1614 : foreach(l, parse->mergeActionList)
1964 : {
1965 1092 : MergeAction *action = lfirst(l),
1966 1092 : *leaf_action = copyObject(action);
1967 :
1968 1092 : leaf_action->qual =
1969 1092 : adjust_appendrel_attrs_multilevel(root,
1970 : (Node *) action->qual,
1971 : this_result_rel,
1972 : top_result_rel);
1973 1092 : leaf_action->targetList = (List *)
1974 1092 : adjust_appendrel_attrs_multilevel(root,
1975 1092 : (Node *) action->targetList,
1976 : this_result_rel,
1977 : top_result_rel);
1978 1092 : if (leaf_action->commandType == CMD_UPDATE)
1979 592 : leaf_action->updateColnos =
1980 592 : adjust_inherited_attnums_multilevel(root,
1981 : action->updateColnos,
1982 : this_result_rel->relid,
1983 : top_result_rel->relid);
1984 1092 : mergeActionList = lappend(mergeActionList,
1985 : leaf_action);
1986 : }
1987 :
1988 522 : mergeActionLists = lappend(mergeActionLists,
1989 : mergeActionList);
1990 : }
1991 5240 : if (parse->commandType == CMD_MERGE)
1992 : {
1993 522 : Node *mergeJoinCondition = parse->mergeJoinCondition;
1994 :
1995 522 : if (this_result_rel != top_result_rel)
1996 : mergeJoinCondition =
1997 522 : adjust_appendrel_attrs_multilevel(root,
1998 : mergeJoinCondition,
1999 : this_result_rel,
2000 : top_result_rel);
2001 522 : mergeJoinConditions = lappend(mergeJoinConditions,
2002 : mergeJoinCondition);
2003 : }
2004 : }
2005 :
2006 2816 : if (resultRelations == NIL)
2007 : {
2008 : /*
2009 : * We managed to exclude every child rel, so generate a
2010 : * dummy one-relation plan using info for the top target
2011 : * rel (even though that may not be a leaf target).
2012 : * Although it's clear that no data will be updated or
2013 : * deleted, we still need to have a ModifyTable node so
2014 : * that any statement triggers will be executed. (This
2015 : * could be cleaner if we fixed nodeModifyTable.c to allow
2016 : * zero target relations, but that probably wouldn't be a
2017 : * net win.)
2018 : */
2019 30 : resultRelations = list_make1_int(parse->resultRelation);
2020 30 : if (parse->commandType == CMD_UPDATE)
2021 30 : updateColnosLists = list_make1(root->update_colnos);
2022 30 : if (parse->withCheckOptions)
2023 0 : withCheckOptionLists = list_make1(parse->withCheckOptions);
2024 30 : if (parse->returningList)
2025 18 : returningLists = list_make1(parse->returningList);
2026 30 : if (parse->mergeActionList)
2027 0 : mergeActionLists = list_make1(parse->mergeActionList);
2028 30 : if (parse->commandType == CMD_MERGE)
2029 0 : mergeJoinConditions = list_make1(parse->mergeJoinCondition);
2030 : }
2031 : }
2032 : else
2033 : {
2034 : /* Single-relation INSERT/UPDATE/DELETE/MERGE. */
2035 87286 : rootRelation = 0; /* there's no separate root rel */
2036 87286 : resultRelations = list_make1_int(parse->resultRelation);
2037 87286 : if (parse->commandType == CMD_UPDATE)
2038 11878 : updateColnosLists = list_make1(root->update_colnos);
2039 87286 : if (parse->withCheckOptions)
2040 926 : withCheckOptionLists = list_make1(parse->withCheckOptions);
2041 87286 : if (parse->returningList)
2042 2422 : returningLists = list_make1(parse->returningList);
2043 87286 : if (parse->mergeActionList)
2044 1632 : mergeActionLists = list_make1(parse->mergeActionList);
2045 87286 : if (parse->commandType == CMD_MERGE)
2046 1632 : mergeJoinConditions = list_make1(parse->mergeJoinCondition);
2047 : }
2048 :
2049 : /*
2050 : * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node
2051 : * will have dealt with fetching non-locked marked rows, else we
2052 : * need to have ModifyTable do that.
2053 : */
2054 90102 : if (parse->rowMarks)
2055 0 : rowMarks = NIL;
2056 : else
2057 90102 : rowMarks = root->rowMarks;
2058 :
2059 : path = (Path *)
2060 90102 : create_modifytable_path(root, final_rel,
2061 : path,
2062 : parse->commandType,
2063 90102 : parse->canSetTag,
2064 90102 : parse->resultRelation,
2065 : rootRelation,
2066 90102 : root->partColsUpdated,
2067 : resultRelations,
2068 : updateColnosLists,
2069 : withCheckOptionLists,
2070 : returningLists,
2071 : rowMarks,
2072 : parse->onConflict,
2073 : mergeActionLists,
2074 : mergeJoinConditions,
2075 : assign_special_exec_param(root));
2076 : }
2077 :
2078 : /* And shove it into final_rel */
2079 543574 : add_path(final_rel, path);
2080 : }
2081 :
2082 : /*
2083 : * Generate partial paths for final_rel, too, if outer query levels might
2084 : * be able to make use of them.
2085 : */
2086 524130 : if (final_rel->consider_parallel && root->query_level > 1 &&
2087 25294 : !limit_needed(parse))
2088 : {
2089 : Assert(!parse->rowMarks && parse->commandType == CMD_SELECT);
2090 25234 : foreach(lc, current_rel->partial_pathlist)
2091 : {
2092 108 : Path *partial_path = (Path *) lfirst(lc);
2093 :
2094 108 : add_partial_path(final_rel, partial_path);
2095 : }
2096 : }
2097 :
2098 524130 : extra.limit_needed = limit_needed(parse);
2099 524130 : extra.limit_tuples = limit_tuples;
2100 524130 : extra.count_est = count_est;
2101 524130 : extra.offset_est = offset_est;
2102 :
2103 : /*
2104 : * If there is an FDW that's responsible for all baserels of the query,
2105 : * let it consider adding ForeignPaths.
2106 : */
2107 524130 : if (final_rel->fdwroutine &&
2108 1284 : final_rel->fdwroutine->GetForeignUpperPaths)
2109 1216 : final_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_FINAL,
2110 : current_rel, final_rel,
2111 : &extra);
2112 :
2113 : /* Let extensions possibly add some more paths */
2114 524130 : if (create_upper_paths_hook)
2115 0 : (*create_upper_paths_hook) (root, UPPERREL_FINAL,
2116 : current_rel, final_rel, &extra);
2117 :
2118 : /* Note: currently, we leave it to callers to do set_cheapest() */
2119 524130 : }
2120 :
2121 : /*
2122 : * Do preprocessing for groupingSets clause and related data. This handles the
2123 : * preliminary steps of expanding the grouping sets, organizing them into lists
2124 : * of rollups, and preparing annotations which will later be filled in with
2125 : * size estimates.
2126 : */
2127 : static grouping_sets_data *
2128 878 : preprocess_grouping_sets(PlannerInfo *root)
2129 : {
2130 878 : Query *parse = root->parse;
2131 : List *sets;
2132 878 : int maxref = 0;
2133 : ListCell *lc_set;
2134 878 : grouping_sets_data *gd = palloc0(sizeof(grouping_sets_data));
2135 :
2136 878 : parse->groupingSets = expand_grouping_sets(parse->groupingSets, parse->groupDistinct, -1);
2137 :
2138 878 : gd->any_hashable = false;
2139 878 : gd->unhashable_refs = NULL;
2140 878 : gd->unsortable_refs = NULL;
2141 878 : gd->unsortable_sets = NIL;
2142 :
2143 : /*
2144 : * We don't currently make any attempt to optimize the groupClause when
2145 : * there are grouping sets, so just duplicate it in processed_groupClause.
2146 : */
2147 878 : root->processed_groupClause = parse->groupClause;
2148 :
2149 878 : if (parse->groupClause)
2150 : {
2151 : ListCell *lc;
2152 :
2153 2672 : foreach(lc, parse->groupClause)
2154 : {
2155 1836 : SortGroupClause *gc = lfirst_node(SortGroupClause, lc);
2156 1836 : Index ref = gc->tleSortGroupRef;
2157 :
2158 1836 : if (ref > maxref)
2159 1800 : maxref = ref;
2160 :
2161 1836 : if (!gc->hashable)
2162 30 : gd->unhashable_refs = bms_add_member(gd->unhashable_refs, ref);
2163 :
2164 1836 : if (!OidIsValid(gc->sortop))
2165 42 : gd->unsortable_refs = bms_add_member(gd->unsortable_refs, ref);
2166 : }
2167 : }
2168 :
2169 : /* Allocate workspace array for remapping */
2170 878 : gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
2171 :
2172 : /*
2173 : * If we have any unsortable sets, we must extract them before trying to
2174 : * prepare rollups. Unsortable sets don't go through
2175 : * reorder_grouping_sets, so we must apply the GroupingSetData annotation
2176 : * here.
2177 : */
2178 878 : if (!bms_is_empty(gd->unsortable_refs))
2179 : {
2180 42 : List *sortable_sets = NIL;
2181 : ListCell *lc;
2182 :
2183 126 : foreach(lc, parse->groupingSets)
2184 : {
2185 90 : List *gset = (List *) lfirst(lc);
2186 :
2187 90 : if (bms_overlap_list(gd->unsortable_refs, gset))
2188 : {
2189 48 : GroupingSetData *gs = makeNode(GroupingSetData);
2190 :
2191 48 : gs->set = gset;
2192 48 : gd->unsortable_sets = lappend(gd->unsortable_sets, gs);
2193 :
2194 : /*
2195 : * We must enforce here that an unsortable set is hashable;
2196 : * later code assumes this. Parse analysis only checks that
2197 : * every individual column is either hashable or sortable.
2198 : *
2199 : * Note that passing this test doesn't guarantee we can
2200 : * generate a plan; there might be other showstoppers.
2201 : */
2202 48 : if (bms_overlap_list(gd->unhashable_refs, gset))
2203 6 : ereport(ERROR,
2204 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2205 : errmsg("could not implement GROUP BY"),
2206 : errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
2207 : }
2208 : else
2209 42 : sortable_sets = lappend(sortable_sets, gset);
2210 : }
2211 :
2212 36 : if (sortable_sets)
2213 30 : sets = extract_rollup_sets(sortable_sets);
2214 : else
2215 6 : sets = NIL;
2216 : }
2217 : else
2218 836 : sets = extract_rollup_sets(parse->groupingSets);
2219 :
2220 2298 : foreach(lc_set, sets)
2221 : {
2222 1426 : List *current_sets = (List *) lfirst(lc_set);
2223 1426 : RollupData *rollup = makeNode(RollupData);
2224 : GroupingSetData *gs;
2225 :
2226 : /*
2227 : * Reorder the current list of grouping sets into correct prefix
2228 : * order. If only one aggregation pass is needed, try to make the
2229 : * list match the ORDER BY clause; if more than one pass is needed, we
2230 : * don't bother with that.
2231 : *
2232 : * Note that this reorders the sets from smallest-member-first to
2233 : * largest-member-first, and applies the GroupingSetData annotations,
2234 : * though the data will be filled in later.
2235 : */
2236 1426 : current_sets = reorder_grouping_sets(current_sets,
2237 1426 : (list_length(sets) == 1
2238 : ? parse->sortClause
2239 : : NIL));
2240 :
2241 : /*
2242 : * Get the initial (and therefore largest) grouping set.
2243 : */
2244 1426 : gs = linitial_node(GroupingSetData, current_sets);
2245 :
2246 : /*
2247 : * Order the groupClause appropriately. If the first grouping set is
2248 : * empty, then the groupClause must also be empty; otherwise we have
2249 : * to force the groupClause to match that grouping set's order.
2250 : *
2251 : * (The first grouping set can be empty even though parse->groupClause
2252 : * is not empty only if all non-empty grouping sets are unsortable.
2253 : * The groupClauses for hashed grouping sets are built later on.)
2254 : */
2255 1426 : if (gs->set)
2256 1384 : rollup->groupClause = preprocess_groupclause(root, gs->set);
2257 : else
2258 42 : rollup->groupClause = NIL;
2259 :
2260 : /*
2261 : * Is it hashable? We pretend empty sets are hashable even though we
2262 : * actually force them not to be hashed later. But don't bother if
2263 : * there's nothing but empty sets (since in that case we can't hash
2264 : * anything).
2265 : */
2266 1426 : if (gs->set &&
2267 1384 : !bms_overlap_list(gd->unhashable_refs, gs->set))
2268 : {
2269 1360 : rollup->hashable = true;
2270 1360 : gd->any_hashable = true;
2271 : }
2272 :
2273 : /*
2274 : * Now that we've pinned down an order for the groupClause for this
2275 : * list of grouping sets, we need to remap the entries in the grouping
2276 : * sets from sortgrouprefs to plain indices (0-based) into the
2277 : * groupClause for this collection of grouping sets. We keep the
2278 : * original form for later use, though.
2279 : */
2280 1426 : rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
2281 : current_sets,
2282 : gd->tleref_to_colnum_map);
2283 1426 : rollup->gsets_data = current_sets;
2284 :
2285 1426 : gd->rollups = lappend(gd->rollups, rollup);
2286 : }
2287 :
2288 872 : if (gd->unsortable_sets)
2289 : {
2290 : /*
2291 : * We have not yet pinned down a groupclause for this, but we will
2292 : * need index-based lists for estimation purposes. Construct
2293 : * hash_sets_idx based on the entire original groupclause for now.
2294 : */
2295 36 : gd->hash_sets_idx = remap_to_groupclause_idx(parse->groupClause,
2296 : gd->unsortable_sets,
2297 : gd->tleref_to_colnum_map);
2298 36 : gd->any_hashable = true;
2299 : }
2300 :
2301 872 : return gd;
2302 : }
2303 :
2304 : /*
2305 : * Given a groupclause and a list of GroupingSetData, return equivalent sets
2306 : * (without annotation) mapped to indexes into the given groupclause.
2307 : */
2308 : static List *
2309 4164 : remap_to_groupclause_idx(List *groupClause,
2310 : List *gsets,
2311 : int *tleref_to_colnum_map)
2312 : {
2313 4164 : int ref = 0;
2314 4164 : List *result = NIL;
2315 : ListCell *lc;
2316 :
2317 10192 : foreach(lc, groupClause)
2318 : {
2319 6028 : SortGroupClause *gc = lfirst_node(SortGroupClause, lc);
2320 :
2321 6028 : tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
2322 : }
2323 :
2324 9630 : foreach(lc, gsets)
2325 : {
2326 5466 : List *set = NIL;
2327 : ListCell *lc2;
2328 5466 : GroupingSetData *gs = lfirst_node(GroupingSetData, lc);
2329 :
2330 12332 : foreach(lc2, gs->set)
2331 : {
2332 6866 : set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]);
2333 : }
2334 :
2335 5466 : result = lappend(result, set);
2336 : }
2337 :
2338 4164 : return result;
2339 : }
2340 :
2341 :
2342 : /*
2343 : * preprocess_rowmarks - set up PlanRowMarks if needed
2344 : */
2345 : static void
2346 528056 : preprocess_rowmarks(PlannerInfo *root)
2347 : {
2348 528056 : Query *parse = root->parse;
2349 : Bitmapset *rels;
2350 : List *prowmarks;
2351 : ListCell *l;
2352 : int i;
2353 :
2354 528056 : if (parse->rowMarks)
2355 : {
2356 : /*
2357 : * We've got trouble if FOR [KEY] UPDATE/SHARE appears inside
2358 : * grouping, since grouping renders a reference to individual tuple
2359 : * CTIDs invalid. This is also checked at parse time, but that's
2360 : * insufficient because of rule substitution, query pullup, etc.
2361 : */
2362 7764 : CheckSelectLocking(parse, linitial_node(RowMarkClause,
2363 : parse->rowMarks)->strength);
2364 : }
2365 : else
2366 : {
2367 : /*
2368 : * We only need rowmarks for UPDATE, DELETE, MERGE, or FOR [KEY]
2369 : * UPDATE/SHARE.
2370 : */
2371 520292 : if (parse->commandType != CMD_UPDATE &&
2372 506430 : parse->commandType != CMD_DELETE &&
2373 502134 : parse->commandType != CMD_MERGE)
2374 500262 : return;
2375 : }
2376 :
2377 : /*
2378 : * We need to have rowmarks for all base relations except the target. We
2379 : * make a bitmapset of all base rels and then remove the items we don't
2380 : * need or have FOR [KEY] UPDATE/SHARE marks for.
2381 : */
2382 27794 : rels = get_relids_in_jointree((Node *) parse->jointree, false, false);
2383 27794 : if (parse->resultRelation)
2384 20030 : rels = bms_del_member(rels, parse->resultRelation);
2385 :
2386 : /*
2387 : * Convert RowMarkClauses to PlanRowMark representation.
2388 : */
2389 27794 : prowmarks = NIL;
2390 35836 : foreach(l, parse->rowMarks)
2391 : {
2392 8042 : RowMarkClause *rc = lfirst_node(RowMarkClause, l);
2393 8042 : RangeTblEntry *rte = rt_fetch(rc->rti, parse->rtable);
2394 : PlanRowMark *newrc;
2395 :
2396 : /*
2397 : * Currently, it is syntactically impossible to have FOR UPDATE et al
2398 : * applied to an update/delete target rel. If that ever becomes
2399 : * possible, we should drop the target from the PlanRowMark list.
2400 : */
2401 : Assert(rc->rti != parse->resultRelation);
2402 :
2403 : /*
2404 : * Ignore RowMarkClauses for subqueries; they aren't real tables and
2405 : * can't support true locking. Subqueries that got flattened into the
2406 : * main query should be ignored completely. Any that didn't will get
2407 : * ROW_MARK_COPY items in the next loop.
2408 : */
2409 8042 : if (rte->rtekind != RTE_RELATION)
2410 108 : continue;
2411 :
2412 7934 : rels = bms_del_member(rels, rc->rti);
2413 :
2414 7934 : newrc = makeNode(PlanRowMark);
2415 7934 : newrc->rti = newrc->prti = rc->rti;
2416 7934 : newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2417 7934 : newrc->markType = select_rowmark_type(rte, rc->strength);
2418 7934 : newrc->allMarkTypes = (1 << newrc->markType);
2419 7934 : newrc->strength = rc->strength;
2420 7934 : newrc->waitPolicy = rc->waitPolicy;
2421 7934 : newrc->isParent = false;
2422 :
2423 7934 : prowmarks = lappend(prowmarks, newrc);
2424 : }
2425 :
2426 : /*
2427 : * Now, add rowmarks for any non-target, non-locked base relations.
2428 : */
2429 27794 : i = 0;
2430 66998 : foreach(l, parse->rtable)
2431 : {
2432 39204 : RangeTblEntry *rte = lfirst_node(RangeTblEntry, l);
2433 : PlanRowMark *newrc;
2434 :
2435 39204 : i++;
2436 39204 : if (!bms_is_member(i, rels))
2437 35442 : continue;
2438 :
2439 3762 : newrc = makeNode(PlanRowMark);
2440 3762 : newrc->rti = newrc->prti = i;
2441 3762 : newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2442 3762 : newrc->markType = select_rowmark_type(rte, LCS_NONE);
2443 3762 : newrc->allMarkTypes = (1 << newrc->markType);
2444 3762 : newrc->strength = LCS_NONE;
2445 3762 : newrc->waitPolicy = LockWaitBlock; /* doesn't matter */
2446 3762 : newrc->isParent = false;
2447 :
2448 3762 : prowmarks = lappend(prowmarks, newrc);
2449 : }
2450 :
2451 27794 : root->rowMarks = prowmarks;
2452 : }
2453 :
2454 : /*
2455 : * Select RowMarkType to use for a given table
2456 : */
2457 : RowMarkType
2458 14028 : select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength)
2459 : {
2460 14028 : if (rte->rtekind != RTE_RELATION)
2461 : {
2462 : /* If it's not a table at all, use ROW_MARK_COPY */
2463 1512 : return ROW_MARK_COPY;
2464 : }
2465 12516 : else if (rte->relkind == RELKIND_FOREIGN_TABLE)
2466 : {
2467 : /* Let the FDW select the rowmark type, if it wants to */
2468 212 : FdwRoutine *fdwroutine = GetFdwRoutineByRelId(rte->relid);
2469 :
2470 212 : if (fdwroutine->GetForeignRowMarkType != NULL)
2471 0 : return fdwroutine->GetForeignRowMarkType(rte, strength);
2472 : /* Otherwise, use ROW_MARK_COPY by default */
2473 212 : return ROW_MARK_COPY;
2474 : }
2475 : else
2476 : {
2477 : /* Regular table, apply the appropriate lock type */
2478 12304 : switch (strength)
2479 : {
2480 2480 : case LCS_NONE:
2481 :
2482 : /*
2483 : * We don't need a tuple lock, only the ability to re-fetch
2484 : * the row.
2485 : */
2486 2480 : return ROW_MARK_REFERENCE;
2487 : break;
2488 7928 : case LCS_FORKEYSHARE:
2489 7928 : return ROW_MARK_KEYSHARE;
2490 : break;
2491 300 : case LCS_FORSHARE:
2492 300 : return ROW_MARK_SHARE;
2493 : break;
2494 72 : case LCS_FORNOKEYUPDATE:
2495 72 : return ROW_MARK_NOKEYEXCLUSIVE;
2496 : break;
2497 1524 : case LCS_FORUPDATE:
2498 1524 : return ROW_MARK_EXCLUSIVE;
2499 : break;
2500 : }
2501 0 : elog(ERROR, "unrecognized LockClauseStrength %d", (int) strength);
2502 : return ROW_MARK_EXCLUSIVE; /* keep compiler quiet */
2503 : }
2504 : }
2505 :
2506 : /*
2507 : * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses
2508 : *
2509 : * We try to estimate the values of the LIMIT/OFFSET clauses, and pass the
2510 : * results back in *count_est and *offset_est. These variables are set to
2511 : * 0 if the corresponding clause is not present, and -1 if it's present
2512 : * but we couldn't estimate the value for it. (The "0" convention is OK
2513 : * for OFFSET but a little bit bogus for LIMIT: effectively we estimate
2514 : * LIMIT 0 as though it were LIMIT 1. But this is in line with the planner's
2515 : * usual practice of never estimating less than one row.) These values will
2516 : * be passed to create_limit_path, which see if you change this code.
2517 : *
2518 : * The return value is the suitably adjusted tuple_fraction to use for
2519 : * planning the query. This adjustment is not overridable, since it reflects
2520 : * plan actions that grouping_planner() will certainly take, not assumptions
2521 : * about context.
2522 : */
2523 : static double
2524 4956 : preprocess_limit(PlannerInfo *root, double tuple_fraction,
2525 : int64 *offset_est, int64 *count_est)
2526 : {
2527 4956 : Query *parse = root->parse;
2528 : Node *est;
2529 : double limit_fraction;
2530 :
2531 : /* Should not be called unless LIMIT or OFFSET */
2532 : Assert(parse->limitCount || parse->limitOffset);
2533 :
2534 : /*
2535 : * Try to obtain the clause values. We use estimate_expression_value
2536 : * primarily because it can sometimes do something useful with Params.
2537 : */
2538 4956 : if (parse->limitCount)
2539 : {
2540 4458 : est = estimate_expression_value(root, parse->limitCount);
2541 4458 : if (est && IsA(est, Const))
2542 : {
2543 4452 : if (((Const *) est)->constisnull)
2544 : {
2545 : /* NULL indicates LIMIT ALL, ie, no limit */
2546 0 : *count_est = 0; /* treat as not present */
2547 : }
2548 : else
2549 : {
2550 4452 : *count_est = DatumGetInt64(((Const *) est)->constvalue);
2551 4452 : if (*count_est <= 0)
2552 150 : *count_est = 1; /* force to at least 1 */
2553 : }
2554 : }
2555 : else
2556 6 : *count_est = -1; /* can't estimate */
2557 : }
2558 : else
2559 498 : *count_est = 0; /* not present */
2560 :
2561 4956 : if (parse->limitOffset)
2562 : {
2563 870 : est = estimate_expression_value(root, parse->limitOffset);
2564 870 : if (est && IsA(est, Const))
2565 : {
2566 846 : if (((Const *) est)->constisnull)
2567 : {
2568 : /* Treat NULL as no offset; the executor will too */
2569 0 : *offset_est = 0; /* treat as not present */
2570 : }
2571 : else
2572 : {
2573 846 : *offset_est = DatumGetInt64(((Const *) est)->constvalue);
2574 846 : if (*offset_est < 0)
2575 0 : *offset_est = 0; /* treat as not present */
2576 : }
2577 : }
2578 : else
2579 24 : *offset_est = -1; /* can't estimate */
2580 : }
2581 : else
2582 4086 : *offset_est = 0; /* not present */
2583 :
2584 4956 : if (*count_est != 0)
2585 : {
2586 : /*
2587 : * A LIMIT clause limits the absolute number of tuples returned.
2588 : * However, if it's not a constant LIMIT then we have to guess; for
2589 : * lack of a better idea, assume 10% of the plan's result is wanted.
2590 : */
2591 4458 : if (*count_est < 0 || *offset_est < 0)
2592 : {
2593 : /* LIMIT or OFFSET is an expression ... punt ... */
2594 24 : limit_fraction = 0.10;
2595 : }
2596 : else
2597 : {
2598 : /* LIMIT (plus OFFSET, if any) is max number of tuples needed */
2599 4434 : limit_fraction = (double) *count_est + (double) *offset_est;
2600 : }
2601 :
2602 : /*
2603 : * If we have absolute limits from both caller and LIMIT, use the
2604 : * smaller value; likewise if they are both fractional. If one is
2605 : * fractional and the other absolute, we can't easily determine which
2606 : * is smaller, but we use the heuristic that the absolute will usually
2607 : * be smaller.
2608 : */
2609 4458 : if (tuple_fraction >= 1.0)
2610 : {
2611 6 : if (limit_fraction >= 1.0)
2612 : {
2613 : /* both absolute */
2614 6 : tuple_fraction = Min(tuple_fraction, limit_fraction);
2615 : }
2616 : else
2617 : {
2618 : /* caller absolute, limit fractional; use caller's value */
2619 : }
2620 : }
2621 4452 : else if (tuple_fraction > 0.0)
2622 : {
2623 148 : if (limit_fraction >= 1.0)
2624 : {
2625 : /* caller fractional, limit absolute; use limit */
2626 148 : tuple_fraction = limit_fraction;
2627 : }
2628 : else
2629 : {
2630 : /* both fractional */
2631 0 : tuple_fraction = Min(tuple_fraction, limit_fraction);
2632 : }
2633 : }
2634 : else
2635 : {
2636 : /* no info from caller, just use limit */
2637 4304 : tuple_fraction = limit_fraction;
2638 : }
2639 : }
2640 498 : else if (*offset_est != 0 && tuple_fraction > 0.0)
2641 : {
2642 : /*
2643 : * We have an OFFSET but no LIMIT. This acts entirely differently
2644 : * from the LIMIT case: here, we need to increase rather than decrease
2645 : * the caller's tuple_fraction, because the OFFSET acts to cause more
2646 : * tuples to be fetched instead of fewer. This only matters if we got
2647 : * a tuple_fraction > 0, however.
2648 : *
2649 : * As above, use 10% if OFFSET is present but unestimatable.
2650 : */
2651 12 : if (*offset_est < 0)
2652 0 : limit_fraction = 0.10;
2653 : else
2654 12 : limit_fraction = (double) *offset_est;
2655 :
2656 : /*
2657 : * If we have absolute counts from both caller and OFFSET, add them
2658 : * together; likewise if they are both fractional. If one is
2659 : * fractional and the other absolute, we want to take the larger, and
2660 : * we heuristically assume that's the fractional one.
2661 : */
2662 12 : if (tuple_fraction >= 1.0)
2663 : {
2664 0 : if (limit_fraction >= 1.0)
2665 : {
2666 : /* both absolute, so add them together */
2667 0 : tuple_fraction += limit_fraction;
2668 : }
2669 : else
2670 : {
2671 : /* caller absolute, limit fractional; use limit */
2672 0 : tuple_fraction = limit_fraction;
2673 : }
2674 : }
2675 : else
2676 : {
2677 12 : if (limit_fraction >= 1.0)
2678 : {
2679 : /* caller fractional, limit absolute; use caller's value */
2680 : }
2681 : else
2682 : {
2683 : /* both fractional, so add them together */
2684 0 : tuple_fraction += limit_fraction;
2685 0 : if (tuple_fraction >= 1.0)
2686 0 : tuple_fraction = 0.0; /* assume fetch all */
2687 : }
2688 : }
2689 : }
2690 :
2691 4956 : return tuple_fraction;
2692 : }
2693 :
2694 : /*
2695 : * limit_needed - do we actually need a Limit plan node?
2696 : *
2697 : * If we have constant-zero OFFSET and constant-null LIMIT, we can skip adding
2698 : * a Limit node. This is worth checking for because "OFFSET 0" is a common
2699 : * locution for an optimization fence. (Because other places in the planner
2700 : * merely check whether parse->limitOffset isn't NULL, it will still work as
2701 : * an optimization fence --- we're just suppressing unnecessary run-time
2702 : * overhead.)
2703 : *
2704 : * This might look like it could be merged into preprocess_limit, but there's
2705 : * a key distinction: here we need hard constants in OFFSET/LIMIT, whereas
2706 : * in preprocess_limit it's good enough to consider estimated values.
2707 : */
2708 : bool
2709 1105672 : limit_needed(Query *parse)
2710 : {
2711 : Node *node;
2712 :
2713 1105672 : node = parse->limitCount;
2714 1105672 : if (node)
2715 : {
2716 10618 : if (IsA(node, Const))
2717 : {
2718 : /* NULL indicates LIMIT ALL, ie, no limit */
2719 10418 : if (!((Const *) node)->constisnull)
2720 10418 : return true; /* LIMIT with a constant value */
2721 : }
2722 : else
2723 200 : return true; /* non-constant LIMIT */
2724 : }
2725 :
2726 1095054 : node = parse->limitOffset;
2727 1095054 : if (node)
2728 : {
2729 1456 : if (IsA(node, Const))
2730 : {
2731 : /* Treat NULL as no offset; the executor would too */
2732 1160 : if (!((Const *) node)->constisnull)
2733 : {
2734 1160 : int64 offset = DatumGetInt64(((Const *) node)->constvalue);
2735 :
2736 1160 : if (offset != 0)
2737 110 : return true; /* OFFSET with a nonzero value */
2738 : }
2739 : }
2740 : else
2741 296 : return true; /* non-constant OFFSET */
2742 : }
2743 :
2744 1094648 : return false; /* don't need a Limit plan node */
2745 : }
2746 :
2747 : /*
2748 : * preprocess_groupclause - do preparatory work on GROUP BY clause
2749 : *
2750 : * The idea here is to adjust the ordering of the GROUP BY elements
2751 : * (which in itself is semantically insignificant) to match ORDER BY,
2752 : * thereby allowing a single sort operation to both implement the ORDER BY
2753 : * requirement and set up for a Unique step that implements GROUP BY.
2754 : * We also consider partial match between GROUP BY and ORDER BY elements,
2755 : * which could allow to implement ORDER BY using the incremental sort.
2756 : *
2757 : * We also consider other orderings of the GROUP BY elements, which could
2758 : * match the sort ordering of other possible plans (eg an indexscan) and
2759 : * thereby reduce cost. This is implemented during the generation of grouping
2760 : * paths. See get_useful_group_keys_orderings() for details.
2761 : *
2762 : * Note: we need no comparable processing of the distinctClause because
2763 : * the parser already enforced that that matches ORDER BY.
2764 : *
2765 : * Note: we return a fresh List, but its elements are the same
2766 : * SortGroupClauses appearing in parse->groupClause. This is important
2767 : * because later processing may modify the processed_groupClause list.
2768 : *
2769 : * For grouping sets, the order of items is instead forced to agree with that
2770 : * of the grouping set (and items not in the grouping set are skipped). The
2771 : * work of sorting the order of grouping set elements to match the ORDER BY if
2772 : * possible is done elsewhere.
2773 : */
2774 : static List *
2775 7720 : preprocess_groupclause(PlannerInfo *root, List *force)
2776 : {
2777 7720 : Query *parse = root->parse;
2778 7720 : List *new_groupclause = NIL;
2779 : ListCell *sl;
2780 : ListCell *gl;
2781 :
2782 : /* For grouping sets, we need to force the ordering */
2783 7720 : if (force)
2784 : {
2785 10036 : foreach(sl, force)
2786 : {
2787 5950 : Index ref = lfirst_int(sl);
2788 5950 : SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
2789 :
2790 5950 : new_groupclause = lappend(new_groupclause, cl);
2791 : }
2792 :
2793 4086 : return new_groupclause;
2794 : }
2795 :
2796 : /* If no ORDER BY, nothing useful to do here */
2797 3634 : if (parse->sortClause == NIL)
2798 2058 : return list_copy(parse->groupClause);
2799 :
2800 : /*
2801 : * Scan the ORDER BY clause and construct a list of matching GROUP BY
2802 : * items, but only as far as we can make a matching prefix.
2803 : *
2804 : * This code assumes that the sortClause contains no duplicate items.
2805 : */
2806 3058 : foreach(sl, parse->sortClause)
2807 : {
2808 2128 : SortGroupClause *sc = lfirst_node(SortGroupClause, sl);
2809 :
2810 3224 : foreach(gl, parse->groupClause)
2811 : {
2812 2578 : SortGroupClause *gc = lfirst_node(SortGroupClause, gl);
2813 :
2814 2578 : if (equal(gc, sc))
2815 : {
2816 1482 : new_groupclause = lappend(new_groupclause, gc);
2817 1482 : break;
2818 : }
2819 : }
2820 2128 : if (gl == NULL)
2821 646 : break; /* no match, so stop scanning */
2822 : }
2823 :
2824 :
2825 : /* If no match at all, no point in reordering GROUP BY */
2826 1576 : if (new_groupclause == NIL)
2827 298 : return list_copy(parse->groupClause);
2828 :
2829 : /*
2830 : * Add any remaining GROUP BY items to the new list. We don't require a
2831 : * complete match, because even partial match allows ORDER BY to be
2832 : * implemented using incremental sort. Also, give up if there are any
2833 : * non-sortable GROUP BY items, since then there's no hope anyway.
2834 : */
2835 2926 : foreach(gl, parse->groupClause)
2836 : {
2837 1648 : SortGroupClause *gc = lfirst_node(SortGroupClause, gl);
2838 :
2839 1648 : if (list_member_ptr(new_groupclause, gc))
2840 1482 : continue; /* it matched an ORDER BY item */
2841 166 : if (!OidIsValid(gc->sortop)) /* give up, GROUP BY can't be sorted */
2842 0 : return list_copy(parse->groupClause);
2843 166 : new_groupclause = lappend(new_groupclause, gc);
2844 : }
2845 :
2846 : /* Success --- install the rearranged GROUP BY list */
2847 : Assert(list_length(parse->groupClause) == list_length(new_groupclause));
2848 1278 : return new_groupclause;
2849 : }
2850 :
2851 : /*
2852 : * Extract lists of grouping sets that can be implemented using a single
2853 : * rollup-type aggregate pass each. Returns a list of lists of grouping sets.
2854 : *
2855 : * Input must be sorted with smallest sets first. Result has each sublist
2856 : * sorted with smallest sets first.
2857 : *
2858 : * We want to produce the absolute minimum possible number of lists here to
2859 : * avoid excess sorts. Fortunately, there is an algorithm for this; the problem
2860 : * of finding the minimal partition of a partially-ordered set into chains
2861 : * (which is what we need, taking the list of grouping sets as a poset ordered
2862 : * by set inclusion) can be mapped to the problem of finding the maximum
2863 : * cardinality matching on a bipartite graph, which is solvable in polynomial
2864 : * time with a worst case of no worse than O(n^2.5) and usually much
2865 : * better. Since our N is at most 4096, we don't need to consider fallbacks to
2866 : * heuristic or approximate methods. (Planning time for a 12-d cube is under
2867 : * half a second on my modest system even with optimization off and assertions
2868 : * on.)
2869 : */
2870 : static List *
2871 866 : extract_rollup_sets(List *groupingSets)
2872 : {
2873 866 : int num_sets_raw = list_length(groupingSets);
2874 866 : int num_empty = 0;
2875 866 : int num_sets = 0; /* distinct sets */
2876 866 : int num_chains = 0;
2877 866 : List *result = NIL;
2878 : List **results;
2879 : List **orig_sets;
2880 : Bitmapset **set_masks;
2881 : int *chains;
2882 : short **adjacency;
2883 : short *adjacency_buf;
2884 : BipartiteMatchState *state;
2885 : int i;
2886 : int j;
2887 : int j_size;
2888 866 : ListCell *lc1 = list_head(groupingSets);
2889 : ListCell *lc;
2890 :
2891 : /*
2892 : * Start by stripping out empty sets. The algorithm doesn't require this,
2893 : * but the planner currently needs all empty sets to be returned in the
2894 : * first list, so we strip them here and add them back after.
2895 : */
2896 1476 : while (lc1 && lfirst(lc1) == NIL)
2897 : {
2898 610 : ++num_empty;
2899 610 : lc1 = lnext(groupingSets, lc1);
2900 : }
2901 :
2902 : /* bail out now if it turns out that all we had were empty sets. */
2903 866 : if (!lc1)
2904 42 : return list_make1(groupingSets);
2905 :
2906 : /*----------
2907 : * We don't strictly need to remove duplicate sets here, but if we don't,
2908 : * they tend to become scattered through the result, which is a bit
2909 : * confusing (and irritating if we ever decide to optimize them out).
2910 : * So we remove them here and add them back after.
2911 : *
2912 : * For each non-duplicate set, we fill in the following:
2913 : *
2914 : * orig_sets[i] = list of the original set lists
2915 : * set_masks[i] = bitmapset for testing inclusion
2916 : * adjacency[i] = array [n, v1, v2, ... vn] of adjacency indices
2917 : *
2918 : * chains[i] will be the result group this set is assigned to.
2919 : *
2920 : * We index all of these from 1 rather than 0 because it is convenient
2921 : * to leave 0 free for the NIL node in the graph algorithm.
2922 : *----------
2923 : */
2924 824 : orig_sets = palloc0((num_sets_raw + 1) * sizeof(List *));
2925 824 : set_masks = palloc0((num_sets_raw + 1) * sizeof(Bitmapset *));
2926 824 : adjacency = palloc0((num_sets_raw + 1) * sizeof(short *));
2927 824 : adjacency_buf = palloc((num_sets_raw + 1) * sizeof(short));
2928 :
2929 824 : j_size = 0;
2930 824 : j = 0;
2931 824 : i = 1;
2932 :
2933 2936 : for_each_cell(lc, groupingSets, lc1)
2934 : {
2935 2112 : List *candidate = (List *) lfirst(lc);
2936 2112 : Bitmapset *candidate_set = NULL;
2937 : ListCell *lc2;
2938 2112 : int dup_of = 0;
2939 :
2940 5118 : foreach(lc2, candidate)
2941 : {
2942 3006 : candidate_set = bms_add_member(candidate_set, lfirst_int(lc2));
2943 : }
2944 :
2945 : /* we can only be a dup if we're the same length as a previous set */
2946 2112 : if (j_size == list_length(candidate))
2947 : {
2948 : int k;
2949 :
2950 1844 : for (k = j; k < i; ++k)
2951 : {
2952 1188 : if (bms_equal(set_masks[k], candidate_set))
2953 : {
2954 158 : dup_of = k;
2955 158 : break;
2956 : }
2957 : }
2958 : }
2959 1298 : else if (j_size < list_length(candidate))
2960 : {
2961 1298 : j_size = list_length(candidate);
2962 1298 : j = i;
2963 : }
2964 :
2965 2112 : if (dup_of > 0)
2966 : {
2967 158 : orig_sets[dup_of] = lappend(orig_sets[dup_of], candidate);
2968 158 : bms_free(candidate_set);
2969 : }
2970 : else
2971 : {
2972 : int k;
2973 1954 : int n_adj = 0;
2974 :
2975 1954 : orig_sets[i] = list_make1(candidate);
2976 1954 : set_masks[i] = candidate_set;
2977 :
2978 : /* fill in adjacency list; no need to compare equal-size sets */
2979 :
2980 3226 : for (k = j - 1; k > 0; --k)
2981 : {
2982 1272 : if (bms_is_subset(set_masks[k], candidate_set))
2983 1110 : adjacency_buf[++n_adj] = k;
2984 : }
2985 :
2986 1954 : if (n_adj > 0)
2987 : {
2988 598 : adjacency_buf[0] = n_adj;
2989 598 : adjacency[i] = palloc((n_adj + 1) * sizeof(short));
2990 598 : memcpy(adjacency[i], adjacency_buf, (n_adj + 1) * sizeof(short));
2991 : }
2992 : else
2993 1356 : adjacency[i] = NULL;
2994 :
2995 1954 : ++i;
2996 : }
2997 : }
2998 :
2999 824 : num_sets = i - 1;
3000 :
3001 : /*
3002 : * Apply the graph matching algorithm to do the work.
3003 : */
3004 824 : state = BipartiteMatch(num_sets, num_sets, adjacency);
3005 :
3006 : /*
3007 : * Now, the state->pair* fields have the info we need to assign sets to
3008 : * chains. Two sets (u,v) belong to the same chain if pair_uv[u] = v or
3009 : * pair_vu[v] = u (both will be true, but we check both so that we can do
3010 : * it in one pass)
3011 : */
3012 824 : chains = palloc0((num_sets + 1) * sizeof(int));
3013 :
3014 2778 : for (i = 1; i <= num_sets; ++i)
3015 : {
3016 1954 : int u = state->pair_vu[i];
3017 1954 : int v = state->pair_uv[i];
3018 :
3019 1954 : if (u > 0 && u < i)
3020 0 : chains[i] = chains[u];
3021 1954 : else if (v > 0 && v < i)
3022 570 : chains[i] = chains[v];
3023 : else
3024 1384 : chains[i] = ++num_chains;
3025 : }
3026 :
3027 : /* build result lists. */
3028 824 : results = palloc0((num_chains + 1) * sizeof(List *));
3029 :
3030 2778 : for (i = 1; i <= num_sets; ++i)
3031 : {
3032 1954 : int c = chains[i];
3033 :
3034 : Assert(c > 0);
3035 :
3036 1954 : results[c] = list_concat(results[c], orig_sets[i]);
3037 : }
3038 :
3039 : /* push any empty sets back on the first list. */
3040 1344 : while (num_empty-- > 0)
3041 520 : results[1] = lcons(NIL, results[1]);
3042 :
3043 : /* make result list */
3044 2208 : for (i = 1; i <= num_chains; ++i)
3045 1384 : result = lappend(result, results[i]);
3046 :
3047 : /*
3048 : * Free all the things.
3049 : *
3050 : * (This is over-fussy for small sets but for large sets we could have
3051 : * tied up a nontrivial amount of memory.)
3052 : */
3053 824 : BipartiteMatchFree(state);
3054 824 : pfree(results);
3055 824 : pfree(chains);
3056 2778 : for (i = 1; i <= num_sets; ++i)
3057 1954 : if (adjacency[i])
3058 598 : pfree(adjacency[i]);
3059 824 : pfree(adjacency);
3060 824 : pfree(adjacency_buf);
3061 824 : pfree(orig_sets);
3062 2778 : for (i = 1; i <= num_sets; ++i)
3063 1954 : bms_free(set_masks[i]);
3064 824 : pfree(set_masks);
3065 :
3066 824 : return result;
3067 : }
3068 :
3069 : /*
3070 : * Reorder the elements of a list of grouping sets such that they have correct
3071 : * prefix relationships. Also inserts the GroupingSetData annotations.
3072 : *
3073 : * The input must be ordered with smallest sets first; the result is returned
3074 : * with largest sets first. Note that the result shares no list substructure
3075 : * with the input, so it's safe for the caller to modify it later.
3076 : *
3077 : * If we're passed in a sortclause, we follow its order of columns to the
3078 : * extent possible, to minimize the chance that we add unnecessary sorts.
3079 : * (We're trying here to ensure that GROUPING SETS ((a,b,c),(c)) ORDER BY c,b,a
3080 : * gets implemented in one pass.)
3081 : */
3082 : static List *
3083 1426 : reorder_grouping_sets(List *groupingSets, List *sortclause)
3084 : {
3085 : ListCell *lc;
3086 1426 : List *previous = NIL;
3087 1426 : List *result = NIL;
3088 :
3089 4148 : foreach(lc, groupingSets)
3090 : {
3091 2722 : List *candidate = (List *) lfirst(lc);
3092 2722 : List *new_elems = list_difference_int(candidate, previous);
3093 2722 : GroupingSetData *gs = makeNode(GroupingSetData);
3094 :
3095 2886 : while (list_length(sortclause) > list_length(previous) &&
3096 : new_elems != NIL)
3097 : {
3098 272 : SortGroupClause *sc = list_nth(sortclause, list_length(previous));
3099 272 : int ref = sc->tleSortGroupRef;
3100 :
3101 272 : if (list_member_int(new_elems, ref))
3102 : {
3103 164 : previous = lappend_int(previous, ref);
3104 164 : new_elems = list_delete_int(new_elems, ref);
3105 : }
3106 : else
3107 : {
3108 : /* diverged from the sortclause; give up on it */
3109 108 : sortclause = NIL;
3110 108 : break;
3111 : }
3112 : }
3113 :
3114 2722 : previous = list_concat(previous, new_elems);
3115 :
3116 2722 : gs->set = list_copy(previous);
3117 2722 : result = lcons(gs, result);
3118 : }
3119 :
3120 1426 : list_free(previous);
3121 :
3122 1426 : return result;
3123 : }
3124 :
3125 : /*
3126 : * has_volatile_pathkey
3127 : * Returns true if any PathKey in 'keys' has an EquivalenceClass
3128 : * containing a volatile function. Otherwise returns false.
3129 : */
3130 : static bool
3131 3096 : has_volatile_pathkey(List *keys)
3132 : {
3133 : ListCell *lc;
3134 :
3135 6336 : foreach(lc, keys)
3136 : {
3137 3258 : PathKey *pathkey = lfirst_node(PathKey, lc);
3138 :
3139 3258 : if (pathkey->pk_eclass->ec_has_volatile)
3140 18 : return true;
3141 : }
3142 :
3143 3078 : return false;
3144 : }
3145 :
3146 : /*
3147 : * adjust_group_pathkeys_for_groupagg
3148 : * Add pathkeys to root->group_pathkeys to reflect the best set of
3149 : * pre-ordered input for ordered aggregates.
3150 : *
3151 : * We define "best" as the pathkeys that suit the largest number of
3152 : * aggregate functions. We find these by looking at the first ORDER BY /
3153 : * DISTINCT aggregate and take the pathkeys for that before searching for
3154 : * other aggregates that require the same or a more strict variation of the
3155 : * same pathkeys. We then repeat that process for any remaining aggregates
3156 : * with different pathkeys and if we find another set of pathkeys that suits a
3157 : * larger number of aggregates then we select those pathkeys instead.
3158 : *
3159 : * When the best pathkeys are found we also mark each Aggref that can use
3160 : * those pathkeys as aggpresorted = true.
3161 : *
3162 : * Note: When an aggregate function's ORDER BY / DISTINCT clause contains any
3163 : * volatile functions, we never make use of these pathkeys. We want to ensure
3164 : * that sorts using volatile functions are done independently in each Aggref
3165 : * rather than once at the query level. If we were to allow this then Aggrefs
3166 : * with compatible sort orders would all transition their rows in the same
3167 : * order if those pathkeys were deemed to be the best pathkeys to sort on.
3168 : * Whereas, if some other set of Aggref's pathkeys happened to be deemed
3169 : * better pathkeys to sort on, then the volatile function Aggrefs would be
3170 : * left to perform their sorts individually. To avoid this inconsistent
3171 : * behavior which could make Aggref results depend on what other Aggrefs the
3172 : * query contains, we always force Aggrefs with volatile functions to perform
3173 : * their own sorts.
3174 : */
3175 : static void
3176 2700 : adjust_group_pathkeys_for_groupagg(PlannerInfo *root)
3177 : {
3178 2700 : List *grouppathkeys = root->group_pathkeys;
3179 : List *bestpathkeys;
3180 : Bitmapset *bestaggs;
3181 : Bitmapset *unprocessed_aggs;
3182 : ListCell *lc;
3183 : int i;
3184 :
3185 : /* Shouldn't be here if there are grouping sets */
3186 : Assert(root->parse->groupingSets == NIL);
3187 : /* Shouldn't be here unless there are some ordered aggregates */
3188 : Assert(root->numOrderedAggs > 0);
3189 :
3190 : /* Do nothing if disabled */
3191 2700 : if (!enable_presorted_aggregate)
3192 6 : return;
3193 :
3194 : /*
3195 : * Make a first pass over all AggInfos to collect a Bitmapset containing
3196 : * the indexes of all AggInfos to be processed below.
3197 : */
3198 2694 : unprocessed_aggs = NULL;
3199 6072 : foreach(lc, root->agginfos)
3200 : {
3201 3378 : AggInfo *agginfo = lfirst_node(AggInfo, lc);
3202 3378 : Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs);
3203 :
3204 3378 : if (AGGKIND_IS_ORDERED_SET(aggref->aggkind))
3205 264 : continue;
3206 :
3207 : /* Skip unless there's a DISTINCT or ORDER BY clause */
3208 3114 : if (aggref->aggdistinct == NIL && aggref->aggorder == NIL)
3209 300 : continue;
3210 :
3211 : /* Additional safety checks are needed if there's a FILTER clause */
3212 2814 : if (aggref->aggfilter != NULL)
3213 : {
3214 : ListCell *lc2;
3215 54 : bool allow_presort = true;
3216 :
3217 : /*
3218 : * When the Aggref has a FILTER clause, it's possible that the
3219 : * filter removes rows that cannot be sorted because the
3220 : * expression to sort by results in an error during its
3221 : * evaluation. This is a problem for presorting as that happens
3222 : * before the FILTER, whereas without presorting, the Aggregate
3223 : * node will apply the FILTER *before* sorting. So that we never
3224 : * try to sort anything that might error, here we aim to skip over
3225 : * any Aggrefs with arguments with expressions which, when
3226 : * evaluated, could cause an ERROR. Vars and Consts are ok. There
3227 : * may be more cases that should be allowed, but more thought
3228 : * needs to be given. Err on the side of caution.
3229 : */
3230 102 : foreach(lc2, aggref->args)
3231 : {
3232 72 : TargetEntry *tle = (TargetEntry *) lfirst(lc2);
3233 72 : Expr *expr = tle->expr;
3234 :
3235 84 : while (IsA(expr, RelabelType))
3236 12 : expr = (Expr *) (castNode(RelabelType, expr))->arg;
3237 :
3238 : /* Common case, Vars and Consts are ok */
3239 72 : if (IsA(expr, Var) || IsA(expr, Const))
3240 48 : continue;
3241 :
3242 : /* Unsupported. Don't try to presort for this Aggref */
3243 24 : allow_presort = false;
3244 24 : break;
3245 : }
3246 :
3247 : /* Skip unsupported Aggrefs */
3248 54 : if (!allow_presort)
3249 24 : continue;
3250 : }
3251 :
3252 2790 : unprocessed_aggs = bms_add_member(unprocessed_aggs,
3253 : foreach_current_index(lc));
3254 : }
3255 :
3256 : /*
3257 : * Now process all the unprocessed_aggs to find the best set of pathkeys
3258 : * for the given set of aggregates.
3259 : *
3260 : * On the first outer loop here 'bestaggs' will be empty. We'll populate
3261 : * this during the first loop using the pathkeys for the very first
3262 : * AggInfo then taking any stronger pathkeys from any other AggInfos with
3263 : * a more strict set of compatible pathkeys. Once the outer loop is
3264 : * complete, we mark off all the aggregates with compatible pathkeys then
3265 : * remove those from the unprocessed_aggs and repeat the process to try to
3266 : * find another set of pathkeys that are suitable for a larger number of
3267 : * aggregates. The outer loop will stop when there are not enough
3268 : * unprocessed aggregates for it to be possible to find a set of pathkeys
3269 : * to suit a larger number of aggregates.
3270 : */
3271 2694 : bestpathkeys = NIL;
3272 2694 : bestaggs = NULL;
3273 5322 : while (bms_num_members(unprocessed_aggs) > bms_num_members(bestaggs))
3274 : {
3275 2628 : Bitmapset *aggindexes = NULL;
3276 2628 : List *currpathkeys = NIL;
3277 :
3278 2628 : i = -1;
3279 5724 : while ((i = bms_next_member(unprocessed_aggs, i)) >= 0)
3280 : {
3281 3096 : AggInfo *agginfo = list_nth_node(AggInfo, root->agginfos, i);
3282 3096 : Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs);
3283 : List *sortlist;
3284 : List *pathkeys;
3285 :
3286 3096 : if (aggref->aggdistinct != NIL)
3287 718 : sortlist = aggref->aggdistinct;
3288 : else
3289 2378 : sortlist = aggref->aggorder;
3290 :
3291 3096 : pathkeys = make_pathkeys_for_sortclauses(root, sortlist,
3292 : aggref->args);
3293 :
3294 : /*
3295 : * Ignore Aggrefs which have volatile functions in their ORDER BY
3296 : * or DISTINCT clause.
3297 : */
3298 3096 : if (has_volatile_pathkey(pathkeys))
3299 : {
3300 18 : unprocessed_aggs = bms_del_member(unprocessed_aggs, i);
3301 18 : continue;
3302 : }
3303 :
3304 : /*
3305 : * When not set yet, take the pathkeys from the first unprocessed
3306 : * aggregate.
3307 : */
3308 3078 : if (currpathkeys == NIL)
3309 : {
3310 2622 : currpathkeys = pathkeys;
3311 :
3312 : /* include the GROUP BY pathkeys, if they exist */
3313 2622 : if (grouppathkeys != NIL)
3314 276 : currpathkeys = append_pathkeys(list_copy(grouppathkeys),
3315 : currpathkeys);
3316 :
3317 : /* record that we found pathkeys for this aggregate */
3318 2622 : aggindexes = bms_add_member(aggindexes, i);
3319 : }
3320 : else
3321 : {
3322 : /* now look for a stronger set of matching pathkeys */
3323 :
3324 : /* include the GROUP BY pathkeys, if they exist */
3325 456 : if (grouppathkeys != NIL)
3326 288 : pathkeys = append_pathkeys(list_copy(grouppathkeys),
3327 : pathkeys);
3328 :
3329 : /* are 'pathkeys' compatible or better than 'currpathkeys'? */
3330 456 : switch (compare_pathkeys(currpathkeys, pathkeys))
3331 : {
3332 12 : case PATHKEYS_BETTER2:
3333 : /* 'pathkeys' are stronger, use these ones instead */
3334 12 : currpathkeys = pathkeys;
3335 : /* FALLTHROUGH */
3336 :
3337 66 : case PATHKEYS_BETTER1:
3338 : /* 'pathkeys' are less strict */
3339 : /* FALLTHROUGH */
3340 :
3341 : case PATHKEYS_EQUAL:
3342 : /* mark this aggregate as covered by 'currpathkeys' */
3343 66 : aggindexes = bms_add_member(aggindexes, i);
3344 66 : break;
3345 :
3346 390 : case PATHKEYS_DIFFERENT:
3347 390 : break;
3348 : }
3349 : }
3350 : }
3351 :
3352 : /* remove the aggregates that we've just processed */
3353 2628 : unprocessed_aggs = bms_del_members(unprocessed_aggs, aggindexes);
3354 :
3355 : /*
3356 : * If this pass included more aggregates than the previous best then
3357 : * use these ones as the best set.
3358 : */
3359 2628 : if (bms_num_members(aggindexes) > bms_num_members(bestaggs))
3360 : {
3361 2520 : bestaggs = aggindexes;
3362 2520 : bestpathkeys = currpathkeys;
3363 : }
3364 : }
3365 :
3366 : /*
3367 : * If we found any ordered aggregates, update root->group_pathkeys to add
3368 : * the best set of aggregate pathkeys. Note that bestpathkeys includes
3369 : * the original GROUP BY pathkeys already.
3370 : */
3371 2694 : if (bestpathkeys != NIL)
3372 2460 : root->group_pathkeys = bestpathkeys;
3373 :
3374 : /*
3375 : * Now that we've found the best set of aggregates we can set the
3376 : * presorted flag to indicate to the executor that it needn't bother
3377 : * performing a sort for these Aggrefs. We're able to do this now as
3378 : * there's no chance of a Hash Aggregate plan as create_grouping_paths
3379 : * will not mark the GROUP BY as GROUPING_CAN_USE_HASH due to the presence
3380 : * of ordered aggregates.
3381 : */
3382 2694 : i = -1;
3383 5250 : while ((i = bms_next_member(bestaggs, i)) >= 0)
3384 : {
3385 2556 : AggInfo *agginfo = list_nth_node(AggInfo, root->agginfos, i);
3386 :
3387 5130 : foreach(lc, agginfo->aggrefs)
3388 : {
3389 2574 : Aggref *aggref = lfirst_node(Aggref, lc);
3390 :
3391 2574 : aggref->aggpresorted = true;
3392 : }
3393 : }
3394 : }
3395 :
3396 : /*
3397 : * Compute query_pathkeys and other pathkeys during plan generation
3398 : */
3399 : static void
3400 518014 : standard_qp_callback(PlannerInfo *root, void *extra)
3401 : {
3402 518014 : Query *parse = root->parse;
3403 518014 : standard_qp_extra *qp_extra = (standard_qp_extra *) extra;
3404 518014 : List *tlist = root->processed_tlist;
3405 518014 : List *activeWindows = qp_extra->activeWindows;
3406 :
3407 : /*
3408 : * Calculate pathkeys that represent grouping/ordering and/or ordered
3409 : * aggregate requirements.
3410 : */
3411 518014 : if (qp_extra->gset_data)
3412 : {
3413 : /*
3414 : * With grouping sets, just use the first RollupData's groupClause. We
3415 : * don't make any effort to optimize grouping clauses when there are
3416 : * grouping sets, nor can we combine aggregate ordering keys with
3417 : * grouping.
3418 : */
3419 872 : List *rollups = qp_extra->gset_data->rollups;
3420 872 : List *groupClause = (rollups ? linitial_node(RollupData, rollups)->groupClause : NIL);
3421 :
3422 872 : if (grouping_is_sortable(groupClause))
3423 : {
3424 : bool sortable;
3425 :
3426 : /*
3427 : * The groupClause is logically below the grouping step. So if
3428 : * there is an RTE entry for the grouping step, we need to remove
3429 : * its RT index from the sort expressions before we make PathKeys
3430 : * for them.
3431 : */
3432 872 : root->group_pathkeys =
3433 872 : make_pathkeys_for_sortclauses_extended(root,
3434 : &groupClause,
3435 : tlist,
3436 : false,
3437 872 : parse->hasGroupRTE,
3438 : &sortable,
3439 : false);
3440 : Assert(sortable);
3441 872 : root->num_groupby_pathkeys = list_length(root->group_pathkeys);
3442 : }
3443 : else
3444 : {
3445 0 : root->group_pathkeys = NIL;
3446 0 : root->num_groupby_pathkeys = 0;
3447 : }
3448 : }
3449 517142 : else if (parse->groupClause || root->numOrderedAggs > 0)
3450 6090 : {
3451 : /*
3452 : * With a plain GROUP BY list, we can remove any grouping items that
3453 : * are proven redundant by EquivalenceClass processing. For example,
3454 : * we can remove y given "WHERE x = y GROUP BY x, y". These aren't
3455 : * especially common cases, but they're nearly free to detect. Note
3456 : * that we remove redundant items from processed_groupClause but not
3457 : * the original parse->groupClause.
3458 : */
3459 : bool sortable;
3460 :
3461 : /*
3462 : * Convert group clauses into pathkeys. Set the ec_sortref field of
3463 : * EquivalenceClass'es if it's not set yet.
3464 : */
3465 6090 : root->group_pathkeys =
3466 6090 : make_pathkeys_for_sortclauses_extended(root,
3467 : &root->processed_groupClause,
3468 : tlist,
3469 : true,
3470 : false,
3471 : &sortable,
3472 : true);
3473 6090 : if (!sortable)
3474 : {
3475 : /* Can't sort; no point in considering aggregate ordering either */
3476 0 : root->group_pathkeys = NIL;
3477 0 : root->num_groupby_pathkeys = 0;
3478 : }
3479 : else
3480 : {
3481 6090 : root->num_groupby_pathkeys = list_length(root->group_pathkeys);
3482 : /* If we have ordered aggs, consider adding onto group_pathkeys */
3483 6090 : if (root->numOrderedAggs > 0)
3484 2700 : adjust_group_pathkeys_for_groupagg(root);
3485 : }
3486 : }
3487 : else
3488 : {
3489 511052 : root->group_pathkeys = NIL;
3490 511052 : root->num_groupby_pathkeys = 0;
3491 : }
3492 :
3493 : /* We consider only the first (bottom) window in pathkeys logic */
3494 518014 : if (activeWindows != NIL)
3495 : {
3496 2378 : WindowClause *wc = linitial_node(WindowClause, activeWindows);
3497 :
3498 2378 : root->window_pathkeys = make_pathkeys_for_window(root,
3499 : wc,
3500 : tlist);
3501 : }
3502 : else
3503 515636 : root->window_pathkeys = NIL;
3504 :
3505 : /*
3506 : * As with GROUP BY, we can discard any DISTINCT items that are proven
3507 : * redundant by EquivalenceClass processing. The non-redundant list is
3508 : * kept in root->processed_distinctClause, leaving the original
3509 : * parse->distinctClause alone.
3510 : */
3511 518014 : if (parse->distinctClause)
3512 : {
3513 : bool sortable;
3514 :
3515 : /* Make a copy since pathkey processing can modify the list */
3516 2678 : root->processed_distinctClause = list_copy(parse->distinctClause);
3517 2678 : root->distinct_pathkeys =
3518 2678 : make_pathkeys_for_sortclauses_extended(root,
3519 : &root->processed_distinctClause,
3520 : tlist,
3521 : true,
3522 : false,
3523 : &sortable,
3524 : false);
3525 2678 : if (!sortable)
3526 6 : root->distinct_pathkeys = NIL;
3527 : }
3528 : else
3529 515336 : root->distinct_pathkeys = NIL;
3530 :
3531 518014 : root->sort_pathkeys =
3532 518014 : make_pathkeys_for_sortclauses(root,
3533 : parse->sortClause,
3534 : tlist);
3535 :
3536 : /* setting setop_pathkeys might be useful to the union planner */
3537 518014 : if (qp_extra->setop != NULL)
3538 : {
3539 : List *groupClauses;
3540 : bool sortable;
3541 :
3542 12268 : groupClauses = generate_setop_child_grouplist(qp_extra->setop, tlist);
3543 :
3544 12268 : root->setop_pathkeys =
3545 12268 : make_pathkeys_for_sortclauses_extended(root,
3546 : &groupClauses,
3547 : tlist,
3548 : false,
3549 : false,
3550 : &sortable,
3551 : false);
3552 12268 : if (!sortable)
3553 200 : root->setop_pathkeys = NIL;
3554 : }
3555 : else
3556 505746 : root->setop_pathkeys = NIL;
3557 :
3558 : /*
3559 : * Figure out whether we want a sorted result from query_planner.
3560 : *
3561 : * If we have a sortable GROUP BY clause, then we want a result sorted
3562 : * properly for grouping. Otherwise, if we have window functions to
3563 : * evaluate, we try to sort for the first window. Otherwise, if there's a
3564 : * sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
3565 : * we try to produce output that's sufficiently well sorted for the
3566 : * DISTINCT. Otherwise, if there is an ORDER BY clause, we want to sort
3567 : * by the ORDER BY clause. Otherwise, if we're a subquery being planned
3568 : * for a set operation which can benefit from presorted results and have a
3569 : * sortable targetlist, we want to sort by the target list.
3570 : *
3571 : * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
3572 : * of GROUP BY, it would be tempting to request sort by ORDER BY --- but
3573 : * that might just leave us failing to exploit an available sort order at
3574 : * all. Needs more thought. The choice for DISTINCT versus ORDER BY is
3575 : * much easier, since we know that the parser ensured that one is a
3576 : * superset of the other.
3577 : */
3578 518014 : if (root->group_pathkeys)
3579 6606 : root->query_pathkeys = root->group_pathkeys;
3580 511408 : else if (root->window_pathkeys)
3581 2032 : root->query_pathkeys = root->window_pathkeys;
3582 1018752 : else if (list_length(root->distinct_pathkeys) >
3583 509376 : list_length(root->sort_pathkeys))
3584 2226 : root->query_pathkeys = root->distinct_pathkeys;
3585 507150 : else if (root->sort_pathkeys)
3586 69342 : root->query_pathkeys = root->sort_pathkeys;
3587 437808 : else if (root->setop_pathkeys != NIL)
3588 10876 : root->query_pathkeys = root->setop_pathkeys;
3589 : else
3590 426932 : root->query_pathkeys = NIL;
3591 518014 : }
3592 :
3593 : /*
3594 : * Estimate number of groups produced by grouping clauses (1 if not grouping)
3595 : *
3596 : * path_rows: number of output rows from scan/join step
3597 : * gd: grouping sets data including list of grouping sets and their clauses
3598 : * target_list: target list containing group clause references
3599 : *
3600 : * If doing grouping sets, we also annotate the gsets data with the estimates
3601 : * for each set and each individual rollup list, with a view to later
3602 : * determining whether some combination of them could be hashed instead.
3603 : */
3604 : static double
3605 42300 : get_number_of_groups(PlannerInfo *root,
3606 : double path_rows,
3607 : grouping_sets_data *gd,
3608 : List *target_list)
3609 : {
3610 42300 : Query *parse = root->parse;
3611 : double dNumGroups;
3612 :
3613 42300 : if (parse->groupClause)
3614 : {
3615 : List *groupExprs;
3616 :
3617 6944 : if (parse->groupingSets)
3618 : {
3619 : /* Add up the estimates for each grouping set */
3620 : ListCell *lc;
3621 :
3622 : Assert(gd); /* keep Coverity happy */
3623 :
3624 830 : dNumGroups = 0;
3625 :
3626 2214 : foreach(lc, gd->rollups)
3627 : {
3628 1384 : RollupData *rollup = lfirst_node(RollupData, lc);
3629 : ListCell *lc2;
3630 : ListCell *lc3;
3631 :
3632 1384 : groupExprs = get_sortgrouplist_exprs(rollup->groupClause,
3633 : target_list);
3634 :
3635 1384 : rollup->numGroups = 0.0;
3636 :
3637 4016 : forboth(lc2, rollup->gsets, lc3, rollup->gsets_data)
3638 : {
3639 2632 : List *gset = (List *) lfirst(lc2);
3640 2632 : GroupingSetData *gs = lfirst_node(GroupingSetData, lc3);
3641 2632 : double numGroups = estimate_num_groups(root,
3642 : groupExprs,
3643 : path_rows,
3644 : &gset,
3645 : NULL);
3646 :
3647 2632 : gs->numGroups = numGroups;
3648 2632 : rollup->numGroups += numGroups;
3649 : }
3650 :
3651 1384 : dNumGroups += rollup->numGroups;
3652 : }
3653 :
3654 830 : if (gd->hash_sets_idx)
3655 : {
3656 : ListCell *lc2;
3657 :
3658 36 : gd->dNumHashGroups = 0;
3659 :
3660 36 : groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3661 : target_list);
3662 :
3663 78 : forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets)
3664 : {
3665 42 : List *gset = (List *) lfirst(lc);
3666 42 : GroupingSetData *gs = lfirst_node(GroupingSetData, lc2);
3667 42 : double numGroups = estimate_num_groups(root,
3668 : groupExprs,
3669 : path_rows,
3670 : &gset,
3671 : NULL);
3672 :
3673 42 : gs->numGroups = numGroups;
3674 42 : gd->dNumHashGroups += numGroups;
3675 : }
3676 :
3677 36 : dNumGroups += gd->dNumHashGroups;
3678 : }
3679 : }
3680 : else
3681 : {
3682 : /* Plain GROUP BY -- estimate based on optimized groupClause */
3683 6114 : groupExprs = get_sortgrouplist_exprs(root->processed_groupClause,
3684 : target_list);
3685 :
3686 6114 : dNumGroups = estimate_num_groups(root, groupExprs, path_rows,
3687 : NULL, NULL);
3688 : }
3689 : }
3690 35356 : else if (parse->groupingSets)
3691 : {
3692 : /* Empty grouping sets ... one result row for each one */
3693 42 : dNumGroups = list_length(parse->groupingSets);
3694 : }
3695 35314 : else if (parse->hasAggs || root->hasHavingQual)
3696 : {
3697 : /* Plain aggregation, one result row */
3698 35314 : dNumGroups = 1;
3699 : }
3700 : else
3701 : {
3702 : /* Not grouping */
3703 0 : dNumGroups = 1;
3704 : }
3705 :
3706 42300 : return dNumGroups;
3707 : }
3708 :
3709 : /*
3710 : * create_grouping_paths
3711 : *
3712 : * Build a new upperrel containing Paths for grouping and/or aggregation.
3713 : * Along the way, we also build an upperrel for Paths which are partially
3714 : * grouped and/or aggregated. A partially grouped and/or aggregated path
3715 : * needs a FinalizeAggregate node to complete the aggregation. Currently,
3716 : * the only partially grouped paths we build are also partial paths; that
3717 : * is, they need a Gather and then a FinalizeAggregate.
3718 : *
3719 : * input_rel: contains the source-data Paths
3720 : * target: the pathtarget for the result Paths to compute
3721 : * gd: grouping sets data including list of grouping sets and their clauses
3722 : *
3723 : * Note: all Paths in input_rel are expected to return the target computed
3724 : * by make_group_input_target.
3725 : */
3726 : static RelOptInfo *
3727 39040 : create_grouping_paths(PlannerInfo *root,
3728 : RelOptInfo *input_rel,
3729 : PathTarget *target,
3730 : bool target_parallel_safe,
3731 : grouping_sets_data *gd)
3732 : {
3733 39040 : Query *parse = root->parse;
3734 : RelOptInfo *grouped_rel;
3735 : RelOptInfo *partially_grouped_rel;
3736 : AggClauseCosts agg_costs;
3737 :
3738 234240 : MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
3739 39040 : get_agg_clause_costs(root, AGGSPLIT_SIMPLE, &agg_costs);
3740 :
3741 : /*
3742 : * Create grouping relation to hold fully aggregated grouping and/or
3743 : * aggregation paths.
3744 : */
3745 39040 : grouped_rel = make_grouping_rel(root, input_rel, target,
3746 : target_parallel_safe, parse->havingQual);
3747 :
3748 : /*
3749 : * Create either paths for a degenerate grouping or paths for ordinary
3750 : * grouping, as appropriate.
3751 : */
3752 39040 : if (is_degenerate_grouping(root))
3753 18 : create_degenerate_grouping_paths(root, input_rel, grouped_rel);
3754 : else
3755 : {
3756 39022 : int flags = 0;
3757 : GroupPathExtraData extra;
3758 :
3759 : /*
3760 : * Determine whether it's possible to perform sort-based
3761 : * implementations of grouping. (Note that if processed_groupClause
3762 : * is empty, grouping_is_sortable() is trivially true, and all the
3763 : * pathkeys_contained_in() tests will succeed too, so that we'll
3764 : * consider every surviving input path.)
3765 : *
3766 : * If we have grouping sets, we might be able to sort some but not all
3767 : * of them; in this case, we need can_sort to be true as long as we
3768 : * must consider any sorted-input plan.
3769 : */
3770 39022 : if ((gd && gd->rollups != NIL)
3771 38156 : || grouping_is_sortable(root->processed_groupClause))
3772 39016 : flags |= GROUPING_CAN_USE_SORT;
3773 :
3774 : /*
3775 : * Determine whether we should consider hash-based implementations of
3776 : * grouping.
3777 : *
3778 : * Hashed aggregation only applies if we're grouping. If we have
3779 : * grouping sets, some groups might be hashable but others not; in
3780 : * this case we set can_hash true as long as there is nothing globally
3781 : * preventing us from hashing (and we should therefore consider plans
3782 : * with hashes).
3783 : *
3784 : * Executor doesn't support hashed aggregation with DISTINCT or ORDER
3785 : * BY aggregates. (Doing so would imply storing *all* the input
3786 : * values in the hash table, and/or running many sorts in parallel,
3787 : * either of which seems like a certain loser.) We similarly don't
3788 : * support ordered-set aggregates in hashed aggregation, but that case
3789 : * is also included in the numOrderedAggs count.
3790 : *
3791 : * Note: grouping_is_hashable() is much more expensive to check than
3792 : * the other gating conditions, so we want to do it last.
3793 : */
3794 39022 : if ((parse->groupClause != NIL &&
3795 8648 : root->numOrderedAggs == 0 &&
3796 4184 : (gd ? gd->any_hashable : grouping_is_hashable(root->processed_groupClause))))
3797 4180 : flags |= GROUPING_CAN_USE_HASH;
3798 :
3799 : /*
3800 : * Determine whether partial aggregation is possible.
3801 : */
3802 39022 : if (can_partial_agg(root))
3803 34068 : flags |= GROUPING_CAN_PARTIAL_AGG;
3804 :
3805 39022 : extra.flags = flags;
3806 39022 : extra.target_parallel_safe = target_parallel_safe;
3807 39022 : extra.havingQual = parse->havingQual;
3808 39022 : extra.targetList = parse->targetList;
3809 39022 : extra.partial_costs_set = false;
3810 :
3811 : /*
3812 : * Determine whether partitionwise aggregation is in theory possible.
3813 : * It can be disabled by the user, and for now, we don't try to
3814 : * support grouping sets. create_ordinary_grouping_paths() will check
3815 : * additional conditions, such as whether input_rel is partitioned.
3816 : */
3817 39022 : if (enable_partitionwise_aggregate && !parse->groupingSets)
3818 556 : extra.patype = PARTITIONWISE_AGGREGATE_FULL;
3819 : else
3820 38466 : extra.patype = PARTITIONWISE_AGGREGATE_NONE;
3821 :
3822 39022 : create_ordinary_grouping_paths(root, input_rel, grouped_rel,
3823 : &agg_costs, gd, &extra,
3824 : &partially_grouped_rel);
3825 : }
3826 :
3827 39034 : set_cheapest(grouped_rel);
3828 39034 : return grouped_rel;
3829 : }
3830 :
3831 : /*
3832 : * make_grouping_rel
3833 : *
3834 : * Create a new grouping rel and set basic properties.
3835 : *
3836 : * input_rel represents the underlying scan/join relation.
3837 : * target is the output expected from the grouping relation.
3838 : */
3839 : static RelOptInfo *
3840 40534 : make_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
3841 : PathTarget *target, bool target_parallel_safe,
3842 : Node *havingQual)
3843 : {
3844 : RelOptInfo *grouped_rel;
3845 :
3846 40534 : if (IS_OTHER_REL(input_rel))
3847 : {
3848 1494 : grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG,
3849 : input_rel->relids);
3850 1494 : grouped_rel->reloptkind = RELOPT_OTHER_UPPER_REL;
3851 : }
3852 : else
3853 : {
3854 : /*
3855 : * By tradition, the relids set for the main grouping relation is
3856 : * NULL. (This could be changed, but might require adjustments
3857 : * elsewhere.)
3858 : */
3859 39040 : grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
3860 : }
3861 :
3862 : /* Set target. */
3863 40534 : grouped_rel->reltarget = target;
3864 :
3865 : /*
3866 : * If the input relation is not parallel-safe, then the grouped relation
3867 : * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
3868 : * target list and HAVING quals are parallel-safe.
3869 : */
3870 68608 : if (input_rel->consider_parallel && target_parallel_safe &&
3871 28074 : is_parallel_safe(root, (Node *) havingQual))
3872 28056 : grouped_rel->consider_parallel = true;
3873 :
3874 : /*
3875 : * If the input rel belongs to a single FDW, so does the grouped rel.
3876 : */
3877 40534 : grouped_rel->serverid = input_rel->serverid;
3878 40534 : grouped_rel->userid = input_rel->userid;
3879 40534 : grouped_rel->useridiscurrent = input_rel->useridiscurrent;
3880 40534 : grouped_rel->fdwroutine = input_rel->fdwroutine;
3881 :
3882 40534 : return grouped_rel;
3883 : }
3884 :
3885 : /*
3886 : * is_degenerate_grouping
3887 : *
3888 : * A degenerate grouping is one in which the query has a HAVING qual and/or
3889 : * grouping sets, but no aggregates and no GROUP BY (which implies that the
3890 : * grouping sets are all empty).
3891 : */
3892 : static bool
3893 39040 : is_degenerate_grouping(PlannerInfo *root)
3894 : {
3895 39040 : Query *parse = root->parse;
3896 :
3897 38024 : return (root->hasHavingQual || parse->groupingSets) &&
3898 77064 : !parse->hasAggs && parse->groupClause == NIL;
3899 : }
3900 :
3901 : /*
3902 : * create_degenerate_grouping_paths
3903 : *
3904 : * When the grouping is degenerate (see is_degenerate_grouping), we are
3905 : * supposed to emit either zero or one row for each grouping set depending on
3906 : * whether HAVING succeeds. Furthermore, there cannot be any variables in
3907 : * either HAVING or the targetlist, so we actually do not need the FROM table
3908 : * at all! We can just throw away the plan-so-far and generate a Result node.
3909 : * This is a sufficiently unusual corner case that it's not worth contorting
3910 : * the structure of this module to avoid having to generate the earlier paths
3911 : * in the first place.
3912 : */
3913 : static void
3914 18 : create_degenerate_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
3915 : RelOptInfo *grouped_rel)
3916 : {
3917 18 : Query *parse = root->parse;
3918 : int nrows;
3919 : Path *path;
3920 :
3921 18 : nrows = list_length(parse->groupingSets);
3922 18 : if (nrows > 1)
3923 : {
3924 : /*
3925 : * Doesn't seem worthwhile writing code to cons up a generate_series
3926 : * or a values scan to emit multiple rows. Instead just make N clones
3927 : * and append them. (With a volatile HAVING clause, this means you
3928 : * might get between 0 and N output rows. Offhand I think that's
3929 : * desired.)
3930 : */
3931 0 : List *paths = NIL;
3932 :
3933 0 : while (--nrows >= 0)
3934 : {
3935 : path = (Path *)
3936 0 : create_group_result_path(root, grouped_rel,
3937 0 : grouped_rel->reltarget,
3938 0 : (List *) parse->havingQual);
3939 0 : paths = lappend(paths, path);
3940 : }
3941 : path = (Path *)
3942 0 : create_append_path(root,
3943 : grouped_rel,
3944 : paths,
3945 : NIL,
3946 : NIL,
3947 : NULL,
3948 : 0,
3949 : false,
3950 : -1);
3951 : }
3952 : else
3953 : {
3954 : /* No grouping sets, or just one, so one output row */
3955 : path = (Path *)
3956 18 : create_group_result_path(root, grouped_rel,
3957 18 : grouped_rel->reltarget,
3958 18 : (List *) parse->havingQual);
3959 : }
3960 :
3961 18 : add_path(grouped_rel, path);
3962 18 : }
3963 :
3964 : /*
3965 : * create_ordinary_grouping_paths
3966 : *
3967 : * Create grouping paths for the ordinary (that is, non-degenerate) case.
3968 : *
3969 : * We need to consider sorted and hashed aggregation in the same function,
3970 : * because otherwise (1) it would be harder to throw an appropriate error
3971 : * message if neither way works, and (2) we should not allow hashtable size
3972 : * considerations to dissuade us from using hashing if sorting is not possible.
3973 : *
3974 : * *partially_grouped_rel_p will be set to the partially grouped rel which this
3975 : * function creates, or to NULL if it doesn't create one.
3976 : */
3977 : static void
3978 40516 : create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
3979 : RelOptInfo *grouped_rel,
3980 : const AggClauseCosts *agg_costs,
3981 : grouping_sets_data *gd,
3982 : GroupPathExtraData *extra,
3983 : RelOptInfo **partially_grouped_rel_p)
3984 : {
3985 40516 : Path *cheapest_path = input_rel->cheapest_total_path;
3986 40516 : RelOptInfo *partially_grouped_rel = NULL;
3987 : double dNumGroups;
3988 40516 : PartitionwiseAggregateType patype = PARTITIONWISE_AGGREGATE_NONE;
3989 :
3990 : /*
3991 : * If this is the topmost grouping relation or if the parent relation is
3992 : * doing some form of partitionwise aggregation, then we may be able to do
3993 : * it at this level also. However, if the input relation is not
3994 : * partitioned, partitionwise aggregate is impossible.
3995 : */
3996 40516 : if (extra->patype != PARTITIONWISE_AGGREGATE_NONE &&
3997 2050 : IS_PARTITIONED_REL(input_rel))
3998 : {
3999 : /*
4000 : * If this is the topmost relation or if the parent relation is doing
4001 : * full partitionwise aggregation, then we can do full partitionwise
4002 : * aggregation provided that the GROUP BY clause contains all of the
4003 : * partitioning columns at this level and the collation used by GROUP
4004 : * BY matches the partitioning collation. Otherwise, we can do at
4005 : * most partial partitionwise aggregation. But if partial aggregation
4006 : * is not supported in general then we can't use it for partitionwise
4007 : * aggregation either.
4008 : *
4009 : * Check parse->groupClause not processed_groupClause, because it's
4010 : * okay if some of the partitioning columns were proved redundant.
4011 : */
4012 1160 : if (extra->patype == PARTITIONWISE_AGGREGATE_FULL &&
4013 556 : group_by_has_partkey(input_rel, extra->targetList,
4014 556 : root->parse->groupClause))
4015 320 : patype = PARTITIONWISE_AGGREGATE_FULL;
4016 284 : else if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0)
4017 242 : patype = PARTITIONWISE_AGGREGATE_PARTIAL;
4018 : else
4019 42 : patype = PARTITIONWISE_AGGREGATE_NONE;
4020 : }
4021 :
4022 : /*
4023 : * Before generating paths for grouped_rel, we first generate any possible
4024 : * partially grouped paths; that way, later code can easily consider both
4025 : * parallel and non-parallel approaches to grouping.
4026 : */
4027 40516 : if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0)
4028 : {
4029 : bool force_rel_creation;
4030 :
4031 : /*
4032 : * If we're doing partitionwise aggregation at this level, force
4033 : * creation of a partially_grouped_rel so we can add partitionwise
4034 : * paths to it.
4035 : */
4036 35490 : force_rel_creation = (patype == PARTITIONWISE_AGGREGATE_PARTIAL);
4037 :
4038 : partially_grouped_rel =
4039 35490 : create_partial_grouping_paths(root,
4040 : grouped_rel,
4041 : input_rel,
4042 : gd,
4043 : extra,
4044 : force_rel_creation);
4045 : }
4046 :
4047 : /* Set out parameter. */
4048 40516 : *partially_grouped_rel_p = partially_grouped_rel;
4049 :
4050 : /* Apply partitionwise aggregation technique, if possible. */
4051 40516 : if (patype != PARTITIONWISE_AGGREGATE_NONE)
4052 562 : create_partitionwise_grouping_paths(root, input_rel, grouped_rel,
4053 : partially_grouped_rel, agg_costs,
4054 : gd, patype, extra);
4055 :
4056 : /* If we are doing partial aggregation only, return. */
4057 40516 : if (extra->patype == PARTITIONWISE_AGGREGATE_PARTIAL)
4058 : {
4059 : Assert(partially_grouped_rel);
4060 :
4061 618 : if (partially_grouped_rel->pathlist)
4062 618 : set_cheapest(partially_grouped_rel);
4063 :
4064 618 : return;
4065 : }
4066 :
4067 : /* Gather any partially grouped partial paths. */
4068 39898 : if (partially_grouped_rel && partially_grouped_rel->partial_pathlist)
4069 : {
4070 1484 : gather_grouping_paths(root, partially_grouped_rel);
4071 1484 : set_cheapest(partially_grouped_rel);
4072 : }
4073 :
4074 : /*
4075 : * Estimate number of groups.
4076 : */
4077 39898 : dNumGroups = get_number_of_groups(root,
4078 : cheapest_path->rows,
4079 : gd,
4080 : extra->targetList);
4081 :
4082 : /* Build final grouping paths */
4083 39898 : add_paths_to_grouping_rel(root, input_rel, grouped_rel,
4084 : partially_grouped_rel, agg_costs, gd,
4085 : dNumGroups, extra);
4086 :
4087 : /* Give a helpful error if we failed to find any implementation */
4088 39898 : if (grouped_rel->pathlist == NIL)
4089 6 : ereport(ERROR,
4090 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4091 : errmsg("could not implement GROUP BY"),
4092 : errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4093 :
4094 : /*
4095 : * If there is an FDW that's responsible for all baserels of the query,
4096 : * let it consider adding ForeignPaths.
4097 : */
4098 39892 : if (grouped_rel->fdwroutine &&
4099 334 : grouped_rel->fdwroutine->GetForeignUpperPaths)
4100 334 : grouped_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_GROUP_AGG,
4101 : input_rel, grouped_rel,
4102 : extra);
4103 :
4104 : /* Let extensions possibly add some more paths */
4105 39892 : if (create_upper_paths_hook)
4106 0 : (*create_upper_paths_hook) (root, UPPERREL_GROUP_AGG,
4107 : input_rel, grouped_rel,
4108 : extra);
4109 : }
4110 :
4111 : /*
4112 : * For a given input path, consider the possible ways of doing grouping sets on
4113 : * it, by combinations of hashing and sorting. This can be called multiple
4114 : * times, so it's important that it not scribble on input. No result is
4115 : * returned, but any generated paths are added to grouped_rel.
4116 : */
4117 : static void
4118 1732 : consider_groupingsets_paths(PlannerInfo *root,
4119 : RelOptInfo *grouped_rel,
4120 : Path *path,
4121 : bool is_sorted,
4122 : bool can_hash,
4123 : grouping_sets_data *gd,
4124 : const AggClauseCosts *agg_costs,
4125 : double dNumGroups)
4126 : {
4127 1732 : Query *parse = root->parse;
4128 1732 : Size hash_mem_limit = get_hash_memory_limit();
4129 :
4130 : /*
4131 : * If we're not being offered sorted input, then only consider plans that
4132 : * can be done entirely by hashing.
4133 : *
4134 : * We can hash everything if it looks like it'll fit in hash_mem. But if
4135 : * the input is actually sorted despite not being advertised as such, we
4136 : * prefer to make use of that in order to use less memory.
4137 : *
4138 : * If none of the grouping sets are sortable, then ignore the hash_mem
4139 : * limit and generate a path anyway, since otherwise we'll just fail.
4140 : */
4141 1732 : if (!is_sorted)
4142 : {
4143 794 : List *new_rollups = NIL;
4144 794 : RollupData *unhashed_rollup = NULL;
4145 : List *sets_data;
4146 794 : List *empty_sets_data = NIL;
4147 794 : List *empty_sets = NIL;
4148 : ListCell *lc;
4149 794 : ListCell *l_start = list_head(gd->rollups);
4150 794 : AggStrategy strat = AGG_HASHED;
4151 : double hashsize;
4152 794 : double exclude_groups = 0.0;
4153 :
4154 : Assert(can_hash);
4155 :
4156 : /*
4157 : * If the input is coincidentally sorted usefully (which can happen
4158 : * even if is_sorted is false, since that only means that our caller
4159 : * has set up the sorting for us), then save some hashtable space by
4160 : * making use of that. But we need to watch out for degenerate cases:
4161 : *
4162 : * 1) If there are any empty grouping sets, then group_pathkeys might
4163 : * be NIL if all non-empty grouping sets are unsortable. In this case,
4164 : * there will be a rollup containing only empty groups, and the
4165 : * pathkeys_contained_in test is vacuously true; this is ok.
4166 : *
4167 : * XXX: the above relies on the fact that group_pathkeys is generated
4168 : * from the first rollup. If we add the ability to consider multiple
4169 : * sort orders for grouping input, this assumption might fail.
4170 : *
4171 : * 2) If there are no empty sets and only unsortable sets, then the
4172 : * rollups list will be empty (and thus l_start == NULL), and
4173 : * group_pathkeys will be NIL; we must ensure that the vacuously-true
4174 : * pathkeys_contained_in test doesn't cause us to crash.
4175 : */
4176 1582 : if (l_start != NULL &&
4177 788 : pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
4178 : {
4179 12 : unhashed_rollup = lfirst_node(RollupData, l_start);
4180 12 : exclude_groups = unhashed_rollup->numGroups;
4181 12 : l_start = lnext(gd->rollups, l_start);
4182 : }
4183 :
4184 794 : hashsize = estimate_hashagg_tablesize(root,
4185 : path,
4186 : agg_costs,
4187 : dNumGroups - exclude_groups);
4188 :
4189 : /*
4190 : * gd->rollups is empty if we have only unsortable columns to work
4191 : * with. Override hash_mem in that case; otherwise, we'll rely on the
4192 : * sorted-input case to generate usable mixed paths.
4193 : */
4194 794 : if (hashsize > hash_mem_limit && gd->rollups)
4195 18 : return; /* nope, won't fit */
4196 :
4197 : /*
4198 : * We need to burst the existing rollups list into individual grouping
4199 : * sets and recompute a groupClause for each set.
4200 : */
4201 776 : sets_data = list_copy(gd->unsortable_sets);
4202 :
4203 1968 : for_each_cell(lc, gd->rollups, l_start)
4204 : {
4205 1216 : RollupData *rollup = lfirst_node(RollupData, lc);
4206 :
4207 : /*
4208 : * If we find an unhashable rollup that's not been skipped by the
4209 : * "actually sorted" check above, we can't cope; we'd need sorted
4210 : * input (with a different sort order) but we can't get that here.
4211 : * So bail out; we'll get a valid path from the is_sorted case
4212 : * instead.
4213 : *
4214 : * The mere presence of empty grouping sets doesn't make a rollup
4215 : * unhashable (see preprocess_grouping_sets), we handle those
4216 : * specially below.
4217 : */
4218 1216 : if (!rollup->hashable)
4219 24 : return;
4220 :
4221 1192 : sets_data = list_concat(sets_data, rollup->gsets_data);
4222 : }
4223 3150 : foreach(lc, sets_data)
4224 : {
4225 2398 : GroupingSetData *gs = lfirst_node(GroupingSetData, lc);
4226 2398 : List *gset = gs->set;
4227 : RollupData *rollup;
4228 :
4229 2398 : if (gset == NIL)
4230 : {
4231 : /* Empty grouping sets can't be hashed. */
4232 484 : empty_sets_data = lappend(empty_sets_data, gs);
4233 484 : empty_sets = lappend(empty_sets, NIL);
4234 : }
4235 : else
4236 : {
4237 1914 : rollup = makeNode(RollupData);
4238 :
4239 1914 : rollup->groupClause = preprocess_groupclause(root, gset);
4240 1914 : rollup->gsets_data = list_make1(gs);
4241 1914 : rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4242 : rollup->gsets_data,
4243 : gd->tleref_to_colnum_map);
4244 1914 : rollup->numGroups = gs->numGroups;
4245 1914 : rollup->hashable = true;
4246 1914 : rollup->is_hashed = true;
4247 1914 : new_rollups = lappend(new_rollups, rollup);
4248 : }
4249 : }
4250 :
4251 : /*
4252 : * If we didn't find anything nonempty to hash, then bail. We'll
4253 : * generate a path from the is_sorted case.
4254 : */
4255 752 : if (new_rollups == NIL)
4256 0 : return;
4257 :
4258 : /*
4259 : * If there were empty grouping sets they should have been in the
4260 : * first rollup.
4261 : */
4262 : Assert(!unhashed_rollup || !empty_sets);
4263 :
4264 752 : if (unhashed_rollup)
4265 : {
4266 12 : new_rollups = lappend(new_rollups, unhashed_rollup);
4267 12 : strat = AGG_MIXED;
4268 : }
4269 740 : else if (empty_sets)
4270 : {
4271 436 : RollupData *rollup = makeNode(RollupData);
4272 :
4273 436 : rollup->groupClause = NIL;
4274 436 : rollup->gsets_data = empty_sets_data;
4275 436 : rollup->gsets = empty_sets;
4276 436 : rollup->numGroups = list_length(empty_sets);
4277 436 : rollup->hashable = false;
4278 436 : rollup->is_hashed = false;
4279 436 : new_rollups = lappend(new_rollups, rollup);
4280 436 : strat = AGG_MIXED;
4281 : }
4282 :
4283 752 : add_path(grouped_rel, (Path *)
4284 752 : create_groupingsets_path(root,
4285 : grouped_rel,
4286 : path,
4287 752 : (List *) parse->havingQual,
4288 : strat,
4289 : new_rollups,
4290 : agg_costs));
4291 752 : return;
4292 : }
4293 :
4294 : /*
4295 : * If we have sorted input but nothing we can do with it, bail.
4296 : */
4297 938 : if (gd->rollups == NIL)
4298 0 : return;
4299 :
4300 : /*
4301 : * Given sorted input, we try and make two paths: one sorted and one mixed
4302 : * sort/hash. (We need to try both because hashagg might be disabled, or
4303 : * some columns might not be sortable.)
4304 : *
4305 : * can_hash is passed in as false if some obstacle elsewhere (such as
4306 : * ordered aggs) means that we shouldn't consider hashing at all.
4307 : */
4308 938 : if (can_hash && gd->any_hashable)
4309 : {
4310 860 : List *rollups = NIL;
4311 860 : List *hash_sets = list_copy(gd->unsortable_sets);
4312 860 : double availspace = hash_mem_limit;
4313 : ListCell *lc;
4314 :
4315 : /*
4316 : * Account first for space needed for groups we can't sort at all.
4317 : */
4318 860 : availspace -= estimate_hashagg_tablesize(root,
4319 : path,
4320 : agg_costs,
4321 : gd->dNumHashGroups);
4322 :
4323 860 : if (availspace > 0 && list_length(gd->rollups) > 1)
4324 : {
4325 : double scale;
4326 444 : int num_rollups = list_length(gd->rollups);
4327 : int k_capacity;
4328 444 : int *k_weights = palloc(num_rollups * sizeof(int));
4329 444 : Bitmapset *hash_items = NULL;
4330 : int i;
4331 :
4332 : /*
4333 : * We treat this as a knapsack problem: the knapsack capacity
4334 : * represents hash_mem, the item weights are the estimated memory
4335 : * usage of the hashtables needed to implement a single rollup,
4336 : * and we really ought to use the cost saving as the item value;
4337 : * however, currently the costs assigned to sort nodes don't
4338 : * reflect the comparison costs well, and so we treat all items as
4339 : * of equal value (each rollup we hash instead saves us one sort).
4340 : *
4341 : * To use the discrete knapsack, we need to scale the values to a
4342 : * reasonably small bounded range. We choose to allow a 5% error
4343 : * margin; we have no more than 4096 rollups in the worst possible
4344 : * case, which with a 5% error margin will require a bit over 42MB
4345 : * of workspace. (Anyone wanting to plan queries that complex had
4346 : * better have the memory for it. In more reasonable cases, with
4347 : * no more than a couple of dozen rollups, the memory usage will
4348 : * be negligible.)
4349 : *
4350 : * k_capacity is naturally bounded, but we clamp the values for
4351 : * scale and weight (below) to avoid overflows or underflows (or
4352 : * uselessly trying to use a scale factor less than 1 byte).
4353 : */
4354 444 : scale = Max(availspace / (20.0 * num_rollups), 1.0);
4355 444 : k_capacity = (int) floor(availspace / scale);
4356 :
4357 : /*
4358 : * We leave the first rollup out of consideration since it's the
4359 : * one that matches the input sort order. We assign indexes "i"
4360 : * to only those entries considered for hashing; the second loop,
4361 : * below, must use the same condition.
4362 : */
4363 444 : i = 0;
4364 1128 : for_each_from(lc, gd->rollups, 1)
4365 : {
4366 684 : RollupData *rollup = lfirst_node(RollupData, lc);
4367 :
4368 684 : if (rollup->hashable)
4369 : {
4370 684 : double sz = estimate_hashagg_tablesize(root,
4371 : path,
4372 : agg_costs,
4373 : rollup->numGroups);
4374 :
4375 : /*
4376 : * If sz is enormous, but hash_mem (and hence scale) is
4377 : * small, avoid integer overflow here.
4378 : */
4379 684 : k_weights[i] = (int) Min(floor(sz / scale),
4380 : k_capacity + 1.0);
4381 684 : ++i;
4382 : }
4383 : }
4384 :
4385 : /*
4386 : * Apply knapsack algorithm; compute the set of items which
4387 : * maximizes the value stored (in this case the number of sorts
4388 : * saved) while keeping the total size (approximately) within
4389 : * capacity.
4390 : */
4391 444 : if (i > 0)
4392 444 : hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
4393 :
4394 444 : if (!bms_is_empty(hash_items))
4395 : {
4396 444 : rollups = list_make1(linitial(gd->rollups));
4397 :
4398 444 : i = 0;
4399 1128 : for_each_from(lc, gd->rollups, 1)
4400 : {
4401 684 : RollupData *rollup = lfirst_node(RollupData, lc);
4402 :
4403 684 : if (rollup->hashable)
4404 : {
4405 684 : if (bms_is_member(i, hash_items))
4406 648 : hash_sets = list_concat(hash_sets,
4407 648 : rollup->gsets_data);
4408 : else
4409 36 : rollups = lappend(rollups, rollup);
4410 684 : ++i;
4411 : }
4412 : else
4413 0 : rollups = lappend(rollups, rollup);
4414 : }
4415 : }
4416 : }
4417 :
4418 860 : if (!rollups && hash_sets)
4419 24 : rollups = list_copy(gd->rollups);
4420 :
4421 1648 : foreach(lc, hash_sets)
4422 : {
4423 788 : GroupingSetData *gs = lfirst_node(GroupingSetData, lc);
4424 788 : RollupData *rollup = makeNode(RollupData);
4425 :
4426 : Assert(gs->set != NIL);
4427 :
4428 788 : rollup->groupClause = preprocess_groupclause(root, gs->set);
4429 788 : rollup->gsets_data = list_make1(gs);
4430 788 : rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4431 : rollup->gsets_data,
4432 : gd->tleref_to_colnum_map);
4433 788 : rollup->numGroups = gs->numGroups;
4434 788 : rollup->hashable = true;
4435 788 : rollup->is_hashed = true;
4436 788 : rollups = lcons(rollup, rollups);
4437 : }
4438 :
4439 860 : if (rollups)
4440 : {
4441 468 : add_path(grouped_rel, (Path *)
4442 468 : create_groupingsets_path(root,
4443 : grouped_rel,
4444 : path,
4445 468 : (List *) parse->havingQual,
4446 : AGG_MIXED,
4447 : rollups,
4448 : agg_costs));
4449 : }
4450 : }
4451 :
4452 : /*
4453 : * Now try the simple sorted case.
4454 : */
4455 938 : if (!gd->unsortable_sets)
4456 908 : add_path(grouped_rel, (Path *)
4457 908 : create_groupingsets_path(root,
4458 : grouped_rel,
4459 : path,
4460 908 : (List *) parse->havingQual,
4461 : AGG_SORTED,
4462 : gd->rollups,
4463 : agg_costs));
4464 : }
4465 :
4466 : /*
4467 : * create_window_paths
4468 : *
4469 : * Build a new upperrel containing Paths for window-function evaluation.
4470 : *
4471 : * input_rel: contains the source-data Paths
4472 : * input_target: result of make_window_input_target
4473 : * output_target: what the topmost WindowAggPath should return
4474 : * wflists: result of find_window_functions
4475 : * activeWindows: result of select_active_windows
4476 : *
4477 : * Note: all Paths in input_rel are expected to return input_target.
4478 : */
4479 : static RelOptInfo *
4480 2378 : create_window_paths(PlannerInfo *root,
4481 : RelOptInfo *input_rel,
4482 : PathTarget *input_target,
4483 : PathTarget *output_target,
4484 : bool output_target_parallel_safe,
4485 : WindowFuncLists *wflists,
4486 : List *activeWindows)
4487 : {
4488 : RelOptInfo *window_rel;
4489 : ListCell *lc;
4490 :
4491 : /* For now, do all work in the (WINDOW, NULL) upperrel */
4492 2378 : window_rel = fetch_upper_rel(root, UPPERREL_WINDOW, NULL);
4493 :
4494 : /*
4495 : * If the input relation is not parallel-safe, then the window relation
4496 : * can't be parallel-safe, either. Otherwise, we need to examine the
4497 : * target list and active windows for non-parallel-safe constructs.
4498 : */
4499 2378 : if (input_rel->consider_parallel && output_target_parallel_safe &&
4500 0 : is_parallel_safe(root, (Node *) activeWindows))
4501 0 : window_rel->consider_parallel = true;
4502 :
4503 : /*
4504 : * If the input rel belongs to a single FDW, so does the window rel.
4505 : */
4506 2378 : window_rel->serverid = input_rel->serverid;
4507 2378 : window_rel->userid = input_rel->userid;
4508 2378 : window_rel->useridiscurrent = input_rel->useridiscurrent;
4509 2378 : window_rel->fdwroutine = input_rel->fdwroutine;
4510 :
4511 : /*
4512 : * Consider computing window functions starting from the existing
4513 : * cheapest-total path (which will likely require a sort) as well as any
4514 : * existing paths that satisfy or partially satisfy root->window_pathkeys.
4515 : */
4516 5078 : foreach(lc, input_rel->pathlist)
4517 : {
4518 2700 : Path *path = (Path *) lfirst(lc);
4519 : int presorted_keys;
4520 :
4521 3022 : if (path == input_rel->cheapest_total_path ||
4522 322 : pathkeys_count_contained_in(root->window_pathkeys, path->pathkeys,
4523 140 : &presorted_keys) ||
4524 140 : presorted_keys > 0)
4525 2586 : create_one_window_path(root,
4526 : window_rel,
4527 : path,
4528 : input_target,
4529 : output_target,
4530 : wflists,
4531 : activeWindows);
4532 : }
4533 :
4534 : /*
4535 : * If there is an FDW that's responsible for all baserels of the query,
4536 : * let it consider adding ForeignPaths.
4537 : */
4538 2378 : if (window_rel->fdwroutine &&
4539 12 : window_rel->fdwroutine->GetForeignUpperPaths)
4540 12 : window_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_WINDOW,
4541 : input_rel, window_rel,
4542 : NULL);
4543 :
4544 : /* Let extensions possibly add some more paths */
4545 2378 : if (create_upper_paths_hook)
4546 0 : (*create_upper_paths_hook) (root, UPPERREL_WINDOW,
4547 : input_rel, window_rel, NULL);
4548 :
4549 : /* Now choose the best path(s) */
4550 2378 : set_cheapest(window_rel);
4551 :
4552 2378 : return window_rel;
4553 : }
4554 :
4555 : /*
4556 : * Stack window-function implementation steps atop the given Path, and
4557 : * add the result to window_rel.
4558 : *
4559 : * window_rel: upperrel to contain result
4560 : * path: input Path to use (must return input_target)
4561 : * input_target: result of make_window_input_target
4562 : * output_target: what the topmost WindowAggPath should return
4563 : * wflists: result of find_window_functions
4564 : * activeWindows: result of select_active_windows
4565 : */
4566 : static void
4567 2586 : create_one_window_path(PlannerInfo *root,
4568 : RelOptInfo *window_rel,
4569 : Path *path,
4570 : PathTarget *input_target,
4571 : PathTarget *output_target,
4572 : WindowFuncLists *wflists,
4573 : List *activeWindows)
4574 : {
4575 : PathTarget *window_target;
4576 : ListCell *l;
4577 2586 : List *topqual = NIL;
4578 :
4579 : /*
4580 : * Since each window clause could require a different sort order, we stack
4581 : * up a WindowAgg node for each clause, with sort steps between them as
4582 : * needed. (We assume that select_active_windows chose a good order for
4583 : * executing the clauses in.)
4584 : *
4585 : * input_target should contain all Vars and Aggs needed for the result.
4586 : * (In some cases we wouldn't need to propagate all of these all the way
4587 : * to the top, since they might only be needed as inputs to WindowFuncs.
4588 : * It's probably not worth trying to optimize that though.) It must also
4589 : * contain all window partitioning and sorting expressions, to ensure
4590 : * they're computed only once at the bottom of the stack (that's critical
4591 : * for volatile functions). As we climb up the stack, we'll add outputs
4592 : * for the WindowFuncs computed at each level.
4593 : */
4594 2586 : window_target = input_target;
4595 :
4596 5340 : foreach(l, activeWindows)
4597 : {
4598 2754 : WindowClause *wc = lfirst_node(WindowClause, l);
4599 : List *window_pathkeys;
4600 2754 : List *runcondition = NIL;
4601 : int presorted_keys;
4602 : bool is_sorted;
4603 : bool topwindow;
4604 : ListCell *lc2;
4605 :
4606 2754 : window_pathkeys = make_pathkeys_for_window(root,
4607 : wc,
4608 : root->processed_tlist);
4609 :
4610 2754 : is_sorted = pathkeys_count_contained_in(window_pathkeys,
4611 : path->pathkeys,
4612 : &presorted_keys);
4613 :
4614 : /* Sort if necessary */
4615 2754 : if (!is_sorted)
4616 : {
4617 : /*
4618 : * No presorted keys or incremental sort disabled, just perform a
4619 : * complete sort.
4620 : */
4621 2106 : if (presorted_keys == 0 || !enable_incremental_sort)
4622 2044 : path = (Path *) create_sort_path(root, window_rel,
4623 : path,
4624 : window_pathkeys,
4625 : -1.0);
4626 : else
4627 : {
4628 : /*
4629 : * Since we have presorted keys and incremental sort is
4630 : * enabled, just use incremental sort.
4631 : */
4632 62 : path = (Path *) create_incremental_sort_path(root,
4633 : window_rel,
4634 : path,
4635 : window_pathkeys,
4636 : presorted_keys,
4637 : -1.0);
4638 : }
4639 : }
4640 :
4641 2754 : if (lnext(activeWindows, l))
4642 : {
4643 : /*
4644 : * Add the current WindowFuncs to the output target for this
4645 : * intermediate WindowAggPath. We must copy window_target to
4646 : * avoid changing the previous path's target.
4647 : *
4648 : * Note: a WindowFunc adds nothing to the target's eval costs; but
4649 : * we do need to account for the increase in tlist width.
4650 : */
4651 168 : int64 tuple_width = window_target->width;
4652 :
4653 168 : window_target = copy_pathtarget(window_target);
4654 384 : foreach(lc2, wflists->windowFuncs[wc->winref])
4655 : {
4656 216 : WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
4657 :
4658 216 : add_column_to_pathtarget(window_target, (Expr *) wfunc, 0);
4659 216 : tuple_width += get_typavgwidth(wfunc->wintype, -1);
4660 : }
4661 168 : window_target->width = clamp_width_est(tuple_width);
4662 : }
4663 : else
4664 : {
4665 : /* Install the goal target in the topmost WindowAgg */
4666 2586 : window_target = output_target;
4667 : }
4668 :
4669 : /* mark the final item in the list as the top-level window */
4670 2754 : topwindow = foreach_current_index(l) == list_length(activeWindows) - 1;
4671 :
4672 : /*
4673 : * Collect the WindowFuncRunConditions from each WindowFunc and
4674 : * convert them into OpExprs
4675 : */
4676 6246 : foreach(lc2, wflists->windowFuncs[wc->winref])
4677 : {
4678 : ListCell *lc3;
4679 3492 : WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
4680 :
4681 3672 : foreach(lc3, wfunc->runCondition)
4682 : {
4683 180 : WindowFuncRunCondition *wfuncrc =
4684 : lfirst_node(WindowFuncRunCondition, lc3);
4685 : Expr *opexpr;
4686 : Expr *leftop;
4687 : Expr *rightop;
4688 :
4689 180 : if (wfuncrc->wfunc_left)
4690 : {
4691 162 : leftop = (Expr *) copyObject(wfunc);
4692 162 : rightop = copyObject(wfuncrc->arg);
4693 : }
4694 : else
4695 : {
4696 18 : leftop = copyObject(wfuncrc->arg);
4697 18 : rightop = (Expr *) copyObject(wfunc);
4698 : }
4699 :
4700 180 : opexpr = make_opclause(wfuncrc->opno,
4701 : BOOLOID,
4702 : false,
4703 : leftop,
4704 : rightop,
4705 : InvalidOid,
4706 : wfuncrc->inputcollid);
4707 :
4708 180 : runcondition = lappend(runcondition, opexpr);
4709 :
4710 180 : if (!topwindow)
4711 24 : topqual = lappend(topqual, opexpr);
4712 : }
4713 : }
4714 :
4715 : path = (Path *)
4716 2754 : create_windowagg_path(root, window_rel, path, window_target,
4717 2754 : wflists->windowFuncs[wc->winref],
4718 : runcondition, wc,
4719 : topwindow ? topqual : NIL, topwindow);
4720 : }
4721 :
4722 2586 : add_path(window_rel, path);
4723 2586 : }
4724 :
4725 : /*
4726 : * create_distinct_paths
4727 : *
4728 : * Build a new upperrel containing Paths for SELECT DISTINCT evaluation.
4729 : *
4730 : * input_rel: contains the source-data Paths
4731 : * target: the pathtarget for the result Paths to compute
4732 : *
4733 : * Note: input paths should already compute the desired pathtarget, since
4734 : * Sort/Unique won't project anything.
4735 : */
4736 : static RelOptInfo *
4737 2678 : create_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
4738 : PathTarget *target)
4739 : {
4740 : RelOptInfo *distinct_rel;
4741 :
4742 : /* For now, do all work in the (DISTINCT, NULL) upperrel */
4743 2678 : distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL);
4744 :
4745 : /*
4746 : * We don't compute anything at this level, so distinct_rel will be
4747 : * parallel-safe if the input rel is parallel-safe. In particular, if
4748 : * there is a DISTINCT ON (...) clause, any path for the input_rel will
4749 : * output those expressions, and will not be parallel-safe unless those
4750 : * expressions are parallel-safe.
4751 : */
4752 2678 : distinct_rel->consider_parallel = input_rel->consider_parallel;
4753 :
4754 : /*
4755 : * If the input rel belongs to a single FDW, so does the distinct_rel.
4756 : */
4757 2678 : distinct_rel->serverid = input_rel->serverid;
4758 2678 : distinct_rel->userid = input_rel->userid;
4759 2678 : distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4760 2678 : distinct_rel->fdwroutine = input_rel->fdwroutine;
4761 :
4762 : /* build distinct paths based on input_rel's pathlist */
4763 2678 : create_final_distinct_paths(root, input_rel, distinct_rel);
4764 :
4765 : /* now build distinct paths based on input_rel's partial_pathlist */
4766 2678 : create_partial_distinct_paths(root, input_rel, distinct_rel, target);
4767 :
4768 : /* Give a helpful error if we failed to create any paths */
4769 2678 : if (distinct_rel->pathlist == NIL)
4770 0 : ereport(ERROR,
4771 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4772 : errmsg("could not implement DISTINCT"),
4773 : errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4774 :
4775 : /*
4776 : * If there is an FDW that's responsible for all baserels of the query,
4777 : * let it consider adding ForeignPaths.
4778 : */
4779 2678 : if (distinct_rel->fdwroutine &&
4780 16 : distinct_rel->fdwroutine->GetForeignUpperPaths)
4781 16 : distinct_rel->fdwroutine->GetForeignUpperPaths(root,
4782 : UPPERREL_DISTINCT,
4783 : input_rel,
4784 : distinct_rel,
4785 : NULL);
4786 :
4787 : /* Let extensions possibly add some more paths */
4788 2678 : if (create_upper_paths_hook)
4789 0 : (*create_upper_paths_hook) (root, UPPERREL_DISTINCT, input_rel,
4790 : distinct_rel, NULL);
4791 :
4792 : /* Now choose the best path(s) */
4793 2678 : set_cheapest(distinct_rel);
4794 :
4795 2678 : return distinct_rel;
4796 : }
4797 :
4798 : /*
4799 : * create_partial_distinct_paths
4800 : *
4801 : * Process 'input_rel' partial paths and add unique/aggregate paths to the
4802 : * UPPERREL_PARTIAL_DISTINCT rel. For paths created, add Gather/GatherMerge
4803 : * paths on top and add a final unique/aggregate path to remove any duplicate
4804 : * produced from combining rows from parallel workers.
4805 : */
4806 : static void
4807 2678 : create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
4808 : RelOptInfo *final_distinct_rel,
4809 : PathTarget *target)
4810 : {
4811 : RelOptInfo *partial_distinct_rel;
4812 : Query *parse;
4813 : List *distinctExprs;
4814 : double numDistinctRows;
4815 : Path *cheapest_partial_path;
4816 : ListCell *lc;
4817 :
4818 : /* nothing to do when there are no partial paths in the input rel */
4819 2678 : if (!input_rel->consider_parallel || input_rel->partial_pathlist == NIL)
4820 2570 : return;
4821 :
4822 108 : parse = root->parse;
4823 :
4824 : /* can't do parallel DISTINCT ON */
4825 108 : if (parse->hasDistinctOn)
4826 0 : return;
4827 :
4828 108 : partial_distinct_rel = fetch_upper_rel(root, UPPERREL_PARTIAL_DISTINCT,
4829 : NULL);
4830 108 : partial_distinct_rel->reltarget = target;
4831 108 : partial_distinct_rel->consider_parallel = input_rel->consider_parallel;
4832 :
4833 : /*
4834 : * If input_rel belongs to a single FDW, so does the partial_distinct_rel.
4835 : */
4836 108 : partial_distinct_rel->serverid = input_rel->serverid;
4837 108 : partial_distinct_rel->userid = input_rel->userid;
4838 108 : partial_distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4839 108 : partial_distinct_rel->fdwroutine = input_rel->fdwroutine;
4840 :
4841 108 : cheapest_partial_path = linitial(input_rel->partial_pathlist);
4842 :
4843 108 : distinctExprs = get_sortgrouplist_exprs(root->processed_distinctClause,
4844 : parse->targetList);
4845 :
4846 : /* estimate how many distinct rows we'll get from each worker */
4847 108 : numDistinctRows = estimate_num_groups(root, distinctExprs,
4848 : cheapest_partial_path->rows,
4849 : NULL, NULL);
4850 :
4851 : /*
4852 : * Try sorting the cheapest path and incrementally sorting any paths with
4853 : * presorted keys and put a unique paths atop of those. We'll also
4854 : * attempt to reorder the required pathkeys to match the input path's
4855 : * pathkeys as much as possible, in hopes of avoiding a possible need to
4856 : * re-sort.
4857 : */
4858 108 : if (grouping_is_sortable(root->processed_distinctClause))
4859 : {
4860 234 : foreach(lc, input_rel->partial_pathlist)
4861 : {
4862 126 : Path *input_path = (Path *) lfirst(lc);
4863 : Path *sorted_path;
4864 126 : List *useful_pathkeys_list = NIL;
4865 :
4866 : useful_pathkeys_list =
4867 126 : get_useful_pathkeys_for_distinct(root,
4868 : root->distinct_pathkeys,
4869 : input_path->pathkeys);
4870 : Assert(list_length(useful_pathkeys_list) > 0);
4871 :
4872 390 : foreach_node(List, useful_pathkeys, useful_pathkeys_list)
4873 : {
4874 138 : sorted_path = make_ordered_path(root,
4875 : partial_distinct_rel,
4876 : input_path,
4877 : cheapest_partial_path,
4878 : useful_pathkeys,
4879 : -1.0);
4880 :
4881 138 : if (sorted_path == NULL)
4882 12 : continue;
4883 :
4884 : /*
4885 : * An empty distinct_pathkeys means all tuples have the same
4886 : * value for the DISTINCT clause. See
4887 : * create_final_distinct_paths()
4888 : */
4889 126 : if (root->distinct_pathkeys == NIL)
4890 : {
4891 : Node *limitCount;
4892 :
4893 6 : limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
4894 : sizeof(int64),
4895 : Int64GetDatum(1), false,
4896 : FLOAT8PASSBYVAL);
4897 :
4898 : /*
4899 : * Apply a LimitPath onto the partial path to restrict the
4900 : * tuples from each worker to 1.
4901 : * create_final_distinct_paths will need to apply an
4902 : * additional LimitPath to restrict this to a single row
4903 : * after the Gather node. If the query already has a
4904 : * LIMIT clause, then we could end up with three Limit
4905 : * nodes in the final plan. Consolidating the top two of
4906 : * these could be done, but does not seem worth troubling
4907 : * over.
4908 : */
4909 6 : add_partial_path(partial_distinct_rel, (Path *)
4910 6 : create_limit_path(root, partial_distinct_rel,
4911 : sorted_path,
4912 : NULL,
4913 : limitCount,
4914 : LIMIT_OPTION_COUNT,
4915 : 0, 1));
4916 : }
4917 : else
4918 : {
4919 120 : add_partial_path(partial_distinct_rel, (Path *)
4920 120 : create_upper_unique_path(root, partial_distinct_rel,
4921 : sorted_path,
4922 120 : list_length(root->distinct_pathkeys),
4923 : numDistinctRows));
4924 : }
4925 : }
4926 : }
4927 : }
4928 :
4929 : /*
4930 : * Now try hash aggregate paths, if enabled and hashing is possible. Since
4931 : * we're not on the hook to ensure we do our best to create at least one
4932 : * path here, we treat enable_hashagg as a hard off-switch rather than the
4933 : * slightly softer variant in create_final_distinct_paths.
4934 : */
4935 108 : if (enable_hashagg && grouping_is_hashable(root->processed_distinctClause))
4936 : {
4937 78 : add_partial_path(partial_distinct_rel, (Path *)
4938 78 : create_agg_path(root,
4939 : partial_distinct_rel,
4940 : cheapest_partial_path,
4941 : cheapest_partial_path->pathtarget,
4942 : AGG_HASHED,
4943 : AGGSPLIT_SIMPLE,
4944 : root->processed_distinctClause,
4945 : NIL,
4946 : NULL,
4947 : numDistinctRows));
4948 : }
4949 :
4950 : /*
4951 : * If there is an FDW that's responsible for all baserels of the query,
4952 : * let it consider adding ForeignPaths.
4953 : */
4954 108 : if (partial_distinct_rel->fdwroutine &&
4955 0 : partial_distinct_rel->fdwroutine->GetForeignUpperPaths)
4956 0 : partial_distinct_rel->fdwroutine->GetForeignUpperPaths(root,
4957 : UPPERREL_PARTIAL_DISTINCT,
4958 : input_rel,
4959 : partial_distinct_rel,
4960 : NULL);
4961 :
4962 : /* Let extensions possibly add some more partial paths */
4963 108 : if (create_upper_paths_hook)
4964 0 : (*create_upper_paths_hook) (root, UPPERREL_PARTIAL_DISTINCT,
4965 : input_rel, partial_distinct_rel, NULL);
4966 :
4967 108 : if (partial_distinct_rel->partial_pathlist != NIL)
4968 : {
4969 108 : generate_useful_gather_paths(root, partial_distinct_rel, true);
4970 108 : set_cheapest(partial_distinct_rel);
4971 :
4972 : /*
4973 : * Finally, create paths to distinctify the final result. This step
4974 : * is needed to remove any duplicates due to combining rows from
4975 : * parallel workers.
4976 : */
4977 108 : create_final_distinct_paths(root, partial_distinct_rel,
4978 : final_distinct_rel);
4979 : }
4980 : }
4981 :
4982 : /*
4983 : * create_final_distinct_paths
4984 : * Create distinct paths in 'distinct_rel' based on 'input_rel' pathlist
4985 : *
4986 : * input_rel: contains the source-data paths
4987 : * distinct_rel: destination relation for storing created paths
4988 : */
4989 : static RelOptInfo *
4990 2786 : create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
4991 : RelOptInfo *distinct_rel)
4992 : {
4993 2786 : Query *parse = root->parse;
4994 2786 : Path *cheapest_input_path = input_rel->cheapest_total_path;
4995 : double numDistinctRows;
4996 : bool allow_hash;
4997 :
4998 : /* Estimate number of distinct rows there will be */
4999 2786 : if (parse->groupClause || parse->groupingSets || parse->hasAggs ||
5000 2712 : root->hasHavingQual)
5001 : {
5002 : /*
5003 : * If there was grouping or aggregation, use the number of input rows
5004 : * as the estimated number of DISTINCT rows (ie, assume the input is
5005 : * already mostly unique).
5006 : */
5007 74 : numDistinctRows = cheapest_input_path->rows;
5008 : }
5009 : else
5010 : {
5011 : /*
5012 : * Otherwise, the UNIQUE filter has effects comparable to GROUP BY.
5013 : */
5014 : List *distinctExprs;
5015 :
5016 2712 : distinctExprs = get_sortgrouplist_exprs(root->processed_distinctClause,
5017 : parse->targetList);
5018 2712 : numDistinctRows = estimate_num_groups(root, distinctExprs,
5019 : cheapest_input_path->rows,
5020 : NULL, NULL);
5021 : }
5022 :
5023 : /*
5024 : * Consider sort-based implementations of DISTINCT, if possible.
5025 : */
5026 2786 : if (grouping_is_sortable(root->processed_distinctClause))
5027 : {
5028 : /*
5029 : * Firstly, if we have any adequately-presorted paths, just stick a
5030 : * Unique node on those. We also, consider doing an explicit sort of
5031 : * the cheapest input path and Unique'ing that. If any paths have
5032 : * presorted keys then we'll create an incremental sort atop of those
5033 : * before adding a unique node on the top. We'll also attempt to
5034 : * reorder the required pathkeys to match the input path's pathkeys as
5035 : * much as possible, in hopes of avoiding a possible need to re-sort.
5036 : *
5037 : * When we have DISTINCT ON, we must sort by the more rigorous of
5038 : * DISTINCT and ORDER BY, else it won't have the desired behavior.
5039 : * Also, if we do have to do an explicit sort, we might as well use
5040 : * the more rigorous ordering to avoid a second sort later. (Note
5041 : * that the parser will have ensured that one clause is a prefix of
5042 : * the other.)
5043 : */
5044 : List *needed_pathkeys;
5045 : ListCell *lc;
5046 2780 : double limittuples = root->distinct_pathkeys == NIL ? 1.0 : -1.0;
5047 :
5048 3016 : if (parse->hasDistinctOn &&
5049 236 : list_length(root->distinct_pathkeys) <
5050 236 : list_length(root->sort_pathkeys))
5051 54 : needed_pathkeys = root->sort_pathkeys;
5052 : else
5053 2726 : needed_pathkeys = root->distinct_pathkeys;
5054 :
5055 7162 : foreach(lc, input_rel->pathlist)
5056 : {
5057 4382 : Path *input_path = (Path *) lfirst(lc);
5058 : Path *sorted_path;
5059 4382 : List *useful_pathkeys_list = NIL;
5060 :
5061 : useful_pathkeys_list =
5062 4382 : get_useful_pathkeys_for_distinct(root,
5063 : needed_pathkeys,
5064 : input_path->pathkeys);
5065 : Assert(list_length(useful_pathkeys_list) > 0);
5066 :
5067 13632 : foreach_node(List, useful_pathkeys, useful_pathkeys_list)
5068 : {
5069 4868 : sorted_path = make_ordered_path(root,
5070 : distinct_rel,
5071 : input_path,
5072 : cheapest_input_path,
5073 : useful_pathkeys,
5074 : limittuples);
5075 :
5076 4868 : if (sorted_path == NULL)
5077 544 : continue;
5078 :
5079 : /*
5080 : * distinct_pathkeys may have become empty if all of the
5081 : * pathkeys were determined to be redundant. If all of the
5082 : * pathkeys are redundant then each DISTINCT target must only
5083 : * allow a single value, therefore all resulting tuples must
5084 : * be identical (or at least indistinguishable by an equality
5085 : * check). We can uniquify these tuples simply by just taking
5086 : * the first tuple. All we do here is add a path to do "LIMIT
5087 : * 1" atop of 'sorted_path'. When doing a DISTINCT ON we may
5088 : * still have a non-NIL sort_pathkeys list, so we must still
5089 : * only do this with paths which are correctly sorted by
5090 : * sort_pathkeys.
5091 : */
5092 4324 : if (root->distinct_pathkeys == NIL)
5093 : {
5094 : Node *limitCount;
5095 :
5096 106 : limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
5097 : sizeof(int64),
5098 : Int64GetDatum(1), false,
5099 : FLOAT8PASSBYVAL);
5100 :
5101 : /*
5102 : * If the query already has a LIMIT clause, then we could
5103 : * end up with a duplicate LimitPath in the final plan.
5104 : * That does not seem worth troubling over too much.
5105 : */
5106 106 : add_path(distinct_rel, (Path *)
5107 106 : create_limit_path(root, distinct_rel, sorted_path,
5108 : NULL, limitCount,
5109 : LIMIT_OPTION_COUNT, 0, 1));
5110 : }
5111 : else
5112 : {
5113 4218 : add_path(distinct_rel, (Path *)
5114 4218 : create_upper_unique_path(root, distinct_rel,
5115 : sorted_path,
5116 4218 : list_length(root->distinct_pathkeys),
5117 : numDistinctRows));
5118 : }
5119 : }
5120 : }
5121 : }
5122 :
5123 : /*
5124 : * Consider hash-based implementations of DISTINCT, if possible.
5125 : *
5126 : * If we were not able to make any other types of path, we *must* hash or
5127 : * die trying. If we do have other choices, there are two things that
5128 : * should prevent selection of hashing: if the query uses DISTINCT ON
5129 : * (because it won't really have the expected behavior if we hash), or if
5130 : * enable_hashagg is off.
5131 : *
5132 : * Note: grouping_is_hashable() is much more expensive to check than the
5133 : * other gating conditions, so we want to do it last.
5134 : */
5135 2786 : if (distinct_rel->pathlist == NIL)
5136 6 : allow_hash = true; /* we have no alternatives */
5137 2780 : else if (parse->hasDistinctOn || !enable_hashagg)
5138 386 : allow_hash = false; /* policy-based decision not to hash */
5139 : else
5140 2394 : allow_hash = true; /* default */
5141 :
5142 2786 : if (allow_hash && grouping_is_hashable(root->processed_distinctClause))
5143 : {
5144 : /* Generate hashed aggregate path --- no sort needed */
5145 2400 : add_path(distinct_rel, (Path *)
5146 2400 : create_agg_path(root,
5147 : distinct_rel,
5148 : cheapest_input_path,
5149 : cheapest_input_path->pathtarget,
5150 : AGG_HASHED,
5151 : AGGSPLIT_SIMPLE,
5152 : root->processed_distinctClause,
5153 : NIL,
5154 : NULL,
5155 : numDistinctRows));
5156 : }
5157 :
5158 2786 : return distinct_rel;
5159 : }
5160 :
5161 : /*
5162 : * get_useful_pathkeys_for_distinct
5163 : * Get useful orderings of pathkeys for distinctClause by reordering
5164 : * 'needed_pathkeys' to match the given 'path_pathkeys' as much as possible.
5165 : *
5166 : * This returns a list of pathkeys that can be useful for DISTINCT or DISTINCT
5167 : * ON clause. For convenience, it always includes the given 'needed_pathkeys'.
5168 : */
5169 : static List *
5170 4508 : get_useful_pathkeys_for_distinct(PlannerInfo *root, List *needed_pathkeys,
5171 : List *path_pathkeys)
5172 : {
5173 4508 : List *useful_pathkeys_list = NIL;
5174 4508 : List *useful_pathkeys = NIL;
5175 :
5176 : /* always include the given 'needed_pathkeys' */
5177 4508 : useful_pathkeys_list = lappend(useful_pathkeys_list,
5178 : needed_pathkeys);
5179 :
5180 4508 : if (!enable_distinct_reordering)
5181 0 : return useful_pathkeys_list;
5182 :
5183 : /*
5184 : * Scan the given 'path_pathkeys' and construct a list of PathKey nodes
5185 : * that match 'needed_pathkeys', but only up to the longest matching
5186 : * prefix.
5187 : *
5188 : * When we have DISTINCT ON, we must ensure that the resulting pathkey
5189 : * list matches initial distinctClause pathkeys; otherwise, it won't have
5190 : * the desired behavior.
5191 : */
5192 11022 : foreach_node(PathKey, pathkey, path_pathkeys)
5193 : {
5194 : /*
5195 : * The PathKey nodes are canonical, so they can be checked for
5196 : * equality by simple pointer comparison.
5197 : */
5198 2034 : if (!list_member_ptr(needed_pathkeys, pathkey))
5199 10 : break;
5200 2024 : if (root->parse->hasDistinctOn &&
5201 200 : !list_member_ptr(root->distinct_pathkeys, pathkey))
5202 18 : break;
5203 :
5204 2006 : useful_pathkeys = lappend(useful_pathkeys, pathkey);
5205 : }
5206 :
5207 : /* If no match at all, no point in reordering needed_pathkeys */
5208 4508 : if (useful_pathkeys == NIL)
5209 2766 : return useful_pathkeys_list;
5210 :
5211 : /*
5212 : * If not full match, the resulting pathkey list is not useful without
5213 : * incremental sort.
5214 : */
5215 1742 : if (list_length(useful_pathkeys) < list_length(needed_pathkeys) &&
5216 892 : !enable_incremental_sort)
5217 60 : return useful_pathkeys_list;
5218 :
5219 : /* Append the remaining PathKey nodes in needed_pathkeys */
5220 1682 : useful_pathkeys = list_concat_unique_ptr(useful_pathkeys,
5221 : needed_pathkeys);
5222 :
5223 : /*
5224 : * If the resulting pathkey list is the same as the 'needed_pathkeys',
5225 : * just drop it.
5226 : */
5227 1682 : if (compare_pathkeys(needed_pathkeys,
5228 : useful_pathkeys) == PATHKEYS_EQUAL)
5229 1184 : return useful_pathkeys_list;
5230 :
5231 498 : useful_pathkeys_list = lappend(useful_pathkeys_list,
5232 : useful_pathkeys);
5233 :
5234 498 : return useful_pathkeys_list;
5235 : }
5236 :
5237 : /*
5238 : * create_ordered_paths
5239 : *
5240 : * Build a new upperrel containing Paths for ORDER BY evaluation.
5241 : *
5242 : * All paths in the result must satisfy the ORDER BY ordering.
5243 : * The only new paths we need consider are an explicit full sort
5244 : * and incremental sort on the cheapest-total existing path.
5245 : *
5246 : * input_rel: contains the source-data Paths
5247 : * target: the output tlist the result Paths must emit
5248 : * limit_tuples: estimated bound on the number of output tuples,
5249 : * or -1 if no LIMIT or couldn't estimate
5250 : *
5251 : * XXX This only looks at sort_pathkeys. I wonder if it needs to look at the
5252 : * other pathkeys (grouping, ...) like generate_useful_gather_paths.
5253 : */
5254 : static RelOptInfo *
5255 75732 : create_ordered_paths(PlannerInfo *root,
5256 : RelOptInfo *input_rel,
5257 : PathTarget *target,
5258 : bool target_parallel_safe,
5259 : double limit_tuples)
5260 : {
5261 75732 : Path *cheapest_input_path = input_rel->cheapest_total_path;
5262 : RelOptInfo *ordered_rel;
5263 : ListCell *lc;
5264 :
5265 : /* For now, do all work in the (ORDERED, NULL) upperrel */
5266 75732 : ordered_rel = fetch_upper_rel(root, UPPERREL_ORDERED, NULL);
5267 :
5268 : /*
5269 : * If the input relation is not parallel-safe, then the ordered relation
5270 : * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
5271 : * target list is parallel-safe.
5272 : */
5273 75732 : if (input_rel->consider_parallel && target_parallel_safe)
5274 52438 : ordered_rel->consider_parallel = true;
5275 :
5276 : /*
5277 : * If the input rel belongs to a single FDW, so does the ordered_rel.
5278 : */
5279 75732 : ordered_rel->serverid = input_rel->serverid;
5280 75732 : ordered_rel->userid = input_rel->userid;
5281 75732 : ordered_rel->useridiscurrent = input_rel->useridiscurrent;
5282 75732 : ordered_rel->fdwroutine = input_rel->fdwroutine;
5283 :
5284 191522 : foreach(lc, input_rel->pathlist)
5285 : {
5286 115790 : Path *input_path = (Path *) lfirst(lc);
5287 : Path *sorted_path;
5288 : bool is_sorted;
5289 : int presorted_keys;
5290 :
5291 115790 : is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
5292 : input_path->pathkeys, &presorted_keys);
5293 :
5294 115790 : if (is_sorted)
5295 42938 : sorted_path = input_path;
5296 : else
5297 : {
5298 : /*
5299 : * Try at least sorting the cheapest path and also try
5300 : * incrementally sorting any path which is partially sorted
5301 : * already (no need to deal with paths which have presorted keys
5302 : * when incremental sort is disabled unless it's the cheapest
5303 : * input path).
5304 : */
5305 72852 : if (input_path != cheapest_input_path &&
5306 6316 : (presorted_keys == 0 || !enable_incremental_sort))
5307 1852 : continue;
5308 :
5309 : /*
5310 : * We've no need to consider both a sort and incremental sort.
5311 : * We'll just do a sort if there are no presorted keys and an
5312 : * incremental sort when there are presorted keys.
5313 : */
5314 71000 : if (presorted_keys == 0 || !enable_incremental_sort)
5315 65932 : sorted_path = (Path *) create_sort_path(root,
5316 : ordered_rel,
5317 : input_path,
5318 : root->sort_pathkeys,
5319 : limit_tuples);
5320 : else
5321 5068 : sorted_path = (Path *) create_incremental_sort_path(root,
5322 : ordered_rel,
5323 : input_path,
5324 : root->sort_pathkeys,
5325 : presorted_keys,
5326 : limit_tuples);
5327 : }
5328 :
5329 : /*
5330 : * If the pathtarget of the result path has different expressions from
5331 : * the target to be applied, a projection step is needed.
5332 : */
5333 113938 : if (!equal(sorted_path->pathtarget->exprs, target->exprs))
5334 294 : sorted_path = apply_projection_to_path(root, ordered_rel,
5335 : sorted_path, target);
5336 :
5337 113938 : add_path(ordered_rel, sorted_path);
5338 : }
5339 :
5340 : /*
5341 : * generate_gather_paths() will have already generated a simple Gather
5342 : * path for the best parallel path, if any, and the loop above will have
5343 : * considered sorting it. Similarly, generate_gather_paths() will also
5344 : * have generated order-preserving Gather Merge plans which can be used
5345 : * without sorting if they happen to match the sort_pathkeys, and the loop
5346 : * above will have handled those as well. However, there's one more
5347 : * possibility: it may make sense to sort the cheapest partial path or
5348 : * incrementally sort any partial path that is partially sorted according
5349 : * to the required output order and then use Gather Merge.
5350 : */
5351 75732 : if (ordered_rel->consider_parallel && root->sort_pathkeys != NIL &&
5352 52300 : input_rel->partial_pathlist != NIL)
5353 : {
5354 : Path *cheapest_partial_path;
5355 :
5356 2216 : cheapest_partial_path = linitial(input_rel->partial_pathlist);
5357 :
5358 4638 : foreach(lc, input_rel->partial_pathlist)
5359 : {
5360 2422 : Path *input_path = (Path *) lfirst(lc);
5361 : Path *sorted_path;
5362 : bool is_sorted;
5363 : int presorted_keys;
5364 : double total_groups;
5365 :
5366 2422 : is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
5367 : input_path->pathkeys,
5368 : &presorted_keys);
5369 :
5370 2422 : if (is_sorted)
5371 182 : continue;
5372 :
5373 : /*
5374 : * Try at least sorting the cheapest path and also try
5375 : * incrementally sorting any path which is partially sorted
5376 : * already (no need to deal with paths which have presorted keys
5377 : * when incremental sort is disabled unless it's the cheapest
5378 : * partial path).
5379 : */
5380 2240 : if (input_path != cheapest_partial_path &&
5381 42 : (presorted_keys == 0 || !enable_incremental_sort))
5382 0 : continue;
5383 :
5384 : /*
5385 : * We've no need to consider both a sort and incremental sort.
5386 : * We'll just do a sort if there are no presorted keys and an
5387 : * incremental sort when there are presorted keys.
5388 : */
5389 2240 : if (presorted_keys == 0 || !enable_incremental_sort)
5390 2180 : sorted_path = (Path *) create_sort_path(root,
5391 : ordered_rel,
5392 : input_path,
5393 : root->sort_pathkeys,
5394 : limit_tuples);
5395 : else
5396 60 : sorted_path = (Path *) create_incremental_sort_path(root,
5397 : ordered_rel,
5398 : input_path,
5399 : root->sort_pathkeys,
5400 : presorted_keys,
5401 : limit_tuples);
5402 2240 : total_groups = compute_gather_rows(sorted_path);
5403 : sorted_path = (Path *)
5404 2240 : create_gather_merge_path(root, ordered_rel,
5405 : sorted_path,
5406 : sorted_path->pathtarget,
5407 : root->sort_pathkeys, NULL,
5408 : &total_groups);
5409 :
5410 : /*
5411 : * If the pathtarget of the result path has different expressions
5412 : * from the target to be applied, a projection step is needed.
5413 : */
5414 2240 : if (!equal(sorted_path->pathtarget->exprs, target->exprs))
5415 6 : sorted_path = apply_projection_to_path(root, ordered_rel,
5416 : sorted_path, target);
5417 :
5418 2240 : add_path(ordered_rel, sorted_path);
5419 : }
5420 : }
5421 :
5422 : /*
5423 : * If there is an FDW that's responsible for all baserels of the query,
5424 : * let it consider adding ForeignPaths.
5425 : */
5426 75732 : if (ordered_rel->fdwroutine &&
5427 384 : ordered_rel->fdwroutine->GetForeignUpperPaths)
5428 370 : ordered_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_ORDERED,
5429 : input_rel, ordered_rel,
5430 : NULL);
5431 :
5432 : /* Let extensions possibly add some more paths */
5433 75732 : if (create_upper_paths_hook)
5434 0 : (*create_upper_paths_hook) (root, UPPERREL_ORDERED,
5435 : input_rel, ordered_rel, NULL);
5436 :
5437 : /*
5438 : * No need to bother with set_cheapest here; grouping_planner does not
5439 : * need us to do it.
5440 : */
5441 : Assert(ordered_rel->pathlist != NIL);
5442 :
5443 75732 : return ordered_rel;
5444 : }
5445 :
5446 :
5447 : /*
5448 : * make_group_input_target
5449 : * Generate appropriate PathTarget for initial input to grouping nodes.
5450 : *
5451 : * If there is grouping or aggregation, the scan/join subplan cannot emit
5452 : * the query's final targetlist; for example, it certainly can't emit any
5453 : * aggregate function calls. This routine generates the correct target
5454 : * for the scan/join subplan.
5455 : *
5456 : * The query target list passed from the parser already contains entries
5457 : * for all ORDER BY and GROUP BY expressions, but it will not have entries
5458 : * for variables used only in HAVING clauses; so we need to add those
5459 : * variables to the subplan target list. Also, we flatten all expressions
5460 : * except GROUP BY items into their component variables; other expressions
5461 : * will be computed by the upper plan nodes rather than by the subplan.
5462 : * For example, given a query like
5463 : * SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
5464 : * we want to pass this targetlist to the subplan:
5465 : * a+b,c,d
5466 : * where the a+b target will be used by the Sort/Group steps, and the
5467 : * other targets will be used for computing the final results.
5468 : *
5469 : * 'final_target' is the query's final target list (in PathTarget form)
5470 : *
5471 : * The result is the PathTarget to be computed by the Paths returned from
5472 : * query_planner().
5473 : */
5474 : static PathTarget *
5475 39040 : make_group_input_target(PlannerInfo *root, PathTarget *final_target)
5476 : {
5477 39040 : Query *parse = root->parse;
5478 : PathTarget *input_target;
5479 : List *non_group_cols;
5480 : List *non_group_vars;
5481 : int i;
5482 : ListCell *lc;
5483 :
5484 : /*
5485 : * We must build a target containing all grouping columns, plus any other
5486 : * Vars mentioned in the query's targetlist and HAVING qual.
5487 : */
5488 39040 : input_target = create_empty_pathtarget();
5489 39040 : non_group_cols = NIL;
5490 :
5491 39040 : i = 0;
5492 94960 : foreach(lc, final_target->exprs)
5493 : {
5494 55920 : Expr *expr = (Expr *) lfirst(lc);
5495 55920 : Index sgref = get_pathtarget_sortgroupref(final_target, i);
5496 :
5497 64672 : if (sgref && root->processed_groupClause &&
5498 8752 : get_sortgroupref_clause_noerr(sgref,
5499 : root->processed_groupClause) != NULL)
5500 : {
5501 : /*
5502 : * It's a grouping column, so add it to the input target as-is.
5503 : *
5504 : * Note that the target is logically below the grouping step. So
5505 : * with grouping sets we need to remove the RT index of the
5506 : * grouping step if there is any from the target expression.
5507 : */
5508 7010 : if (parse->hasGroupRTE && parse->groupingSets != NIL)
5509 : {
5510 : Assert(root->group_rtindex > 0);
5511 : expr = (Expr *)
5512 1824 : remove_nulling_relids((Node *) expr,
5513 1824 : bms_make_singleton(root->group_rtindex),
5514 : NULL);
5515 : }
5516 7010 : add_column_to_pathtarget(input_target, expr, sgref);
5517 : }
5518 : else
5519 : {
5520 : /*
5521 : * Non-grouping column, so just remember the expression for later
5522 : * call to pull_var_clause.
5523 : */
5524 48910 : non_group_cols = lappend(non_group_cols, expr);
5525 : }
5526 :
5527 55920 : i++;
5528 : }
5529 :
5530 : /*
5531 : * If there's a HAVING clause, we'll need the Vars it uses, too.
5532 : */
5533 39040 : if (parse->havingQual)
5534 880 : non_group_cols = lappend(non_group_cols, parse->havingQual);
5535 :
5536 : /*
5537 : * Pull out all the Vars mentioned in non-group cols (plus HAVING), and
5538 : * add them to the input target if not already present. (A Var used
5539 : * directly as a GROUP BY item will be present already.) Note this
5540 : * includes Vars used in resjunk items, so we are covering the needs of
5541 : * ORDER BY and window specifications. Vars used within Aggrefs and
5542 : * WindowFuncs will be pulled out here, too.
5543 : *
5544 : * Note that the target is logically below the grouping step. So with
5545 : * grouping sets we need to remove the RT index of the grouping step if
5546 : * there is any from the non-group Vars.
5547 : */
5548 39040 : non_group_vars = pull_var_clause((Node *) non_group_cols,
5549 : PVC_RECURSE_AGGREGATES |
5550 : PVC_RECURSE_WINDOWFUNCS |
5551 : PVC_INCLUDE_PLACEHOLDERS);
5552 39040 : if (parse->hasGroupRTE && parse->groupingSets != NIL)
5553 : {
5554 : Assert(root->group_rtindex > 0);
5555 : non_group_vars = (List *)
5556 830 : remove_nulling_relids((Node *) non_group_vars,
5557 830 : bms_make_singleton(root->group_rtindex),
5558 : NULL);
5559 : }
5560 39040 : add_new_columns_to_pathtarget(input_target, non_group_vars);
5561 :
5562 : /* clean up cruft */
5563 39040 : list_free(non_group_vars);
5564 39040 : list_free(non_group_cols);
5565 :
5566 : /* XXX this causes some redundant cost calculation ... */
5567 39040 : return set_pathtarget_cost_width(root, input_target);
5568 : }
5569 :
5570 : /*
5571 : * make_partial_grouping_target
5572 : * Generate appropriate PathTarget for output of partial aggregate
5573 : * (or partial grouping, if there are no aggregates) nodes.
5574 : *
5575 : * A partial aggregation node needs to emit all the same aggregates that
5576 : * a regular aggregation node would, plus any aggregates used in HAVING;
5577 : * except that the Aggref nodes should be marked as partial aggregates.
5578 : *
5579 : * In addition, we'd better emit any Vars and PlaceHolderVars that are
5580 : * used outside of Aggrefs in the aggregation tlist and HAVING. (Presumably,
5581 : * these would be Vars that are grouped by or used in grouping expressions.)
5582 : *
5583 : * grouping_target is the tlist to be emitted by the topmost aggregation step.
5584 : * havingQual represents the HAVING clause.
5585 : */
5586 : static PathTarget *
5587 2200 : make_partial_grouping_target(PlannerInfo *root,
5588 : PathTarget *grouping_target,
5589 : Node *havingQual)
5590 : {
5591 : PathTarget *partial_target;
5592 : List *non_group_cols;
5593 : List *non_group_exprs;
5594 : int i;
5595 : ListCell *lc;
5596 :
5597 2200 : partial_target = create_empty_pathtarget();
5598 2200 : non_group_cols = NIL;
5599 :
5600 2200 : i = 0;
5601 7818 : foreach(lc, grouping_target->exprs)
5602 : {
5603 5618 : Expr *expr = (Expr *) lfirst(lc);
5604 5618 : Index sgref = get_pathtarget_sortgroupref(grouping_target, i);
5605 :
5606 9438 : if (sgref && root->processed_groupClause &&
5607 3820 : get_sortgroupref_clause_noerr(sgref,
5608 : root->processed_groupClause) != NULL)
5609 : {
5610 : /*
5611 : * It's a grouping column, so add it to the partial_target as-is.
5612 : * (This allows the upper agg step to repeat the grouping calcs.)
5613 : */
5614 1906 : add_column_to_pathtarget(partial_target, expr, sgref);
5615 : }
5616 : else
5617 : {
5618 : /*
5619 : * Non-grouping column, so just remember the expression for later
5620 : * call to pull_var_clause.
5621 : */
5622 3712 : non_group_cols = lappend(non_group_cols, expr);
5623 : }
5624 :
5625 5618 : i++;
5626 : }
5627 :
5628 : /*
5629 : * If there's a HAVING clause, we'll need the Vars/Aggrefs it uses, too.
5630 : */
5631 2200 : if (havingQual)
5632 824 : non_group_cols = lappend(non_group_cols, havingQual);
5633 :
5634 : /*
5635 : * Pull out all the Vars, PlaceHolderVars, and Aggrefs mentioned in
5636 : * non-group cols (plus HAVING), and add them to the partial_target if not
5637 : * already present. (An expression used directly as a GROUP BY item will
5638 : * be present already.) Note this includes Vars used in resjunk items, so
5639 : * we are covering the needs of ORDER BY and window specifications.
5640 : */
5641 2200 : non_group_exprs = pull_var_clause((Node *) non_group_cols,
5642 : PVC_INCLUDE_AGGREGATES |
5643 : PVC_RECURSE_WINDOWFUNCS |
5644 : PVC_INCLUDE_PLACEHOLDERS);
5645 :
5646 2200 : add_new_columns_to_pathtarget(partial_target, non_group_exprs);
5647 :
5648 : /*
5649 : * Adjust Aggrefs to put them in partial mode. At this point all Aggrefs
5650 : * are at the top level of the target list, so we can just scan the list
5651 : * rather than recursing through the expression trees.
5652 : */
5653 8390 : foreach(lc, partial_target->exprs)
5654 : {
5655 6190 : Aggref *aggref = (Aggref *) lfirst(lc);
5656 :
5657 6190 : if (IsA(aggref, Aggref))
5658 : {
5659 : Aggref *newaggref;
5660 :
5661 : /*
5662 : * We shouldn't need to copy the substructure of the Aggref node,
5663 : * but flat-copy the node itself to avoid damaging other trees.
5664 : */
5665 4254 : newaggref = makeNode(Aggref);
5666 4254 : memcpy(newaggref, aggref, sizeof(Aggref));
5667 :
5668 : /* For now, assume serialization is required */
5669 4254 : mark_partial_aggref(newaggref, AGGSPLIT_INITIAL_SERIAL);
5670 :
5671 4254 : lfirst(lc) = newaggref;
5672 : }
5673 : }
5674 :
5675 : /* clean up cruft */
5676 2200 : list_free(non_group_exprs);
5677 2200 : list_free(non_group_cols);
5678 :
5679 : /* XXX this causes some redundant cost calculation ... */
5680 2200 : return set_pathtarget_cost_width(root, partial_target);
5681 : }
5682 :
5683 : /*
5684 : * mark_partial_aggref
5685 : * Adjust an Aggref to make it represent a partial-aggregation step.
5686 : *
5687 : * The Aggref node is modified in-place; caller must do any copying required.
5688 : */
5689 : void
5690 7066 : mark_partial_aggref(Aggref *agg, AggSplit aggsplit)
5691 : {
5692 : /* aggtranstype should be computed by this point */
5693 : Assert(OidIsValid(agg->aggtranstype));
5694 : /* ... but aggsplit should still be as the parser left it */
5695 : Assert(agg->aggsplit == AGGSPLIT_SIMPLE);
5696 :
5697 : /* Mark the Aggref with the intended partial-aggregation mode */
5698 7066 : agg->aggsplit = aggsplit;
5699 :
5700 : /*
5701 : * Adjust result type if needed. Normally, a partial aggregate returns
5702 : * the aggregate's transition type; but if that's INTERNAL and we're
5703 : * serializing, it returns BYTEA instead.
5704 : */
5705 7066 : if (DO_AGGSPLIT_SKIPFINAL(aggsplit))
5706 : {
5707 5660 : if (agg->aggtranstype == INTERNALOID && DO_AGGSPLIT_SERIALIZE(aggsplit))
5708 242 : agg->aggtype = BYTEAOID;
5709 : else
5710 5418 : agg->aggtype = agg->aggtranstype;
5711 : }
5712 7066 : }
5713 :
5714 : /*
5715 : * postprocess_setop_tlist
5716 : * Fix up targetlist returned by plan_set_operations().
5717 : *
5718 : * We need to transpose sort key info from the orig_tlist into new_tlist.
5719 : * NOTE: this would not be good enough if we supported resjunk sort keys
5720 : * for results of set operations --- then, we'd need to project a whole
5721 : * new tlist to evaluate the resjunk columns. For now, just ereport if we
5722 : * find any resjunk columns in orig_tlist.
5723 : */
5724 : static List *
5725 6158 : postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
5726 : {
5727 : ListCell *l;
5728 6158 : ListCell *orig_tlist_item = list_head(orig_tlist);
5729 :
5730 24134 : foreach(l, new_tlist)
5731 : {
5732 17976 : TargetEntry *new_tle = lfirst_node(TargetEntry, l);
5733 : TargetEntry *orig_tle;
5734 :
5735 : /* ignore resjunk columns in setop result */
5736 17976 : if (new_tle->resjunk)
5737 0 : continue;
5738 :
5739 : Assert(orig_tlist_item != NULL);
5740 17976 : orig_tle = lfirst_node(TargetEntry, orig_tlist_item);
5741 17976 : orig_tlist_item = lnext(orig_tlist, orig_tlist_item);
5742 17976 : if (orig_tle->resjunk) /* should not happen */
5743 0 : elog(ERROR, "resjunk output columns are not implemented");
5744 : Assert(new_tle->resno == orig_tle->resno);
5745 17976 : new_tle->ressortgroupref = orig_tle->ressortgroupref;
5746 : }
5747 6158 : if (orig_tlist_item != NULL)
5748 0 : elog(ERROR, "resjunk output columns are not implemented");
5749 6158 : return new_tlist;
5750 : }
5751 :
5752 : /*
5753 : * optimize_window_clauses
5754 : * Call each WindowFunc's prosupport function to see if we're able to
5755 : * make any adjustments to any of the WindowClause's so that the executor
5756 : * can execute the window functions in a more optimal way.
5757 : *
5758 : * Currently we only allow adjustments to the WindowClause's frameOptions. We
5759 : * may allow more things to be done here in the future.
5760 : */
5761 : static void
5762 2378 : optimize_window_clauses(PlannerInfo *root, WindowFuncLists *wflists)
5763 : {
5764 2378 : List *windowClause = root->parse->windowClause;
5765 : ListCell *lc;
5766 :
5767 4984 : foreach(lc, windowClause)
5768 : {
5769 2606 : WindowClause *wc = lfirst_node(WindowClause, lc);
5770 : ListCell *lc2;
5771 2606 : int optimizedFrameOptions = 0;
5772 :
5773 : Assert(wc->winref <= wflists->maxWinRef);
5774 :
5775 : /* skip any WindowClauses that have no WindowFuncs */
5776 2606 : if (wflists->windowFuncs[wc->winref] == NIL)
5777 24 : continue;
5778 :
5779 3122 : foreach(lc2, wflists->windowFuncs[wc->winref])
5780 : {
5781 : SupportRequestOptimizeWindowClause req;
5782 : SupportRequestOptimizeWindowClause *res;
5783 2624 : WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
5784 : Oid prosupport;
5785 :
5786 2624 : prosupport = get_func_support(wfunc->winfnoid);
5787 :
5788 : /* Check if there's a support function for 'wfunc' */
5789 2624 : if (!OidIsValid(prosupport))
5790 2084 : break; /* can't optimize this WindowClause */
5791 :
5792 760 : req.type = T_SupportRequestOptimizeWindowClause;
5793 760 : req.window_clause = wc;
5794 760 : req.window_func = wfunc;
5795 760 : req.frameOptions = wc->frameOptions;
5796 :
5797 : /* call the support function */
5798 : res = (SupportRequestOptimizeWindowClause *)
5799 760 : DatumGetPointer(OidFunctionCall1(prosupport,
5800 : PointerGetDatum(&req)));
5801 :
5802 : /*
5803 : * Skip to next WindowClause if the support function does not
5804 : * support this request type.
5805 : */
5806 760 : if (res == NULL)
5807 220 : break;
5808 :
5809 : /*
5810 : * Save these frameOptions for the first WindowFunc for this
5811 : * WindowClause.
5812 : */
5813 540 : if (foreach_current_index(lc2) == 0)
5814 516 : optimizedFrameOptions = res->frameOptions;
5815 :
5816 : /*
5817 : * On subsequent WindowFuncs, if the frameOptions are not the same
5818 : * then we're unable to optimize the frameOptions for this
5819 : * WindowClause.
5820 : */
5821 24 : else if (optimizedFrameOptions != res->frameOptions)
5822 0 : break; /* skip to the next WindowClause, if any */
5823 : }
5824 :
5825 : /* adjust the frameOptions if all WindowFunc's agree that it's ok */
5826 2582 : if (lc2 == NULL && wc->frameOptions != optimizedFrameOptions)
5827 : {
5828 : ListCell *lc3;
5829 :
5830 : /* apply the new frame options */
5831 498 : wc->frameOptions = optimizedFrameOptions;
5832 :
5833 : /*
5834 : * We now check to see if changing the frameOptions has caused
5835 : * this WindowClause to be a duplicate of some other WindowClause.
5836 : * This can only happen if we have multiple WindowClauses, so
5837 : * don't bother if there's only 1.
5838 : */
5839 498 : if (list_length(windowClause) == 1)
5840 408 : continue;
5841 :
5842 : /*
5843 : * Do the duplicate check and reuse the existing WindowClause if
5844 : * we find a duplicate.
5845 : */
5846 228 : foreach(lc3, windowClause)
5847 : {
5848 174 : WindowClause *existing_wc = lfirst_node(WindowClause, lc3);
5849 :
5850 : /* skip over the WindowClause we're currently editing */
5851 174 : if (existing_wc == wc)
5852 54 : continue;
5853 :
5854 : /*
5855 : * Perform the same duplicate check that is done in
5856 : * transformWindowFuncCall.
5857 : */
5858 240 : if (equal(wc->partitionClause, existing_wc->partitionClause) &&
5859 120 : equal(wc->orderClause, existing_wc->orderClause) &&
5860 120 : wc->frameOptions == existing_wc->frameOptions &&
5861 72 : equal(wc->startOffset, existing_wc->startOffset) &&
5862 36 : equal(wc->endOffset, existing_wc->endOffset))
5863 : {
5864 : ListCell *lc4;
5865 :
5866 : /*
5867 : * Now move each WindowFunc in 'wc' into 'existing_wc'.
5868 : * This required adjusting each WindowFunc's winref and
5869 : * moving the WindowFuncs in 'wc' to the list of
5870 : * WindowFuncs in 'existing_wc'.
5871 : */
5872 78 : foreach(lc4, wflists->windowFuncs[wc->winref])
5873 : {
5874 42 : WindowFunc *wfunc = lfirst_node(WindowFunc, lc4);
5875 :
5876 42 : wfunc->winref = existing_wc->winref;
5877 : }
5878 :
5879 : /* move list items */
5880 72 : wflists->windowFuncs[existing_wc->winref] = list_concat(wflists->windowFuncs[existing_wc->winref],
5881 36 : wflists->windowFuncs[wc->winref]);
5882 36 : wflists->windowFuncs[wc->winref] = NIL;
5883 :
5884 : /*
5885 : * transformWindowFuncCall() should have made sure there
5886 : * are no other duplicates, so we needn't bother looking
5887 : * any further.
5888 : */
5889 36 : break;
5890 : }
5891 : }
5892 : }
5893 : }
5894 2378 : }
5895 :
5896 : /*
5897 : * select_active_windows
5898 : * Create a list of the "active" window clauses (ie, those referenced
5899 : * by non-deleted WindowFuncs) in the order they are to be executed.
5900 : */
5901 : static List *
5902 2378 : select_active_windows(PlannerInfo *root, WindowFuncLists *wflists)
5903 : {
5904 2378 : List *windowClause = root->parse->windowClause;
5905 2378 : List *result = NIL;
5906 : ListCell *lc;
5907 2378 : int nActive = 0;
5908 2378 : WindowClauseSortData *actives = palloc(sizeof(WindowClauseSortData)
5909 2378 : * list_length(windowClause));
5910 :
5911 : /* First, construct an array of the active windows */
5912 4984 : foreach(lc, windowClause)
5913 : {
5914 2606 : WindowClause *wc = lfirst_node(WindowClause, lc);
5915 :
5916 : /* It's only active if wflists shows some related WindowFuncs */
5917 : Assert(wc->winref <= wflists->maxWinRef);
5918 2606 : if (wflists->windowFuncs[wc->winref] == NIL)
5919 60 : continue;
5920 :
5921 2546 : actives[nActive].wc = wc; /* original clause */
5922 :
5923 : /*
5924 : * For sorting, we want the list of partition keys followed by the
5925 : * list of sort keys. But pathkeys construction will remove duplicates
5926 : * between the two, so we can as well (even though we can't detect all
5927 : * of the duplicates, since some may come from ECs - that might mean
5928 : * we miss optimization chances here). We must, however, ensure that
5929 : * the order of entries is preserved with respect to the ones we do
5930 : * keep.
5931 : *
5932 : * partitionClause and orderClause had their own duplicates removed in
5933 : * parse analysis, so we're only concerned here with removing
5934 : * orderClause entries that also appear in partitionClause.
5935 : */
5936 5092 : actives[nActive].uniqueOrder =
5937 2546 : list_concat_unique(list_copy(wc->partitionClause),
5938 2546 : wc->orderClause);
5939 2546 : nActive++;
5940 : }
5941 :
5942 : /*
5943 : * Sort active windows by their partitioning/ordering clauses, ignoring
5944 : * any framing clauses, so that the windows that need the same sorting are
5945 : * adjacent in the list. When we come to generate paths, this will avoid
5946 : * inserting additional Sort nodes.
5947 : *
5948 : * This is how we implement a specific requirement from the SQL standard,
5949 : * which says that when two or more windows are order-equivalent (i.e.
5950 : * have matching partition and order clauses, even if their names or
5951 : * framing clauses differ), then all peer rows must be presented in the
5952 : * same order in all of them. If we allowed multiple sort nodes for such
5953 : * cases, we'd risk having the peer rows end up in different orders in
5954 : * equivalent windows due to sort instability. (See General Rule 4 of
5955 : * <window clause> in SQL2008 - SQL2016.)
5956 : *
5957 : * Additionally, if the entire list of clauses of one window is a prefix
5958 : * of another, put first the window with stronger sorting requirements.
5959 : * This way we will first sort for stronger window, and won't have to sort
5960 : * again for the weaker one.
5961 : */
5962 2378 : qsort(actives, nActive, sizeof(WindowClauseSortData), common_prefix_cmp);
5963 :
5964 : /* build ordered list of the original WindowClause nodes */
5965 4924 : for (int i = 0; i < nActive; i++)
5966 2546 : result = lappend(result, actives[i].wc);
5967 :
5968 2378 : pfree(actives);
5969 :
5970 2378 : return result;
5971 : }
5972 :
5973 : /*
5974 : * name_active_windows
5975 : * Ensure all active windows have unique names.
5976 : *
5977 : * The parser will have checked that user-assigned window names are unique
5978 : * within the Query. Here we assign made-up names to any unnamed
5979 : * WindowClauses for the benefit of EXPLAIN. (We don't want to do this
5980 : * at parse time, because it'd mess up decompilation of views.)
5981 : *
5982 : * activeWindows: result of select_active_windows
5983 : */
5984 : static void
5985 2378 : name_active_windows(List *activeWindows)
5986 : {
5987 2378 : int next_n = 1;
5988 : char newname[16];
5989 : ListCell *lc;
5990 :
5991 4924 : foreach(lc, activeWindows)
5992 : {
5993 2546 : WindowClause *wc = lfirst_node(WindowClause, lc);
5994 :
5995 : /* Nothing to do if it has a name already. */
5996 2546 : if (wc->name)
5997 498 : continue;
5998 :
5999 : /* Select a name not currently present in the list. */
6000 : for (;;)
6001 6 : {
6002 : ListCell *lc2;
6003 :
6004 2054 : snprintf(newname, sizeof(newname), "w%d", next_n++);
6005 4456 : foreach(lc2, activeWindows)
6006 : {
6007 2408 : WindowClause *wc2 = lfirst_node(WindowClause, lc2);
6008 :
6009 2408 : if (wc2->name && strcmp(wc2->name, newname) == 0)
6010 6 : break; /* matched */
6011 : }
6012 2054 : if (lc2 == NULL)
6013 2048 : break; /* reached the end with no match */
6014 : }
6015 2048 : wc->name = pstrdup(newname);
6016 : }
6017 2378 : }
6018 :
6019 : /*
6020 : * common_prefix_cmp
6021 : * QSort comparison function for WindowClauseSortData
6022 : *
6023 : * Sort the windows by the required sorting clauses. First, compare the sort
6024 : * clauses themselves. Second, if one window's clauses are a prefix of another
6025 : * one's clauses, put the window with more sort clauses first.
6026 : *
6027 : * We purposefully sort by the highest tleSortGroupRef first. Since
6028 : * tleSortGroupRefs are assigned for the query's DISTINCT and ORDER BY first
6029 : * and because here we sort the lowest tleSortGroupRefs last, if a
6030 : * WindowClause is sharing a tleSortGroupRef with the query's DISTINCT or
6031 : * ORDER BY clause, this makes it more likely that the final WindowAgg will
6032 : * provide presorted input for the query's DISTINCT or ORDER BY clause, thus
6033 : * reducing the total number of sorts required for the query.
6034 : */
6035 : static int
6036 186 : common_prefix_cmp(const void *a, const void *b)
6037 : {
6038 186 : const WindowClauseSortData *wcsa = a;
6039 186 : const WindowClauseSortData *wcsb = b;
6040 : ListCell *item_a;
6041 : ListCell *item_b;
6042 :
6043 330 : forboth(item_a, wcsa->uniqueOrder, item_b, wcsb->uniqueOrder)
6044 : {
6045 246 : SortGroupClause *sca = lfirst_node(SortGroupClause, item_a);
6046 246 : SortGroupClause *scb = lfirst_node(SortGroupClause, item_b);
6047 :
6048 246 : if (sca->tleSortGroupRef > scb->tleSortGroupRef)
6049 102 : return -1;
6050 234 : else if (sca->tleSortGroupRef < scb->tleSortGroupRef)
6051 66 : return 1;
6052 168 : else if (sca->sortop > scb->sortop)
6053 0 : return -1;
6054 168 : else if (sca->sortop < scb->sortop)
6055 24 : return 1;
6056 144 : else if (sca->nulls_first && !scb->nulls_first)
6057 0 : return -1;
6058 144 : else if (!sca->nulls_first && scb->nulls_first)
6059 0 : return 1;
6060 : /* no need to compare eqop, since it is fully determined by sortop */
6061 : }
6062 :
6063 84 : if (list_length(wcsa->uniqueOrder) > list_length(wcsb->uniqueOrder))
6064 6 : return -1;
6065 78 : else if (list_length(wcsa->uniqueOrder) < list_length(wcsb->uniqueOrder))
6066 30 : return 1;
6067 :
6068 48 : return 0;
6069 : }
6070 :
6071 : /*
6072 : * make_window_input_target
6073 : * Generate appropriate PathTarget for initial input to WindowAgg nodes.
6074 : *
6075 : * When the query has window functions, this function computes the desired
6076 : * target to be computed by the node just below the first WindowAgg.
6077 : * This tlist must contain all values needed to evaluate the window functions,
6078 : * compute the final target list, and perform any required final sort step.
6079 : * If multiple WindowAggs are needed, each intermediate one adds its window
6080 : * function results onto this base tlist; only the topmost WindowAgg computes
6081 : * the actual desired target list.
6082 : *
6083 : * This function is much like make_group_input_target, though not quite enough
6084 : * like it to share code. As in that function, we flatten most expressions
6085 : * into their component variables. But we do not want to flatten window
6086 : * PARTITION BY/ORDER BY clauses, since that might result in multiple
6087 : * evaluations of them, which would be bad (possibly even resulting in
6088 : * inconsistent answers, if they contain volatile functions).
6089 : * Also, we must not flatten GROUP BY clauses that were left unflattened by
6090 : * make_group_input_target, because we may no longer have access to the
6091 : * individual Vars in them.
6092 : *
6093 : * Another key difference from make_group_input_target is that we don't
6094 : * flatten Aggref expressions, since those are to be computed below the
6095 : * window functions and just referenced like Vars above that.
6096 : *
6097 : * 'final_target' is the query's final target list (in PathTarget form)
6098 : * 'activeWindows' is the list of active windows previously identified by
6099 : * select_active_windows.
6100 : *
6101 : * The result is the PathTarget to be computed by the plan node immediately
6102 : * below the first WindowAgg node.
6103 : */
6104 : static PathTarget *
6105 2378 : make_window_input_target(PlannerInfo *root,
6106 : PathTarget *final_target,
6107 : List *activeWindows)
6108 : {
6109 : PathTarget *input_target;
6110 : Bitmapset *sgrefs;
6111 : List *flattenable_cols;
6112 : List *flattenable_vars;
6113 : int i;
6114 : ListCell *lc;
6115 :
6116 : Assert(root->parse->hasWindowFuncs);
6117 :
6118 : /*
6119 : * Collect the sortgroupref numbers of window PARTITION/ORDER BY clauses
6120 : * into a bitmapset for convenient reference below.
6121 : */
6122 2378 : sgrefs = NULL;
6123 4924 : foreach(lc, activeWindows)
6124 : {
6125 2546 : WindowClause *wc = lfirst_node(WindowClause, lc);
6126 : ListCell *lc2;
6127 :
6128 3290 : foreach(lc2, wc->partitionClause)
6129 : {
6130 744 : SortGroupClause *sortcl = lfirst_node(SortGroupClause, lc2);
6131 :
6132 744 : sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
6133 : }
6134 4722 : foreach(lc2, wc->orderClause)
6135 : {
6136 2176 : SortGroupClause *sortcl = lfirst_node(SortGroupClause, lc2);
6137 :
6138 2176 : sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
6139 : }
6140 : }
6141 :
6142 : /* Add in sortgroupref numbers of GROUP BY clauses, too */
6143 2564 : foreach(lc, root->processed_groupClause)
6144 : {
6145 186 : SortGroupClause *grpcl = lfirst_node(SortGroupClause, lc);
6146 :
6147 186 : sgrefs = bms_add_member(sgrefs, grpcl->tleSortGroupRef);
6148 : }
6149 :
6150 : /*
6151 : * Construct a target containing all the non-flattenable targetlist items,
6152 : * and save aside the others for a moment.
6153 : */
6154 2378 : input_target = create_empty_pathtarget();
6155 2378 : flattenable_cols = NIL;
6156 :
6157 2378 : i = 0;
6158 10238 : foreach(lc, final_target->exprs)
6159 : {
6160 7860 : Expr *expr = (Expr *) lfirst(lc);
6161 7860 : Index sgref = get_pathtarget_sortgroupref(final_target, i);
6162 :
6163 : /*
6164 : * Don't want to deconstruct window clauses or GROUP BY items. (Note
6165 : * that such items can't contain window functions, so it's okay to
6166 : * compute them below the WindowAgg nodes.)
6167 : */
6168 7860 : if (sgref != 0 && bms_is_member(sgref, sgrefs))
6169 : {
6170 : /*
6171 : * Don't want to deconstruct this value, so add it to the input
6172 : * target as-is.
6173 : */
6174 2774 : add_column_to_pathtarget(input_target, expr, sgref);
6175 : }
6176 : else
6177 : {
6178 : /*
6179 : * Column is to be flattened, so just remember the expression for
6180 : * later call to pull_var_clause.
6181 : */
6182 5086 : flattenable_cols = lappend(flattenable_cols, expr);
6183 : }
6184 :
6185 7860 : i++;
6186 : }
6187 :
6188 : /*
6189 : * Pull out all the Vars and Aggrefs mentioned in flattenable columns, and
6190 : * add them to the input target if not already present. (Some might be
6191 : * there already because they're used directly as window/group clauses.)
6192 : *
6193 : * Note: it's essential to use PVC_INCLUDE_AGGREGATES here, so that any
6194 : * Aggrefs are placed in the Agg node's tlist and not left to be computed
6195 : * at higher levels. On the other hand, we should recurse into
6196 : * WindowFuncs to make sure their input expressions are available.
6197 : */
6198 2378 : flattenable_vars = pull_var_clause((Node *) flattenable_cols,
6199 : PVC_INCLUDE_AGGREGATES |
6200 : PVC_RECURSE_WINDOWFUNCS |
6201 : PVC_INCLUDE_PLACEHOLDERS);
6202 2378 : add_new_columns_to_pathtarget(input_target, flattenable_vars);
6203 :
6204 : /* clean up cruft */
6205 2378 : list_free(flattenable_vars);
6206 2378 : list_free(flattenable_cols);
6207 :
6208 : /* XXX this causes some redundant cost calculation ... */
6209 2378 : return set_pathtarget_cost_width(root, input_target);
6210 : }
6211 :
6212 : /*
6213 : * make_pathkeys_for_window
6214 : * Create a pathkeys list describing the required input ordering
6215 : * for the given WindowClause.
6216 : *
6217 : * Modifies wc's partitionClause to remove any clauses which are deemed
6218 : * redundant by the pathkey logic.
6219 : *
6220 : * The required ordering is first the PARTITION keys, then the ORDER keys.
6221 : * In the future we might try to implement windowing using hashing, in which
6222 : * case the ordering could be relaxed, but for now we always sort.
6223 : */
6224 : static List *
6225 5132 : make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
6226 : List *tlist)
6227 : {
6228 5132 : List *window_pathkeys = NIL;
6229 :
6230 : /* Throw error if can't sort */
6231 5132 : if (!grouping_is_sortable(wc->partitionClause))
6232 0 : ereport(ERROR,
6233 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
6234 : errmsg("could not implement window PARTITION BY"),
6235 : errdetail("Window partitioning columns must be of sortable datatypes.")));
6236 5132 : if (!grouping_is_sortable(wc->orderClause))
6237 0 : ereport(ERROR,
6238 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
6239 : errmsg("could not implement window ORDER BY"),
6240 : errdetail("Window ordering columns must be of sortable datatypes.")));
6241 :
6242 : /*
6243 : * First fetch the pathkeys for the PARTITION BY clause. We can safely
6244 : * remove any clauses from the wc->partitionClause for redundant pathkeys.
6245 : */
6246 5132 : if (wc->partitionClause != NIL)
6247 : {
6248 : bool sortable;
6249 :
6250 1290 : window_pathkeys = make_pathkeys_for_sortclauses_extended(root,
6251 : &wc->partitionClause,
6252 : tlist,
6253 : true,
6254 : false,
6255 : &sortable,
6256 : false);
6257 :
6258 : Assert(sortable);
6259 : }
6260 :
6261 : /*
6262 : * In principle, we could also consider removing redundant ORDER BY items
6263 : * too as doing so does not alter the result of peer row checks done by
6264 : * the executor. However, we must *not* remove the ordering column for
6265 : * RANGE OFFSET cases, as the executor needs that for in_range tests even
6266 : * if it's known to be equal to some partitioning column.
6267 : */
6268 5132 : if (wc->orderClause != NIL)
6269 : {
6270 : List *orderby_pathkeys;
6271 :
6272 4276 : orderby_pathkeys = make_pathkeys_for_sortclauses(root,
6273 : wc->orderClause,
6274 : tlist);
6275 :
6276 : /* Okay, make the combined pathkeys */
6277 4276 : if (window_pathkeys != NIL)
6278 934 : window_pathkeys = append_pathkeys(window_pathkeys, orderby_pathkeys);
6279 : else
6280 3342 : window_pathkeys = orderby_pathkeys;
6281 : }
6282 :
6283 5132 : return window_pathkeys;
6284 : }
6285 :
6286 : /*
6287 : * make_sort_input_target
6288 : * Generate appropriate PathTarget for initial input to Sort step.
6289 : *
6290 : * If the query has ORDER BY, this function chooses the target to be computed
6291 : * by the node just below the Sort (and DISTINCT, if any, since Unique can't
6292 : * project) steps. This might or might not be identical to the query's final
6293 : * output target.
6294 : *
6295 : * The main argument for keeping the sort-input tlist the same as the final
6296 : * is that we avoid a separate projection node (which will be needed if
6297 : * they're different, because Sort can't project). However, there are also
6298 : * advantages to postponing tlist evaluation till after the Sort: it ensures
6299 : * a consistent order of evaluation for any volatile functions in the tlist,
6300 : * and if there's also a LIMIT, we can stop the query without ever computing
6301 : * tlist functions for later rows, which is beneficial for both volatile and
6302 : * expensive functions.
6303 : *
6304 : * Our current policy is to postpone volatile expressions till after the sort
6305 : * unconditionally (assuming that that's possible, ie they are in plain tlist
6306 : * columns and not ORDER BY/GROUP BY/DISTINCT columns). We also prefer to
6307 : * postpone set-returning expressions, because running them beforehand would
6308 : * bloat the sort dataset, and because it might cause unexpected output order
6309 : * if the sort isn't stable. However there's a constraint on that: all SRFs
6310 : * in the tlist should be evaluated at the same plan step, so that they can
6311 : * run in sync in nodeProjectSet. So if any SRFs are in sort columns, we
6312 : * mustn't postpone any SRFs. (Note that in principle that policy should
6313 : * probably get applied to the group/window input targetlists too, but we
6314 : * have not done that historically.) Lastly, expensive expressions are
6315 : * postponed if there is a LIMIT, or if root->tuple_fraction shows that
6316 : * partial evaluation of the query is possible (if neither is true, we expect
6317 : * to have to evaluate the expressions for every row anyway), or if there are
6318 : * any volatile or set-returning expressions (since once we've put in a
6319 : * projection at all, it won't cost any more to postpone more stuff).
6320 : *
6321 : * Another issue that could potentially be considered here is that
6322 : * evaluating tlist expressions could result in data that's either wider
6323 : * or narrower than the input Vars, thus changing the volume of data that
6324 : * has to go through the Sort. However, we usually have only a very bad
6325 : * idea of the output width of any expression more complex than a Var,
6326 : * so for now it seems too risky to try to optimize on that basis.
6327 : *
6328 : * Note that if we do produce a modified sort-input target, and then the
6329 : * query ends up not using an explicit Sort, no particular harm is done:
6330 : * we'll initially use the modified target for the preceding path nodes,
6331 : * but then change them to the final target with apply_projection_to_path.
6332 : * Moreover, in such a case the guarantees about evaluation order of
6333 : * volatile functions still hold, since the rows are sorted already.
6334 : *
6335 : * This function has some things in common with make_group_input_target and
6336 : * make_window_input_target, though the detailed rules for what to do are
6337 : * different. We never flatten/postpone any grouping or ordering columns;
6338 : * those are needed before the sort. If we do flatten a particular
6339 : * expression, we leave Aggref and WindowFunc nodes alone, since those were
6340 : * computed earlier.
6341 : *
6342 : * 'final_target' is the query's final target list (in PathTarget form)
6343 : * 'have_postponed_srfs' is an output argument, see below
6344 : *
6345 : * The result is the PathTarget to be computed by the plan node immediately
6346 : * below the Sort step (and the Distinct step, if any). This will be
6347 : * exactly final_target if we decide a projection step wouldn't be helpful.
6348 : *
6349 : * In addition, *have_postponed_srfs is set to true if we choose to postpone
6350 : * any set-returning functions to after the Sort.
6351 : */
6352 : static PathTarget *
6353 71776 : make_sort_input_target(PlannerInfo *root,
6354 : PathTarget *final_target,
6355 : bool *have_postponed_srfs)
6356 : {
6357 71776 : Query *parse = root->parse;
6358 : PathTarget *input_target;
6359 : int ncols;
6360 : bool *col_is_srf;
6361 : bool *postpone_col;
6362 : bool have_srf;
6363 : bool have_volatile;
6364 : bool have_expensive;
6365 : bool have_srf_sortcols;
6366 : bool postpone_srfs;
6367 : List *postponable_cols;
6368 : List *postponable_vars;
6369 : int i;
6370 : ListCell *lc;
6371 :
6372 : /* Shouldn't get here unless query has ORDER BY */
6373 : Assert(parse->sortClause);
6374 :
6375 71776 : *have_postponed_srfs = false; /* default result */
6376 :
6377 : /* Inspect tlist and collect per-column information */
6378 71776 : ncols = list_length(final_target->exprs);
6379 71776 : col_is_srf = (bool *) palloc0(ncols * sizeof(bool));
6380 71776 : postpone_col = (bool *) palloc0(ncols * sizeof(bool));
6381 71776 : have_srf = have_volatile = have_expensive = have_srf_sortcols = false;
6382 :
6383 71776 : i = 0;
6384 442842 : foreach(lc, final_target->exprs)
6385 : {
6386 371066 : Expr *expr = (Expr *) lfirst(lc);
6387 :
6388 : /*
6389 : * If the column has a sortgroupref, assume it has to be evaluated
6390 : * before sorting. Generally such columns would be ORDER BY, GROUP
6391 : * BY, etc targets. One exception is columns that were removed from
6392 : * GROUP BY by remove_useless_groupby_columns() ... but those would
6393 : * only be Vars anyway. There don't seem to be any cases where it
6394 : * would be worth the trouble to double-check.
6395 : */
6396 371066 : if (get_pathtarget_sortgroupref(final_target, i) == 0)
6397 : {
6398 : /*
6399 : * Check for SRF or volatile functions. Check the SRF case first
6400 : * because we must know whether we have any postponed SRFs.
6401 : */
6402 269594 : if (parse->hasTargetSRFs &&
6403 216 : expression_returns_set((Node *) expr))
6404 : {
6405 : /* We'll decide below whether these are postponable */
6406 96 : col_is_srf[i] = true;
6407 96 : have_srf = true;
6408 : }
6409 269282 : else if (contain_volatile_functions((Node *) expr))
6410 : {
6411 : /* Unconditionally postpone */
6412 148 : postpone_col[i] = true;
6413 148 : have_volatile = true;
6414 : }
6415 : else
6416 : {
6417 : /*
6418 : * Else check the cost. XXX it's annoying to have to do this
6419 : * when set_pathtarget_cost_width() just did it. Refactor to
6420 : * allow sharing the work?
6421 : */
6422 : QualCost cost;
6423 :
6424 269134 : cost_qual_eval_node(&cost, (Node *) expr, root);
6425 :
6426 : /*
6427 : * We arbitrarily define "expensive" as "more than 10X
6428 : * cpu_operator_cost". Note this will take in any PL function
6429 : * with default cost.
6430 : */
6431 269134 : if (cost.per_tuple > 10 * cpu_operator_cost)
6432 : {
6433 17234 : postpone_col[i] = true;
6434 17234 : have_expensive = true;
6435 : }
6436 : }
6437 : }
6438 : else
6439 : {
6440 : /* For sortgroupref cols, just check if any contain SRFs */
6441 101688 : if (!have_srf_sortcols &&
6442 101998 : parse->hasTargetSRFs &&
6443 310 : expression_returns_set((Node *) expr))
6444 124 : have_srf_sortcols = true;
6445 : }
6446 :
6447 371066 : i++;
6448 : }
6449 :
6450 : /*
6451 : * We can postpone SRFs if we have some but none are in sortgroupref cols.
6452 : */
6453 71776 : postpone_srfs = (have_srf && !have_srf_sortcols);
6454 :
6455 : /*
6456 : * If we don't need a post-sort projection, just return final_target.
6457 : */
6458 71776 : if (!(postpone_srfs || have_volatile ||
6459 71572 : (have_expensive &&
6460 10116 : (parse->limitCount || root->tuple_fraction > 0))))
6461 71536 : return final_target;
6462 :
6463 : /*
6464 : * Report whether the post-sort projection will contain set-returning
6465 : * functions. This is important because it affects whether the Sort can
6466 : * rely on the query's LIMIT (if any) to bound the number of rows it needs
6467 : * to return.
6468 : */
6469 240 : *have_postponed_srfs = postpone_srfs;
6470 :
6471 : /*
6472 : * Construct the sort-input target, taking all non-postponable columns and
6473 : * then adding Vars, PlaceHolderVars, Aggrefs, and WindowFuncs found in
6474 : * the postponable ones.
6475 : */
6476 240 : input_target = create_empty_pathtarget();
6477 240 : postponable_cols = NIL;
6478 :
6479 240 : i = 0;
6480 1990 : foreach(lc, final_target->exprs)
6481 : {
6482 1750 : Expr *expr = (Expr *) lfirst(lc);
6483 :
6484 1750 : if (postpone_col[i] || (postpone_srfs && col_is_srf[i]))
6485 298 : postponable_cols = lappend(postponable_cols, expr);
6486 : else
6487 1452 : add_column_to_pathtarget(input_target, expr,
6488 1452 : get_pathtarget_sortgroupref(final_target, i));
6489 :
6490 1750 : i++;
6491 : }
6492 :
6493 : /*
6494 : * Pull out all the Vars, Aggrefs, and WindowFuncs mentioned in
6495 : * postponable columns, and add them to the sort-input target if not
6496 : * already present. (Some might be there already.) We mustn't
6497 : * deconstruct Aggrefs or WindowFuncs here, since the projection node
6498 : * would be unable to recompute them.
6499 : */
6500 240 : postponable_vars = pull_var_clause((Node *) postponable_cols,
6501 : PVC_INCLUDE_AGGREGATES |
6502 : PVC_INCLUDE_WINDOWFUNCS |
6503 : PVC_INCLUDE_PLACEHOLDERS);
6504 240 : add_new_columns_to_pathtarget(input_target, postponable_vars);
6505 :
6506 : /* clean up cruft */
6507 240 : list_free(postponable_vars);
6508 240 : list_free(postponable_cols);
6509 :
6510 : /* XXX this represents even more redundant cost calculation ... */
6511 240 : return set_pathtarget_cost_width(root, input_target);
6512 : }
6513 :
6514 : /*
6515 : * get_cheapest_fractional_path
6516 : * Find the cheapest path for retrieving a specified fraction of all
6517 : * the tuples expected to be returned by the given relation.
6518 : *
6519 : * Do not consider parameterized paths. If the caller needs a path for upper
6520 : * rel, it can't have parameterized paths. If the caller needs an append
6521 : * subpath, it could become limited by the treatment of similar
6522 : * parameterization of all the subpaths.
6523 : *
6524 : * We interpret tuple_fraction the same way as grouping_planner.
6525 : *
6526 : * We assume set_cheapest() has been run on the given rel.
6527 : */
6528 : Path *
6529 493658 : get_cheapest_fractional_path(RelOptInfo *rel, double tuple_fraction)
6530 : {
6531 493658 : Path *best_path = rel->cheapest_total_path;
6532 : ListCell *l;
6533 :
6534 : /* If all tuples will be retrieved, just return the cheapest-total path */
6535 493658 : if (tuple_fraction <= 0.0)
6536 484160 : return best_path;
6537 :
6538 : /* Convert absolute # of tuples to a fraction; no need to clamp to 0..1 */
6539 9498 : if (tuple_fraction >= 1.0 && best_path->rows > 0)
6540 3942 : tuple_fraction /= best_path->rows;
6541 :
6542 24870 : foreach(l, rel->pathlist)
6543 : {
6544 15372 : Path *path = (Path *) lfirst(l);
6545 :
6546 15372 : if (path->param_info)
6547 200 : continue;
6548 :
6549 20846 : if (path == rel->cheapest_total_path ||
6550 5674 : compare_fractional_path_costs(best_path, path, tuple_fraction) <= 0)
6551 14656 : continue;
6552 :
6553 516 : best_path = path;
6554 : }
6555 :
6556 9498 : return best_path;
6557 : }
6558 :
6559 : /*
6560 : * adjust_paths_for_srfs
6561 : * Fix up the Paths of the given upperrel to handle tSRFs properly.
6562 : *
6563 : * The executor can only handle set-returning functions that appear at the
6564 : * top level of the targetlist of a ProjectSet plan node. If we have any SRFs
6565 : * that are not at top level, we need to split up the evaluation into multiple
6566 : * plan levels in which each level satisfies this constraint. This function
6567 : * modifies each Path of an upperrel that (might) compute any SRFs in its
6568 : * output tlist to insert appropriate projection steps.
6569 : *
6570 : * The given targets and targets_contain_srfs lists are from
6571 : * split_pathtarget_at_srfs(). We assume the existing Paths emit the first
6572 : * target in targets.
6573 : */
6574 : static void
6575 12664 : adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
6576 : List *targets, List *targets_contain_srfs)
6577 : {
6578 : ListCell *lc;
6579 :
6580 : Assert(list_length(targets) == list_length(targets_contain_srfs));
6581 : Assert(!linitial_int(targets_contain_srfs));
6582 :
6583 : /* If no SRFs appear at this plan level, nothing to do */
6584 12664 : if (list_length(targets) == 1)
6585 628 : return;
6586 :
6587 : /*
6588 : * Stack SRF-evaluation nodes atop each path for the rel.
6589 : *
6590 : * In principle we should re-run set_cheapest() here to identify the
6591 : * cheapest path, but it seems unlikely that adding the same tlist eval
6592 : * costs to all the paths would change that, so we don't bother. Instead,
6593 : * just assume that the cheapest-startup and cheapest-total paths remain
6594 : * so. (There should be no parameterized paths anymore, so we needn't
6595 : * worry about updating cheapest_parameterized_paths.)
6596 : */
6597 24098 : foreach(lc, rel->pathlist)
6598 : {
6599 12062 : Path *subpath = (Path *) lfirst(lc);
6600 12062 : Path *newpath = subpath;
6601 : ListCell *lc1,
6602 : *lc2;
6603 :
6604 : Assert(subpath->param_info == NULL);
6605 37362 : forboth(lc1, targets, lc2, targets_contain_srfs)
6606 : {
6607 25300 : PathTarget *thistarget = lfirst_node(PathTarget, lc1);
6608 25300 : bool contains_srfs = (bool) lfirst_int(lc2);
6609 :
6610 : /* If this level doesn't contain SRFs, do regular projection */
6611 25300 : if (contains_srfs)
6612 12122 : newpath = (Path *) create_set_projection_path(root,
6613 : rel,
6614 : newpath,
6615 : thistarget);
6616 : else
6617 13178 : newpath = (Path *) apply_projection_to_path(root,
6618 : rel,
6619 : newpath,
6620 : thistarget);
6621 : }
6622 12062 : lfirst(lc) = newpath;
6623 12062 : if (subpath == rel->cheapest_startup_path)
6624 372 : rel->cheapest_startup_path = newpath;
6625 12062 : if (subpath == rel->cheapest_total_path)
6626 372 : rel->cheapest_total_path = newpath;
6627 : }
6628 :
6629 : /* Likewise for partial paths, if any */
6630 12042 : foreach(lc, rel->partial_pathlist)
6631 : {
6632 6 : Path *subpath = (Path *) lfirst(lc);
6633 6 : Path *newpath = subpath;
6634 : ListCell *lc1,
6635 : *lc2;
6636 :
6637 : Assert(subpath->param_info == NULL);
6638 24 : forboth(lc1, targets, lc2, targets_contain_srfs)
6639 : {
6640 18 : PathTarget *thistarget = lfirst_node(PathTarget, lc1);
6641 18 : bool contains_srfs = (bool) lfirst_int(lc2);
6642 :
6643 : /* If this level doesn't contain SRFs, do regular projection */
6644 18 : if (contains_srfs)
6645 6 : newpath = (Path *) create_set_projection_path(root,
6646 : rel,
6647 : newpath,
6648 : thistarget);
6649 : else
6650 : {
6651 : /* avoid apply_projection_to_path, in case of multiple refs */
6652 12 : newpath = (Path *) create_projection_path(root,
6653 : rel,
6654 : newpath,
6655 : thistarget);
6656 : }
6657 : }
6658 6 : lfirst(lc) = newpath;
6659 : }
6660 : }
6661 :
6662 : /*
6663 : * expression_planner
6664 : * Perform planner's transformations on a standalone expression.
6665 : *
6666 : * Various utility commands need to evaluate expressions that are not part
6667 : * of a plannable query. They can do so using the executor's regular
6668 : * expression-execution machinery, but first the expression has to be fed
6669 : * through here to transform it from parser output to something executable.
6670 : *
6671 : * Currently, we disallow sublinks in standalone expressions, so there's no
6672 : * real "planning" involved here. (That might not always be true though.)
6673 : * What we must do is run eval_const_expressions to ensure that any function
6674 : * calls are converted to positional notation and function default arguments
6675 : * get inserted. The fact that constant subexpressions get simplified is a
6676 : * side-effect that is useful when the expression will get evaluated more than
6677 : * once. Also, we must fix operator function IDs.
6678 : *
6679 : * This does not return any information about dependencies of the expression.
6680 : * Hence callers should use the results only for the duration of the current
6681 : * query. Callers that would like to cache the results for longer should use
6682 : * expression_planner_with_deps, probably via the plancache.
6683 : *
6684 : * Note: this must not make any damaging changes to the passed-in expression
6685 : * tree. (It would actually be okay to apply fix_opfuncids to it, but since
6686 : * we first do an expression_tree_mutator-based walk, what is returned will
6687 : * be a new node tree.) The result is constructed in the current memory
6688 : * context; beware that this can leak a lot of additional stuff there, too.
6689 : */
6690 : Expr *
6691 246018 : expression_planner(Expr *expr)
6692 : {
6693 : Node *result;
6694 :
6695 : /*
6696 : * Convert named-argument function calls, insert default arguments and
6697 : * simplify constant subexprs
6698 : */
6699 246018 : result = eval_const_expressions(NULL, (Node *) expr);
6700 :
6701 : /* Fill in opfuncid values if missing */
6702 246000 : fix_opfuncids(result);
6703 :
6704 246000 : return (Expr *) result;
6705 : }
6706 :
6707 : /*
6708 : * expression_planner_with_deps
6709 : * Perform planner's transformations on a standalone expression,
6710 : * returning expression dependency information along with the result.
6711 : *
6712 : * This is identical to expression_planner() except that it also returns
6713 : * information about possible dependencies of the expression, ie identities of
6714 : * objects whose definitions affect the result. As in a PlannedStmt, these
6715 : * are expressed as a list of relation Oids and a list of PlanInvalItems.
6716 : */
6717 : Expr *
6718 358 : expression_planner_with_deps(Expr *expr,
6719 : List **relationOids,
6720 : List **invalItems)
6721 : {
6722 : Node *result;
6723 : PlannerGlobal glob;
6724 : PlannerInfo root;
6725 :
6726 : /* Make up dummy planner state so we can use setrefs machinery */
6727 7876 : MemSet(&glob, 0, sizeof(glob));
6728 358 : glob.type = T_PlannerGlobal;
6729 358 : glob.relationOids = NIL;
6730 358 : glob.invalItems = NIL;
6731 :
6732 31862 : MemSet(&root, 0, sizeof(root));
6733 358 : root.type = T_PlannerInfo;
6734 358 : root.glob = &glob;
6735 :
6736 : /*
6737 : * Convert named-argument function calls, insert default arguments and
6738 : * simplify constant subexprs. Collect identities of inlined functions
6739 : * and elided domains, too.
6740 : */
6741 358 : result = eval_const_expressions(&root, (Node *) expr);
6742 :
6743 : /* Fill in opfuncid values if missing */
6744 358 : fix_opfuncids(result);
6745 :
6746 : /*
6747 : * Now walk the finished expression to find anything else we ought to
6748 : * record as an expression dependency.
6749 : */
6750 358 : (void) extract_query_dependencies_walker(result, &root);
6751 :
6752 358 : *relationOids = glob.relationOids;
6753 358 : *invalItems = glob.invalItems;
6754 :
6755 358 : return (Expr *) result;
6756 : }
6757 :
6758 :
6759 : /*
6760 : * plan_cluster_use_sort
6761 : * Use the planner to decide how CLUSTER should implement sorting
6762 : *
6763 : * tableOid is the OID of a table to be clustered on its index indexOid
6764 : * (which is already known to be a btree index). Decide whether it's
6765 : * cheaper to do an indexscan or a seqscan-plus-sort to execute the CLUSTER.
6766 : * Return true to use sorting, false to use an indexscan.
6767 : *
6768 : * Note: caller had better already hold some type of lock on the table.
6769 : */
6770 : bool
6771 186 : plan_cluster_use_sort(Oid tableOid, Oid indexOid)
6772 : {
6773 : PlannerInfo *root;
6774 : Query *query;
6775 : PlannerGlobal *glob;
6776 : RangeTblEntry *rte;
6777 : RelOptInfo *rel;
6778 : IndexOptInfo *indexInfo;
6779 : QualCost indexExprCost;
6780 : Cost comparisonCost;
6781 : Path *seqScanPath;
6782 : Path seqScanAndSortPath;
6783 : IndexPath *indexScanPath;
6784 : ListCell *lc;
6785 :
6786 : /* We can short-circuit the cost comparison if indexscans are disabled */
6787 186 : if (!enable_indexscan)
6788 30 : return true; /* use sort */
6789 :
6790 : /* Set up mostly-dummy planner state */
6791 156 : query = makeNode(Query);
6792 156 : query->commandType = CMD_SELECT;
6793 :
6794 156 : glob = makeNode(PlannerGlobal);
6795 :
6796 156 : root = makeNode(PlannerInfo);
6797 156 : root->parse = query;
6798 156 : root->glob = glob;
6799 156 : root->query_level = 1;
6800 156 : root->planner_cxt = CurrentMemoryContext;
6801 156 : root->wt_param_id = -1;
6802 156 : root->join_domains = list_make1(makeNode(JoinDomain));
6803 :
6804 : /* Build a minimal RTE for the rel */
6805 156 : rte = makeNode(RangeTblEntry);
6806 156 : rte->rtekind = RTE_RELATION;
6807 156 : rte->relid = tableOid;
6808 156 : rte->relkind = RELKIND_RELATION; /* Don't be too picky. */
6809 156 : rte->rellockmode = AccessShareLock;
6810 156 : rte->lateral = false;
6811 156 : rte->inh = false;
6812 156 : rte->inFromCl = true;
6813 156 : query->rtable = list_make1(rte);
6814 156 : addRTEPermissionInfo(&query->rteperminfos, rte);
6815 :
6816 : /* Set up RTE/RelOptInfo arrays */
6817 156 : setup_simple_rel_arrays(root);
6818 :
6819 : /* Build RelOptInfo */
6820 156 : rel = build_simple_rel(root, 1, NULL);
6821 :
6822 : /* Locate IndexOptInfo for the target index */
6823 156 : indexInfo = NULL;
6824 194 : foreach(lc, rel->indexlist)
6825 : {
6826 194 : indexInfo = lfirst_node(IndexOptInfo, lc);
6827 194 : if (indexInfo->indexoid == indexOid)
6828 156 : break;
6829 : }
6830 :
6831 : /*
6832 : * It's possible that get_relation_info did not generate an IndexOptInfo
6833 : * for the desired index; this could happen if it's not yet reached its
6834 : * indcheckxmin usability horizon, or if it's a system index and we're
6835 : * ignoring system indexes. In such cases we should tell CLUSTER to not
6836 : * trust the index contents but use seqscan-and-sort.
6837 : */
6838 156 : if (lc == NULL) /* not in the list? */
6839 0 : return true; /* use sort */
6840 :
6841 : /*
6842 : * Rather than doing all the pushups that would be needed to use
6843 : * set_baserel_size_estimates, just do a quick hack for rows and width.
6844 : */
6845 156 : rel->rows = rel->tuples;
6846 156 : rel->reltarget->width = get_relation_data_width(tableOid, NULL);
6847 :
6848 156 : root->total_table_pages = rel->pages;
6849 :
6850 : /*
6851 : * Determine eval cost of the index expressions, if any. We need to
6852 : * charge twice that amount for each tuple comparison that happens during
6853 : * the sort, since tuplesort.c will have to re-evaluate the index
6854 : * expressions each time. (XXX that's pretty inefficient...)
6855 : */
6856 156 : cost_qual_eval(&indexExprCost, indexInfo->indexprs, root);
6857 156 : comparisonCost = 2.0 * (indexExprCost.startup + indexExprCost.per_tuple);
6858 :
6859 : /* Estimate the cost of seq scan + sort */
6860 156 : seqScanPath = create_seqscan_path(root, rel, NULL, 0);
6861 156 : cost_sort(&seqScanAndSortPath, root, NIL,
6862 : seqScanPath->disabled_nodes,
6863 156 : seqScanPath->total_cost, rel->tuples, rel->reltarget->width,
6864 : comparisonCost, maintenance_work_mem, -1.0);
6865 :
6866 : /* Estimate the cost of index scan */
6867 156 : indexScanPath = create_index_path(root, indexInfo,
6868 : NIL, NIL, NIL, NIL,
6869 : ForwardScanDirection, false,
6870 : NULL, 1.0, false);
6871 :
6872 156 : return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost);
6873 : }
6874 :
6875 : /*
6876 : * plan_create_index_workers
6877 : * Use the planner to decide how many parallel worker processes
6878 : * CREATE INDEX should request for use
6879 : *
6880 : * tableOid is the table on which the index is to be built. indexOid is the
6881 : * OID of an index to be created or reindexed (which must be an index with
6882 : * support for parallel builds - currently btree or BRIN).
6883 : *
6884 : * Return value is the number of parallel worker processes to request. It
6885 : * may be unsafe to proceed if this is 0. Note that this does not include the
6886 : * leader participating as a worker (value is always a number of parallel
6887 : * worker processes).
6888 : *
6889 : * Note: caller had better already hold some type of lock on the table and
6890 : * index.
6891 : */
6892 : int
6893 36214 : plan_create_index_workers(Oid tableOid, Oid indexOid)
6894 : {
6895 : PlannerInfo *root;
6896 : Query *query;
6897 : PlannerGlobal *glob;
6898 : RangeTblEntry *rte;
6899 : Relation heap;
6900 : Relation index;
6901 : RelOptInfo *rel;
6902 : int parallel_workers;
6903 : BlockNumber heap_blocks;
6904 : double reltuples;
6905 : double allvisfrac;
6906 :
6907 : /*
6908 : * We don't allow performing parallel operation in standalone backend or
6909 : * when parallelism is disabled.
6910 : */
6911 36214 : if (!IsUnderPostmaster || max_parallel_maintenance_workers == 0)
6912 498 : return 0;
6913 :
6914 : /* Set up largely-dummy planner state */
6915 35716 : query = makeNode(Query);
6916 35716 : query->commandType = CMD_SELECT;
6917 :
6918 35716 : glob = makeNode(PlannerGlobal);
6919 :
6920 35716 : root = makeNode(PlannerInfo);
6921 35716 : root->parse = query;
6922 35716 : root->glob = glob;
6923 35716 : root->query_level = 1;
6924 35716 : root->planner_cxt = CurrentMemoryContext;
6925 35716 : root->wt_param_id = -1;
6926 35716 : root->join_domains = list_make1(makeNode(JoinDomain));
6927 :
6928 : /*
6929 : * Build a minimal RTE.
6930 : *
6931 : * Mark the RTE with inh = true. This is a kludge to prevent
6932 : * get_relation_info() from fetching index info, which is necessary
6933 : * because it does not expect that any IndexOptInfo is currently
6934 : * undergoing REINDEX.
6935 : */
6936 35716 : rte = makeNode(RangeTblEntry);
6937 35716 : rte->rtekind = RTE_RELATION;
6938 35716 : rte->relid = tableOid;
6939 35716 : rte->relkind = RELKIND_RELATION; /* Don't be too picky. */
6940 35716 : rte->rellockmode = AccessShareLock;
6941 35716 : rte->lateral = false;
6942 35716 : rte->inh = true;
6943 35716 : rte->inFromCl = true;
6944 35716 : query->rtable = list_make1(rte);
6945 35716 : addRTEPermissionInfo(&query->rteperminfos, rte);
6946 :
6947 : /* Set up RTE/RelOptInfo arrays */
6948 35716 : setup_simple_rel_arrays(root);
6949 :
6950 : /* Build RelOptInfo */
6951 35716 : rel = build_simple_rel(root, 1, NULL);
6952 :
6953 : /* Rels are assumed already locked by the caller */
6954 35716 : heap = table_open(tableOid, NoLock);
6955 35716 : index = index_open(indexOid, NoLock);
6956 :
6957 : /*
6958 : * Determine if it's safe to proceed.
6959 : *
6960 : * Currently, parallel workers can't access the leader's temporary tables.
6961 : * Furthermore, any index predicate or index expressions must be parallel
6962 : * safe.
6963 : */
6964 35716 : if (heap->rd_rel->relpersistence == RELPERSISTENCE_TEMP ||
6965 33688 : !is_parallel_safe(root, (Node *) RelationGetIndexExpressions(index)) ||
6966 33568 : !is_parallel_safe(root, (Node *) RelationGetIndexPredicate(index)))
6967 : {
6968 2148 : parallel_workers = 0;
6969 2148 : goto done;
6970 : }
6971 :
6972 : /*
6973 : * If parallel_workers storage parameter is set for the table, accept that
6974 : * as the number of parallel worker processes to launch (though still cap
6975 : * at max_parallel_maintenance_workers). Note that we deliberately do not
6976 : * consider any other factor when parallel_workers is set. (e.g., memory
6977 : * use by workers.)
6978 : */
6979 33568 : if (rel->rel_parallel_workers != -1)
6980 : {
6981 18 : parallel_workers = Min(rel->rel_parallel_workers,
6982 : max_parallel_maintenance_workers);
6983 18 : goto done;
6984 : }
6985 :
6986 : /*
6987 : * Estimate heap relation size ourselves, since rel->pages cannot be
6988 : * trusted (heap RTE was marked as inheritance parent)
6989 : */
6990 33550 : estimate_rel_size(heap, NULL, &heap_blocks, &reltuples, &allvisfrac);
6991 :
6992 : /*
6993 : * Determine number of workers to scan the heap relation using generic
6994 : * model
6995 : */
6996 33550 : parallel_workers = compute_parallel_worker(rel, heap_blocks, -1,
6997 : max_parallel_maintenance_workers);
6998 :
6999 : /*
7000 : * Cap workers based on available maintenance_work_mem as needed.
7001 : *
7002 : * Note that each tuplesort participant receives an even share of the
7003 : * total maintenance_work_mem budget. Aim to leave participants
7004 : * (including the leader as a participant) with no less than 32MB of
7005 : * memory. This leaves cases where maintenance_work_mem is set to 64MB
7006 : * immediately past the threshold of being capable of launching a single
7007 : * parallel worker to sort.
7008 : */
7009 33706 : while (parallel_workers > 0 &&
7010 314 : maintenance_work_mem / (parallel_workers + 1) < 32 * 1024)
7011 156 : parallel_workers--;
7012 :
7013 33550 : done:
7014 35716 : index_close(index, NoLock);
7015 35716 : table_close(heap, NoLock);
7016 :
7017 35716 : return parallel_workers;
7018 : }
7019 :
7020 : /*
7021 : * add_paths_to_grouping_rel
7022 : *
7023 : * Add non-partial paths to grouping relation.
7024 : */
7025 : static void
7026 39898 : add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
7027 : RelOptInfo *grouped_rel,
7028 : RelOptInfo *partially_grouped_rel,
7029 : const AggClauseCosts *agg_costs,
7030 : grouping_sets_data *gd, double dNumGroups,
7031 : GroupPathExtraData *extra)
7032 : {
7033 39898 : Query *parse = root->parse;
7034 39898 : Path *cheapest_path = input_rel->cheapest_total_path;
7035 : ListCell *lc;
7036 39898 : bool can_hash = (extra->flags & GROUPING_CAN_USE_HASH) != 0;
7037 39898 : bool can_sort = (extra->flags & GROUPING_CAN_USE_SORT) != 0;
7038 39898 : List *havingQual = (List *) extra->havingQual;
7039 39898 : AggClauseCosts *agg_final_costs = &extra->agg_final_costs;
7040 :
7041 39898 : if (can_sort)
7042 : {
7043 : /*
7044 : * Use any available suitably-sorted path as input, and also consider
7045 : * sorting the cheapest-total path and incremental sort on any paths
7046 : * with presorted keys.
7047 : */
7048 82458 : foreach(lc, input_rel->pathlist)
7049 : {
7050 : ListCell *lc2;
7051 42566 : Path *path = (Path *) lfirst(lc);
7052 42566 : Path *path_save = path;
7053 42566 : List *pathkey_orderings = NIL;
7054 :
7055 : /* generate alternative group orderings that might be useful */
7056 42566 : pathkey_orderings = get_useful_group_keys_orderings(root, path);
7057 :
7058 : Assert(list_length(pathkey_orderings) > 0);
7059 :
7060 85276 : foreach(lc2, pathkey_orderings)
7061 : {
7062 42710 : GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
7063 :
7064 : /* restore the path (we replace it in the loop) */
7065 42710 : path = path_save;
7066 :
7067 42710 : path = make_ordered_path(root,
7068 : grouped_rel,
7069 : path,
7070 : cheapest_path,
7071 : info->pathkeys,
7072 : -1.0);
7073 42710 : if (path == NULL)
7074 368 : continue;
7075 :
7076 : /* Now decide what to stick atop it */
7077 42342 : if (parse->groupingSets)
7078 : {
7079 938 : consider_groupingsets_paths(root, grouped_rel,
7080 : path, true, can_hash,
7081 : gd, agg_costs, dNumGroups);
7082 : }
7083 41404 : else if (parse->hasAggs)
7084 : {
7085 : /*
7086 : * We have aggregation, possibly with plain GROUP BY. Make
7087 : * an AggPath.
7088 : */
7089 40632 : add_path(grouped_rel, (Path *)
7090 40632 : create_agg_path(root,
7091 : grouped_rel,
7092 : path,
7093 40632 : grouped_rel->reltarget,
7094 40632 : parse->groupClause ? AGG_SORTED : AGG_PLAIN,
7095 : AGGSPLIT_SIMPLE,
7096 : info->clauses,
7097 : havingQual,
7098 : agg_costs,
7099 : dNumGroups));
7100 : }
7101 772 : else if (parse->groupClause)
7102 : {
7103 : /*
7104 : * We have GROUP BY without aggregation or grouping sets.
7105 : * Make a GroupPath.
7106 : */
7107 772 : add_path(grouped_rel, (Path *)
7108 772 : create_group_path(root,
7109 : grouped_rel,
7110 : path,
7111 : info->clauses,
7112 : havingQual,
7113 : dNumGroups));
7114 : }
7115 : else
7116 : {
7117 : /* Other cases should have been handled above */
7118 : Assert(false);
7119 : }
7120 : }
7121 : }
7122 :
7123 : /*
7124 : * Instead of operating directly on the input relation, we can
7125 : * consider finalizing a partially aggregated path.
7126 : */
7127 39892 : if (partially_grouped_rel != NULL)
7128 : {
7129 3994 : foreach(lc, partially_grouped_rel->pathlist)
7130 : {
7131 : ListCell *lc2;
7132 2412 : Path *path = (Path *) lfirst(lc);
7133 2412 : Path *path_save = path;
7134 2412 : List *pathkey_orderings = NIL;
7135 :
7136 : /* generate alternative group orderings that might be useful */
7137 2412 : pathkey_orderings = get_useful_group_keys_orderings(root, path);
7138 :
7139 : Assert(list_length(pathkey_orderings) > 0);
7140 :
7141 : /* process all potentially interesting grouping reorderings */
7142 4824 : foreach(lc2, pathkey_orderings)
7143 : {
7144 2412 : GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
7145 :
7146 : /* restore the path (we replace it in the loop) */
7147 2412 : path = path_save;
7148 :
7149 2412 : path = make_ordered_path(root,
7150 : grouped_rel,
7151 : path,
7152 2412 : partially_grouped_rel->cheapest_total_path,
7153 : info->pathkeys,
7154 : -1.0);
7155 :
7156 2412 : if (path == NULL)
7157 108 : continue;
7158 :
7159 2304 : if (parse->hasAggs)
7160 2056 : add_path(grouped_rel, (Path *)
7161 2056 : create_agg_path(root,
7162 : grouped_rel,
7163 : path,
7164 2056 : grouped_rel->reltarget,
7165 2056 : parse->groupClause ? AGG_SORTED : AGG_PLAIN,
7166 : AGGSPLIT_FINAL_DESERIAL,
7167 : info->clauses,
7168 : havingQual,
7169 : agg_final_costs,
7170 : dNumGroups));
7171 : else
7172 248 : add_path(grouped_rel, (Path *)
7173 248 : create_group_path(root,
7174 : grouped_rel,
7175 : path,
7176 : info->clauses,
7177 : havingQual,
7178 : dNumGroups));
7179 :
7180 : }
7181 : }
7182 : }
7183 : }
7184 :
7185 39898 : if (can_hash)
7186 : {
7187 4984 : if (parse->groupingSets)
7188 : {
7189 : /*
7190 : * Try for a hash-only groupingsets path over unsorted input.
7191 : */
7192 794 : consider_groupingsets_paths(root, grouped_rel,
7193 : cheapest_path, false, true,
7194 : gd, agg_costs, dNumGroups);
7195 : }
7196 : else
7197 : {
7198 : /*
7199 : * Generate a HashAgg Path. We just need an Agg over the
7200 : * cheapest-total input path, since input order won't matter.
7201 : */
7202 4190 : add_path(grouped_rel, (Path *)
7203 4190 : create_agg_path(root, grouped_rel,
7204 : cheapest_path,
7205 4190 : grouped_rel->reltarget,
7206 : AGG_HASHED,
7207 : AGGSPLIT_SIMPLE,
7208 : root->processed_groupClause,
7209 : havingQual,
7210 : agg_costs,
7211 : dNumGroups));
7212 : }
7213 :
7214 : /*
7215 : * Generate a Finalize HashAgg Path atop of the cheapest partially
7216 : * grouped path, assuming there is one
7217 : */
7218 4984 : if (partially_grouped_rel && partially_grouped_rel->pathlist)
7219 : {
7220 784 : Path *path = partially_grouped_rel->cheapest_total_path;
7221 :
7222 784 : add_path(grouped_rel, (Path *)
7223 784 : create_agg_path(root,
7224 : grouped_rel,
7225 : path,
7226 784 : grouped_rel->reltarget,
7227 : AGG_HASHED,
7228 : AGGSPLIT_FINAL_DESERIAL,
7229 : root->processed_groupClause,
7230 : havingQual,
7231 : agg_final_costs,
7232 : dNumGroups));
7233 : }
7234 : }
7235 :
7236 : /*
7237 : * When partitionwise aggregate is used, we might have fully aggregated
7238 : * paths in the partial pathlist, because add_paths_to_append_rel() will
7239 : * consider a path for grouped_rel consisting of a Parallel Append of
7240 : * non-partial paths from each child.
7241 : */
7242 39898 : if (grouped_rel->partial_pathlist != NIL)
7243 162 : gather_grouping_paths(root, grouped_rel);
7244 39898 : }
7245 :
7246 : /*
7247 : * create_partial_grouping_paths
7248 : *
7249 : * Create a new upper relation representing the result of partial aggregation
7250 : * and populate it with appropriate paths. Note that we don't finalize the
7251 : * lists of paths here, so the caller can add additional partial or non-partial
7252 : * paths and must afterward call gather_grouping_paths and set_cheapest on
7253 : * the returned upper relation.
7254 : *
7255 : * All paths for this new upper relation -- both partial and non-partial --
7256 : * have been partially aggregated but require a subsequent FinalizeAggregate
7257 : * step.
7258 : *
7259 : * NB: This function is allowed to return NULL if it determines that there is
7260 : * no real need to create a new RelOptInfo.
7261 : */
7262 : static RelOptInfo *
7263 35490 : create_partial_grouping_paths(PlannerInfo *root,
7264 : RelOptInfo *grouped_rel,
7265 : RelOptInfo *input_rel,
7266 : grouping_sets_data *gd,
7267 : GroupPathExtraData *extra,
7268 : bool force_rel_creation)
7269 : {
7270 35490 : Query *parse = root->parse;
7271 : RelOptInfo *partially_grouped_rel;
7272 35490 : AggClauseCosts *agg_partial_costs = &extra->agg_partial_costs;
7273 35490 : AggClauseCosts *agg_final_costs = &extra->agg_final_costs;
7274 35490 : Path *cheapest_partial_path = NULL;
7275 35490 : Path *cheapest_total_path = NULL;
7276 35490 : double dNumPartialGroups = 0;
7277 35490 : double dNumPartialPartialGroups = 0;
7278 : ListCell *lc;
7279 35490 : bool can_hash = (extra->flags & GROUPING_CAN_USE_HASH) != 0;
7280 35490 : bool can_sort = (extra->flags & GROUPING_CAN_USE_SORT) != 0;
7281 :
7282 : /*
7283 : * Consider whether we should generate partially aggregated non-partial
7284 : * paths. We can only do this if we have a non-partial path, and only if
7285 : * the parent of the input rel is performing partial partitionwise
7286 : * aggregation. (Note that extra->patype is the type of partitionwise
7287 : * aggregation being used at the parent level, not this level.)
7288 : */
7289 35490 : if (input_rel->pathlist != NIL &&
7290 35490 : extra->patype == PARTITIONWISE_AGGREGATE_PARTIAL)
7291 618 : cheapest_total_path = input_rel->cheapest_total_path;
7292 :
7293 : /*
7294 : * If parallelism is possible for grouped_rel, then we should consider
7295 : * generating partially-grouped partial paths. However, if the input rel
7296 : * has no partial paths, then we can't.
7297 : */
7298 35490 : if (grouped_rel->consider_parallel && input_rel->partial_pathlist != NIL)
7299 1784 : cheapest_partial_path = linitial(input_rel->partial_pathlist);
7300 :
7301 : /*
7302 : * If we can't partially aggregate partial paths, and we can't partially
7303 : * aggregate non-partial paths, then don't bother creating the new
7304 : * RelOptInfo at all, unless the caller specified force_rel_creation.
7305 : */
7306 35490 : if (cheapest_total_path == NULL &&
7307 33388 : cheapest_partial_path == NULL &&
7308 33388 : !force_rel_creation)
7309 33290 : return NULL;
7310 :
7311 : /*
7312 : * Build a new upper relation to represent the result of partially
7313 : * aggregating the rows from the input relation.
7314 : */
7315 2200 : partially_grouped_rel = fetch_upper_rel(root,
7316 : UPPERREL_PARTIAL_GROUP_AGG,
7317 : grouped_rel->relids);
7318 2200 : partially_grouped_rel->consider_parallel =
7319 2200 : grouped_rel->consider_parallel;
7320 2200 : partially_grouped_rel->reloptkind = grouped_rel->reloptkind;
7321 2200 : partially_grouped_rel->serverid = grouped_rel->serverid;
7322 2200 : partially_grouped_rel->userid = grouped_rel->userid;
7323 2200 : partially_grouped_rel->useridiscurrent = grouped_rel->useridiscurrent;
7324 2200 : partially_grouped_rel->fdwroutine = grouped_rel->fdwroutine;
7325 :
7326 : /*
7327 : * Build target list for partial aggregate paths. These paths cannot just
7328 : * emit the same tlist as regular aggregate paths, because (1) we must
7329 : * include Vars and Aggrefs needed in HAVING, which might not appear in
7330 : * the result tlist, and (2) the Aggrefs must be set in partial mode.
7331 : */
7332 2200 : partially_grouped_rel->reltarget =
7333 2200 : make_partial_grouping_target(root, grouped_rel->reltarget,
7334 : extra->havingQual);
7335 :
7336 2200 : if (!extra->partial_costs_set)
7337 : {
7338 : /*
7339 : * Collect statistics about aggregates for estimating costs of
7340 : * performing aggregation in parallel.
7341 : */
7342 7764 : MemSet(agg_partial_costs, 0, sizeof(AggClauseCosts));
7343 7764 : MemSet(agg_final_costs, 0, sizeof(AggClauseCosts));
7344 1294 : if (parse->hasAggs)
7345 : {
7346 : /* partial phase */
7347 1160 : get_agg_clause_costs(root, AGGSPLIT_INITIAL_SERIAL,
7348 : agg_partial_costs);
7349 :
7350 : /* final phase */
7351 1160 : get_agg_clause_costs(root, AGGSPLIT_FINAL_DESERIAL,
7352 : agg_final_costs);
7353 : }
7354 :
7355 1294 : extra->partial_costs_set = true;
7356 : }
7357 :
7358 : /* Estimate number of partial groups. */
7359 2200 : if (cheapest_total_path != NULL)
7360 : dNumPartialGroups =
7361 618 : get_number_of_groups(root,
7362 : cheapest_total_path->rows,
7363 : gd,
7364 : extra->targetList);
7365 2200 : if (cheapest_partial_path != NULL)
7366 : dNumPartialPartialGroups =
7367 1784 : get_number_of_groups(root,
7368 : cheapest_partial_path->rows,
7369 : gd,
7370 : extra->targetList);
7371 :
7372 2200 : if (can_sort && cheapest_total_path != NULL)
7373 : {
7374 : /* This should have been checked previously */
7375 : Assert(parse->hasAggs || parse->groupClause);
7376 :
7377 : /*
7378 : * Use any available suitably-sorted path as input, and also consider
7379 : * sorting the cheapest partial path.
7380 : */
7381 1236 : foreach(lc, input_rel->pathlist)
7382 : {
7383 : ListCell *lc2;
7384 618 : Path *path = (Path *) lfirst(lc);
7385 618 : Path *path_save = path;
7386 618 : List *pathkey_orderings = NIL;
7387 :
7388 : /* generate alternative group orderings that might be useful */
7389 618 : pathkey_orderings = get_useful_group_keys_orderings(root, path);
7390 :
7391 : Assert(list_length(pathkey_orderings) > 0);
7392 :
7393 : /* process all potentially interesting grouping reorderings */
7394 1236 : foreach(lc2, pathkey_orderings)
7395 : {
7396 618 : GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
7397 :
7398 : /* restore the path (we replace it in the loop) */
7399 618 : path = path_save;
7400 :
7401 618 : path = make_ordered_path(root,
7402 : partially_grouped_rel,
7403 : path,
7404 : cheapest_total_path,
7405 : info->pathkeys,
7406 : -1.0);
7407 :
7408 618 : if (path == NULL)
7409 0 : continue;
7410 :
7411 618 : if (parse->hasAggs)
7412 546 : add_path(partially_grouped_rel, (Path *)
7413 546 : create_agg_path(root,
7414 : partially_grouped_rel,
7415 : path,
7416 546 : partially_grouped_rel->reltarget,
7417 546 : parse->groupClause ? AGG_SORTED : AGG_PLAIN,
7418 : AGGSPLIT_INITIAL_SERIAL,
7419 : info->clauses,
7420 : NIL,
7421 : agg_partial_costs,
7422 : dNumPartialGroups));
7423 : else
7424 72 : add_path(partially_grouped_rel, (Path *)
7425 72 : create_group_path(root,
7426 : partially_grouped_rel,
7427 : path,
7428 : info->clauses,
7429 : NIL,
7430 : dNumPartialGroups));
7431 : }
7432 : }
7433 : }
7434 :
7435 2200 : if (can_sort && cheapest_partial_path != NULL)
7436 : {
7437 : /* Similar to above logic, but for partial paths. */
7438 3580 : foreach(lc, input_rel->partial_pathlist)
7439 : {
7440 : ListCell *lc2;
7441 1796 : Path *path = (Path *) lfirst(lc);
7442 1796 : Path *path_save = path;
7443 1796 : List *pathkey_orderings = NIL;
7444 :
7445 : /* generate alternative group orderings that might be useful */
7446 1796 : pathkey_orderings = get_useful_group_keys_orderings(root, path);
7447 :
7448 : Assert(list_length(pathkey_orderings) > 0);
7449 :
7450 : /* process all potentially interesting grouping reorderings */
7451 3592 : foreach(lc2, pathkey_orderings)
7452 : {
7453 1796 : GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
7454 :
7455 :
7456 : /* restore the path (we replace it in the loop) */
7457 1796 : path = path_save;
7458 :
7459 1796 : path = make_ordered_path(root,
7460 : partially_grouped_rel,
7461 : path,
7462 : cheapest_partial_path,
7463 : info->pathkeys,
7464 : -1.0);
7465 :
7466 1796 : if (path == NULL)
7467 6 : continue;
7468 :
7469 1790 : if (parse->hasAggs)
7470 1668 : add_partial_path(partially_grouped_rel, (Path *)
7471 1668 : create_agg_path(root,
7472 : partially_grouped_rel,
7473 : path,
7474 1668 : partially_grouped_rel->reltarget,
7475 1668 : parse->groupClause ? AGG_SORTED : AGG_PLAIN,
7476 : AGGSPLIT_INITIAL_SERIAL,
7477 : info->clauses,
7478 : NIL,
7479 : agg_partial_costs,
7480 : dNumPartialPartialGroups));
7481 : else
7482 122 : add_partial_path(partially_grouped_rel, (Path *)
7483 122 : create_group_path(root,
7484 : partially_grouped_rel,
7485 : path,
7486 : info->clauses,
7487 : NIL,
7488 : dNumPartialPartialGroups));
7489 : }
7490 : }
7491 : }
7492 :
7493 : /*
7494 : * Add a partially-grouped HashAgg Path where possible
7495 : */
7496 2200 : if (can_hash && cheapest_total_path != NULL)
7497 : {
7498 : /* Checked above */
7499 : Assert(parse->hasAggs || parse->groupClause);
7500 :
7501 618 : add_path(partially_grouped_rel, (Path *)
7502 618 : create_agg_path(root,
7503 : partially_grouped_rel,
7504 : cheapest_total_path,
7505 618 : partially_grouped_rel->reltarget,
7506 : AGG_HASHED,
7507 : AGGSPLIT_INITIAL_SERIAL,
7508 : root->processed_groupClause,
7509 : NIL,
7510 : agg_partial_costs,
7511 : dNumPartialGroups));
7512 : }
7513 :
7514 : /*
7515 : * Now add a partially-grouped HashAgg partial Path where possible
7516 : */
7517 2200 : if (can_hash && cheapest_partial_path != NULL)
7518 : {
7519 986 : add_partial_path(partially_grouped_rel, (Path *)
7520 986 : create_agg_path(root,
7521 : partially_grouped_rel,
7522 : cheapest_partial_path,
7523 986 : partially_grouped_rel->reltarget,
7524 : AGG_HASHED,
7525 : AGGSPLIT_INITIAL_SERIAL,
7526 : root->processed_groupClause,
7527 : NIL,
7528 : agg_partial_costs,
7529 : dNumPartialPartialGroups));
7530 : }
7531 :
7532 : /*
7533 : * If there is an FDW that's responsible for all baserels of the query,
7534 : * let it consider adding partially grouped ForeignPaths.
7535 : */
7536 2200 : if (partially_grouped_rel->fdwroutine &&
7537 6 : partially_grouped_rel->fdwroutine->GetForeignUpperPaths)
7538 : {
7539 6 : FdwRoutine *fdwroutine = partially_grouped_rel->fdwroutine;
7540 :
7541 6 : fdwroutine->GetForeignUpperPaths(root,
7542 : UPPERREL_PARTIAL_GROUP_AGG,
7543 : input_rel, partially_grouped_rel,
7544 : extra);
7545 : }
7546 :
7547 2200 : return partially_grouped_rel;
7548 : }
7549 :
7550 : /*
7551 : * make_ordered_path
7552 : * Return a path ordered by 'pathkeys' based on the given 'path'. May
7553 : * return NULL if it doesn't make sense to generate an ordered path in
7554 : * this case.
7555 : */
7556 : static Path *
7557 52542 : make_ordered_path(PlannerInfo *root, RelOptInfo *rel, Path *path,
7558 : Path *cheapest_path, List *pathkeys, double limit_tuples)
7559 : {
7560 : bool is_sorted;
7561 : int presorted_keys;
7562 :
7563 52542 : is_sorted = pathkeys_count_contained_in(pathkeys,
7564 : path->pathkeys,
7565 : &presorted_keys);
7566 :
7567 52542 : if (!is_sorted)
7568 : {
7569 : /*
7570 : * Try at least sorting the cheapest path and also try incrementally
7571 : * sorting any path which is partially sorted already (no need to deal
7572 : * with paths which have presorted keys when incremental sort is
7573 : * disabled unless it's the cheapest input path).
7574 : */
7575 12902 : if (path != cheapest_path &&
7576 2032 : (presorted_keys == 0 || !enable_incremental_sort))
7577 1038 : return NULL;
7578 :
7579 : /*
7580 : * We've no need to consider both a sort and incremental sort. We'll
7581 : * just do a sort if there are no presorted keys and an incremental
7582 : * sort when there are presorted keys.
7583 : */
7584 11864 : if (presorted_keys == 0 || !enable_incremental_sort)
7585 10702 : path = (Path *) create_sort_path(root,
7586 : rel,
7587 : path,
7588 : pathkeys,
7589 : limit_tuples);
7590 : else
7591 1162 : path = (Path *) create_incremental_sort_path(root,
7592 : rel,
7593 : path,
7594 : pathkeys,
7595 : presorted_keys,
7596 : limit_tuples);
7597 : }
7598 :
7599 51504 : return path;
7600 : }
7601 :
7602 : /*
7603 : * Generate Gather and Gather Merge paths for a grouping relation or partial
7604 : * grouping relation.
7605 : *
7606 : * generate_useful_gather_paths does most of the work, but we also consider a
7607 : * special case: we could try sorting the data by the group_pathkeys and then
7608 : * applying Gather Merge.
7609 : *
7610 : * NB: This function shouldn't be used for anything other than a grouped or
7611 : * partially grouped relation not only because of the fact that it explicitly
7612 : * references group_pathkeys but we pass "true" as the third argument to
7613 : * generate_useful_gather_paths().
7614 : */
7615 : static void
7616 1646 : gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
7617 : {
7618 : ListCell *lc;
7619 : Path *cheapest_partial_path;
7620 : List *groupby_pathkeys;
7621 :
7622 : /*
7623 : * This occurs after any partial aggregation has taken place, so trim off
7624 : * any pathkeys added for ORDER BY / DISTINCT aggregates.
7625 : */
7626 1646 : if (list_length(root->group_pathkeys) > root->num_groupby_pathkeys)
7627 18 : groupby_pathkeys = list_copy_head(root->group_pathkeys,
7628 : root->num_groupby_pathkeys);
7629 : else
7630 1628 : groupby_pathkeys = root->group_pathkeys;
7631 :
7632 : /* Try Gather for unordered paths and Gather Merge for ordered ones. */
7633 1646 : generate_useful_gather_paths(root, rel, true);
7634 :
7635 1646 : cheapest_partial_path = linitial(rel->partial_pathlist);
7636 :
7637 : /* XXX Shouldn't this also consider the group-key-reordering? */
7638 3898 : foreach(lc, rel->partial_pathlist)
7639 : {
7640 2252 : Path *path = (Path *) lfirst(lc);
7641 : bool is_sorted;
7642 : int presorted_keys;
7643 : double total_groups;
7644 :
7645 2252 : is_sorted = pathkeys_count_contained_in(groupby_pathkeys,
7646 : path->pathkeys,
7647 : &presorted_keys);
7648 :
7649 2252 : if (is_sorted)
7650 1472 : continue;
7651 :
7652 : /*
7653 : * Try at least sorting the cheapest path and also try incrementally
7654 : * sorting any path which is partially sorted already (no need to deal
7655 : * with paths which have presorted keys when incremental sort is
7656 : * disabled unless it's the cheapest input path).
7657 : */
7658 780 : if (path != cheapest_partial_path &&
7659 0 : (presorted_keys == 0 || !enable_incremental_sort))
7660 0 : continue;
7661 :
7662 : /*
7663 : * We've no need to consider both a sort and incremental sort. We'll
7664 : * just do a sort if there are no presorted keys and an incremental
7665 : * sort when there are presorted keys.
7666 : */
7667 780 : if (presorted_keys == 0 || !enable_incremental_sort)
7668 780 : path = (Path *) create_sort_path(root, rel, path,
7669 : groupby_pathkeys,
7670 : -1.0);
7671 : else
7672 0 : path = (Path *) create_incremental_sort_path(root,
7673 : rel,
7674 : path,
7675 : groupby_pathkeys,
7676 : presorted_keys,
7677 : -1.0);
7678 780 : total_groups = compute_gather_rows(path);
7679 : path = (Path *)
7680 780 : create_gather_merge_path(root,
7681 : rel,
7682 : path,
7683 780 : rel->reltarget,
7684 : groupby_pathkeys,
7685 : NULL,
7686 : &total_groups);
7687 :
7688 780 : add_path(rel, path);
7689 : }
7690 1646 : }
7691 :
7692 : /*
7693 : * can_partial_agg
7694 : *
7695 : * Determines whether or not partial grouping and/or aggregation is possible.
7696 : * Returns true when possible, false otherwise.
7697 : */
7698 : static bool
7699 39022 : can_partial_agg(PlannerInfo *root)
7700 : {
7701 39022 : Query *parse = root->parse;
7702 :
7703 39022 : if (!parse->hasAggs && parse->groupClause == NIL)
7704 : {
7705 : /*
7706 : * We don't know how to do parallel aggregation unless we have either
7707 : * some aggregates or a grouping clause.
7708 : */
7709 0 : return false;
7710 : }
7711 39022 : else if (parse->groupingSets)
7712 : {
7713 : /* We don't know how to do grouping sets in parallel. */
7714 872 : return false;
7715 : }
7716 38150 : else if (root->hasNonPartialAggs || root->hasNonSerialAggs)
7717 : {
7718 : /* Insufficient support for partial mode. */
7719 4082 : return false;
7720 : }
7721 :
7722 : /* Everything looks good. */
7723 34068 : return true;
7724 : }
7725 :
7726 : /*
7727 : * apply_scanjoin_target_to_paths
7728 : *
7729 : * Adjust the final scan/join relation, and recursively all of its children,
7730 : * to generate the final scan/join target. It would be more correct to model
7731 : * this as a separate planning step with a new RelOptInfo at the toplevel and
7732 : * for each child relation, but doing it this way is noticeably cheaper.
7733 : * Maybe that problem can be solved at some point, but for now we do this.
7734 : *
7735 : * If tlist_same_exprs is true, then the scan/join target to be applied has
7736 : * the same expressions as the existing reltarget, so we need only insert the
7737 : * appropriate sortgroupref information. By avoiding the creation of
7738 : * projection paths we save effort both immediately and at plan creation time.
7739 : */
7740 : static void
7741 540846 : apply_scanjoin_target_to_paths(PlannerInfo *root,
7742 : RelOptInfo *rel,
7743 : List *scanjoin_targets,
7744 : List *scanjoin_targets_contain_srfs,
7745 : bool scanjoin_target_parallel_safe,
7746 : bool tlist_same_exprs)
7747 : {
7748 540846 : bool rel_is_partitioned = IS_PARTITIONED_REL(rel);
7749 : PathTarget *scanjoin_target;
7750 : ListCell *lc;
7751 :
7752 : /* This recurses, so be paranoid. */
7753 540846 : check_stack_depth();
7754 :
7755 : /*
7756 : * If the rel is partitioned, we want to drop its existing paths and
7757 : * generate new ones. This function would still be correct if we kept the
7758 : * existing paths: we'd modify them to generate the correct target above
7759 : * the partitioning Append, and then they'd compete on cost with paths
7760 : * generating the target below the Append. However, in our current cost
7761 : * model the latter way is always the same or cheaper cost, so modifying
7762 : * the existing paths would just be useless work. Moreover, when the cost
7763 : * is the same, varying roundoff errors might sometimes allow an existing
7764 : * path to be picked, resulting in undesirable cross-platform plan
7765 : * variations. So we drop old paths and thereby force the work to be done
7766 : * below the Append, except in the case of a non-parallel-safe target.
7767 : *
7768 : * Some care is needed, because we have to allow
7769 : * generate_useful_gather_paths to see the old partial paths in the next
7770 : * stanza. Hence, zap the main pathlist here, then allow
7771 : * generate_useful_gather_paths to add path(s) to the main list, and
7772 : * finally zap the partial pathlist.
7773 : */
7774 540846 : if (rel_is_partitioned)
7775 12548 : rel->pathlist = NIL;
7776 :
7777 : /*
7778 : * If the scan/join target is not parallel-safe, partial paths cannot
7779 : * generate it.
7780 : */
7781 540846 : if (!scanjoin_target_parallel_safe)
7782 : {
7783 : /*
7784 : * Since we can't generate the final scan/join target in parallel
7785 : * workers, this is our last opportunity to use any partial paths that
7786 : * exist; so build Gather path(s) that use them and emit whatever the
7787 : * current reltarget is. We don't do this in the case where the
7788 : * target is parallel-safe, since we will be able to generate superior
7789 : * paths by doing it after the final scan/join target has been
7790 : * applied.
7791 : */
7792 79760 : generate_useful_gather_paths(root, rel, false);
7793 :
7794 : /* Can't use parallel query above this level. */
7795 79760 : rel->partial_pathlist = NIL;
7796 79760 : rel->consider_parallel = false;
7797 : }
7798 :
7799 : /* Finish dropping old paths for a partitioned rel, per comment above */
7800 540846 : if (rel_is_partitioned)
7801 12548 : rel->partial_pathlist = NIL;
7802 :
7803 : /* Extract SRF-free scan/join target. */
7804 540846 : scanjoin_target = linitial_node(PathTarget, scanjoin_targets);
7805 :
7806 : /*
7807 : * Apply the SRF-free scan/join target to each existing path.
7808 : *
7809 : * If the tlist exprs are the same, we can just inject the sortgroupref
7810 : * information into the existing pathtargets. Otherwise, replace each
7811 : * path with a projection path that generates the SRF-free scan/join
7812 : * target. This can't change the ordering of paths within rel->pathlist,
7813 : * so we just modify the list in place.
7814 : */
7815 1122722 : foreach(lc, rel->pathlist)
7816 : {
7817 581876 : Path *subpath = (Path *) lfirst(lc);
7818 :
7819 : /* Shouldn't have any parameterized paths anymore */
7820 : Assert(subpath->param_info == NULL);
7821 :
7822 581876 : if (tlist_same_exprs)
7823 205600 : subpath->pathtarget->sortgrouprefs =
7824 205600 : scanjoin_target->sortgrouprefs;
7825 : else
7826 : {
7827 : Path *newpath;
7828 :
7829 376276 : newpath = (Path *) create_projection_path(root, rel, subpath,
7830 : scanjoin_target);
7831 376276 : lfirst(lc) = newpath;
7832 : }
7833 : }
7834 :
7835 : /* Likewise adjust the targets for any partial paths. */
7836 560438 : foreach(lc, rel->partial_pathlist)
7837 : {
7838 19592 : Path *subpath = (Path *) lfirst(lc);
7839 :
7840 : /* Shouldn't have any parameterized paths anymore */
7841 : Assert(subpath->param_info == NULL);
7842 :
7843 19592 : if (tlist_same_exprs)
7844 15958 : subpath->pathtarget->sortgrouprefs =
7845 15958 : scanjoin_target->sortgrouprefs;
7846 : else
7847 : {
7848 : Path *newpath;
7849 :
7850 3634 : newpath = (Path *) create_projection_path(root, rel, subpath,
7851 : scanjoin_target);
7852 3634 : lfirst(lc) = newpath;
7853 : }
7854 : }
7855 :
7856 : /*
7857 : * Now, if final scan/join target contains SRFs, insert ProjectSetPath(s)
7858 : * atop each existing path. (Note that this function doesn't look at the
7859 : * cheapest-path fields, which is a good thing because they're bogus right
7860 : * now.)
7861 : */
7862 540846 : if (root->parse->hasTargetSRFs)
7863 12036 : adjust_paths_for_srfs(root, rel,
7864 : scanjoin_targets,
7865 : scanjoin_targets_contain_srfs);
7866 :
7867 : /*
7868 : * Update the rel's target to be the final (with SRFs) scan/join target.
7869 : * This now matches the actual output of all the paths, and we might get
7870 : * confused in createplan.c if they don't agree. We must do this now so
7871 : * that any append paths made in the next part will use the correct
7872 : * pathtarget (cf. create_append_path).
7873 : *
7874 : * Note that this is also necessary if GetForeignUpperPaths() gets called
7875 : * on the final scan/join relation or on any of its children, since the
7876 : * FDW might look at the rel's target to create ForeignPaths.
7877 : */
7878 540846 : rel->reltarget = llast_node(PathTarget, scanjoin_targets);
7879 :
7880 : /*
7881 : * If the relation is partitioned, recursively apply the scan/join target
7882 : * to all partitions, and generate brand-new Append paths in which the
7883 : * scan/join target is computed below the Append rather than above it.
7884 : * Since Append is not projection-capable, that might save a separate
7885 : * Result node, and it also is important for partitionwise aggregate.
7886 : */
7887 540846 : if (rel_is_partitioned)
7888 : {
7889 12548 : List *live_children = NIL;
7890 : int i;
7891 :
7892 : /* Adjust each partition. */
7893 12548 : i = -1;
7894 35456 : while ((i = bms_next_member(rel->live_parts, i)) >= 0)
7895 : {
7896 22908 : RelOptInfo *child_rel = rel->part_rels[i];
7897 : AppendRelInfo **appinfos;
7898 : int nappinfos;
7899 22908 : List *child_scanjoin_targets = NIL;
7900 :
7901 : Assert(child_rel != NULL);
7902 :
7903 : /* Dummy children can be ignored. */
7904 22908 : if (IS_DUMMY_REL(child_rel))
7905 42 : continue;
7906 :
7907 : /* Translate scan/join targets for this child. */
7908 22866 : appinfos = find_appinfos_by_relids(root, child_rel->relids,
7909 : &nappinfos);
7910 45732 : foreach(lc, scanjoin_targets)
7911 : {
7912 22866 : PathTarget *target = lfirst_node(PathTarget, lc);
7913 :
7914 22866 : target = copy_pathtarget(target);
7915 22866 : target->exprs = (List *)
7916 22866 : adjust_appendrel_attrs(root,
7917 22866 : (Node *) target->exprs,
7918 : nappinfos, appinfos);
7919 22866 : child_scanjoin_targets = lappend(child_scanjoin_targets,
7920 : target);
7921 : }
7922 22866 : pfree(appinfos);
7923 :
7924 : /* Recursion does the real work. */
7925 22866 : apply_scanjoin_target_to_paths(root, child_rel,
7926 : child_scanjoin_targets,
7927 : scanjoin_targets_contain_srfs,
7928 : scanjoin_target_parallel_safe,
7929 : tlist_same_exprs);
7930 :
7931 : /* Save non-dummy children for Append paths. */
7932 22866 : if (!IS_DUMMY_REL(child_rel))
7933 22866 : live_children = lappend(live_children, child_rel);
7934 : }
7935 :
7936 : /* Build new paths for this relation by appending child paths. */
7937 12548 : add_paths_to_append_rel(root, rel, live_children);
7938 : }
7939 :
7940 : /*
7941 : * Consider generating Gather or Gather Merge paths. We must only do this
7942 : * if the relation is parallel safe, and we don't do it for child rels to
7943 : * avoid creating multiple Gather nodes within the same plan. We must do
7944 : * this after all paths have been generated and before set_cheapest, since
7945 : * one of the generated paths may turn out to be the cheapest one.
7946 : */
7947 540846 : if (rel->consider_parallel && !IS_OTHER_REL(rel))
7948 175038 : generate_useful_gather_paths(root, rel, false);
7949 :
7950 : /*
7951 : * Reassess which paths are the cheapest, now that we've potentially added
7952 : * new Gather (or Gather Merge) and/or Append (or MergeAppend) paths to
7953 : * this relation.
7954 : */
7955 540846 : set_cheapest(rel);
7956 540846 : }
7957 :
7958 : /*
7959 : * create_partitionwise_grouping_paths
7960 : *
7961 : * If the partition keys of input relation are part of the GROUP BY clause, all
7962 : * the rows belonging to a given group come from a single partition. This
7963 : * allows aggregation/grouping over a partitioned relation to be broken down
7964 : * into aggregation/grouping on each partition. This should be no worse, and
7965 : * often better, than the normal approach.
7966 : *
7967 : * However, if the GROUP BY clause does not contain all the partition keys,
7968 : * rows from a given group may be spread across multiple partitions. In that
7969 : * case, we perform partial aggregation for each group, append the results,
7970 : * and then finalize aggregation. This is less certain to win than the
7971 : * previous case. It may win if the PartialAggregate stage greatly reduces
7972 : * the number of groups, because fewer rows will pass through the Append node.
7973 : * It may lose if we have lots of small groups.
7974 : */
7975 : static void
7976 562 : create_partitionwise_grouping_paths(PlannerInfo *root,
7977 : RelOptInfo *input_rel,
7978 : RelOptInfo *grouped_rel,
7979 : RelOptInfo *partially_grouped_rel,
7980 : const AggClauseCosts *agg_costs,
7981 : grouping_sets_data *gd,
7982 : PartitionwiseAggregateType patype,
7983 : GroupPathExtraData *extra)
7984 : {
7985 562 : List *grouped_live_children = NIL;
7986 562 : List *partially_grouped_live_children = NIL;
7987 562 : PathTarget *target = grouped_rel->reltarget;
7988 562 : bool partial_grouping_valid = true;
7989 : int i;
7990 :
7991 : Assert(patype != PARTITIONWISE_AGGREGATE_NONE);
7992 : Assert(patype != PARTITIONWISE_AGGREGATE_PARTIAL ||
7993 : partially_grouped_rel != NULL);
7994 :
7995 : /* Add paths for partitionwise aggregation/grouping. */
7996 562 : i = -1;
7997 2056 : while ((i = bms_next_member(input_rel->live_parts, i)) >= 0)
7998 : {
7999 1494 : RelOptInfo *child_input_rel = input_rel->part_rels[i];
8000 : PathTarget *child_target;
8001 : AppendRelInfo **appinfos;
8002 : int nappinfos;
8003 : GroupPathExtraData child_extra;
8004 : RelOptInfo *child_grouped_rel;
8005 : RelOptInfo *child_partially_grouped_rel;
8006 :
8007 : Assert(child_input_rel != NULL);
8008 :
8009 : /* Dummy children can be ignored. */
8010 1494 : if (IS_DUMMY_REL(child_input_rel))
8011 0 : continue;
8012 :
8013 1494 : child_target = copy_pathtarget(target);
8014 :
8015 : /*
8016 : * Copy the given "extra" structure as is and then override the
8017 : * members specific to this child.
8018 : */
8019 1494 : memcpy(&child_extra, extra, sizeof(child_extra));
8020 :
8021 1494 : appinfos = find_appinfos_by_relids(root, child_input_rel->relids,
8022 : &nappinfos);
8023 :
8024 1494 : child_target->exprs = (List *)
8025 1494 : adjust_appendrel_attrs(root,
8026 1494 : (Node *) target->exprs,
8027 : nappinfos, appinfos);
8028 :
8029 : /* Translate havingQual and targetList. */
8030 1494 : child_extra.havingQual = (Node *)
8031 : adjust_appendrel_attrs(root,
8032 : extra->havingQual,
8033 : nappinfos, appinfos);
8034 1494 : child_extra.targetList = (List *)
8035 1494 : adjust_appendrel_attrs(root,
8036 1494 : (Node *) extra->targetList,
8037 : nappinfos, appinfos);
8038 :
8039 : /*
8040 : * extra->patype was the value computed for our parent rel; patype is
8041 : * the value for this relation. For the child, our value is its
8042 : * parent rel's value.
8043 : */
8044 1494 : child_extra.patype = patype;
8045 :
8046 : /*
8047 : * Create grouping relation to hold fully aggregated grouping and/or
8048 : * aggregation paths for the child.
8049 : */
8050 1494 : child_grouped_rel = make_grouping_rel(root, child_input_rel,
8051 : child_target,
8052 1494 : extra->target_parallel_safe,
8053 : child_extra.havingQual);
8054 :
8055 : /* Create grouping paths for this child relation. */
8056 1494 : create_ordinary_grouping_paths(root, child_input_rel,
8057 : child_grouped_rel,
8058 : agg_costs, gd, &child_extra,
8059 : &child_partially_grouped_rel);
8060 :
8061 1494 : if (child_partially_grouped_rel)
8062 : {
8063 : partially_grouped_live_children =
8064 906 : lappend(partially_grouped_live_children,
8065 : child_partially_grouped_rel);
8066 : }
8067 : else
8068 588 : partial_grouping_valid = false;
8069 :
8070 1494 : if (patype == PARTITIONWISE_AGGREGATE_FULL)
8071 : {
8072 876 : set_cheapest(child_grouped_rel);
8073 876 : grouped_live_children = lappend(grouped_live_children,
8074 : child_grouped_rel);
8075 : }
8076 :
8077 1494 : pfree(appinfos);
8078 : }
8079 :
8080 : /*
8081 : * Try to create append paths for partially grouped children. For full
8082 : * partitionwise aggregation, we might have paths in the partial_pathlist
8083 : * if parallel aggregation is possible. For partial partitionwise
8084 : * aggregation, we may have paths in both pathlist and partial_pathlist.
8085 : *
8086 : * NB: We must have a partially grouped path for every child in order to
8087 : * generate a partially grouped path for this relation.
8088 : */
8089 562 : if (partially_grouped_rel && partial_grouping_valid)
8090 : {
8091 : Assert(partially_grouped_live_children != NIL);
8092 :
8093 350 : add_paths_to_append_rel(root, partially_grouped_rel,
8094 : partially_grouped_live_children);
8095 :
8096 : /*
8097 : * We need call set_cheapest, since the finalization step will use the
8098 : * cheapest path from the rel.
8099 : */
8100 350 : if (partially_grouped_rel->pathlist)
8101 350 : set_cheapest(partially_grouped_rel);
8102 : }
8103 :
8104 : /* If possible, create append paths for fully grouped children. */
8105 562 : if (patype == PARTITIONWISE_AGGREGATE_FULL)
8106 : {
8107 : Assert(grouped_live_children != NIL);
8108 :
8109 320 : add_paths_to_append_rel(root, grouped_rel, grouped_live_children);
8110 : }
8111 562 : }
8112 :
8113 : /*
8114 : * group_by_has_partkey
8115 : *
8116 : * Returns true if all the partition keys of the given relation are part of
8117 : * the GROUP BY clauses, including having matching collation, false otherwise.
8118 : */
8119 : static bool
8120 556 : group_by_has_partkey(RelOptInfo *input_rel,
8121 : List *targetList,
8122 : List *groupClause)
8123 : {
8124 556 : List *groupexprs = get_sortgrouplist_exprs(groupClause, targetList);
8125 556 : int cnt = 0;
8126 : int partnatts;
8127 :
8128 : /* Input relation should be partitioned. */
8129 : Assert(input_rel->part_scheme);
8130 :
8131 : /* Rule out early, if there are no partition keys present. */
8132 556 : if (!input_rel->partexprs)
8133 0 : return false;
8134 :
8135 556 : partnatts = input_rel->part_scheme->partnatts;
8136 :
8137 912 : for (cnt = 0; cnt < partnatts; cnt++)
8138 : {
8139 592 : List *partexprs = input_rel->partexprs[cnt];
8140 : ListCell *lc;
8141 592 : bool found = false;
8142 :
8143 810 : foreach(lc, partexprs)
8144 : {
8145 : ListCell *lg;
8146 586 : Expr *partexpr = lfirst(lc);
8147 586 : Oid partcoll = input_rel->part_scheme->partcollation[cnt];
8148 :
8149 924 : foreach(lg, groupexprs)
8150 : {
8151 706 : Expr *groupexpr = lfirst(lg);
8152 706 : Oid groupcoll = exprCollation((Node *) groupexpr);
8153 :
8154 : /*
8155 : * Note: we can assume there is at most one RelabelType node;
8156 : * eval_const_expressions() will have simplified if more than
8157 : * one.
8158 : */
8159 706 : if (IsA(groupexpr, RelabelType))
8160 24 : groupexpr = ((RelabelType *) groupexpr)->arg;
8161 :
8162 706 : if (equal(groupexpr, partexpr))
8163 : {
8164 : /*
8165 : * Reject a match if the grouping collation does not match
8166 : * the partitioning collation.
8167 : */
8168 368 : if (OidIsValid(partcoll) && OidIsValid(groupcoll) &&
8169 : partcoll != groupcoll)
8170 12 : return false;
8171 :
8172 356 : found = true;
8173 356 : break;
8174 : }
8175 : }
8176 :
8177 574 : if (found)
8178 356 : break;
8179 : }
8180 :
8181 : /*
8182 : * If none of the partition key expressions match with any of the
8183 : * GROUP BY expression, return false.
8184 : */
8185 580 : if (!found)
8186 224 : return false;
8187 : }
8188 :
8189 320 : return true;
8190 : }
8191 :
8192 : /*
8193 : * generate_setop_child_grouplist
8194 : * Build a SortGroupClause list defining the sort/grouping properties
8195 : * of the child of a set operation.
8196 : *
8197 : * This is similar to generate_setop_grouplist() but differs as the setop
8198 : * child query's targetlist entries may already have a tleSortGroupRef
8199 : * assigned for other purposes, such as GROUP BYs. Here we keep the
8200 : * SortGroupClause list in the same order as 'op' groupClauses and just adjust
8201 : * the tleSortGroupRef to reference the TargetEntry's 'ressortgroupref'. If
8202 : * any of the columns in the targetlist don't match to the setop's colTypes
8203 : * then we return an empty list. This may leave some TLEs with unreferenced
8204 : * ressortgroupref markings, but that's harmless.
8205 : */
8206 : static List *
8207 12268 : generate_setop_child_grouplist(SetOperationStmt *op, List *targetlist)
8208 : {
8209 12268 : List *grouplist = copyObject(op->groupClauses);
8210 : ListCell *lg;
8211 : ListCell *lt;
8212 : ListCell *ct;
8213 :
8214 12268 : lg = list_head(grouplist);
8215 12268 : ct = list_head(op->colTypes);
8216 47758 : foreach(lt, targetlist)
8217 : {
8218 35904 : TargetEntry *tle = (TargetEntry *) lfirst(lt);
8219 : SortGroupClause *sgc;
8220 : Oid coltype;
8221 :
8222 : /* resjunk columns could have sortgrouprefs. Leave these alone */
8223 35904 : if (tle->resjunk)
8224 0 : continue;
8225 :
8226 : /*
8227 : * We expect every non-resjunk target to have a SortGroupClause and
8228 : * colTypes.
8229 : */
8230 : Assert(lg != NULL);
8231 : Assert(ct != NULL);
8232 35904 : sgc = (SortGroupClause *) lfirst(lg);
8233 35904 : coltype = lfirst_oid(ct);
8234 :
8235 : /* reject if target type isn't the same as the setop target type */
8236 35904 : if (coltype != exprType((Node *) tle->expr))
8237 414 : return NIL;
8238 :
8239 35490 : lg = lnext(grouplist, lg);
8240 35490 : ct = lnext(op->colTypes, ct);
8241 :
8242 : /* assign a tleSortGroupRef, or reuse the existing one */
8243 35490 : sgc->tleSortGroupRef = assignSortGroupRef(tle, targetlist);
8244 : }
8245 :
8246 : Assert(lg == NULL);
8247 : Assert(ct == NULL);
8248 :
8249 11854 : return grouplist;
8250 : }
|