Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * planner.c
4 : * The query optimizer external interface.
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/optimizer/plan/planner.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 :
16 : #include "postgres.h"
17 :
18 : #include <limits.h>
19 : #include <math.h>
20 :
21 : #include "access/genam.h"
22 : #include "access/parallel.h"
23 : #include "access/sysattr.h"
24 : #include "access/table.h"
25 : #include "catalog/pg_aggregate.h"
26 : #include "catalog/pg_inherits.h"
27 : #include "catalog/pg_proc.h"
28 : #include "catalog/pg_type.h"
29 : #include "executor/executor.h"
30 : #include "foreign/fdwapi.h"
31 : #include "jit/jit.h"
32 : #include "lib/bipartite_match.h"
33 : #include "lib/knapsack.h"
34 : #include "miscadmin.h"
35 : #include "nodes/makefuncs.h"
36 : #include "nodes/nodeFuncs.h"
37 : #ifdef OPTIMIZER_DEBUG
38 : #include "nodes/print.h"
39 : #endif
40 : #include "nodes/supportnodes.h"
41 : #include "optimizer/appendinfo.h"
42 : #include "optimizer/clauses.h"
43 : #include "optimizer/cost.h"
44 : #include "optimizer/optimizer.h"
45 : #include "optimizer/paramassign.h"
46 : #include "optimizer/pathnode.h"
47 : #include "optimizer/paths.h"
48 : #include "optimizer/plancat.h"
49 : #include "optimizer/planmain.h"
50 : #include "optimizer/planner.h"
51 : #include "optimizer/prep.h"
52 : #include "optimizer/subselect.h"
53 : #include "optimizer/tlist.h"
54 : #include "parser/analyze.h"
55 : #include "parser/parse_agg.h"
56 : #include "parser/parse_clause.h"
57 : #include "parser/parse_relation.h"
58 : #include "parser/parsetree.h"
59 : #include "partitioning/partdesc.h"
60 : #include "rewrite/rewriteManip.h"
61 : #include "utils/lsyscache.h"
62 : #include "utils/rel.h"
63 : #include "utils/selfuncs.h"
64 :
65 : /* GUC parameters */
66 : double cursor_tuple_fraction = DEFAULT_CURSOR_TUPLE_FRACTION;
67 : int debug_parallel_query = DEBUG_PARALLEL_OFF;
68 : bool parallel_leader_participation = true;
69 : bool enable_distinct_reordering = true;
70 :
71 : /* Hook for plugins to get control in planner() */
72 : planner_hook_type planner_hook = NULL;
73 :
74 : /* Hook for plugins to get control when grouping_planner() plans upper rels */
75 : create_upper_paths_hook_type create_upper_paths_hook = NULL;
76 :
77 :
78 : /* Expression kind codes for preprocess_expression */
79 : #define EXPRKIND_QUAL 0
80 : #define EXPRKIND_TARGET 1
81 : #define EXPRKIND_RTFUNC 2
82 : #define EXPRKIND_RTFUNC_LATERAL 3
83 : #define EXPRKIND_VALUES 4
84 : #define EXPRKIND_VALUES_LATERAL 5
85 : #define EXPRKIND_LIMIT 6
86 : #define EXPRKIND_APPINFO 7
87 : #define EXPRKIND_PHV 8
88 : #define EXPRKIND_TABLESAMPLE 9
89 : #define EXPRKIND_ARBITER_ELEM 10
90 : #define EXPRKIND_TABLEFUNC 11
91 : #define EXPRKIND_TABLEFUNC_LATERAL 12
92 : #define EXPRKIND_GROUPEXPR 13
93 :
94 : /*
95 : * Data specific to grouping sets
96 : */
97 : typedef struct
98 : {
99 : List *rollups;
100 : List *hash_sets_idx;
101 : double dNumHashGroups;
102 : bool any_hashable;
103 : Bitmapset *unsortable_refs;
104 : Bitmapset *unhashable_refs;
105 : List *unsortable_sets;
106 : int *tleref_to_colnum_map;
107 : } grouping_sets_data;
108 :
109 : /*
110 : * Temporary structure for use during WindowClause reordering in order to be
111 : * able to sort WindowClauses on partitioning/ordering prefix.
112 : */
113 : typedef struct
114 : {
115 : WindowClause *wc;
116 : List *uniqueOrder; /* A List of unique ordering/partitioning
117 : * clauses per Window */
118 : } WindowClauseSortData;
119 :
120 : /* Passthrough data for standard_qp_callback */
121 : typedef struct
122 : {
123 : List *activeWindows; /* active windows, if any */
124 : grouping_sets_data *gset_data; /* grouping sets data, if any */
125 : SetOperationStmt *setop; /* parent set operation or NULL if not a
126 : * subquery belonging to a set operation */
127 : } standard_qp_extra;
128 :
129 : /* Local functions */
130 : static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
131 : static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
132 : static void grouping_planner(PlannerInfo *root, double tuple_fraction,
133 : SetOperationStmt *setops);
134 : static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root);
135 : static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
136 : int *tleref_to_colnum_map);
137 : static void preprocess_rowmarks(PlannerInfo *root);
138 : static double preprocess_limit(PlannerInfo *root,
139 : double tuple_fraction,
140 : int64 *offset_est, int64 *count_est);
141 : static List *preprocess_groupclause(PlannerInfo *root, List *force);
142 : static List *extract_rollup_sets(List *groupingSets);
143 : static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
144 : static void standard_qp_callback(PlannerInfo *root, void *extra);
145 : static double get_number_of_groups(PlannerInfo *root,
146 : double path_rows,
147 : grouping_sets_data *gd,
148 : List *target_list);
149 : static RelOptInfo *create_grouping_paths(PlannerInfo *root,
150 : RelOptInfo *input_rel,
151 : PathTarget *target,
152 : bool target_parallel_safe,
153 : grouping_sets_data *gd);
154 : static bool is_degenerate_grouping(PlannerInfo *root);
155 : static void create_degenerate_grouping_paths(PlannerInfo *root,
156 : RelOptInfo *input_rel,
157 : RelOptInfo *grouped_rel);
158 : static RelOptInfo *make_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
159 : PathTarget *target, bool target_parallel_safe,
160 : Node *havingQual);
161 : static void create_ordinary_grouping_paths(PlannerInfo *root,
162 : RelOptInfo *input_rel,
163 : RelOptInfo *grouped_rel,
164 : const AggClauseCosts *agg_costs,
165 : grouping_sets_data *gd,
166 : GroupPathExtraData *extra,
167 : RelOptInfo **partially_grouped_rel_p);
168 : static void consider_groupingsets_paths(PlannerInfo *root,
169 : RelOptInfo *grouped_rel,
170 : Path *path,
171 : bool is_sorted,
172 : bool can_hash,
173 : grouping_sets_data *gd,
174 : const AggClauseCosts *agg_costs,
175 : double dNumGroups);
176 : static RelOptInfo *create_window_paths(PlannerInfo *root,
177 : RelOptInfo *input_rel,
178 : PathTarget *input_target,
179 : PathTarget *output_target,
180 : bool output_target_parallel_safe,
181 : WindowFuncLists *wflists,
182 : List *activeWindows);
183 : static void create_one_window_path(PlannerInfo *root,
184 : RelOptInfo *window_rel,
185 : Path *path,
186 : PathTarget *input_target,
187 : PathTarget *output_target,
188 : WindowFuncLists *wflists,
189 : List *activeWindows);
190 : static RelOptInfo *create_distinct_paths(PlannerInfo *root,
191 : RelOptInfo *input_rel,
192 : PathTarget *target);
193 : static void create_partial_distinct_paths(PlannerInfo *root,
194 : RelOptInfo *input_rel,
195 : RelOptInfo *final_distinct_rel,
196 : PathTarget *target);
197 : static RelOptInfo *create_final_distinct_paths(PlannerInfo *root,
198 : RelOptInfo *input_rel,
199 : RelOptInfo *distinct_rel);
200 : static List *get_useful_pathkeys_for_distinct(PlannerInfo *root,
201 : List *needed_pathkeys,
202 : List *path_pathkeys);
203 : static RelOptInfo *create_ordered_paths(PlannerInfo *root,
204 : RelOptInfo *input_rel,
205 : PathTarget *target,
206 : bool target_parallel_safe,
207 : double limit_tuples);
208 : static PathTarget *make_group_input_target(PlannerInfo *root,
209 : PathTarget *final_target);
210 : static PathTarget *make_partial_grouping_target(PlannerInfo *root,
211 : PathTarget *grouping_target,
212 : Node *havingQual);
213 : static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
214 : static void optimize_window_clauses(PlannerInfo *root,
215 : WindowFuncLists *wflists);
216 : static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
217 : static PathTarget *make_window_input_target(PlannerInfo *root,
218 : PathTarget *final_target,
219 : List *activeWindows);
220 : static List *make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
221 : List *tlist);
222 : static PathTarget *make_sort_input_target(PlannerInfo *root,
223 : PathTarget *final_target,
224 : bool *have_postponed_srfs);
225 : static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
226 : List *targets, List *targets_contain_srfs);
227 : static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
228 : RelOptInfo *grouped_rel,
229 : RelOptInfo *partially_grouped_rel,
230 : const AggClauseCosts *agg_costs,
231 : grouping_sets_data *gd,
232 : double dNumGroups,
233 : GroupPathExtraData *extra);
234 : static RelOptInfo *create_partial_grouping_paths(PlannerInfo *root,
235 : RelOptInfo *grouped_rel,
236 : RelOptInfo *input_rel,
237 : grouping_sets_data *gd,
238 : GroupPathExtraData *extra,
239 : bool force_rel_creation);
240 : static Path *make_ordered_path(PlannerInfo *root,
241 : RelOptInfo *rel,
242 : Path *path,
243 : Path *cheapest_path,
244 : List *pathkeys,
245 : double limit_tuples);
246 : static void gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel);
247 : static bool can_partial_agg(PlannerInfo *root);
248 : static void apply_scanjoin_target_to_paths(PlannerInfo *root,
249 : RelOptInfo *rel,
250 : List *scanjoin_targets,
251 : List *scanjoin_targets_contain_srfs,
252 : bool scanjoin_target_parallel_safe,
253 : bool tlist_same_exprs);
254 : static void create_partitionwise_grouping_paths(PlannerInfo *root,
255 : RelOptInfo *input_rel,
256 : RelOptInfo *grouped_rel,
257 : RelOptInfo *partially_grouped_rel,
258 : const AggClauseCosts *agg_costs,
259 : grouping_sets_data *gd,
260 : PartitionwiseAggregateType patype,
261 : GroupPathExtraData *extra);
262 : static bool group_by_has_partkey(RelOptInfo *input_rel,
263 : List *targetList,
264 : List *groupClause);
265 : static int common_prefix_cmp(const void *a, const void *b);
266 : static List *generate_setop_child_grouplist(SetOperationStmt *op,
267 : List *targetlist);
268 :
269 :
270 : /*****************************************************************************
271 : *
272 : * Query optimizer entry point
273 : *
274 : * To support loadable plugins that monitor or modify planner behavior,
275 : * we provide a hook variable that lets a plugin get control before and
276 : * after the standard planning process. The plugin would normally call
277 : * standard_planner().
278 : *
279 : * Note to plugin authors: standard_planner() scribbles on its Query input,
280 : * so you'd better copy that data structure if you want to plan more than once.
281 : *
282 : *****************************************************************************/
283 : PlannedStmt *
284 484132 : planner(Query *parse, const char *query_string, int cursorOptions,
285 : ParamListInfo boundParams)
286 : {
287 : PlannedStmt *result;
288 :
289 484132 : if (planner_hook)
290 94598 : result = (*planner_hook) (parse, query_string, cursorOptions, boundParams);
291 : else
292 389534 : result = standard_planner(parse, query_string, cursorOptions, boundParams);
293 479984 : return result;
294 : }
295 :
296 : PlannedStmt *
297 484132 : standard_planner(Query *parse, const char *query_string, int cursorOptions,
298 : ParamListInfo boundParams)
299 : {
300 : PlannedStmt *result;
301 : PlannerGlobal *glob;
302 : double tuple_fraction;
303 : PlannerInfo *root;
304 : RelOptInfo *final_rel;
305 : Path *best_path;
306 : Plan *top_plan;
307 : ListCell *lp,
308 : *lr;
309 :
310 : /*
311 : * Set up global state for this planner invocation. This data is needed
312 : * across all levels of sub-Query that might exist in the given command,
313 : * so we keep it in a separate struct that's linked to by each per-Query
314 : * PlannerInfo.
315 : */
316 484132 : glob = makeNode(PlannerGlobal);
317 :
318 484132 : glob->boundParams = boundParams;
319 484132 : glob->subplans = NIL;
320 484132 : glob->subpaths = NIL;
321 484132 : glob->subroots = NIL;
322 484132 : glob->rewindPlanIDs = NULL;
323 484132 : glob->finalrtable = NIL;
324 484132 : glob->finalrteperminfos = NIL;
325 484132 : glob->finalrowmarks = NIL;
326 484132 : glob->resultRelations = NIL;
327 484132 : glob->appendRelations = NIL;
328 484132 : glob->relationOids = NIL;
329 484132 : glob->invalItems = NIL;
330 484132 : glob->paramExecTypes = NIL;
331 484132 : glob->lastPHId = 0;
332 484132 : glob->lastRowMarkId = 0;
333 484132 : glob->lastPlanNodeId = 0;
334 484132 : glob->transientPlan = false;
335 484132 : glob->dependsOnRole = false;
336 :
337 : /*
338 : * Assess whether it's feasible to use parallel mode for this query. We
339 : * can't do this in a standalone backend, or if the command will try to
340 : * modify any data, or if this is a cursor operation, or if GUCs are set
341 : * to values that don't permit parallelism, or if parallel-unsafe
342 : * functions are present in the query tree.
343 : *
344 : * (Note that we do allow CREATE TABLE AS, SELECT INTO, and CREATE
345 : * MATERIALIZED VIEW to use parallel plans, but this is safe only because
346 : * the command is writing into a completely new table which workers won't
347 : * be able to see. If the workers could see the table, the fact that
348 : * group locking would cause them to ignore the leader's heavyweight GIN
349 : * page locks would make this unsafe. We'll have to fix that somehow if
350 : * we want to allow parallel inserts in general; updates and deletes have
351 : * additional problems especially around combo CIDs.)
352 : *
353 : * For now, we don't try to use parallel mode if we're running inside a
354 : * parallel worker. We might eventually be able to relax this
355 : * restriction, but for now it seems best not to have parallel workers
356 : * trying to create their own parallel workers.
357 : */
358 484132 : if ((cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 &&
359 458214 : IsUnderPostmaster &&
360 458214 : parse->commandType == CMD_SELECT &&
361 372432 : !parse->hasModifyingCTE &&
362 372292 : max_parallel_workers_per_gather > 0 &&
363 371714 : !IsParallelWorker())
364 : {
365 : /* all the cheap tests pass, so scan the query tree */
366 371638 : glob->maxParallelHazard = max_parallel_hazard(parse);
367 371638 : glob->parallelModeOK = (glob->maxParallelHazard != PROPARALLEL_UNSAFE);
368 : }
369 : else
370 : {
371 : /* skip the query tree scan, just assume it's unsafe */
372 112494 : glob->maxParallelHazard = PROPARALLEL_UNSAFE;
373 112494 : glob->parallelModeOK = false;
374 : }
375 :
376 : /*
377 : * glob->parallelModeNeeded is normally set to false here and changed to
378 : * true during plan creation if a Gather or Gather Merge plan is actually
379 : * created (cf. create_gather_plan, create_gather_merge_plan).
380 : *
381 : * However, if debug_parallel_query = on or debug_parallel_query =
382 : * regress, then we impose parallel mode whenever it's safe to do so, even
383 : * if the final plan doesn't use parallelism. It's not safe to do so if
384 : * the query contains anything parallel-unsafe; parallelModeOK will be
385 : * false in that case. Note that parallelModeOK can't change after this
386 : * point. Otherwise, everything in the query is either parallel-safe or
387 : * parallel-restricted, and in either case it should be OK to impose
388 : * parallel-mode restrictions. If that ends up breaking something, then
389 : * either some function the user included in the query is incorrectly
390 : * labeled as parallel-safe or parallel-restricted when in reality it's
391 : * parallel-unsafe, or else the query planner itself has a bug.
392 : */
393 795480 : glob->parallelModeNeeded = glob->parallelModeOK &&
394 311348 : (debug_parallel_query != DEBUG_PARALLEL_OFF);
395 :
396 : /* Determine what fraction of the plan is likely to be scanned */
397 484132 : if (cursorOptions & CURSOR_OPT_FAST_PLAN)
398 : {
399 : /*
400 : * We have no real idea how many tuples the user will ultimately FETCH
401 : * from a cursor, but it is often the case that he doesn't want 'em
402 : * all, or would prefer a fast-start plan anyway so that he can
403 : * process some of the tuples sooner. Use a GUC parameter to decide
404 : * what fraction to optimize for.
405 : */
406 2906 : tuple_fraction = cursor_tuple_fraction;
407 :
408 : /*
409 : * We document cursor_tuple_fraction as simply being a fraction, which
410 : * means the edge cases 0 and 1 have to be treated specially here. We
411 : * convert 1 to 0 ("all the tuples") and 0 to a very small fraction.
412 : */
413 2906 : if (tuple_fraction >= 1.0)
414 0 : tuple_fraction = 0.0;
415 2906 : else if (tuple_fraction <= 0.0)
416 0 : tuple_fraction = 1e-10;
417 : }
418 : else
419 : {
420 : /* Default assumption is we need all the tuples */
421 481226 : tuple_fraction = 0.0;
422 : }
423 :
424 : /* primary planning entry point (may recurse for subqueries) */
425 484132 : root = subquery_planner(glob, parse, NULL, false, tuple_fraction, NULL);
426 :
427 : /* Select best Path and turn it into a Plan */
428 480380 : final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
429 480380 : best_path = get_cheapest_fractional_path(final_rel, tuple_fraction);
430 :
431 480380 : top_plan = create_plan(root, best_path);
432 :
433 : /*
434 : * If creating a plan for a scrollable cursor, make sure it can run
435 : * backwards on demand. Add a Material node at the top at need.
436 : */
437 479984 : if (cursorOptions & CURSOR_OPT_SCROLL)
438 : {
439 264 : if (!ExecSupportsBackwardScan(top_plan))
440 30 : top_plan = materialize_finished_plan(top_plan);
441 : }
442 :
443 : /*
444 : * Optionally add a Gather node for testing purposes, provided this is
445 : * actually a safe thing to do.
446 : *
447 : * We can add Gather even when top_plan has parallel-safe initPlans, but
448 : * then we have to move the initPlans to the Gather node because of
449 : * SS_finalize_plan's limitations. That would cause cosmetic breakage of
450 : * regression tests when debug_parallel_query = regress, because initPlans
451 : * that would normally appear on the top_plan move to the Gather, causing
452 : * them to disappear from EXPLAIN output. That doesn't seem worth kluging
453 : * EXPLAIN to hide, so skip it when debug_parallel_query = regress.
454 : */
455 479984 : if (debug_parallel_query != DEBUG_PARALLEL_OFF &&
456 182 : top_plan->parallel_safe &&
457 84 : (top_plan->initPlan == NIL ||
458 0 : debug_parallel_query != DEBUG_PARALLEL_REGRESS))
459 : {
460 84 : Gather *gather = makeNode(Gather);
461 : Cost initplan_cost;
462 : bool unsafe_initplans;
463 :
464 84 : gather->plan.targetlist = top_plan->targetlist;
465 84 : gather->plan.qual = NIL;
466 84 : gather->plan.lefttree = top_plan;
467 84 : gather->plan.righttree = NULL;
468 84 : gather->num_workers = 1;
469 84 : gather->single_copy = true;
470 84 : gather->invisible = (debug_parallel_query == DEBUG_PARALLEL_REGRESS);
471 :
472 : /* Transfer any initPlans to the new top node */
473 84 : gather->plan.initPlan = top_plan->initPlan;
474 84 : top_plan->initPlan = NIL;
475 :
476 : /*
477 : * Since this Gather has no parallel-aware descendants to signal to,
478 : * we don't need a rescan Param.
479 : */
480 84 : gather->rescan_param = -1;
481 :
482 : /*
483 : * Ideally we'd use cost_gather here, but setting up dummy path data
484 : * to satisfy it doesn't seem much cleaner than knowing what it does.
485 : */
486 84 : gather->plan.startup_cost = top_plan->startup_cost +
487 : parallel_setup_cost;
488 84 : gather->plan.total_cost = top_plan->total_cost +
489 84 : parallel_setup_cost + parallel_tuple_cost * top_plan->plan_rows;
490 84 : gather->plan.plan_rows = top_plan->plan_rows;
491 84 : gather->plan.plan_width = top_plan->plan_width;
492 84 : gather->plan.parallel_aware = false;
493 84 : gather->plan.parallel_safe = false;
494 :
495 : /*
496 : * Delete the initplans' cost from top_plan. We needn't add it to the
497 : * Gather node, since the above coding already included it there.
498 : */
499 84 : SS_compute_initplan_cost(gather->plan.initPlan,
500 : &initplan_cost, &unsafe_initplans);
501 84 : top_plan->startup_cost -= initplan_cost;
502 84 : top_plan->total_cost -= initplan_cost;
503 :
504 : /* use parallel mode for parallel plans. */
505 84 : root->glob->parallelModeNeeded = true;
506 :
507 84 : top_plan = &gather->plan;
508 : }
509 :
510 : /*
511 : * If any Params were generated, run through the plan tree and compute
512 : * each plan node's extParam/allParam sets. Ideally we'd merge this into
513 : * set_plan_references' tree traversal, but for now it has to be separate
514 : * because we need to visit subplans before not after main plan.
515 : */
516 479984 : if (glob->paramExecTypes != NIL)
517 : {
518 : Assert(list_length(glob->subplans) == list_length(glob->subroots));
519 205748 : forboth(lp, glob->subplans, lr, glob->subroots)
520 : {
521 40428 : Plan *subplan = (Plan *) lfirst(lp);
522 40428 : PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
523 :
524 40428 : SS_finalize_plan(subroot, subplan);
525 : }
526 165320 : SS_finalize_plan(root, top_plan);
527 : }
528 :
529 : /* final cleanup of the plan */
530 : Assert(glob->finalrtable == NIL);
531 : Assert(glob->finalrteperminfos == NIL);
532 : Assert(glob->finalrowmarks == NIL);
533 : Assert(glob->resultRelations == NIL);
534 : Assert(glob->appendRelations == NIL);
535 479984 : top_plan = set_plan_references(root, top_plan);
536 : /* ... and the subplans (both regular subplans and initplans) */
537 : Assert(list_length(glob->subplans) == list_length(glob->subroots));
538 520412 : forboth(lp, glob->subplans, lr, glob->subroots)
539 : {
540 40428 : Plan *subplan = (Plan *) lfirst(lp);
541 40428 : PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
542 :
543 40428 : lfirst(lp) = set_plan_references(subroot, subplan);
544 : }
545 :
546 : /* build the PlannedStmt result */
547 479984 : result = makeNode(PlannedStmt);
548 :
549 479984 : result->commandType = parse->commandType;
550 479984 : result->queryId = parse->queryId;
551 479984 : result->hasReturning = (parse->returningList != NIL);
552 479984 : result->hasModifyingCTE = parse->hasModifyingCTE;
553 479984 : result->canSetTag = parse->canSetTag;
554 479984 : result->transientPlan = glob->transientPlan;
555 479984 : result->dependsOnRole = glob->dependsOnRole;
556 479984 : result->parallelModeNeeded = glob->parallelModeNeeded;
557 479984 : result->planTree = top_plan;
558 479984 : result->partPruneInfos = glob->partPruneInfos;
559 479984 : result->rtable = glob->finalrtable;
560 959968 : result->unprunableRelids = bms_difference(glob->allRelids,
561 479984 : glob->prunableRelids);
562 479984 : result->permInfos = glob->finalrteperminfos;
563 479984 : result->resultRelations = glob->resultRelations;
564 479984 : result->appendRelations = glob->appendRelations;
565 479984 : result->subplans = glob->subplans;
566 479984 : result->rewindPlanIDs = glob->rewindPlanIDs;
567 479984 : result->rowMarks = glob->finalrowmarks;
568 479984 : result->relationOids = glob->relationOids;
569 479984 : result->invalItems = glob->invalItems;
570 479984 : result->paramExecTypes = glob->paramExecTypes;
571 : /* utilityStmt should be null, but we might as well copy it */
572 479984 : result->utilityStmt = parse->utilityStmt;
573 479984 : result->stmt_location = parse->stmt_location;
574 479984 : result->stmt_len = parse->stmt_len;
575 :
576 479984 : result->jitFlags = PGJIT_NONE;
577 479984 : if (jit_enabled && jit_above_cost >= 0 &&
578 479350 : top_plan->total_cost > jit_above_cost)
579 : {
580 950 : result->jitFlags |= PGJIT_PERFORM;
581 :
582 : /*
583 : * Decide how much effort should be put into generating better code.
584 : */
585 950 : if (jit_optimize_above_cost >= 0 &&
586 950 : top_plan->total_cost > jit_optimize_above_cost)
587 444 : result->jitFlags |= PGJIT_OPT3;
588 950 : if (jit_inline_above_cost >= 0 &&
589 950 : top_plan->total_cost > jit_inline_above_cost)
590 444 : result->jitFlags |= PGJIT_INLINE;
591 :
592 : /*
593 : * Decide which operations should be JITed.
594 : */
595 950 : if (jit_expressions)
596 950 : result->jitFlags |= PGJIT_EXPR;
597 950 : if (jit_tuple_deforming)
598 950 : result->jitFlags |= PGJIT_DEFORM;
599 : }
600 :
601 479984 : if (glob->partition_directory != NULL)
602 11474 : DestroyPartitionDirectory(glob->partition_directory);
603 :
604 479984 : return result;
605 : }
606 :
607 :
608 : /*--------------------
609 : * subquery_planner
610 : * Invokes the planner on a subquery. We recurse to here for each
611 : * sub-SELECT found in the query tree.
612 : *
613 : * glob is the global state for the current planner run.
614 : * parse is the querytree produced by the parser & rewriter.
615 : * parent_root is the immediate parent Query's info (NULL at the top level).
616 : * hasRecursion is true if this is a recursive WITH query.
617 : * tuple_fraction is the fraction of tuples we expect will be retrieved.
618 : * tuple_fraction is interpreted as explained for grouping_planner, below.
619 : * setops is used for set operation subqueries to provide the subquery with
620 : * the context in which it's being used so that Paths correctly sorted for the
621 : * set operation can be generated. NULL when not planning a set operation
622 : * child, or when a child of a set op that isn't interested in sorted input.
623 : *
624 : * Basically, this routine does the stuff that should only be done once
625 : * per Query object. It then calls grouping_planner. At one time,
626 : * grouping_planner could be invoked recursively on the same Query object;
627 : * that's not currently true, but we keep the separation between the two
628 : * routines anyway, in case we need it again someday.
629 : *
630 : * subquery_planner will be called recursively to handle sub-Query nodes
631 : * found within the query's expressions and rangetable.
632 : *
633 : * Returns the PlannerInfo struct ("root") that contains all data generated
634 : * while planning the subquery. In particular, the Path(s) attached to
635 : * the (UPPERREL_FINAL, NULL) upperrel represent our conclusions about the
636 : * cheapest way(s) to implement the query. The top level will select the
637 : * best Path and pass it through createplan.c to produce a finished Plan.
638 : *--------------------
639 : */
640 : PlannerInfo *
641 559666 : subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root,
642 : bool hasRecursion, double tuple_fraction,
643 : SetOperationStmt *setops)
644 : {
645 : PlannerInfo *root;
646 : List *newWithCheckOptions;
647 : List *newHaving;
648 : bool hasOuterJoins;
649 : bool hasResultRTEs;
650 : RelOptInfo *final_rel;
651 : ListCell *l;
652 :
653 : /* Create a PlannerInfo data structure for this subquery */
654 559666 : root = makeNode(PlannerInfo);
655 559666 : root->parse = parse;
656 559666 : root->glob = glob;
657 559666 : root->query_level = parent_root ? parent_root->query_level + 1 : 1;
658 559666 : root->parent_root = parent_root;
659 559666 : root->plan_params = NIL;
660 559666 : root->outer_params = NULL;
661 559666 : root->planner_cxt = CurrentMemoryContext;
662 559666 : root->init_plans = NIL;
663 559666 : root->cte_plan_ids = NIL;
664 559666 : root->multiexpr_params = NIL;
665 559666 : root->join_domains = NIL;
666 559666 : root->eq_classes = NIL;
667 559666 : root->ec_merging_done = false;
668 559666 : root->last_rinfo_serial = 0;
669 559666 : root->all_result_relids =
670 559666 : parse->resultRelation ? bms_make_singleton(parse->resultRelation) : NULL;
671 559666 : root->leaf_result_relids = NULL; /* we'll find out leaf-ness later */
672 559666 : root->append_rel_list = NIL;
673 559666 : root->row_identity_vars = NIL;
674 559666 : root->rowMarks = NIL;
675 559666 : memset(root->upper_rels, 0, sizeof(root->upper_rels));
676 559666 : memset(root->upper_targets, 0, sizeof(root->upper_targets));
677 559666 : root->processed_groupClause = NIL;
678 559666 : root->processed_distinctClause = NIL;
679 559666 : root->processed_tlist = NIL;
680 559666 : root->update_colnos = NIL;
681 559666 : root->grouping_map = NULL;
682 559666 : root->minmax_aggs = NIL;
683 559666 : root->qual_security_level = 0;
684 559666 : root->hasPseudoConstantQuals = false;
685 559666 : root->hasAlternativeSubPlans = false;
686 559666 : root->placeholdersFrozen = false;
687 559666 : root->hasRecursion = hasRecursion;
688 559666 : if (hasRecursion)
689 834 : root->wt_param_id = assign_special_exec_param(root);
690 : else
691 558832 : root->wt_param_id = -1;
692 559666 : root->non_recursive_path = NULL;
693 559666 : root->partColsUpdated = false;
694 :
695 : /*
696 : * Create the top-level join domain. This won't have valid contents until
697 : * deconstruct_jointree fills it in, but the node needs to exist before
698 : * that so we can build EquivalenceClasses referencing it.
699 : */
700 559666 : root->join_domains = list_make1(makeNode(JoinDomain));
701 :
702 : /*
703 : * If there is a WITH list, process each WITH query and either convert it
704 : * to RTE_SUBQUERY RTE(s) or build an initplan SubPlan structure for it.
705 : */
706 559666 : if (parse->cteList)
707 2522 : SS_process_ctes(root);
708 :
709 : /*
710 : * If it's a MERGE command, transform the joinlist as appropriate.
711 : */
712 559660 : transform_MERGE_to_join(parse);
713 :
714 : /*
715 : * If the FROM clause is empty, replace it with a dummy RTE_RESULT RTE, so
716 : * that we don't need so many special cases to deal with that situation.
717 : */
718 559660 : replace_empty_jointree(parse);
719 :
720 : /*
721 : * Look for ANY and EXISTS SubLinks in WHERE and JOIN/ON clauses, and try
722 : * to transform them into joins. Note that this step does not descend
723 : * into subqueries; if we pull up any subqueries below, their SubLinks are
724 : * processed just before pulling them up.
725 : */
726 559660 : if (parse->hasSubLinks)
727 31684 : pull_up_sublinks(root);
728 :
729 : /*
730 : * Scan the rangetable for function RTEs, do const-simplification on them,
731 : * and then inline them if possible (producing subqueries that might get
732 : * pulled up next). Recursion issues here are handled in the same way as
733 : * for SubLinks.
734 : */
735 559660 : preprocess_function_rtes(root);
736 :
737 : /*
738 : * Check to see if any subqueries in the jointree can be merged into this
739 : * query.
740 : */
741 559654 : pull_up_subqueries(root);
742 :
743 : /*
744 : * If this is a simple UNION ALL query, flatten it into an appendrel. We
745 : * do this now because it requires applying pull_up_subqueries to the leaf
746 : * queries of the UNION ALL, which weren't touched above because they
747 : * weren't referenced by the jointree (they will be after we do this).
748 : */
749 559648 : if (parse->setOperations)
750 6098 : flatten_simple_union_all(root);
751 :
752 : /*
753 : * Survey the rangetable to see what kinds of entries are present. We can
754 : * skip some later processing if relevant SQL features are not used; for
755 : * example if there are no JOIN RTEs we can avoid the expense of doing
756 : * flatten_join_alias_vars(). This must be done after we have finished
757 : * adding rangetable entries, of course. (Note: actually, processing of
758 : * inherited or partitioned rels can cause RTEs for their child tables to
759 : * get added later; but those must all be RTE_RELATION entries, so they
760 : * don't invalidate the conclusions drawn here.)
761 : */
762 559648 : root->hasJoinRTEs = false;
763 559648 : root->hasLateralRTEs = false;
764 559648 : root->group_rtindex = 0;
765 559648 : hasOuterJoins = false;
766 559648 : hasResultRTEs = false;
767 1604690 : foreach(l, parse->rtable)
768 : {
769 1045042 : RangeTblEntry *rte = lfirst_node(RangeTblEntry, l);
770 :
771 1045042 : switch (rte->rtekind)
772 : {
773 538134 : case RTE_RELATION:
774 538134 : if (rte->inh)
775 : {
776 : /*
777 : * Check to see if the relation actually has any children;
778 : * if not, clear the inh flag so we can treat it as a
779 : * plain base relation.
780 : *
781 : * Note: this could give a false-positive result, if the
782 : * rel once had children but no longer does. We used to
783 : * be able to clear rte->inh later on when we discovered
784 : * that, but no more; we have to handle such cases as
785 : * full-fledged inheritance.
786 : */
787 452266 : rte->inh = has_subclass(rte->relid);
788 : }
789 538134 : break;
790 151090 : case RTE_JOIN:
791 151090 : root->hasJoinRTEs = true;
792 151090 : if (IS_OUTER_JOIN(rte->jointype))
793 56384 : hasOuterJoins = true;
794 151090 : break;
795 221560 : case RTE_RESULT:
796 221560 : hasResultRTEs = true;
797 221560 : break;
798 4424 : case RTE_GROUP:
799 : Assert(parse->hasGroupRTE);
800 4424 : root->group_rtindex = list_cell_number(parse->rtable, l) + 1;
801 4424 : break;
802 129834 : default:
803 : /* No work here for other RTE types */
804 129834 : break;
805 : }
806 :
807 1045042 : if (rte->lateral)
808 10518 : root->hasLateralRTEs = true;
809 :
810 : /*
811 : * We can also determine the maximum security level required for any
812 : * securityQuals now. Addition of inheritance-child RTEs won't affect
813 : * this, because child tables don't have their own securityQuals; see
814 : * expand_single_inheritance_child().
815 : */
816 1045042 : if (rte->securityQuals)
817 2472 : root->qual_security_level = Max(root->qual_security_level,
818 : list_length(rte->securityQuals));
819 : }
820 :
821 : /*
822 : * If we have now verified that the query target relation is
823 : * non-inheriting, mark it as a leaf target.
824 : */
825 559648 : if (parse->resultRelation)
826 : {
827 92070 : RangeTblEntry *rte = rt_fetch(parse->resultRelation, parse->rtable);
828 :
829 92070 : if (!rte->inh)
830 89266 : root->leaf_result_relids =
831 89266 : bms_make_singleton(parse->resultRelation);
832 : }
833 :
834 : /*
835 : * Preprocess RowMark information. We need to do this after subquery
836 : * pullup, so that all base relations are present.
837 : */
838 559648 : preprocess_rowmarks(root);
839 :
840 : /*
841 : * Set hasHavingQual to remember if HAVING clause is present. Needed
842 : * because preprocess_expression will reduce a constant-true condition to
843 : * an empty qual list ... but "HAVING TRUE" is not a semantic no-op.
844 : */
845 559648 : root->hasHavingQual = (parse->havingQual != NULL);
846 :
847 : /*
848 : * Do expression preprocessing on targetlist and quals, as well as other
849 : * random expressions in the querytree. Note that we do not need to
850 : * handle sort/group expressions explicitly, because they are actually
851 : * part of the targetlist.
852 : */
853 555974 : parse->targetList = (List *)
854 559648 : preprocess_expression(root, (Node *) parse->targetList,
855 : EXPRKIND_TARGET);
856 :
857 555974 : newWithCheckOptions = NIL;
858 558366 : foreach(l, parse->withCheckOptions)
859 : {
860 2392 : WithCheckOption *wco = lfirst_node(WithCheckOption, l);
861 :
862 2392 : wco->qual = preprocess_expression(root, wco->qual,
863 : EXPRKIND_QUAL);
864 2392 : if (wco->qual != NULL)
865 1992 : newWithCheckOptions = lappend(newWithCheckOptions, wco);
866 : }
867 555974 : parse->withCheckOptions = newWithCheckOptions;
868 :
869 555974 : parse->returningList = (List *)
870 555974 : preprocess_expression(root, (Node *) parse->returningList,
871 : EXPRKIND_TARGET);
872 :
873 555974 : preprocess_qual_conditions(root, (Node *) parse->jointree);
874 :
875 555974 : parse->havingQual = preprocess_expression(root, parse->havingQual,
876 : EXPRKIND_QUAL);
877 :
878 558532 : foreach(l, parse->windowClause)
879 : {
880 2558 : WindowClause *wc = lfirst_node(WindowClause, l);
881 :
882 : /* partitionClause/orderClause are sort/group expressions */
883 2558 : wc->startOffset = preprocess_expression(root, wc->startOffset,
884 : EXPRKIND_LIMIT);
885 2558 : wc->endOffset = preprocess_expression(root, wc->endOffset,
886 : EXPRKIND_LIMIT);
887 : }
888 :
889 555974 : parse->limitOffset = preprocess_expression(root, parse->limitOffset,
890 : EXPRKIND_LIMIT);
891 555974 : parse->limitCount = preprocess_expression(root, parse->limitCount,
892 : EXPRKIND_LIMIT);
893 :
894 555974 : if (parse->onConflict)
895 : {
896 3628 : parse->onConflict->arbiterElems = (List *)
897 1814 : preprocess_expression(root,
898 1814 : (Node *) parse->onConflict->arbiterElems,
899 : EXPRKIND_ARBITER_ELEM);
900 3628 : parse->onConflict->arbiterWhere =
901 1814 : preprocess_expression(root,
902 1814 : parse->onConflict->arbiterWhere,
903 : EXPRKIND_QUAL);
904 3628 : parse->onConflict->onConflictSet = (List *)
905 1814 : preprocess_expression(root,
906 1814 : (Node *) parse->onConflict->onConflictSet,
907 : EXPRKIND_TARGET);
908 1814 : parse->onConflict->onConflictWhere =
909 1814 : preprocess_expression(root,
910 1814 : parse->onConflict->onConflictWhere,
911 : EXPRKIND_QUAL);
912 : /* exclRelTlist contains only Vars, so no preprocessing needed */
913 : }
914 :
915 558716 : foreach(l, parse->mergeActionList)
916 : {
917 2742 : MergeAction *action = (MergeAction *) lfirst(l);
918 :
919 2742 : action->targetList = (List *)
920 2742 : preprocess_expression(root,
921 2742 : (Node *) action->targetList,
922 : EXPRKIND_TARGET);
923 2742 : action->qual =
924 2742 : preprocess_expression(root,
925 : (Node *) action->qual,
926 : EXPRKIND_QUAL);
927 : }
928 :
929 555974 : parse->mergeJoinCondition =
930 555974 : preprocess_expression(root, parse->mergeJoinCondition, EXPRKIND_QUAL);
931 :
932 555974 : root->append_rel_list = (List *)
933 555974 : preprocess_expression(root, (Node *) root->append_rel_list,
934 : EXPRKIND_APPINFO);
935 :
936 : /* Also need to preprocess expressions within RTEs */
937 1597064 : foreach(l, parse->rtable)
938 : {
939 1041090 : RangeTblEntry *rte = lfirst_node(RangeTblEntry, l);
940 : int kind;
941 : ListCell *lcsq;
942 :
943 1041090 : if (rte->rtekind == RTE_RELATION)
944 : {
945 537868 : if (rte->tablesample)
946 228 : rte->tablesample = (TableSampleClause *)
947 228 : preprocess_expression(root,
948 228 : (Node *) rte->tablesample,
949 : EXPRKIND_TABLESAMPLE);
950 : }
951 503222 : else if (rte->rtekind == RTE_SUBQUERY)
952 : {
953 : /*
954 : * We don't want to do all preprocessing yet on the subquery's
955 : * expressions, since that will happen when we plan it. But if it
956 : * contains any join aliases of our level, those have to get
957 : * expanded now, because planning of the subquery won't do it.
958 : * That's only possible if the subquery is LATERAL.
959 : */
960 68424 : if (rte->lateral && root->hasJoinRTEs)
961 1040 : rte->subquery = (Query *)
962 1040 : flatten_join_alias_vars(root, root->parse,
963 1040 : (Node *) rte->subquery);
964 : }
965 434798 : else if (rte->rtekind == RTE_FUNCTION)
966 : {
967 : /* Preprocess the function expression(s) fully */
968 47988 : kind = rte->lateral ? EXPRKIND_RTFUNC_LATERAL : EXPRKIND_RTFUNC;
969 47988 : rte->functions = (List *)
970 47988 : preprocess_expression(root, (Node *) rte->functions, kind);
971 : }
972 386810 : else if (rte->rtekind == RTE_TABLEFUNC)
973 : {
974 : /* Preprocess the function expression(s) fully */
975 626 : kind = rte->lateral ? EXPRKIND_TABLEFUNC_LATERAL : EXPRKIND_TABLEFUNC;
976 626 : rte->tablefunc = (TableFunc *)
977 626 : preprocess_expression(root, (Node *) rte->tablefunc, kind);
978 : }
979 386184 : else if (rte->rtekind == RTE_VALUES)
980 : {
981 : /* Preprocess the values lists fully */
982 8164 : kind = rte->lateral ? EXPRKIND_VALUES_LATERAL : EXPRKIND_VALUES;
983 8164 : rte->values_lists = (List *)
984 8164 : preprocess_expression(root, (Node *) rte->values_lists, kind);
985 : }
986 378020 : else if (rte->rtekind == RTE_GROUP)
987 : {
988 : /* Preprocess the groupexprs list fully */
989 4424 : rte->groupexprs = (List *)
990 4424 : preprocess_expression(root, (Node *) rte->groupexprs,
991 : EXPRKIND_GROUPEXPR);
992 : }
993 :
994 : /*
995 : * Process each element of the securityQuals list as if it were a
996 : * separate qual expression (as indeed it is). We need to do it this
997 : * way to get proper canonicalization of AND/OR structure. Note that
998 : * this converts each element into an implicit-AND sublist.
999 : */
1000 1043920 : foreach(lcsq, rte->securityQuals)
1001 : {
1002 2830 : lfirst(lcsq) = preprocess_expression(root,
1003 2830 : (Node *) lfirst(lcsq),
1004 : EXPRKIND_QUAL);
1005 : }
1006 : }
1007 :
1008 : /*
1009 : * Now that we are done preprocessing expressions, and in particular done
1010 : * flattening join alias variables, get rid of the joinaliasvars lists.
1011 : * They no longer match what expressions in the rest of the tree look
1012 : * like, because we have not preprocessed expressions in those lists (and
1013 : * do not want to; for example, expanding a SubLink there would result in
1014 : * a useless unreferenced subplan). Leaving them in place simply creates
1015 : * a hazard for later scans of the tree. We could try to prevent that by
1016 : * using QTW_IGNORE_JOINALIASES in every tree scan done after this point,
1017 : * but that doesn't sound very reliable.
1018 : */
1019 555974 : if (root->hasJoinRTEs)
1020 : {
1021 490506 : foreach(l, parse->rtable)
1022 : {
1023 406044 : RangeTblEntry *rte = lfirst_node(RangeTblEntry, l);
1024 :
1025 406044 : rte->joinaliasvars = NIL;
1026 : }
1027 : }
1028 :
1029 : /*
1030 : * Replace any Vars in the subquery's targetlist and havingQual that
1031 : * reference GROUP outputs with the underlying grouping expressions.
1032 : *
1033 : * Note that we need to perform this replacement after we've preprocessed
1034 : * the grouping expressions. This is to ensure that there is only one
1035 : * instance of SubPlan for each SubLink contained within the grouping
1036 : * expressions.
1037 : */
1038 555974 : if (parse->hasGroupRTE)
1039 : {
1040 4424 : parse->targetList = (List *)
1041 4424 : flatten_group_exprs(root, root->parse, (Node *) parse->targetList);
1042 4424 : parse->havingQual =
1043 4424 : flatten_group_exprs(root, root->parse, parse->havingQual);
1044 : }
1045 :
1046 : /* Constant-folding might have removed all set-returning functions */
1047 555974 : if (parse->hasTargetSRFs)
1048 8766 : parse->hasTargetSRFs = expression_returns_set((Node *) parse->targetList);
1049 :
1050 : /*
1051 : * In some cases we may want to transfer a HAVING clause into WHERE. We
1052 : * cannot do so if the HAVING clause contains aggregates (obviously) or
1053 : * volatile functions (since a HAVING clause is supposed to be executed
1054 : * only once per group). We also can't do this if there are any nonempty
1055 : * grouping sets and the clause references any columns that are nullable
1056 : * by the grouping sets; moving such a clause into WHERE would potentially
1057 : * change the results. (If there are only empty grouping sets, then the
1058 : * HAVING clause must be degenerate as discussed below.)
1059 : *
1060 : * Also, it may be that the clause is so expensive to execute that we're
1061 : * better off doing it only once per group, despite the loss of
1062 : * selectivity. This is hard to estimate short of doing the entire
1063 : * planning process twice, so we use a heuristic: clauses containing
1064 : * subplans are left in HAVING. Otherwise, we move or copy the HAVING
1065 : * clause into WHERE, in hopes of eliminating tuples before aggregation
1066 : * instead of after.
1067 : *
1068 : * If the query has explicit grouping then we can simply move such a
1069 : * clause into WHERE; any group that fails the clause will not be in the
1070 : * output because none of its tuples will reach the grouping or
1071 : * aggregation stage. Otherwise we must have a degenerate (variable-free)
1072 : * HAVING clause, which we put in WHERE so that query_planner() can use it
1073 : * in a gating Result node, but also keep in HAVING to ensure that we
1074 : * don't emit a bogus aggregated row. (This could be done better, but it
1075 : * seems not worth optimizing.)
1076 : *
1077 : * Note that a HAVING clause may contain expressions that are not fully
1078 : * preprocessed. This can happen if these expressions are part of
1079 : * grouping items. In such cases, they are replaced with GROUP Vars in
1080 : * the parser and then replaced back after we've done with expression
1081 : * preprocessing on havingQual. This is not an issue if the clause
1082 : * remains in HAVING, because these expressions will be matched to lower
1083 : * target items in setrefs.c. However, if the clause is moved or copied
1084 : * into WHERE, we need to ensure that these expressions are fully
1085 : * preprocessed.
1086 : *
1087 : * Note that both havingQual and parse->jointree->quals are in
1088 : * implicitly-ANDed-list form at this point, even though they are declared
1089 : * as Node *.
1090 : */
1091 555974 : newHaving = NIL;
1092 557186 : foreach(l, (List *) parse->havingQual)
1093 : {
1094 1212 : Node *havingclause = (Node *) lfirst(l);
1095 :
1096 1528 : if (contain_agg_clause(havingclause) ||
1097 632 : contain_volatile_functions(havingclause) ||
1098 316 : contain_subplans(havingclause) ||
1099 382 : (parse->groupClause && parse->groupingSets &&
1100 66 : bms_is_member(root->group_rtindex, pull_varnos(root, havingclause))))
1101 : {
1102 : /* keep it in HAVING */
1103 950 : newHaving = lappend(newHaving, havingclause);
1104 : }
1105 262 : else if (parse->groupClause)
1106 : {
1107 : Node *whereclause;
1108 :
1109 : /* Preprocess the HAVING clause fully */
1110 244 : whereclause = preprocess_expression(root, havingclause,
1111 : EXPRKIND_QUAL);
1112 : /* ... and move it to WHERE */
1113 244 : parse->jointree->quals = (Node *)
1114 244 : list_concat((List *) parse->jointree->quals,
1115 : (List *) whereclause);
1116 : }
1117 : else
1118 : {
1119 : Node *whereclause;
1120 :
1121 : /* Preprocess the HAVING clause fully */
1122 18 : whereclause = preprocess_expression(root, copyObject(havingclause),
1123 : EXPRKIND_QUAL);
1124 : /* ... and put a copy in WHERE */
1125 36 : parse->jointree->quals = (Node *)
1126 18 : list_concat((List *) parse->jointree->quals,
1127 : (List *) whereclause);
1128 : /* ... and also keep it in HAVING */
1129 18 : newHaving = lappend(newHaving, havingclause);
1130 : }
1131 : }
1132 555974 : parse->havingQual = (Node *) newHaving;
1133 :
1134 : /*
1135 : * If we have any outer joins, try to reduce them to plain inner joins.
1136 : * This step is most easily done after we've done expression
1137 : * preprocessing.
1138 : */
1139 555974 : if (hasOuterJoins)
1140 43242 : reduce_outer_joins(root);
1141 :
1142 : /*
1143 : * If we have any RTE_RESULT relations, see if they can be deleted from
1144 : * the jointree. We also rely on this processing to flatten single-child
1145 : * FromExprs underneath outer joins. This step is most effectively done
1146 : * after we've done expression preprocessing and outer join reduction.
1147 : */
1148 555974 : if (hasResultRTEs || hasOuterJoins)
1149 260082 : remove_useless_result_rtes(root);
1150 :
1151 : /*
1152 : * Do the main planning.
1153 : */
1154 555974 : grouping_planner(root, tuple_fraction, setops);
1155 :
1156 : /*
1157 : * Capture the set of outer-level param IDs we have access to, for use in
1158 : * extParam/allParam calculations later.
1159 : */
1160 555908 : SS_identify_outer_params(root);
1161 :
1162 : /*
1163 : * If any initPlans were created in this query level, adjust the surviving
1164 : * Paths' costs and parallel-safety flags to account for them. The
1165 : * initPlans won't actually get attached to the plan tree till
1166 : * create_plan() runs, but we must include their effects now.
1167 : */
1168 555908 : final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1169 555908 : SS_charge_for_initplans(root, final_rel);
1170 :
1171 : /*
1172 : * Make sure we've identified the cheapest Path for the final rel. (By
1173 : * doing this here not in grouping_planner, we include initPlan costs in
1174 : * the decision, though it's unlikely that will change anything.)
1175 : */
1176 555908 : set_cheapest(final_rel);
1177 :
1178 555908 : return root;
1179 : }
1180 :
1181 : /*
1182 : * preprocess_expression
1183 : * Do subquery_planner's preprocessing work for an expression,
1184 : * which can be a targetlist, a WHERE clause (including JOIN/ON
1185 : * conditions), a HAVING clause, or a few other things.
1186 : */
1187 : static Node *
1188 4703314 : preprocess_expression(PlannerInfo *root, Node *expr, int kind)
1189 : {
1190 : /*
1191 : * Fall out quickly if expression is empty. This occurs often enough to
1192 : * be worth checking. Note that null->null is the correct conversion for
1193 : * implicit-AND result format, too.
1194 : */
1195 4703314 : if (expr == NULL)
1196 3679360 : return NULL;
1197 :
1198 : /*
1199 : * If the query has any join RTEs, replace join alias variables with
1200 : * base-relation variables. We must do this first, since any expressions
1201 : * we may extract from the joinaliasvars lists have not been preprocessed.
1202 : * For example, if we did this after sublink processing, sublinks expanded
1203 : * out from join aliases would not get processed. But we can skip this in
1204 : * non-lateral RTE functions, VALUES lists, and TABLESAMPLE clauses, since
1205 : * they can't contain any Vars of the current query level.
1206 : */
1207 1023954 : if (root->hasJoinRTEs &&
1208 636340 : !(kind == EXPRKIND_RTFUNC ||
1209 317972 : kind == EXPRKIND_VALUES ||
1210 : kind == EXPRKIND_TABLESAMPLE ||
1211 : kind == EXPRKIND_TABLEFUNC))
1212 317954 : expr = flatten_join_alias_vars(root, root->parse, expr);
1213 :
1214 : /*
1215 : * Simplify constant expressions. For function RTEs, this was already
1216 : * done by preprocess_function_rtes. (But note we must do it again for
1217 : * EXPRKIND_RTFUNC_LATERAL, because those might by now contain
1218 : * un-simplified subexpressions inserted by flattening of subqueries or
1219 : * join alias variables.)
1220 : *
1221 : * Note: an essential effect of this is to convert named-argument function
1222 : * calls to positional notation and insert the current actual values of
1223 : * any default arguments for functions. To ensure that happens, we *must*
1224 : * process all expressions here. Previous PG versions sometimes skipped
1225 : * const-simplification if it didn't seem worth the trouble, but we can't
1226 : * do that anymore.
1227 : *
1228 : * Note: this also flattens nested AND and OR expressions into N-argument
1229 : * form. All processing of a qual expression after this point must be
1230 : * careful to maintain AND/OR flatness --- that is, do not generate a tree
1231 : * with AND directly under AND, nor OR directly under OR.
1232 : */
1233 1023954 : if (kind != EXPRKIND_RTFUNC)
1234 984574 : expr = eval_const_expressions(root, expr);
1235 :
1236 : /*
1237 : * If it's a qual or havingQual, canonicalize it.
1238 : */
1239 1020280 : if (kind == EXPRKIND_QUAL)
1240 : {
1241 405322 : expr = (Node *) canonicalize_qual((Expr *) expr, false);
1242 :
1243 : #ifdef OPTIMIZER_DEBUG
1244 : printf("After canonicalize_qual()\n");
1245 : pprint(expr);
1246 : #endif
1247 : }
1248 :
1249 : /*
1250 : * Check for ANY ScalarArrayOpExpr with Const arrays and set the
1251 : * hashfuncid of any that might execute more quickly by using hash lookups
1252 : * instead of a linear search.
1253 : */
1254 1020280 : if (kind == EXPRKIND_QUAL || kind == EXPRKIND_TARGET)
1255 : {
1256 948752 : convert_saop_to_hashed_saop(expr);
1257 : }
1258 :
1259 : /* Expand SubLinks to SubPlans */
1260 1020280 : if (root->parse->hasSubLinks)
1261 87924 : expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
1262 :
1263 : /*
1264 : * XXX do not insert anything here unless you have grokked the comments in
1265 : * SS_replace_correlation_vars ...
1266 : */
1267 :
1268 : /* Replace uplevel vars with Param nodes (this IS possible in VALUES) */
1269 1020280 : if (root->query_level > 1)
1270 202068 : expr = SS_replace_correlation_vars(root, expr);
1271 :
1272 : /*
1273 : * If it's a qual or havingQual, convert it to implicit-AND format. (We
1274 : * don't want to do this before eval_const_expressions, since the latter
1275 : * would be unable to simplify a top-level AND correctly. Also,
1276 : * SS_process_sublinks expects explicit-AND format.)
1277 : */
1278 1020280 : if (kind == EXPRKIND_QUAL)
1279 405322 : expr = (Node *) make_ands_implicit((Expr *) expr);
1280 :
1281 1020280 : return expr;
1282 : }
1283 :
1284 : /*
1285 : * preprocess_qual_conditions
1286 : * Recursively scan the query's jointree and do subquery_planner's
1287 : * preprocessing work on each qual condition found therein.
1288 : */
1289 : static void
1290 1477514 : preprocess_qual_conditions(PlannerInfo *root, Node *jtnode)
1291 : {
1292 1477514 : if (jtnode == NULL)
1293 0 : return;
1294 1477514 : if (IsA(jtnode, RangeTblRef))
1295 : {
1296 : /* nothing to do here */
1297 : }
1298 722980 : else if (IsA(jtnode, FromExpr))
1299 : {
1300 567326 : FromExpr *f = (FromExpr *) jtnode;
1301 : ListCell *l;
1302 :
1303 1177558 : foreach(l, f->fromlist)
1304 610232 : preprocess_qual_conditions(root, lfirst(l));
1305 :
1306 567326 : f->quals = preprocess_expression(root, f->quals, EXPRKIND_QUAL);
1307 : }
1308 155654 : else if (IsA(jtnode, JoinExpr))
1309 : {
1310 155654 : JoinExpr *j = (JoinExpr *) jtnode;
1311 :
1312 155654 : preprocess_qual_conditions(root, j->larg);
1313 155654 : preprocess_qual_conditions(root, j->rarg);
1314 :
1315 155654 : j->quals = preprocess_expression(root, j->quals, EXPRKIND_QUAL);
1316 : }
1317 : else
1318 0 : elog(ERROR, "unrecognized node type: %d",
1319 : (int) nodeTag(jtnode));
1320 : }
1321 :
1322 : /*
1323 : * preprocess_phv_expression
1324 : * Do preprocessing on a PlaceHolderVar expression that's been pulled up.
1325 : *
1326 : * If a LATERAL subquery references an output of another subquery, and that
1327 : * output must be wrapped in a PlaceHolderVar because of an intermediate outer
1328 : * join, then we'll push the PlaceHolderVar expression down into the subquery
1329 : * and later pull it back up during find_lateral_references, which runs after
1330 : * subquery_planner has preprocessed all the expressions that were in the
1331 : * current query level to start with. So we need to preprocess it then.
1332 : */
1333 : Expr *
1334 72 : preprocess_phv_expression(PlannerInfo *root, Expr *expr)
1335 : {
1336 72 : return (Expr *) preprocess_expression(root, (Node *) expr, EXPRKIND_PHV);
1337 : }
1338 :
1339 : /*--------------------
1340 : * grouping_planner
1341 : * Perform planning steps related to grouping, aggregation, etc.
1342 : *
1343 : * This function adds all required top-level processing to the scan/join
1344 : * Path(s) produced by query_planner.
1345 : *
1346 : * tuple_fraction is the fraction of tuples we expect will be retrieved.
1347 : * tuple_fraction is interpreted as follows:
1348 : * 0: expect all tuples to be retrieved (normal case)
1349 : * 0 < tuple_fraction < 1: expect the given fraction of tuples available
1350 : * from the plan to be retrieved
1351 : * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
1352 : * expected to be retrieved (ie, a LIMIT specification).
1353 : * setops is used for set operation subqueries to provide the subquery with
1354 : * the context in which it's being used so that Paths correctly sorted for the
1355 : * set operation can be generated. NULL when not planning a set operation
1356 : * child, or when a child of a set op that isn't interested in sorted input.
1357 : *
1358 : * Returns nothing; the useful output is in the Paths we attach to the
1359 : * (UPPERREL_FINAL, NULL) upperrel in *root. In addition,
1360 : * root->processed_tlist contains the final processed targetlist.
1361 : *
1362 : * Note that we have not done set_cheapest() on the final rel; it's convenient
1363 : * to leave this to the caller.
1364 : *--------------------
1365 : */
1366 : static void
1367 555974 : grouping_planner(PlannerInfo *root, double tuple_fraction,
1368 : SetOperationStmt *setops)
1369 : {
1370 555974 : Query *parse = root->parse;
1371 555974 : int64 offset_est = 0;
1372 555974 : int64 count_est = 0;
1373 555974 : double limit_tuples = -1.0;
1374 555974 : bool have_postponed_srfs = false;
1375 : PathTarget *final_target;
1376 : List *final_targets;
1377 : List *final_targets_contain_srfs;
1378 : bool final_target_parallel_safe;
1379 : RelOptInfo *current_rel;
1380 : RelOptInfo *final_rel;
1381 : FinalPathExtraData extra;
1382 : ListCell *lc;
1383 :
1384 : /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
1385 555974 : if (parse->limitCount || parse->limitOffset)
1386 : {
1387 4750 : tuple_fraction = preprocess_limit(root, tuple_fraction,
1388 : &offset_est, &count_est);
1389 :
1390 : /*
1391 : * If we have a known LIMIT, and don't have an unknown OFFSET, we can
1392 : * estimate the effects of using a bounded sort.
1393 : */
1394 4750 : if (count_est > 0 && offset_est >= 0)
1395 4256 : limit_tuples = (double) count_est + (double) offset_est;
1396 : }
1397 :
1398 : /* Make tuple_fraction accessible to lower-level routines */
1399 555974 : root->tuple_fraction = tuple_fraction;
1400 :
1401 555974 : if (parse->setOperations)
1402 : {
1403 : /*
1404 : * Construct Paths for set operations. The results will not need any
1405 : * work except perhaps a top-level sort and/or LIMIT. Note that any
1406 : * special work for recursive unions is the responsibility of
1407 : * plan_set_operations.
1408 : */
1409 5742 : current_rel = plan_set_operations(root);
1410 :
1411 : /*
1412 : * We should not need to call preprocess_targetlist, since we must be
1413 : * in a SELECT query node. Instead, use the processed_tlist returned
1414 : * by plan_set_operations (since this tells whether it returned any
1415 : * resjunk columns!), and transfer any sort key information from the
1416 : * original tlist.
1417 : */
1418 : Assert(parse->commandType == CMD_SELECT);
1419 :
1420 : /* for safety, copy processed_tlist instead of modifying in-place */
1421 5736 : root->processed_tlist =
1422 5736 : postprocess_setop_tlist(copyObject(root->processed_tlist),
1423 : parse->targetList);
1424 :
1425 : /* Also extract the PathTarget form of the setop result tlist */
1426 5736 : final_target = current_rel->cheapest_total_path->pathtarget;
1427 :
1428 : /* And check whether it's parallel safe */
1429 : final_target_parallel_safe =
1430 5736 : is_parallel_safe(root, (Node *) final_target->exprs);
1431 :
1432 : /* The setop result tlist couldn't contain any SRFs */
1433 : Assert(!parse->hasTargetSRFs);
1434 5736 : final_targets = final_targets_contain_srfs = NIL;
1435 :
1436 : /*
1437 : * Can't handle FOR [KEY] UPDATE/SHARE here (parser should have
1438 : * checked already, but let's make sure).
1439 : */
1440 5736 : if (parse->rowMarks)
1441 0 : ereport(ERROR,
1442 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1443 : /*------
1444 : translator: %s is a SQL row locking clause such as FOR UPDATE */
1445 : errmsg("%s is not allowed with UNION/INTERSECT/EXCEPT",
1446 : LCS_asString(linitial_node(RowMarkClause,
1447 : parse->rowMarks)->strength))));
1448 :
1449 : /*
1450 : * Calculate pathkeys that represent result ordering requirements
1451 : */
1452 : Assert(parse->distinctClause == NIL);
1453 5736 : root->sort_pathkeys = make_pathkeys_for_sortclauses(root,
1454 : parse->sortClause,
1455 : root->processed_tlist);
1456 : }
1457 : else
1458 : {
1459 : /* No set operations, do regular planning */
1460 : PathTarget *sort_input_target;
1461 : List *sort_input_targets;
1462 : List *sort_input_targets_contain_srfs;
1463 : bool sort_input_target_parallel_safe;
1464 : PathTarget *grouping_target;
1465 : List *grouping_targets;
1466 : List *grouping_targets_contain_srfs;
1467 : bool grouping_target_parallel_safe;
1468 : PathTarget *scanjoin_target;
1469 : List *scanjoin_targets;
1470 : List *scanjoin_targets_contain_srfs;
1471 : bool scanjoin_target_parallel_safe;
1472 : bool scanjoin_target_same_exprs;
1473 : bool have_grouping;
1474 550232 : WindowFuncLists *wflists = NULL;
1475 550232 : List *activeWindows = NIL;
1476 550232 : grouping_sets_data *gset_data = NULL;
1477 : standard_qp_extra qp_extra;
1478 :
1479 : /* A recursive query should always have setOperations */
1480 : Assert(!root->hasRecursion);
1481 :
1482 : /* Preprocess grouping sets and GROUP BY clause, if any */
1483 550232 : if (parse->groupingSets)
1484 : {
1485 854 : gset_data = preprocess_grouping_sets(root);
1486 : }
1487 549378 : else if (parse->groupClause)
1488 : {
1489 : /* Preprocess regular GROUP BY clause, if any */
1490 3612 : root->processed_groupClause = preprocess_groupclause(root, NIL);
1491 : }
1492 :
1493 : /*
1494 : * Preprocess targetlist. Note that much of the remaining planning
1495 : * work will be done with the PathTarget representation of tlists, but
1496 : * we must also maintain the full representation of the final tlist so
1497 : * that we can transfer its decoration (resnames etc) to the topmost
1498 : * tlist of the finished Plan. This is kept in processed_tlist.
1499 : */
1500 550226 : preprocess_targetlist(root);
1501 :
1502 : /*
1503 : * Mark all the aggregates with resolved aggtranstypes, and detect
1504 : * aggregates that are duplicates or can share transition state. We
1505 : * must do this before slicing and dicing the tlist into various
1506 : * pathtargets, else some copies of the Aggref nodes might escape
1507 : * being marked.
1508 : */
1509 550226 : if (parse->hasAggs)
1510 : {
1511 36952 : preprocess_aggrefs(root, (Node *) root->processed_tlist);
1512 36952 : preprocess_aggrefs(root, (Node *) parse->havingQual);
1513 : }
1514 :
1515 : /*
1516 : * Locate any window functions in the tlist. (We don't need to look
1517 : * anywhere else, since expressions used in ORDER BY will be in there
1518 : * too.) Note that they could all have been eliminated by constant
1519 : * folding, in which case we don't need to do any more work.
1520 : */
1521 550226 : if (parse->hasWindowFuncs)
1522 : {
1523 2342 : wflists = find_window_functions((Node *) root->processed_tlist,
1524 2342 : list_length(parse->windowClause));
1525 2342 : if (wflists->numWindowFuncs > 0)
1526 : {
1527 : /*
1528 : * See if any modifications can be made to each WindowClause
1529 : * to allow the executor to execute the WindowFuncs more
1530 : * quickly.
1531 : */
1532 2336 : optimize_window_clauses(root, wflists);
1533 :
1534 2336 : activeWindows = select_active_windows(root, wflists);
1535 : }
1536 : else
1537 6 : parse->hasWindowFuncs = false;
1538 : }
1539 :
1540 : /*
1541 : * Preprocess MIN/MAX aggregates, if any. Note: be careful about
1542 : * adding logic between here and the query_planner() call. Anything
1543 : * that is needed in MIN/MAX-optimizable cases will have to be
1544 : * duplicated in planagg.c.
1545 : */
1546 550226 : if (parse->hasAggs)
1547 36952 : preprocess_minmax_aggregates(root);
1548 :
1549 : /*
1550 : * Figure out whether there's a hard limit on the number of rows that
1551 : * query_planner's result subplan needs to return. Even if we know a
1552 : * hard limit overall, it doesn't apply if the query has any
1553 : * grouping/aggregation operations, or SRFs in the tlist.
1554 : */
1555 550226 : if (parse->groupClause ||
1556 545808 : parse->groupingSets ||
1557 545766 : parse->distinctClause ||
1558 543346 : parse->hasAggs ||
1559 510266 : parse->hasWindowFuncs ||
1560 508068 : parse->hasTargetSRFs ||
1561 499750 : root->hasHavingQual)
1562 50494 : root->limit_tuples = -1.0;
1563 : else
1564 499732 : root->limit_tuples = limit_tuples;
1565 :
1566 : /* Set up data needed by standard_qp_callback */
1567 550226 : qp_extra.activeWindows = activeWindows;
1568 550226 : qp_extra.gset_data = gset_data;
1569 :
1570 : /*
1571 : * If we're a subquery for a set operation, store the SetOperationStmt
1572 : * in qp_extra.
1573 : */
1574 550226 : qp_extra.setop = setops;
1575 :
1576 : /*
1577 : * Generate the best unsorted and presorted paths for the scan/join
1578 : * portion of this Query, ie the processing represented by the
1579 : * FROM/WHERE clauses. (Note there may not be any presorted paths.)
1580 : * We also generate (in standard_qp_callback) pathkey representations
1581 : * of the query's sort clause, distinct clause, etc.
1582 : */
1583 550226 : current_rel = query_planner(root, standard_qp_callback, &qp_extra);
1584 :
1585 : /*
1586 : * Convert the query's result tlist into PathTarget format.
1587 : *
1588 : * Note: this cannot be done before query_planner() has performed
1589 : * appendrel expansion, because that might add resjunk entries to
1590 : * root->processed_tlist. Waiting till afterwards is also helpful
1591 : * because the target width estimates can use per-Var width numbers
1592 : * that were obtained within query_planner().
1593 : */
1594 550178 : final_target = create_pathtarget(root, root->processed_tlist);
1595 : final_target_parallel_safe =
1596 550178 : is_parallel_safe(root, (Node *) final_target->exprs);
1597 :
1598 : /*
1599 : * If ORDER BY was given, consider whether we should use a post-sort
1600 : * projection, and compute the adjusted target for preceding steps if
1601 : * so.
1602 : */
1603 550178 : if (parse->sortClause)
1604 : {
1605 73712 : sort_input_target = make_sort_input_target(root,
1606 : final_target,
1607 : &have_postponed_srfs);
1608 : sort_input_target_parallel_safe =
1609 73712 : is_parallel_safe(root, (Node *) sort_input_target->exprs);
1610 : }
1611 : else
1612 : {
1613 476466 : sort_input_target = final_target;
1614 476466 : sort_input_target_parallel_safe = final_target_parallel_safe;
1615 : }
1616 :
1617 : /*
1618 : * If we have window functions to deal with, the output from any
1619 : * grouping step needs to be what the window functions want;
1620 : * otherwise, it should be sort_input_target.
1621 : */
1622 550178 : if (activeWindows)
1623 : {
1624 2336 : grouping_target = make_window_input_target(root,
1625 : final_target,
1626 : activeWindows);
1627 : grouping_target_parallel_safe =
1628 2336 : is_parallel_safe(root, (Node *) grouping_target->exprs);
1629 : }
1630 : else
1631 : {
1632 547842 : grouping_target = sort_input_target;
1633 547842 : grouping_target_parallel_safe = sort_input_target_parallel_safe;
1634 : }
1635 :
1636 : /*
1637 : * If we have grouping or aggregation to do, the topmost scan/join
1638 : * plan node must emit what the grouping step wants; otherwise, it
1639 : * should emit grouping_target.
1640 : */
1641 545760 : have_grouping = (parse->groupClause || parse->groupingSets ||
1642 1095938 : parse->hasAggs || root->hasHavingQual);
1643 550178 : if (have_grouping)
1644 : {
1645 37596 : scanjoin_target = make_group_input_target(root, final_target);
1646 : scanjoin_target_parallel_safe =
1647 37596 : is_parallel_safe(root, (Node *) scanjoin_target->exprs);
1648 : }
1649 : else
1650 : {
1651 512582 : scanjoin_target = grouping_target;
1652 512582 : scanjoin_target_parallel_safe = grouping_target_parallel_safe;
1653 : }
1654 :
1655 : /*
1656 : * If there are any SRFs in the targetlist, we must separate each of
1657 : * these PathTargets into SRF-computing and SRF-free targets. Replace
1658 : * each of the named targets with a SRF-free version, and remember the
1659 : * list of additional projection steps we need to add afterwards.
1660 : */
1661 550178 : if (parse->hasTargetSRFs)
1662 : {
1663 : /* final_target doesn't recompute any SRFs in sort_input_target */
1664 8766 : split_pathtarget_at_srfs(root, final_target, sort_input_target,
1665 : &final_targets,
1666 : &final_targets_contain_srfs);
1667 8766 : final_target = linitial_node(PathTarget, final_targets);
1668 : Assert(!linitial_int(final_targets_contain_srfs));
1669 : /* likewise for sort_input_target vs. grouping_target */
1670 8766 : split_pathtarget_at_srfs(root, sort_input_target, grouping_target,
1671 : &sort_input_targets,
1672 : &sort_input_targets_contain_srfs);
1673 8766 : sort_input_target = linitial_node(PathTarget, sort_input_targets);
1674 : Assert(!linitial_int(sort_input_targets_contain_srfs));
1675 : /* likewise for grouping_target vs. scanjoin_target */
1676 8766 : split_pathtarget_at_srfs(root, grouping_target, scanjoin_target,
1677 : &grouping_targets,
1678 : &grouping_targets_contain_srfs);
1679 8766 : grouping_target = linitial_node(PathTarget, grouping_targets);
1680 : Assert(!linitial_int(grouping_targets_contain_srfs));
1681 : /* scanjoin_target will not have any SRFs precomputed for it */
1682 8766 : split_pathtarget_at_srfs(root, scanjoin_target, NULL,
1683 : &scanjoin_targets,
1684 : &scanjoin_targets_contain_srfs);
1685 8766 : scanjoin_target = linitial_node(PathTarget, scanjoin_targets);
1686 : Assert(!linitial_int(scanjoin_targets_contain_srfs));
1687 : }
1688 : else
1689 : {
1690 : /* initialize lists; for most of these, dummy values are OK */
1691 541412 : final_targets = final_targets_contain_srfs = NIL;
1692 541412 : sort_input_targets = sort_input_targets_contain_srfs = NIL;
1693 541412 : grouping_targets = grouping_targets_contain_srfs = NIL;
1694 541412 : scanjoin_targets = list_make1(scanjoin_target);
1695 541412 : scanjoin_targets_contain_srfs = NIL;
1696 : }
1697 :
1698 : /* Apply scan/join target. */
1699 550178 : scanjoin_target_same_exprs = list_length(scanjoin_targets) == 1
1700 550178 : && equal(scanjoin_target->exprs, current_rel->reltarget->exprs);
1701 550178 : apply_scanjoin_target_to_paths(root, current_rel, scanjoin_targets,
1702 : scanjoin_targets_contain_srfs,
1703 : scanjoin_target_parallel_safe,
1704 : scanjoin_target_same_exprs);
1705 :
1706 : /*
1707 : * Save the various upper-rel PathTargets we just computed into
1708 : * root->upper_targets[]. The core code doesn't use this, but it
1709 : * provides a convenient place for extensions to get at the info. For
1710 : * consistency, we save all the intermediate targets, even though some
1711 : * of the corresponding upperrels might not be needed for this query.
1712 : */
1713 550178 : root->upper_targets[UPPERREL_FINAL] = final_target;
1714 550178 : root->upper_targets[UPPERREL_ORDERED] = final_target;
1715 550178 : root->upper_targets[UPPERREL_DISTINCT] = sort_input_target;
1716 550178 : root->upper_targets[UPPERREL_PARTIAL_DISTINCT] = sort_input_target;
1717 550178 : root->upper_targets[UPPERREL_WINDOW] = sort_input_target;
1718 550178 : root->upper_targets[UPPERREL_GROUP_AGG] = grouping_target;
1719 :
1720 : /*
1721 : * If we have grouping and/or aggregation, consider ways to implement
1722 : * that. We build a new upperrel representing the output of this
1723 : * phase.
1724 : */
1725 550178 : if (have_grouping)
1726 : {
1727 37596 : current_rel = create_grouping_paths(root,
1728 : current_rel,
1729 : grouping_target,
1730 : grouping_target_parallel_safe,
1731 : gset_data);
1732 : /* Fix things up if grouping_target contains SRFs */
1733 37590 : if (parse->hasTargetSRFs)
1734 406 : adjust_paths_for_srfs(root, current_rel,
1735 : grouping_targets,
1736 : grouping_targets_contain_srfs);
1737 : }
1738 :
1739 : /*
1740 : * If we have window functions, consider ways to implement those. We
1741 : * build a new upperrel representing the output of this phase.
1742 : */
1743 550172 : if (activeWindows)
1744 : {
1745 2336 : current_rel = create_window_paths(root,
1746 : current_rel,
1747 : grouping_target,
1748 : sort_input_target,
1749 : sort_input_target_parallel_safe,
1750 : wflists,
1751 : activeWindows);
1752 : /* Fix things up if sort_input_target contains SRFs */
1753 2336 : if (parse->hasTargetSRFs)
1754 12 : adjust_paths_for_srfs(root, current_rel,
1755 : sort_input_targets,
1756 : sort_input_targets_contain_srfs);
1757 : }
1758 :
1759 : /*
1760 : * If there is a DISTINCT clause, consider ways to implement that. We
1761 : * build a new upperrel representing the output of this phase.
1762 : */
1763 550172 : if (parse->distinctClause)
1764 : {
1765 2454 : current_rel = create_distinct_paths(root,
1766 : current_rel,
1767 : sort_input_target);
1768 : }
1769 : } /* end of if (setOperations) */
1770 :
1771 : /*
1772 : * If ORDER BY was given, consider ways to implement that, and generate a
1773 : * new upperrel containing only paths that emit the correct ordering and
1774 : * project the correct final_target. We can apply the original
1775 : * limit_tuples limit in sort costing here, but only if there are no
1776 : * postponed SRFs.
1777 : */
1778 555908 : if (parse->sortClause)
1779 : {
1780 77466 : current_rel = create_ordered_paths(root,
1781 : current_rel,
1782 : final_target,
1783 : final_target_parallel_safe,
1784 : have_postponed_srfs ? -1.0 :
1785 : limit_tuples);
1786 : /* Fix things up if final_target contains SRFs */
1787 77466 : if (parse->hasTargetSRFs)
1788 196 : adjust_paths_for_srfs(root, current_rel,
1789 : final_targets,
1790 : final_targets_contain_srfs);
1791 : }
1792 :
1793 : /*
1794 : * Now we are prepared to build the final-output upperrel.
1795 : */
1796 555908 : final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1797 :
1798 : /*
1799 : * If the input rel is marked consider_parallel and there's nothing that's
1800 : * not parallel-safe in the LIMIT clause, then the final_rel can be marked
1801 : * consider_parallel as well. Note that if the query has rowMarks or is
1802 : * not a SELECT, consider_parallel will be false for every relation in the
1803 : * query.
1804 : */
1805 746728 : if (current_rel->consider_parallel &&
1806 381616 : is_parallel_safe(root, parse->limitOffset) &&
1807 190796 : is_parallel_safe(root, parse->limitCount))
1808 190790 : final_rel->consider_parallel = true;
1809 :
1810 : /*
1811 : * If the current_rel belongs to a single FDW, so does the final_rel.
1812 : */
1813 555908 : final_rel->serverid = current_rel->serverid;
1814 555908 : final_rel->userid = current_rel->userid;
1815 555908 : final_rel->useridiscurrent = current_rel->useridiscurrent;
1816 555908 : final_rel->fdwroutine = current_rel->fdwroutine;
1817 :
1818 : /*
1819 : * Generate paths for the final_rel. Insert all surviving paths, with
1820 : * LockRows, Limit, and/or ModifyTable steps added if needed.
1821 : */
1822 1129394 : foreach(lc, current_rel->pathlist)
1823 : {
1824 573486 : Path *path = (Path *) lfirst(lc);
1825 :
1826 : /*
1827 : * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node.
1828 : * (Note: we intentionally test parse->rowMarks not root->rowMarks
1829 : * here. If there are only non-locking rowmarks, they should be
1830 : * handled by the ModifyTable node instead. However, root->rowMarks
1831 : * is what goes into the LockRows node.)
1832 : */
1833 573486 : if (parse->rowMarks)
1834 : {
1835 8394 : path = (Path *) create_lockrows_path(root, final_rel, path,
1836 : root->rowMarks,
1837 : assign_special_exec_param(root));
1838 : }
1839 :
1840 : /*
1841 : * If there is a LIMIT/OFFSET clause, add the LIMIT node.
1842 : */
1843 573486 : if (limit_needed(parse))
1844 : {
1845 5656 : path = (Path *) create_limit_path(root, final_rel, path,
1846 : parse->limitOffset,
1847 : parse->limitCount,
1848 : parse->limitOption,
1849 : offset_est, count_est);
1850 : }
1851 :
1852 : /*
1853 : * If this is an INSERT/UPDATE/DELETE/MERGE, add the ModifyTable node.
1854 : */
1855 573486 : if (parse->commandType != CMD_SELECT)
1856 : {
1857 : Index rootRelation;
1858 91822 : List *resultRelations = NIL;
1859 91822 : List *updateColnosLists = NIL;
1860 91822 : List *withCheckOptionLists = NIL;
1861 91822 : List *returningLists = NIL;
1862 91822 : List *mergeActionLists = NIL;
1863 91822 : List *mergeJoinConditions = NIL;
1864 : List *rowMarks;
1865 :
1866 91822 : if (bms_membership(root->all_result_relids) == BMS_MULTIPLE)
1867 : {
1868 : /* Inherited UPDATE/DELETE/MERGE */
1869 2774 : RelOptInfo *top_result_rel = find_base_rel(root,
1870 : parse->resultRelation);
1871 2774 : int resultRelation = -1;
1872 :
1873 : /* Pass the root result rel forward to the executor. */
1874 2774 : rootRelation = parse->resultRelation;
1875 :
1876 : /* Add only leaf children to ModifyTable. */
1877 8020 : while ((resultRelation = bms_next_member(root->leaf_result_relids,
1878 : resultRelation)) >= 0)
1879 : {
1880 5246 : RelOptInfo *this_result_rel = find_base_rel(root,
1881 : resultRelation);
1882 :
1883 : /*
1884 : * Also exclude any leaf rels that have turned dummy since
1885 : * being added to the list, for example, by being excluded
1886 : * by constraint exclusion.
1887 : */
1888 5246 : if (IS_DUMMY_REL(this_result_rel))
1889 84 : continue;
1890 :
1891 : /* Build per-target-rel lists needed by ModifyTable */
1892 5162 : resultRelations = lappend_int(resultRelations,
1893 : resultRelation);
1894 5162 : if (parse->commandType == CMD_UPDATE)
1895 : {
1896 3624 : List *update_colnos = root->update_colnos;
1897 :
1898 3624 : if (this_result_rel != top_result_rel)
1899 : update_colnos =
1900 3624 : adjust_inherited_attnums_multilevel(root,
1901 : update_colnos,
1902 : this_result_rel->relid,
1903 : top_result_rel->relid);
1904 3624 : updateColnosLists = lappend(updateColnosLists,
1905 : update_colnos);
1906 : }
1907 5162 : if (parse->withCheckOptions)
1908 : {
1909 438 : List *withCheckOptions = parse->withCheckOptions;
1910 :
1911 438 : if (this_result_rel != top_result_rel)
1912 : withCheckOptions = (List *)
1913 438 : adjust_appendrel_attrs_multilevel(root,
1914 : (Node *) withCheckOptions,
1915 : this_result_rel,
1916 : top_result_rel);
1917 438 : withCheckOptionLists = lappend(withCheckOptionLists,
1918 : withCheckOptions);
1919 : }
1920 5162 : if (parse->returningList)
1921 : {
1922 750 : List *returningList = parse->returningList;
1923 :
1924 750 : if (this_result_rel != top_result_rel)
1925 : returningList = (List *)
1926 750 : adjust_appendrel_attrs_multilevel(root,
1927 : (Node *) returningList,
1928 : this_result_rel,
1929 : top_result_rel);
1930 750 : returningLists = lappend(returningLists,
1931 : returningList);
1932 : }
1933 5162 : if (parse->mergeActionList)
1934 : {
1935 : ListCell *l;
1936 438 : List *mergeActionList = NIL;
1937 :
1938 : /*
1939 : * Copy MergeActions and translate stuff that
1940 : * references attribute numbers.
1941 : */
1942 1440 : foreach(l, parse->mergeActionList)
1943 : {
1944 1002 : MergeAction *action = lfirst(l),
1945 1002 : *leaf_action = copyObject(action);
1946 :
1947 1002 : leaf_action->qual =
1948 1002 : adjust_appendrel_attrs_multilevel(root,
1949 : (Node *) action->qual,
1950 : this_result_rel,
1951 : top_result_rel);
1952 1002 : leaf_action->targetList = (List *)
1953 1002 : adjust_appendrel_attrs_multilevel(root,
1954 1002 : (Node *) action->targetList,
1955 : this_result_rel,
1956 : top_result_rel);
1957 1002 : if (leaf_action->commandType == CMD_UPDATE)
1958 592 : leaf_action->updateColnos =
1959 592 : adjust_inherited_attnums_multilevel(root,
1960 : action->updateColnos,
1961 : this_result_rel->relid,
1962 : top_result_rel->relid);
1963 1002 : mergeActionList = lappend(mergeActionList,
1964 : leaf_action);
1965 : }
1966 :
1967 438 : mergeActionLists = lappend(mergeActionLists,
1968 : mergeActionList);
1969 : }
1970 5162 : if (parse->commandType == CMD_MERGE)
1971 : {
1972 438 : Node *mergeJoinCondition = parse->mergeJoinCondition;
1973 :
1974 438 : if (this_result_rel != top_result_rel)
1975 : mergeJoinCondition =
1976 438 : adjust_appendrel_attrs_multilevel(root,
1977 : mergeJoinCondition,
1978 : this_result_rel,
1979 : top_result_rel);
1980 438 : mergeJoinConditions = lappend(mergeJoinConditions,
1981 : mergeJoinCondition);
1982 : }
1983 : }
1984 :
1985 2774 : if (resultRelations == NIL)
1986 : {
1987 : /*
1988 : * We managed to exclude every child rel, so generate a
1989 : * dummy one-relation plan using info for the top target
1990 : * rel (even though that may not be a leaf target).
1991 : * Although it's clear that no data will be updated or
1992 : * deleted, we still need to have a ModifyTable node so
1993 : * that any statement triggers will be executed. (This
1994 : * could be cleaner if we fixed nodeModifyTable.c to allow
1995 : * zero target relations, but that probably wouldn't be a
1996 : * net win.)
1997 : */
1998 30 : resultRelations = list_make1_int(parse->resultRelation);
1999 30 : if (parse->commandType == CMD_UPDATE)
2000 30 : updateColnosLists = list_make1(root->update_colnos);
2001 30 : if (parse->withCheckOptions)
2002 0 : withCheckOptionLists = list_make1(parse->withCheckOptions);
2003 30 : if (parse->returningList)
2004 18 : returningLists = list_make1(parse->returningList);
2005 30 : if (parse->mergeActionList)
2006 0 : mergeActionLists = list_make1(parse->mergeActionList);
2007 30 : if (parse->commandType == CMD_MERGE)
2008 0 : mergeJoinConditions = list_make1(parse->mergeJoinCondition);
2009 : }
2010 : }
2011 : else
2012 : {
2013 : /* Single-relation INSERT/UPDATE/DELETE/MERGE. */
2014 89048 : rootRelation = 0; /* there's no separate root rel */
2015 89048 : resultRelations = list_make1_int(parse->resultRelation);
2016 89048 : if (parse->commandType == CMD_UPDATE)
2017 11668 : updateColnosLists = list_make1(root->update_colnos);
2018 89048 : if (parse->withCheckOptions)
2019 926 : withCheckOptionLists = list_make1(parse->withCheckOptions);
2020 89048 : if (parse->returningList)
2021 2386 : returningLists = list_make1(parse->returningList);
2022 89048 : if (parse->mergeActionList)
2023 1620 : mergeActionLists = list_make1(parse->mergeActionList);
2024 89048 : if (parse->commandType == CMD_MERGE)
2025 1620 : mergeJoinConditions = list_make1(parse->mergeJoinCondition);
2026 : }
2027 :
2028 : /*
2029 : * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node
2030 : * will have dealt with fetching non-locked marked rows, else we
2031 : * need to have ModifyTable do that.
2032 : */
2033 91822 : if (parse->rowMarks)
2034 0 : rowMarks = NIL;
2035 : else
2036 91822 : rowMarks = root->rowMarks;
2037 :
2038 : path = (Path *)
2039 91822 : create_modifytable_path(root, final_rel,
2040 : path,
2041 : parse->commandType,
2042 91822 : parse->canSetTag,
2043 91822 : parse->resultRelation,
2044 : rootRelation,
2045 91822 : root->partColsUpdated,
2046 : resultRelations,
2047 : updateColnosLists,
2048 : withCheckOptionLists,
2049 : returningLists,
2050 : rowMarks,
2051 : parse->onConflict,
2052 : mergeActionLists,
2053 : mergeJoinConditions,
2054 : assign_special_exec_param(root));
2055 : }
2056 :
2057 : /* And shove it into final_rel */
2058 573486 : add_path(final_rel, path);
2059 : }
2060 :
2061 : /*
2062 : * Generate partial paths for final_rel, too, if outer query levels might
2063 : * be able to make use of them.
2064 : */
2065 555908 : if (final_rel->consider_parallel && root->query_level > 1 &&
2066 31898 : !limit_needed(parse))
2067 : {
2068 : Assert(!parse->rowMarks && parse->commandType == CMD_SELECT);
2069 31844 : foreach(lc, current_rel->partial_pathlist)
2070 : {
2071 102 : Path *partial_path = (Path *) lfirst(lc);
2072 :
2073 102 : add_partial_path(final_rel, partial_path);
2074 : }
2075 : }
2076 :
2077 555908 : extra.limit_needed = limit_needed(parse);
2078 555908 : extra.limit_tuples = limit_tuples;
2079 555908 : extra.count_est = count_est;
2080 555908 : extra.offset_est = offset_est;
2081 :
2082 : /*
2083 : * If there is an FDW that's responsible for all baserels of the query,
2084 : * let it consider adding ForeignPaths.
2085 : */
2086 555908 : if (final_rel->fdwroutine &&
2087 1248 : final_rel->fdwroutine->GetForeignUpperPaths)
2088 1180 : final_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_FINAL,
2089 : current_rel, final_rel,
2090 : &extra);
2091 :
2092 : /* Let extensions possibly add some more paths */
2093 555908 : if (create_upper_paths_hook)
2094 0 : (*create_upper_paths_hook) (root, UPPERREL_FINAL,
2095 : current_rel, final_rel, &extra);
2096 :
2097 : /* Note: currently, we leave it to callers to do set_cheapest() */
2098 555908 : }
2099 :
2100 : /*
2101 : * Do preprocessing for groupingSets clause and related data. This handles the
2102 : * preliminary steps of expanding the grouping sets, organizing them into lists
2103 : * of rollups, and preparing annotations which will later be filled in with
2104 : * size estimates.
2105 : */
2106 : static grouping_sets_data *
2107 854 : preprocess_grouping_sets(PlannerInfo *root)
2108 : {
2109 854 : Query *parse = root->parse;
2110 : List *sets;
2111 854 : int maxref = 0;
2112 : ListCell *lc_set;
2113 854 : grouping_sets_data *gd = palloc0(sizeof(grouping_sets_data));
2114 :
2115 854 : parse->groupingSets = expand_grouping_sets(parse->groupingSets, parse->groupDistinct, -1);
2116 :
2117 854 : gd->any_hashable = false;
2118 854 : gd->unhashable_refs = NULL;
2119 854 : gd->unsortable_refs = NULL;
2120 854 : gd->unsortable_sets = NIL;
2121 :
2122 : /*
2123 : * We don't currently make any attempt to optimize the groupClause when
2124 : * there are grouping sets, so just duplicate it in processed_groupClause.
2125 : */
2126 854 : root->processed_groupClause = parse->groupClause;
2127 :
2128 854 : if (parse->groupClause)
2129 : {
2130 : ListCell *lc;
2131 :
2132 2576 : foreach(lc, parse->groupClause)
2133 : {
2134 1764 : SortGroupClause *gc = lfirst_node(SortGroupClause, lc);
2135 1764 : Index ref = gc->tleSortGroupRef;
2136 :
2137 1764 : if (ref > maxref)
2138 1728 : maxref = ref;
2139 :
2140 1764 : if (!gc->hashable)
2141 30 : gd->unhashable_refs = bms_add_member(gd->unhashable_refs, ref);
2142 :
2143 1764 : if (!OidIsValid(gc->sortop))
2144 42 : gd->unsortable_refs = bms_add_member(gd->unsortable_refs, ref);
2145 : }
2146 : }
2147 :
2148 : /* Allocate workspace array for remapping */
2149 854 : gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
2150 :
2151 : /*
2152 : * If we have any unsortable sets, we must extract them before trying to
2153 : * prepare rollups. Unsortable sets don't go through
2154 : * reorder_grouping_sets, so we must apply the GroupingSetData annotation
2155 : * here.
2156 : */
2157 854 : if (!bms_is_empty(gd->unsortable_refs))
2158 : {
2159 42 : List *sortable_sets = NIL;
2160 : ListCell *lc;
2161 :
2162 126 : foreach(lc, parse->groupingSets)
2163 : {
2164 90 : List *gset = (List *) lfirst(lc);
2165 :
2166 90 : if (bms_overlap_list(gd->unsortable_refs, gset))
2167 : {
2168 48 : GroupingSetData *gs = makeNode(GroupingSetData);
2169 :
2170 48 : gs->set = gset;
2171 48 : gd->unsortable_sets = lappend(gd->unsortable_sets, gs);
2172 :
2173 : /*
2174 : * We must enforce here that an unsortable set is hashable;
2175 : * later code assumes this. Parse analysis only checks that
2176 : * every individual column is either hashable or sortable.
2177 : *
2178 : * Note that passing this test doesn't guarantee we can
2179 : * generate a plan; there might be other showstoppers.
2180 : */
2181 48 : if (bms_overlap_list(gd->unhashable_refs, gset))
2182 6 : ereport(ERROR,
2183 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2184 : errmsg("could not implement GROUP BY"),
2185 : errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
2186 : }
2187 : else
2188 42 : sortable_sets = lappend(sortable_sets, gset);
2189 : }
2190 :
2191 36 : if (sortable_sets)
2192 30 : sets = extract_rollup_sets(sortable_sets);
2193 : else
2194 6 : sets = NIL;
2195 : }
2196 : else
2197 812 : sets = extract_rollup_sets(parse->groupingSets);
2198 :
2199 2202 : foreach(lc_set, sets)
2200 : {
2201 1354 : List *current_sets = (List *) lfirst(lc_set);
2202 1354 : RollupData *rollup = makeNode(RollupData);
2203 : GroupingSetData *gs;
2204 :
2205 : /*
2206 : * Reorder the current list of grouping sets into correct prefix
2207 : * order. If only one aggregation pass is needed, try to make the
2208 : * list match the ORDER BY clause; if more than one pass is needed, we
2209 : * don't bother with that.
2210 : *
2211 : * Note that this reorders the sets from smallest-member-first to
2212 : * largest-member-first, and applies the GroupingSetData annotations,
2213 : * though the data will be filled in later.
2214 : */
2215 1354 : current_sets = reorder_grouping_sets(current_sets,
2216 1354 : (list_length(sets) == 1
2217 : ? parse->sortClause
2218 : : NIL));
2219 :
2220 : /*
2221 : * Get the initial (and therefore largest) grouping set.
2222 : */
2223 1354 : gs = linitial_node(GroupingSetData, current_sets);
2224 :
2225 : /*
2226 : * Order the groupClause appropriately. If the first grouping set is
2227 : * empty, then the groupClause must also be empty; otherwise we have
2228 : * to force the groupClause to match that grouping set's order.
2229 : *
2230 : * (The first grouping set can be empty even though parse->groupClause
2231 : * is not empty only if all non-empty grouping sets are unsortable.
2232 : * The groupClauses for hashed grouping sets are built later on.)
2233 : */
2234 1354 : if (gs->set)
2235 1312 : rollup->groupClause = preprocess_groupclause(root, gs->set);
2236 : else
2237 42 : rollup->groupClause = NIL;
2238 :
2239 : /*
2240 : * Is it hashable? We pretend empty sets are hashable even though we
2241 : * actually force them not to be hashed later. But don't bother if
2242 : * there's nothing but empty sets (since in that case we can't hash
2243 : * anything).
2244 : */
2245 1354 : if (gs->set &&
2246 1312 : !bms_overlap_list(gd->unhashable_refs, gs->set))
2247 : {
2248 1288 : rollup->hashable = true;
2249 1288 : gd->any_hashable = true;
2250 : }
2251 :
2252 : /*
2253 : * Now that we've pinned down an order for the groupClause for this
2254 : * list of grouping sets, we need to remap the entries in the grouping
2255 : * sets from sortgrouprefs to plain indices (0-based) into the
2256 : * groupClause for this collection of grouping sets. We keep the
2257 : * original form for later use, though.
2258 : */
2259 1354 : rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
2260 : current_sets,
2261 : gd->tleref_to_colnum_map);
2262 1354 : rollup->gsets_data = current_sets;
2263 :
2264 1354 : gd->rollups = lappend(gd->rollups, rollup);
2265 : }
2266 :
2267 848 : if (gd->unsortable_sets)
2268 : {
2269 : /*
2270 : * We have not yet pinned down a groupclause for this, but we will
2271 : * need index-based lists for estimation purposes. Construct
2272 : * hash_sets_idx based on the entire original groupclause for now.
2273 : */
2274 36 : gd->hash_sets_idx = remap_to_groupclause_idx(parse->groupClause,
2275 : gd->unsortable_sets,
2276 : gd->tleref_to_colnum_map);
2277 36 : gd->any_hashable = true;
2278 : }
2279 :
2280 848 : return gd;
2281 : }
2282 :
2283 : /*
2284 : * Given a groupclause and a list of GroupingSetData, return equivalent sets
2285 : * (without annotation) mapped to indexes into the given groupclause.
2286 : */
2287 : static List *
2288 3972 : remap_to_groupclause_idx(List *groupClause,
2289 : List *gsets,
2290 : int *tleref_to_colnum_map)
2291 : {
2292 3972 : int ref = 0;
2293 3972 : List *result = NIL;
2294 : ListCell *lc;
2295 :
2296 9808 : foreach(lc, groupClause)
2297 : {
2298 5836 : SortGroupClause *gc = lfirst_node(SortGroupClause, lc);
2299 :
2300 5836 : tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
2301 : }
2302 :
2303 9246 : foreach(lc, gsets)
2304 : {
2305 5274 : List *set = NIL;
2306 : ListCell *lc2;
2307 5274 : GroupingSetData *gs = lfirst_node(GroupingSetData, lc);
2308 :
2309 11948 : foreach(lc2, gs->set)
2310 : {
2311 6674 : set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]);
2312 : }
2313 :
2314 5274 : result = lappend(result, set);
2315 : }
2316 :
2317 3972 : return result;
2318 : }
2319 :
2320 :
2321 : /*
2322 : * preprocess_rowmarks - set up PlanRowMarks if needed
2323 : */
2324 : static void
2325 559648 : preprocess_rowmarks(PlannerInfo *root)
2326 : {
2327 559648 : Query *parse = root->parse;
2328 : Bitmapset *rels;
2329 : List *prowmarks;
2330 : ListCell *l;
2331 : int i;
2332 :
2333 559648 : if (parse->rowMarks)
2334 : {
2335 : /*
2336 : * We've got trouble if FOR [KEY] UPDATE/SHARE appears inside
2337 : * grouping, since grouping renders a reference to individual tuple
2338 : * CTIDs invalid. This is also checked at parse time, but that's
2339 : * insufficient because of rule substitution, query pullup, etc.
2340 : */
2341 7920 : CheckSelectLocking(parse, linitial_node(RowMarkClause,
2342 : parse->rowMarks)->strength);
2343 : }
2344 : else
2345 : {
2346 : /*
2347 : * We only need rowmarks for UPDATE, DELETE, MERGE, or FOR [KEY]
2348 : * UPDATE/SHARE.
2349 : */
2350 551728 : if (parse->commandType != CMD_UPDATE &&
2351 538064 : parse->commandType != CMD_DELETE &&
2352 533652 : parse->commandType != CMD_MERGE)
2353 531852 : return;
2354 : }
2355 :
2356 : /*
2357 : * We need to have rowmarks for all base relations except the target. We
2358 : * make a bitmapset of all base rels and then remove the items we don't
2359 : * need or have FOR [KEY] UPDATE/SHARE marks for.
2360 : */
2361 27796 : rels = get_relids_in_jointree((Node *) parse->jointree, false, false);
2362 27796 : if (parse->resultRelation)
2363 19876 : rels = bms_del_member(rels, parse->resultRelation);
2364 :
2365 : /*
2366 : * Convert RowMarkClauses to PlanRowMark representation.
2367 : */
2368 27796 : prowmarks = NIL;
2369 35978 : foreach(l, parse->rowMarks)
2370 : {
2371 8182 : RowMarkClause *rc = lfirst_node(RowMarkClause, l);
2372 8182 : RangeTblEntry *rte = rt_fetch(rc->rti, parse->rtable);
2373 : PlanRowMark *newrc;
2374 :
2375 : /*
2376 : * Currently, it is syntactically impossible to have FOR UPDATE et al
2377 : * applied to an update/delete target rel. If that ever becomes
2378 : * possible, we should drop the target from the PlanRowMark list.
2379 : */
2380 : Assert(rc->rti != parse->resultRelation);
2381 :
2382 : /*
2383 : * Ignore RowMarkClauses for subqueries; they aren't real tables and
2384 : * can't support true locking. Subqueries that got flattened into the
2385 : * main query should be ignored completely. Any that didn't will get
2386 : * ROW_MARK_COPY items in the next loop.
2387 : */
2388 8182 : if (rte->rtekind != RTE_RELATION)
2389 108 : continue;
2390 :
2391 8074 : rels = bms_del_member(rels, rc->rti);
2392 :
2393 8074 : newrc = makeNode(PlanRowMark);
2394 8074 : newrc->rti = newrc->prti = rc->rti;
2395 8074 : newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2396 8074 : newrc->markType = select_rowmark_type(rte, rc->strength);
2397 8074 : newrc->allMarkTypes = (1 << newrc->markType);
2398 8074 : newrc->strength = rc->strength;
2399 8074 : newrc->waitPolicy = rc->waitPolicy;
2400 8074 : newrc->isParent = false;
2401 :
2402 8074 : prowmarks = lappend(prowmarks, newrc);
2403 : }
2404 :
2405 : /*
2406 : * Now, add rowmarks for any non-target, non-locked base relations.
2407 : */
2408 27796 : i = 0;
2409 66676 : foreach(l, parse->rtable)
2410 : {
2411 38880 : RangeTblEntry *rte = lfirst_node(RangeTblEntry, l);
2412 : PlanRowMark *newrc;
2413 :
2414 38880 : i++;
2415 38880 : if (!bms_is_member(i, rels))
2416 35232 : continue;
2417 :
2418 3648 : newrc = makeNode(PlanRowMark);
2419 3648 : newrc->rti = newrc->prti = i;
2420 3648 : newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2421 3648 : newrc->markType = select_rowmark_type(rte, LCS_NONE);
2422 3648 : newrc->allMarkTypes = (1 << newrc->markType);
2423 3648 : newrc->strength = LCS_NONE;
2424 3648 : newrc->waitPolicy = LockWaitBlock; /* doesn't matter */
2425 3648 : newrc->isParent = false;
2426 :
2427 3648 : prowmarks = lappend(prowmarks, newrc);
2428 : }
2429 :
2430 27796 : root->rowMarks = prowmarks;
2431 : }
2432 :
2433 : /*
2434 : * Select RowMarkType to use for a given table
2435 : */
2436 : RowMarkType
2437 14092 : select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength)
2438 : {
2439 14092 : if (rte->rtekind != RTE_RELATION)
2440 : {
2441 : /* If it's not a table at all, use ROW_MARK_COPY */
2442 1422 : return ROW_MARK_COPY;
2443 : }
2444 12670 : else if (rte->relkind == RELKIND_FOREIGN_TABLE)
2445 : {
2446 : /* Let the FDW select the rowmark type, if it wants to */
2447 200 : FdwRoutine *fdwroutine = GetFdwRoutineByRelId(rte->relid);
2448 :
2449 200 : if (fdwroutine->GetForeignRowMarkType != NULL)
2450 0 : return fdwroutine->GetForeignRowMarkType(rte, strength);
2451 : /* Otherwise, use ROW_MARK_COPY by default */
2452 200 : return ROW_MARK_COPY;
2453 : }
2454 : else
2455 : {
2456 : /* Regular table, apply the appropriate lock type */
2457 12470 : switch (strength)
2458 : {
2459 2456 : case LCS_NONE:
2460 :
2461 : /*
2462 : * We don't need a tuple lock, only the ability to re-fetch
2463 : * the row.
2464 : */
2465 2456 : return ROW_MARK_REFERENCE;
2466 : break;
2467 8128 : case LCS_FORKEYSHARE:
2468 8128 : return ROW_MARK_KEYSHARE;
2469 : break;
2470 300 : case LCS_FORSHARE:
2471 300 : return ROW_MARK_SHARE;
2472 : break;
2473 72 : case LCS_FORNOKEYUPDATE:
2474 72 : return ROW_MARK_NOKEYEXCLUSIVE;
2475 : break;
2476 1514 : case LCS_FORUPDATE:
2477 1514 : return ROW_MARK_EXCLUSIVE;
2478 : break;
2479 : }
2480 0 : elog(ERROR, "unrecognized LockClauseStrength %d", (int) strength);
2481 : return ROW_MARK_EXCLUSIVE; /* keep compiler quiet */
2482 : }
2483 : }
2484 :
2485 : /*
2486 : * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses
2487 : *
2488 : * We try to estimate the values of the LIMIT/OFFSET clauses, and pass the
2489 : * results back in *count_est and *offset_est. These variables are set to
2490 : * 0 if the corresponding clause is not present, and -1 if it's present
2491 : * but we couldn't estimate the value for it. (The "0" convention is OK
2492 : * for OFFSET but a little bit bogus for LIMIT: effectively we estimate
2493 : * LIMIT 0 as though it were LIMIT 1. But this is in line with the planner's
2494 : * usual practice of never estimating less than one row.) These values will
2495 : * be passed to create_limit_path, which see if you change this code.
2496 : *
2497 : * The return value is the suitably adjusted tuple_fraction to use for
2498 : * planning the query. This adjustment is not overridable, since it reflects
2499 : * plan actions that grouping_planner() will certainly take, not assumptions
2500 : * about context.
2501 : */
2502 : static double
2503 4750 : preprocess_limit(PlannerInfo *root, double tuple_fraction,
2504 : int64 *offset_est, int64 *count_est)
2505 : {
2506 4750 : Query *parse = root->parse;
2507 : Node *est;
2508 : double limit_fraction;
2509 :
2510 : /* Should not be called unless LIMIT or OFFSET */
2511 : Assert(parse->limitCount || parse->limitOffset);
2512 :
2513 : /*
2514 : * Try to obtain the clause values. We use estimate_expression_value
2515 : * primarily because it can sometimes do something useful with Params.
2516 : */
2517 4750 : if (parse->limitCount)
2518 : {
2519 4280 : est = estimate_expression_value(root, parse->limitCount);
2520 4280 : if (est && IsA(est, Const))
2521 : {
2522 4274 : if (((Const *) est)->constisnull)
2523 : {
2524 : /* NULL indicates LIMIT ALL, ie, no limit */
2525 0 : *count_est = 0; /* treat as not present */
2526 : }
2527 : else
2528 : {
2529 4274 : *count_est = DatumGetInt64(((Const *) est)->constvalue);
2530 4274 : if (*count_est <= 0)
2531 150 : *count_est = 1; /* force to at least 1 */
2532 : }
2533 : }
2534 : else
2535 6 : *count_est = -1; /* can't estimate */
2536 : }
2537 : else
2538 470 : *count_est = 0; /* not present */
2539 :
2540 4750 : if (parse->limitOffset)
2541 : {
2542 830 : est = estimate_expression_value(root, parse->limitOffset);
2543 830 : if (est && IsA(est, Const))
2544 : {
2545 806 : if (((Const *) est)->constisnull)
2546 : {
2547 : /* Treat NULL as no offset; the executor will too */
2548 0 : *offset_est = 0; /* treat as not present */
2549 : }
2550 : else
2551 : {
2552 806 : *offset_est = DatumGetInt64(((Const *) est)->constvalue);
2553 806 : if (*offset_est < 0)
2554 0 : *offset_est = 0; /* treat as not present */
2555 : }
2556 : }
2557 : else
2558 24 : *offset_est = -1; /* can't estimate */
2559 : }
2560 : else
2561 3920 : *offset_est = 0; /* not present */
2562 :
2563 4750 : if (*count_est != 0)
2564 : {
2565 : /*
2566 : * A LIMIT clause limits the absolute number of tuples returned.
2567 : * However, if it's not a constant LIMIT then we have to guess; for
2568 : * lack of a better idea, assume 10% of the plan's result is wanted.
2569 : */
2570 4280 : if (*count_est < 0 || *offset_est < 0)
2571 : {
2572 : /* LIMIT or OFFSET is an expression ... punt ... */
2573 24 : limit_fraction = 0.10;
2574 : }
2575 : else
2576 : {
2577 : /* LIMIT (plus OFFSET, if any) is max number of tuples needed */
2578 4256 : limit_fraction = (double) *count_est + (double) *offset_est;
2579 : }
2580 :
2581 : /*
2582 : * If we have absolute limits from both caller and LIMIT, use the
2583 : * smaller value; likewise if they are both fractional. If one is
2584 : * fractional and the other absolute, we can't easily determine which
2585 : * is smaller, but we use the heuristic that the absolute will usually
2586 : * be smaller.
2587 : */
2588 4280 : if (tuple_fraction >= 1.0)
2589 : {
2590 6 : if (limit_fraction >= 1.0)
2591 : {
2592 : /* both absolute */
2593 6 : tuple_fraction = Min(tuple_fraction, limit_fraction);
2594 : }
2595 : else
2596 : {
2597 : /* caller absolute, limit fractional; use caller's value */
2598 : }
2599 : }
2600 4274 : else if (tuple_fraction > 0.0)
2601 : {
2602 146 : if (limit_fraction >= 1.0)
2603 : {
2604 : /* caller fractional, limit absolute; use limit */
2605 146 : tuple_fraction = limit_fraction;
2606 : }
2607 : else
2608 : {
2609 : /* both fractional */
2610 0 : tuple_fraction = Min(tuple_fraction, limit_fraction);
2611 : }
2612 : }
2613 : else
2614 : {
2615 : /* no info from caller, just use limit */
2616 4128 : tuple_fraction = limit_fraction;
2617 : }
2618 : }
2619 470 : else if (*offset_est != 0 && tuple_fraction > 0.0)
2620 : {
2621 : /*
2622 : * We have an OFFSET but no LIMIT. This acts entirely differently
2623 : * from the LIMIT case: here, we need to increase rather than decrease
2624 : * the caller's tuple_fraction, because the OFFSET acts to cause more
2625 : * tuples to be fetched instead of fewer. This only matters if we got
2626 : * a tuple_fraction > 0, however.
2627 : *
2628 : * As above, use 10% if OFFSET is present but unestimatable.
2629 : */
2630 12 : if (*offset_est < 0)
2631 0 : limit_fraction = 0.10;
2632 : else
2633 12 : limit_fraction = (double) *offset_est;
2634 :
2635 : /*
2636 : * If we have absolute counts from both caller and OFFSET, add them
2637 : * together; likewise if they are both fractional. If one is
2638 : * fractional and the other absolute, we want to take the larger, and
2639 : * we heuristically assume that's the fractional one.
2640 : */
2641 12 : if (tuple_fraction >= 1.0)
2642 : {
2643 0 : if (limit_fraction >= 1.0)
2644 : {
2645 : /* both absolute, so add them together */
2646 0 : tuple_fraction += limit_fraction;
2647 : }
2648 : else
2649 : {
2650 : /* caller absolute, limit fractional; use limit */
2651 0 : tuple_fraction = limit_fraction;
2652 : }
2653 : }
2654 : else
2655 : {
2656 12 : if (limit_fraction >= 1.0)
2657 : {
2658 : /* caller fractional, limit absolute; use caller's value */
2659 : }
2660 : else
2661 : {
2662 : /* both fractional, so add them together */
2663 0 : tuple_fraction += limit_fraction;
2664 0 : if (tuple_fraction >= 1.0)
2665 0 : tuple_fraction = 0.0; /* assume fetch all */
2666 : }
2667 : }
2668 : }
2669 :
2670 4750 : return tuple_fraction;
2671 : }
2672 :
2673 : /*
2674 : * limit_needed - do we actually need a Limit plan node?
2675 : *
2676 : * If we have constant-zero OFFSET and constant-null LIMIT, we can skip adding
2677 : * a Limit node. This is worth checking for because "OFFSET 0" is a common
2678 : * locution for an optimization fence. (Because other places in the planner
2679 : * merely check whether parse->limitOffset isn't NULL, it will still work as
2680 : * an optimization fence --- we're just suppressing unnecessary run-time
2681 : * overhead.)
2682 : *
2683 : * This might look like it could be merged into preprocess_limit, but there's
2684 : * a key distinction: here we need hard constants in OFFSET/LIMIT, whereas
2685 : * in preprocess_limit it's good enough to consider estimated values.
2686 : */
2687 : bool
2688 1179976 : limit_needed(Query *parse)
2689 : {
2690 : Node *node;
2691 :
2692 1179976 : node = parse->limitCount;
2693 1179976 : if (node)
2694 : {
2695 10214 : if (IsA(node, Const))
2696 : {
2697 : /* NULL indicates LIMIT ALL, ie, no limit */
2698 10020 : if (!((Const *) node)->constisnull)
2699 10020 : return true; /* LIMIT with a constant value */
2700 : }
2701 : else
2702 194 : return true; /* non-constant LIMIT */
2703 : }
2704 :
2705 1169762 : node = parse->limitOffset;
2706 1169762 : if (node)
2707 : {
2708 1388 : if (IsA(node, Const))
2709 : {
2710 : /* Treat NULL as no offset; the executor would too */
2711 1098 : if (!((Const *) node)->constisnull)
2712 : {
2713 1098 : int64 offset = DatumGetInt64(((Const *) node)->constvalue);
2714 :
2715 1098 : if (offset != 0)
2716 84 : return true; /* OFFSET with a nonzero value */
2717 : }
2718 : }
2719 : else
2720 290 : return true; /* non-constant OFFSET */
2721 : }
2722 :
2723 1169388 : return false; /* don't need a Limit plan node */
2724 : }
2725 :
2726 : /*
2727 : * preprocess_groupclause - do preparatory work on GROUP BY clause
2728 : *
2729 : * The idea here is to adjust the ordering of the GROUP BY elements
2730 : * (which in itself is semantically insignificant) to match ORDER BY,
2731 : * thereby allowing a single sort operation to both implement the ORDER BY
2732 : * requirement and set up for a Unique step that implements GROUP BY.
2733 : * We also consider partial match between GROUP BY and ORDER BY elements,
2734 : * which could allow to implement ORDER BY using the incremental sort.
2735 : *
2736 : * We also consider other orderings of the GROUP BY elements, which could
2737 : * match the sort ordering of other possible plans (eg an indexscan) and
2738 : * thereby reduce cost. This is implemented during the generation of grouping
2739 : * paths. See get_useful_group_keys_orderings() for details.
2740 : *
2741 : * Note: we need no comparable processing of the distinctClause because
2742 : * the parser already enforced that that matches ORDER BY.
2743 : *
2744 : * Note: we return a fresh List, but its elements are the same
2745 : * SortGroupClauses appearing in parse->groupClause. This is important
2746 : * because later processing may modify the processed_groupClause list.
2747 : *
2748 : * For grouping sets, the order of items is instead forced to agree with that
2749 : * of the grouping set (and items not in the grouping set are skipped). The
2750 : * work of sorting the order of grouping set elements to match the ORDER BY if
2751 : * possible is done elsewhere.
2752 : */
2753 : static List *
2754 7506 : preprocess_groupclause(PlannerInfo *root, List *force)
2755 : {
2756 7506 : Query *parse = root->parse;
2757 7506 : List *new_groupclause = NIL;
2758 : ListCell *sl;
2759 : ListCell *gl;
2760 :
2761 : /* For grouping sets, we need to force the ordering */
2762 7506 : if (force)
2763 : {
2764 9652 : foreach(sl, force)
2765 : {
2766 5758 : Index ref = lfirst_int(sl);
2767 5758 : SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
2768 :
2769 5758 : new_groupclause = lappend(new_groupclause, cl);
2770 : }
2771 :
2772 3894 : return new_groupclause;
2773 : }
2774 :
2775 : /* If no ORDER BY, nothing useful to do here */
2776 3612 : if (parse->sortClause == NIL)
2777 2032 : return list_copy(parse->groupClause);
2778 :
2779 : /*
2780 : * Scan the ORDER BY clause and construct a list of matching GROUP BY
2781 : * items, but only as far as we can make a matching prefix.
2782 : *
2783 : * This code assumes that the sortClause contains no duplicate items.
2784 : */
2785 3066 : foreach(sl, parse->sortClause)
2786 : {
2787 2132 : SortGroupClause *sc = lfirst_node(SortGroupClause, sl);
2788 :
2789 3228 : foreach(gl, parse->groupClause)
2790 : {
2791 2582 : SortGroupClause *gc = lfirst_node(SortGroupClause, gl);
2792 :
2793 2582 : if (equal(gc, sc))
2794 : {
2795 1486 : new_groupclause = lappend(new_groupclause, gc);
2796 1486 : break;
2797 : }
2798 : }
2799 2132 : if (gl == NULL)
2800 646 : break; /* no match, so stop scanning */
2801 : }
2802 :
2803 :
2804 : /* If no match at all, no point in reordering GROUP BY */
2805 1580 : if (new_groupclause == NIL)
2806 298 : return list_copy(parse->groupClause);
2807 :
2808 : /*
2809 : * Add any remaining GROUP BY items to the new list. We don't require a
2810 : * complete match, because even partial match allows ORDER BY to be
2811 : * implemented using incremental sort. Also, give up if there are any
2812 : * non-sortable GROUP BY items, since then there's no hope anyway.
2813 : */
2814 2934 : foreach(gl, parse->groupClause)
2815 : {
2816 1652 : SortGroupClause *gc = lfirst_node(SortGroupClause, gl);
2817 :
2818 1652 : if (list_member_ptr(new_groupclause, gc))
2819 1486 : continue; /* it matched an ORDER BY item */
2820 166 : if (!OidIsValid(gc->sortop)) /* give up, GROUP BY can't be sorted */
2821 0 : return list_copy(parse->groupClause);
2822 166 : new_groupclause = lappend(new_groupclause, gc);
2823 : }
2824 :
2825 : /* Success --- install the rearranged GROUP BY list */
2826 : Assert(list_length(parse->groupClause) == list_length(new_groupclause));
2827 1282 : return new_groupclause;
2828 : }
2829 :
2830 : /*
2831 : * Extract lists of grouping sets that can be implemented using a single
2832 : * rollup-type aggregate pass each. Returns a list of lists of grouping sets.
2833 : *
2834 : * Input must be sorted with smallest sets first. Result has each sublist
2835 : * sorted with smallest sets first.
2836 : *
2837 : * We want to produce the absolute minimum possible number of lists here to
2838 : * avoid excess sorts. Fortunately, there is an algorithm for this; the problem
2839 : * of finding the minimal partition of a partially-ordered set into chains
2840 : * (which is what we need, taking the list of grouping sets as a poset ordered
2841 : * by set inclusion) can be mapped to the problem of finding the maximum
2842 : * cardinality matching on a bipartite graph, which is solvable in polynomial
2843 : * time with a worst case of no worse than O(n^2.5) and usually much
2844 : * better. Since our N is at most 4096, we don't need to consider fallbacks to
2845 : * heuristic or approximate methods. (Planning time for a 12-d cube is under
2846 : * half a second on my modest system even with optimization off and assertions
2847 : * on.)
2848 : */
2849 : static List *
2850 842 : extract_rollup_sets(List *groupingSets)
2851 : {
2852 842 : int num_sets_raw = list_length(groupingSets);
2853 842 : int num_empty = 0;
2854 842 : int num_sets = 0; /* distinct sets */
2855 842 : int num_chains = 0;
2856 842 : List *result = NIL;
2857 : List **results;
2858 : List **orig_sets;
2859 : Bitmapset **set_masks;
2860 : int *chains;
2861 : short **adjacency;
2862 : short *adjacency_buf;
2863 : BipartiteMatchState *state;
2864 : int i;
2865 : int j;
2866 : int j_size;
2867 842 : ListCell *lc1 = list_head(groupingSets);
2868 : ListCell *lc;
2869 :
2870 : /*
2871 : * Start by stripping out empty sets. The algorithm doesn't require this,
2872 : * but the planner currently needs all empty sets to be returned in the
2873 : * first list, so we strip them here and add them back after.
2874 : */
2875 1452 : while (lc1 && lfirst(lc1) == NIL)
2876 : {
2877 610 : ++num_empty;
2878 610 : lc1 = lnext(groupingSets, lc1);
2879 : }
2880 :
2881 : /* bail out now if it turns out that all we had were empty sets. */
2882 842 : if (!lc1)
2883 42 : return list_make1(groupingSets);
2884 :
2885 : /*----------
2886 : * We don't strictly need to remove duplicate sets here, but if we don't,
2887 : * they tend to become scattered through the result, which is a bit
2888 : * confusing (and irritating if we ever decide to optimize them out).
2889 : * So we remove them here and add them back after.
2890 : *
2891 : * For each non-duplicate set, we fill in the following:
2892 : *
2893 : * orig_sets[i] = list of the original set lists
2894 : * set_masks[i] = bitmapset for testing inclusion
2895 : * adjacency[i] = array [n, v1, v2, ... vn] of adjacency indices
2896 : *
2897 : * chains[i] will be the result group this set is assigned to.
2898 : *
2899 : * We index all of these from 1 rather than 0 because it is convenient
2900 : * to leave 0 free for the NIL node in the graph algorithm.
2901 : *----------
2902 : */
2903 800 : orig_sets = palloc0((num_sets_raw + 1) * sizeof(List *));
2904 800 : set_masks = palloc0((num_sets_raw + 1) * sizeof(Bitmapset *));
2905 800 : adjacency = palloc0((num_sets_raw + 1) * sizeof(short *));
2906 800 : adjacency_buf = palloc((num_sets_raw + 1) * sizeof(short));
2907 :
2908 800 : j_size = 0;
2909 800 : j = 0;
2910 800 : i = 1;
2911 :
2912 2840 : for_each_cell(lc, groupingSets, lc1)
2913 : {
2914 2040 : List *candidate = (List *) lfirst(lc);
2915 2040 : Bitmapset *candidate_set = NULL;
2916 : ListCell *lc2;
2917 2040 : int dup_of = 0;
2918 :
2919 4974 : foreach(lc2, candidate)
2920 : {
2921 2934 : candidate_set = bms_add_member(candidate_set, lfirst_int(lc2));
2922 : }
2923 :
2924 : /* we can only be a dup if we're the same length as a previous set */
2925 2040 : if (j_size == list_length(candidate))
2926 : {
2927 : int k;
2928 :
2929 1712 : for (k = j; k < i; ++k)
2930 : {
2931 1104 : if (bms_equal(set_masks[k], candidate_set))
2932 : {
2933 158 : dup_of = k;
2934 158 : break;
2935 : }
2936 : }
2937 : }
2938 1274 : else if (j_size < list_length(candidate))
2939 : {
2940 1274 : j_size = list_length(candidate);
2941 1274 : j = i;
2942 : }
2943 :
2944 2040 : if (dup_of > 0)
2945 : {
2946 158 : orig_sets[dup_of] = lappend(orig_sets[dup_of], candidate);
2947 158 : bms_free(candidate_set);
2948 : }
2949 : else
2950 : {
2951 : int k;
2952 1882 : int n_adj = 0;
2953 :
2954 1882 : orig_sets[i] = list_make1(candidate);
2955 1882 : set_masks[i] = candidate_set;
2956 :
2957 : /* fill in adjacency list; no need to compare equal-size sets */
2958 :
2959 3154 : for (k = j - 1; k > 0; --k)
2960 : {
2961 1272 : if (bms_is_subset(set_masks[k], candidate_set))
2962 1110 : adjacency_buf[++n_adj] = k;
2963 : }
2964 :
2965 1882 : if (n_adj > 0)
2966 : {
2967 598 : adjacency_buf[0] = n_adj;
2968 598 : adjacency[i] = palloc((n_adj + 1) * sizeof(short));
2969 598 : memcpy(adjacency[i], adjacency_buf, (n_adj + 1) * sizeof(short));
2970 : }
2971 : else
2972 1284 : adjacency[i] = NULL;
2973 :
2974 1882 : ++i;
2975 : }
2976 : }
2977 :
2978 800 : num_sets = i - 1;
2979 :
2980 : /*
2981 : * Apply the graph matching algorithm to do the work.
2982 : */
2983 800 : state = BipartiteMatch(num_sets, num_sets, adjacency);
2984 :
2985 : /*
2986 : * Now, the state->pair* fields have the info we need to assign sets to
2987 : * chains. Two sets (u,v) belong to the same chain if pair_uv[u] = v or
2988 : * pair_vu[v] = u (both will be true, but we check both so that we can do
2989 : * it in one pass)
2990 : */
2991 800 : chains = palloc0((num_sets + 1) * sizeof(int));
2992 :
2993 2682 : for (i = 1; i <= num_sets; ++i)
2994 : {
2995 1882 : int u = state->pair_vu[i];
2996 1882 : int v = state->pair_uv[i];
2997 :
2998 1882 : if (u > 0 && u < i)
2999 0 : chains[i] = chains[u];
3000 1882 : else if (v > 0 && v < i)
3001 570 : chains[i] = chains[v];
3002 : else
3003 1312 : chains[i] = ++num_chains;
3004 : }
3005 :
3006 : /* build result lists. */
3007 800 : results = palloc0((num_chains + 1) * sizeof(List *));
3008 :
3009 2682 : for (i = 1; i <= num_sets; ++i)
3010 : {
3011 1882 : int c = chains[i];
3012 :
3013 : Assert(c > 0);
3014 :
3015 1882 : results[c] = list_concat(results[c], orig_sets[i]);
3016 : }
3017 :
3018 : /* push any empty sets back on the first list. */
3019 1320 : while (num_empty-- > 0)
3020 520 : results[1] = lcons(NIL, results[1]);
3021 :
3022 : /* make result list */
3023 2112 : for (i = 1; i <= num_chains; ++i)
3024 1312 : result = lappend(result, results[i]);
3025 :
3026 : /*
3027 : * Free all the things.
3028 : *
3029 : * (This is over-fussy for small sets but for large sets we could have
3030 : * tied up a nontrivial amount of memory.)
3031 : */
3032 800 : BipartiteMatchFree(state);
3033 800 : pfree(results);
3034 800 : pfree(chains);
3035 2682 : for (i = 1; i <= num_sets; ++i)
3036 1882 : if (adjacency[i])
3037 598 : pfree(adjacency[i]);
3038 800 : pfree(adjacency);
3039 800 : pfree(adjacency_buf);
3040 800 : pfree(orig_sets);
3041 2682 : for (i = 1; i <= num_sets; ++i)
3042 1882 : bms_free(set_masks[i]);
3043 800 : pfree(set_masks);
3044 :
3045 800 : return result;
3046 : }
3047 :
3048 : /*
3049 : * Reorder the elements of a list of grouping sets such that they have correct
3050 : * prefix relationships. Also inserts the GroupingSetData annotations.
3051 : *
3052 : * The input must be ordered with smallest sets first; the result is returned
3053 : * with largest sets first. Note that the result shares no list substructure
3054 : * with the input, so it's safe for the caller to modify it later.
3055 : *
3056 : * If we're passed in a sortclause, we follow its order of columns to the
3057 : * extent possible, to minimize the chance that we add unnecessary sorts.
3058 : * (We're trying here to ensure that GROUPING SETS ((a,b,c),(c)) ORDER BY c,b,a
3059 : * gets implemented in one pass.)
3060 : */
3061 : static List *
3062 1354 : reorder_grouping_sets(List *groupingSets, List *sortclause)
3063 : {
3064 : ListCell *lc;
3065 1354 : List *previous = NIL;
3066 1354 : List *result = NIL;
3067 :
3068 4004 : foreach(lc, groupingSets)
3069 : {
3070 2650 : List *candidate = (List *) lfirst(lc);
3071 2650 : List *new_elems = list_difference_int(candidate, previous);
3072 2650 : GroupingSetData *gs = makeNode(GroupingSetData);
3073 :
3074 2814 : while (list_length(sortclause) > list_length(previous) &&
3075 : new_elems != NIL)
3076 : {
3077 272 : SortGroupClause *sc = list_nth(sortclause, list_length(previous));
3078 272 : int ref = sc->tleSortGroupRef;
3079 :
3080 272 : if (list_member_int(new_elems, ref))
3081 : {
3082 164 : previous = lappend_int(previous, ref);
3083 164 : new_elems = list_delete_int(new_elems, ref);
3084 : }
3085 : else
3086 : {
3087 : /* diverged from the sortclause; give up on it */
3088 108 : sortclause = NIL;
3089 108 : break;
3090 : }
3091 : }
3092 :
3093 2650 : previous = list_concat(previous, new_elems);
3094 :
3095 2650 : gs->set = list_copy(previous);
3096 2650 : result = lcons(gs, result);
3097 : }
3098 :
3099 1354 : list_free(previous);
3100 :
3101 1354 : return result;
3102 : }
3103 :
3104 : /*
3105 : * has_volatile_pathkey
3106 : * Returns true if any PathKey in 'keys' has an EquivalenceClass
3107 : * containing a volatile function. Otherwise returns false.
3108 : */
3109 : static bool
3110 2302 : has_volatile_pathkey(List *keys)
3111 : {
3112 : ListCell *lc;
3113 :
3114 4760 : foreach(lc, keys)
3115 : {
3116 2476 : PathKey *pathkey = lfirst_node(PathKey, lc);
3117 :
3118 2476 : if (pathkey->pk_eclass->ec_has_volatile)
3119 18 : return true;
3120 : }
3121 :
3122 2284 : return false;
3123 : }
3124 :
3125 : /*
3126 : * adjust_group_pathkeys_for_groupagg
3127 : * Add pathkeys to root->group_pathkeys to reflect the best set of
3128 : * pre-ordered input for ordered aggregates.
3129 : *
3130 : * We define "best" as the pathkeys that suit the largest number of
3131 : * aggregate functions. We find these by looking at the first ORDER BY /
3132 : * DISTINCT aggregate and take the pathkeys for that before searching for
3133 : * other aggregates that require the same or a more strict variation of the
3134 : * same pathkeys. We then repeat that process for any remaining aggregates
3135 : * with different pathkeys and if we find another set of pathkeys that suits a
3136 : * larger number of aggregates then we select those pathkeys instead.
3137 : *
3138 : * When the best pathkeys are found we also mark each Aggref that can use
3139 : * those pathkeys as aggpresorted = true.
3140 : *
3141 : * Note: When an aggregate function's ORDER BY / DISTINCT clause contains any
3142 : * volatile functions, we never make use of these pathkeys. We want to ensure
3143 : * that sorts using volatile functions are done independently in each Aggref
3144 : * rather than once at the query level. If we were to allow this then Aggrefs
3145 : * with compatible sort orders would all transition their rows in the same
3146 : * order if those pathkeys were deemed to be the best pathkeys to sort on.
3147 : * Whereas, if some other set of Aggref's pathkeys happened to be deemed
3148 : * better pathkeys to sort on, then the volatile function Aggrefs would be
3149 : * left to perform their sorts individually. To avoid this inconsistent
3150 : * behavior which could make Aggref results depend on what other Aggrefs the
3151 : * query contains, we always force Aggrefs with volatile functions to perform
3152 : * their own sorts.
3153 : */
3154 : static void
3155 1882 : adjust_group_pathkeys_for_groupagg(PlannerInfo *root)
3156 : {
3157 1882 : List *grouppathkeys = root->group_pathkeys;
3158 : List *bestpathkeys;
3159 : Bitmapset *bestaggs;
3160 : Bitmapset *unprocessed_aggs;
3161 : ListCell *lc;
3162 : int i;
3163 :
3164 : /* Shouldn't be here if there are grouping sets */
3165 : Assert(root->parse->groupingSets == NIL);
3166 : /* Shouldn't be here unless there are some ordered aggregates */
3167 : Assert(root->numOrderedAggs > 0);
3168 :
3169 : /* Do nothing if disabled */
3170 1882 : if (!enable_presorted_aggregate)
3171 6 : return;
3172 :
3173 : /*
3174 : * Make a first pass over all AggInfos to collect a Bitmapset containing
3175 : * the indexes of all AggInfos to be processed below.
3176 : */
3177 1876 : unprocessed_aggs = NULL;
3178 4436 : foreach(lc, root->agginfos)
3179 : {
3180 2560 : AggInfo *agginfo = lfirst_node(AggInfo, lc);
3181 2560 : Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs);
3182 :
3183 2560 : if (AGGKIND_IS_ORDERED_SET(aggref->aggkind))
3184 264 : continue;
3185 :
3186 : /* only add aggregates with a DISTINCT or ORDER BY */
3187 2296 : if (aggref->aggdistinct != NIL || aggref->aggorder != NIL)
3188 1996 : unprocessed_aggs = bms_add_member(unprocessed_aggs,
3189 : foreach_current_index(lc));
3190 : }
3191 :
3192 : /*
3193 : * Now process all the unprocessed_aggs to find the best set of pathkeys
3194 : * for the given set of aggregates.
3195 : *
3196 : * On the first outer loop here 'bestaggs' will be empty. We'll populate
3197 : * this during the first loop using the pathkeys for the very first
3198 : * AggInfo then taking any stronger pathkeys from any other AggInfos with
3199 : * a more strict set of compatible pathkeys. Once the outer loop is
3200 : * complete, we mark off all the aggregates with compatible pathkeys then
3201 : * remove those from the unprocessed_aggs and repeat the process to try to
3202 : * find another set of pathkeys that are suitable for a larger number of
3203 : * aggregates. The outer loop will stop when there are not enough
3204 : * unprocessed aggregates for it to be possible to find a set of pathkeys
3205 : * to suit a larger number of aggregates.
3206 : */
3207 1876 : bestpathkeys = NIL;
3208 1876 : bestaggs = NULL;
3209 3704 : while (bms_num_members(unprocessed_aggs) > bms_num_members(bestaggs))
3210 : {
3211 1828 : Bitmapset *aggindexes = NULL;
3212 1828 : List *currpathkeys = NIL;
3213 :
3214 1828 : i = -1;
3215 5958 : while ((i = bms_next_member(unprocessed_aggs, i)) >= 0)
3216 : {
3217 2302 : AggInfo *agginfo = list_nth_node(AggInfo, root->agginfos, i);
3218 2302 : Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs);
3219 : List *sortlist;
3220 : List *pathkeys;
3221 :
3222 2302 : if (aggref->aggdistinct != NIL)
3223 718 : sortlist = aggref->aggdistinct;
3224 : else
3225 1584 : sortlist = aggref->aggorder;
3226 :
3227 2302 : pathkeys = make_pathkeys_for_sortclauses(root, sortlist,
3228 : aggref->args);
3229 :
3230 : /*
3231 : * Ignore Aggrefs which have volatile functions in their ORDER BY
3232 : * or DISTINCT clause.
3233 : */
3234 2302 : if (has_volatile_pathkey(pathkeys))
3235 : {
3236 18 : unprocessed_aggs = bms_del_member(unprocessed_aggs, i);
3237 18 : continue;
3238 : }
3239 :
3240 : /*
3241 : * When not set yet, take the pathkeys from the first unprocessed
3242 : * aggregate.
3243 : */
3244 2284 : if (currpathkeys == NIL)
3245 : {
3246 1822 : currpathkeys = pathkeys;
3247 :
3248 : /* include the GROUP BY pathkeys, if they exist */
3249 1822 : if (grouppathkeys != NIL)
3250 270 : currpathkeys = append_pathkeys(list_copy(grouppathkeys),
3251 : currpathkeys);
3252 :
3253 : /* record that we found pathkeys for this aggregate */
3254 1822 : aggindexes = bms_add_member(aggindexes, i);
3255 : }
3256 : else
3257 : {
3258 : /* now look for a stronger set of matching pathkeys */
3259 :
3260 : /* include the GROUP BY pathkeys, if they exist */
3261 462 : if (grouppathkeys != NIL)
3262 288 : pathkeys = append_pathkeys(list_copy(grouppathkeys),
3263 : pathkeys);
3264 :
3265 : /* are 'pathkeys' compatible or better than 'currpathkeys'? */
3266 462 : switch (compare_pathkeys(currpathkeys, pathkeys))
3267 : {
3268 12 : case PATHKEYS_BETTER2:
3269 : /* 'pathkeys' are stronger, use these ones instead */
3270 12 : currpathkeys = pathkeys;
3271 : /* FALLTHROUGH */
3272 :
3273 72 : case PATHKEYS_BETTER1:
3274 : /* 'pathkeys' are less strict */
3275 : /* FALLTHROUGH */
3276 :
3277 : case PATHKEYS_EQUAL:
3278 : /* mark this aggregate as covered by 'currpathkeys' */
3279 72 : aggindexes = bms_add_member(aggindexes, i);
3280 72 : break;
3281 :
3282 390 : case PATHKEYS_DIFFERENT:
3283 390 : break;
3284 : }
3285 4130 : }
3286 : }
3287 :
3288 : /* remove the aggregates that we've just processed */
3289 1828 : unprocessed_aggs = bms_del_members(unprocessed_aggs, aggindexes);
3290 :
3291 : /*
3292 : * If this pass included more aggregates than the previous best then
3293 : * use these ones as the best set.
3294 : */
3295 1828 : if (bms_num_members(aggindexes) > bms_num_members(bestaggs))
3296 : {
3297 1720 : bestaggs = aggindexes;
3298 1720 : bestpathkeys = currpathkeys;
3299 : }
3300 : }
3301 :
3302 : /*
3303 : * If we found any ordered aggregates, update root->group_pathkeys to add
3304 : * the best set of aggregate pathkeys. Note that bestpathkeys includes
3305 : * the original GROUP BY pathkeys already.
3306 : */
3307 1876 : if (bestpathkeys != NIL)
3308 1672 : root->group_pathkeys = bestpathkeys;
3309 :
3310 : /*
3311 : * Now that we've found the best set of aggregates we can set the
3312 : * presorted flag to indicate to the executor that it needn't bother
3313 : * performing a sort for these Aggrefs. We're able to do this now as
3314 : * there's no chance of a Hash Aggregate plan as create_grouping_paths
3315 : * will not mark the GROUP BY as GROUPING_CAN_USE_HASH due to the presence
3316 : * of ordered aggregates.
3317 : */
3318 1876 : i = -1;
3319 3638 : while ((i = bms_next_member(bestaggs, i)) >= 0)
3320 : {
3321 1762 : AggInfo *agginfo = list_nth_node(AggInfo, root->agginfos, i);
3322 :
3323 3542 : foreach(lc, agginfo->aggrefs)
3324 : {
3325 1780 : Aggref *aggref = lfirst_node(Aggref, lc);
3326 :
3327 1780 : aggref->aggpresorted = true;
3328 : }
3329 : }
3330 : }
3331 :
3332 : /*
3333 : * Compute query_pathkeys and other pathkeys during plan generation
3334 : */
3335 : static void
3336 550208 : standard_qp_callback(PlannerInfo *root, void *extra)
3337 : {
3338 550208 : Query *parse = root->parse;
3339 550208 : standard_qp_extra *qp_extra = (standard_qp_extra *) extra;
3340 550208 : List *tlist = root->processed_tlist;
3341 550208 : List *activeWindows = qp_extra->activeWindows;
3342 :
3343 : /*
3344 : * Calculate pathkeys that represent grouping/ordering and/or ordered
3345 : * aggregate requirements.
3346 : */
3347 550208 : if (qp_extra->gset_data)
3348 : {
3349 : /*
3350 : * With grouping sets, just use the first RollupData's groupClause. We
3351 : * don't make any effort to optimize grouping clauses when there are
3352 : * grouping sets, nor can we combine aggregate ordering keys with
3353 : * grouping.
3354 : */
3355 848 : List *rollups = qp_extra->gset_data->rollups;
3356 848 : List *groupClause = (rollups ? linitial_node(RollupData, rollups)->groupClause : NIL);
3357 :
3358 848 : if (grouping_is_sortable(groupClause))
3359 : {
3360 : bool sortable;
3361 :
3362 : /*
3363 : * The groupClause is logically below the grouping step. So if
3364 : * there is an RTE entry for the grouping step, we need to remove
3365 : * its RT index from the sort expressions before we make PathKeys
3366 : * for them.
3367 : */
3368 848 : root->group_pathkeys =
3369 848 : make_pathkeys_for_sortclauses_extended(root,
3370 : &groupClause,
3371 : tlist,
3372 : false,
3373 848 : parse->hasGroupRTE,
3374 : &sortable,
3375 : false);
3376 : Assert(sortable);
3377 848 : root->num_groupby_pathkeys = list_length(root->group_pathkeys);
3378 : }
3379 : else
3380 : {
3381 0 : root->group_pathkeys = NIL;
3382 0 : root->num_groupby_pathkeys = 0;
3383 : }
3384 : }
3385 549360 : else if (parse->groupClause || root->numOrderedAggs > 0)
3386 5256 : {
3387 : /*
3388 : * With a plain GROUP BY list, we can remove any grouping items that
3389 : * are proven redundant by EquivalenceClass processing. For example,
3390 : * we can remove y given "WHERE x = y GROUP BY x, y". These aren't
3391 : * especially common cases, but they're nearly free to detect. Note
3392 : * that we remove redundant items from processed_groupClause but not
3393 : * the original parse->groupClause.
3394 : */
3395 : bool sortable;
3396 :
3397 : /*
3398 : * Convert group clauses into pathkeys. Set the ec_sortref field of
3399 : * EquivalenceClass'es if it's not set yet.
3400 : */
3401 5256 : root->group_pathkeys =
3402 5256 : make_pathkeys_for_sortclauses_extended(root,
3403 : &root->processed_groupClause,
3404 : tlist,
3405 : true,
3406 : false,
3407 : &sortable,
3408 : true);
3409 5256 : if (!sortable)
3410 : {
3411 : /* Can't sort; no point in considering aggregate ordering either */
3412 0 : root->group_pathkeys = NIL;
3413 0 : root->num_groupby_pathkeys = 0;
3414 : }
3415 : else
3416 : {
3417 5256 : root->num_groupby_pathkeys = list_length(root->group_pathkeys);
3418 : /* If we have ordered aggs, consider adding onto group_pathkeys */
3419 5256 : if (root->numOrderedAggs > 0)
3420 1882 : adjust_group_pathkeys_for_groupagg(root);
3421 : }
3422 : }
3423 : else
3424 : {
3425 544104 : root->group_pathkeys = NIL;
3426 544104 : root->num_groupby_pathkeys = 0;
3427 : }
3428 :
3429 : /* We consider only the first (bottom) window in pathkeys logic */
3430 550208 : if (activeWindows != NIL)
3431 : {
3432 2336 : WindowClause *wc = linitial_node(WindowClause, activeWindows);
3433 :
3434 2336 : root->window_pathkeys = make_pathkeys_for_window(root,
3435 : wc,
3436 : tlist);
3437 : }
3438 : else
3439 547872 : root->window_pathkeys = NIL;
3440 :
3441 : /*
3442 : * As with GROUP BY, we can discard any DISTINCT items that are proven
3443 : * redundant by EquivalenceClass processing. The non-redundant list is
3444 : * kept in root->processed_distinctClause, leaving the original
3445 : * parse->distinctClause alone.
3446 : */
3447 550208 : if (parse->distinctClause)
3448 : {
3449 : bool sortable;
3450 :
3451 : /* Make a copy since pathkey processing can modify the list */
3452 2454 : root->processed_distinctClause = list_copy(parse->distinctClause);
3453 2454 : root->distinct_pathkeys =
3454 2454 : make_pathkeys_for_sortclauses_extended(root,
3455 : &root->processed_distinctClause,
3456 : tlist,
3457 : true,
3458 : false,
3459 : &sortable,
3460 : false);
3461 2454 : if (!sortable)
3462 6 : root->distinct_pathkeys = NIL;
3463 : }
3464 : else
3465 547754 : root->distinct_pathkeys = NIL;
3466 :
3467 550208 : root->sort_pathkeys =
3468 550208 : make_pathkeys_for_sortclauses(root,
3469 : parse->sortClause,
3470 : tlist);
3471 :
3472 : /* setting setop_pathkeys might be useful to the union planner */
3473 550208 : if (qp_extra->setop != NULL)
3474 : {
3475 : List *groupClauses;
3476 : bool sortable;
3477 :
3478 12158 : groupClauses = generate_setop_child_grouplist(qp_extra->setop, tlist);
3479 :
3480 12158 : root->setop_pathkeys =
3481 12158 : make_pathkeys_for_sortclauses_extended(root,
3482 : &groupClauses,
3483 : tlist,
3484 : false,
3485 : false,
3486 : &sortable,
3487 : false);
3488 12158 : if (!sortable)
3489 184 : root->setop_pathkeys = NIL;
3490 : }
3491 : else
3492 538050 : root->setop_pathkeys = NIL;
3493 :
3494 : /*
3495 : * Figure out whether we want a sorted result from query_planner.
3496 : *
3497 : * If we have a sortable GROUP BY clause, then we want a result sorted
3498 : * properly for grouping. Otherwise, if we have window functions to
3499 : * evaluate, we try to sort for the first window. Otherwise, if there's a
3500 : * sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
3501 : * we try to produce output that's sufficiently well sorted for the
3502 : * DISTINCT. Otherwise, if there is an ORDER BY clause, we want to sort
3503 : * by the ORDER BY clause. Otherwise, if we're a subquery being planned
3504 : * for a set operation which can benefit from presorted results and have a
3505 : * sortable targetlist, we want to sort by the target list.
3506 : *
3507 : * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
3508 : * of GROUP BY, it would be tempting to request sort by ORDER BY --- but
3509 : * that might just leave us failing to exploit an available sort order at
3510 : * all. Needs more thought. The choice for DISTINCT versus ORDER BY is
3511 : * much easier, since we know that the parser ensured that one is a
3512 : * superset of the other.
3513 : */
3514 550208 : if (root->group_pathkeys)
3515 5778 : root->query_pathkeys = root->group_pathkeys;
3516 544430 : else if (root->window_pathkeys)
3517 1994 : root->query_pathkeys = root->window_pathkeys;
3518 1084872 : else if (list_length(root->distinct_pathkeys) >
3519 542436 : list_length(root->sort_pathkeys))
3520 2004 : root->query_pathkeys = root->distinct_pathkeys;
3521 540432 : else if (root->sort_pathkeys)
3522 71300 : root->query_pathkeys = root->sort_pathkeys;
3523 469132 : else if (root->setop_pathkeys != NIL)
3524 10766 : root->query_pathkeys = root->setop_pathkeys;
3525 : else
3526 458366 : root->query_pathkeys = NIL;
3527 550208 : }
3528 :
3529 : /*
3530 : * Estimate number of groups produced by grouping clauses (1 if not grouping)
3531 : *
3532 : * path_rows: number of output rows from scan/join step
3533 : * gd: grouping sets data including list of grouping sets and their clauses
3534 : * target_list: target list containing group clause references
3535 : *
3536 : * If doing grouping sets, we also annotate the gsets data with the estimates
3537 : * for each set and each individual rollup list, with a view to later
3538 : * determining whether some combination of them could be hashed instead.
3539 : */
3540 : static double
3541 40854 : get_number_of_groups(PlannerInfo *root,
3542 : double path_rows,
3543 : grouping_sets_data *gd,
3544 : List *target_list)
3545 : {
3546 40854 : Query *parse = root->parse;
3547 : double dNumGroups;
3548 :
3549 40854 : if (parse->groupClause)
3550 : {
3551 : List *groupExprs;
3552 :
3553 6898 : if (parse->groupingSets)
3554 : {
3555 : /* Add up the estimates for each grouping set */
3556 : ListCell *lc;
3557 :
3558 : Assert(gd); /* keep Coverity happy */
3559 :
3560 806 : dNumGroups = 0;
3561 :
3562 2118 : foreach(lc, gd->rollups)
3563 : {
3564 1312 : RollupData *rollup = lfirst_node(RollupData, lc);
3565 : ListCell *lc2;
3566 : ListCell *lc3;
3567 :
3568 1312 : groupExprs = get_sortgrouplist_exprs(rollup->groupClause,
3569 : target_list);
3570 :
3571 1312 : rollup->numGroups = 0.0;
3572 :
3573 3872 : forboth(lc2, rollup->gsets, lc3, rollup->gsets_data)
3574 : {
3575 2560 : List *gset = (List *) lfirst(lc2);
3576 2560 : GroupingSetData *gs = lfirst_node(GroupingSetData, lc3);
3577 2560 : double numGroups = estimate_num_groups(root,
3578 : groupExprs,
3579 : path_rows,
3580 : &gset,
3581 : NULL);
3582 :
3583 2560 : gs->numGroups = numGroups;
3584 2560 : rollup->numGroups += numGroups;
3585 : }
3586 :
3587 1312 : dNumGroups += rollup->numGroups;
3588 : }
3589 :
3590 806 : if (gd->hash_sets_idx)
3591 : {
3592 : ListCell *lc2;
3593 :
3594 36 : gd->dNumHashGroups = 0;
3595 :
3596 36 : groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3597 : target_list);
3598 :
3599 78 : forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets)
3600 : {
3601 42 : List *gset = (List *) lfirst(lc);
3602 42 : GroupingSetData *gs = lfirst_node(GroupingSetData, lc2);
3603 42 : double numGroups = estimate_num_groups(root,
3604 : groupExprs,
3605 : path_rows,
3606 : &gset,
3607 : NULL);
3608 :
3609 42 : gs->numGroups = numGroups;
3610 42 : gd->dNumHashGroups += numGroups;
3611 : }
3612 :
3613 36 : dNumGroups += gd->dNumHashGroups;
3614 : }
3615 : }
3616 : else
3617 : {
3618 : /* Plain GROUP BY -- estimate based on optimized groupClause */
3619 6092 : groupExprs = get_sortgrouplist_exprs(root->processed_groupClause,
3620 : target_list);
3621 :
3622 6092 : dNumGroups = estimate_num_groups(root, groupExprs, path_rows,
3623 : NULL, NULL);
3624 : }
3625 : }
3626 33956 : else if (parse->groupingSets)
3627 : {
3628 : /* Empty grouping sets ... one result row for each one */
3629 42 : dNumGroups = list_length(parse->groupingSets);
3630 : }
3631 33914 : else if (parse->hasAggs || root->hasHavingQual)
3632 : {
3633 : /* Plain aggregation, one result row */
3634 33914 : dNumGroups = 1;
3635 : }
3636 : else
3637 : {
3638 : /* Not grouping */
3639 0 : dNumGroups = 1;
3640 : }
3641 :
3642 40854 : return dNumGroups;
3643 : }
3644 :
3645 : /*
3646 : * create_grouping_paths
3647 : *
3648 : * Build a new upperrel containing Paths for grouping and/or aggregation.
3649 : * Along the way, we also build an upperrel for Paths which are partially
3650 : * grouped and/or aggregated. A partially grouped and/or aggregated path
3651 : * needs a FinalizeAggregate node to complete the aggregation. Currently,
3652 : * the only partially grouped paths we build are also partial paths; that
3653 : * is, they need a Gather and then a FinalizeAggregate.
3654 : *
3655 : * input_rel: contains the source-data Paths
3656 : * target: the pathtarget for the result Paths to compute
3657 : * gd: grouping sets data including list of grouping sets and their clauses
3658 : *
3659 : * Note: all Paths in input_rel are expected to return the target computed
3660 : * by make_group_input_target.
3661 : */
3662 : static RelOptInfo *
3663 37596 : create_grouping_paths(PlannerInfo *root,
3664 : RelOptInfo *input_rel,
3665 : PathTarget *target,
3666 : bool target_parallel_safe,
3667 : grouping_sets_data *gd)
3668 : {
3669 37596 : Query *parse = root->parse;
3670 : RelOptInfo *grouped_rel;
3671 : RelOptInfo *partially_grouped_rel;
3672 : AggClauseCosts agg_costs;
3673 :
3674 225576 : MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
3675 37596 : get_agg_clause_costs(root, AGGSPLIT_SIMPLE, &agg_costs);
3676 :
3677 : /*
3678 : * Create grouping relation to hold fully aggregated grouping and/or
3679 : * aggregation paths.
3680 : */
3681 37596 : grouped_rel = make_grouping_rel(root, input_rel, target,
3682 : target_parallel_safe, parse->havingQual);
3683 :
3684 : /*
3685 : * Create either paths for a degenerate grouping or paths for ordinary
3686 : * grouping, as appropriate.
3687 : */
3688 37596 : if (is_degenerate_grouping(root))
3689 18 : create_degenerate_grouping_paths(root, input_rel, grouped_rel);
3690 : else
3691 : {
3692 37578 : int flags = 0;
3693 : GroupPathExtraData extra;
3694 :
3695 : /*
3696 : * Determine whether it's possible to perform sort-based
3697 : * implementations of grouping. (Note that if processed_groupClause
3698 : * is empty, grouping_is_sortable() is trivially true, and all the
3699 : * pathkeys_contained_in() tests will succeed too, so that we'll
3700 : * consider every surviving input path.)
3701 : *
3702 : * If we have grouping sets, we might be able to sort some but not all
3703 : * of them; in this case, we need can_sort to be true as long as we
3704 : * must consider any sorted-input plan.
3705 : */
3706 37578 : if ((gd && gd->rollups != NIL)
3707 36736 : || grouping_is_sortable(root->processed_groupClause))
3708 37572 : flags |= GROUPING_CAN_USE_SORT;
3709 :
3710 : /*
3711 : * Determine whether we should consider hash-based implementations of
3712 : * grouping.
3713 : *
3714 : * Hashed aggregation only applies if we're grouping. If we have
3715 : * grouping sets, some groups might be hashable but others not; in
3716 : * this case we set can_hash true as long as there is nothing globally
3717 : * preventing us from hashing (and we should therefore consider plans
3718 : * with hashes).
3719 : *
3720 : * Executor doesn't support hashed aggregation with DISTINCT or ORDER
3721 : * BY aggregates. (Doing so would imply storing *all* the input
3722 : * values in the hash table, and/or running many sorts in parallel,
3723 : * either of which seems like a certain loser.) We similarly don't
3724 : * support ordered-set aggregates in hashed aggregation, but that case
3725 : * is also included in the numOrderedAggs count.
3726 : *
3727 : * Note: grouping_is_hashable() is much more expensive to check than
3728 : * the other gating conditions, so we want to do it last.
3729 : */
3730 37578 : if ((parse->groupClause != NIL &&
3731 8562 : root->numOrderedAggs == 0 &&
3732 4144 : (gd ? gd->any_hashable : grouping_is_hashable(root->processed_groupClause))))
3733 4140 : flags |= GROUPING_CAN_USE_HASH;
3734 :
3735 : /*
3736 : * Determine whether partial aggregation is possible.
3737 : */
3738 37578 : if (can_partial_agg(root))
3739 33398 : flags |= GROUPING_CAN_PARTIAL_AGG;
3740 :
3741 37578 : extra.flags = flags;
3742 37578 : extra.target_parallel_safe = target_parallel_safe;
3743 37578 : extra.havingQual = parse->havingQual;
3744 37578 : extra.targetList = parse->targetList;
3745 37578 : extra.partial_costs_set = false;
3746 :
3747 : /*
3748 : * Determine whether partitionwise aggregation is in theory possible.
3749 : * It can be disabled by the user, and for now, we don't try to
3750 : * support grouping sets. create_ordinary_grouping_paths() will check
3751 : * additional conditions, such as whether input_rel is partitioned.
3752 : */
3753 37578 : if (enable_partitionwise_aggregate && !parse->groupingSets)
3754 556 : extra.patype = PARTITIONWISE_AGGREGATE_FULL;
3755 : else
3756 37022 : extra.patype = PARTITIONWISE_AGGREGATE_NONE;
3757 :
3758 37578 : create_ordinary_grouping_paths(root, input_rel, grouped_rel,
3759 : &agg_costs, gd, &extra,
3760 : &partially_grouped_rel);
3761 : }
3762 :
3763 37590 : set_cheapest(grouped_rel);
3764 37590 : return grouped_rel;
3765 : }
3766 :
3767 : /*
3768 : * make_grouping_rel
3769 : *
3770 : * Create a new grouping rel and set basic properties.
3771 : *
3772 : * input_rel represents the underlying scan/join relation.
3773 : * target is the output expected from the grouping relation.
3774 : */
3775 : static RelOptInfo *
3776 39090 : make_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
3777 : PathTarget *target, bool target_parallel_safe,
3778 : Node *havingQual)
3779 : {
3780 : RelOptInfo *grouped_rel;
3781 :
3782 39090 : if (IS_OTHER_REL(input_rel))
3783 : {
3784 1494 : grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG,
3785 : input_rel->relids);
3786 1494 : grouped_rel->reloptkind = RELOPT_OTHER_UPPER_REL;
3787 : }
3788 : else
3789 : {
3790 : /*
3791 : * By tradition, the relids set for the main grouping relation is
3792 : * NULL. (This could be changed, but might require adjustments
3793 : * elsewhere.)
3794 : */
3795 37596 : grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
3796 : }
3797 :
3798 : /* Set target. */
3799 39090 : grouped_rel->reltarget = target;
3800 :
3801 : /*
3802 : * If the input relation is not parallel-safe, then the grouped relation
3803 : * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
3804 : * target list and HAVING quals are parallel-safe.
3805 : */
3806 66774 : if (input_rel->consider_parallel && target_parallel_safe &&
3807 27684 : is_parallel_safe(root, (Node *) havingQual))
3808 27666 : grouped_rel->consider_parallel = true;
3809 :
3810 : /*
3811 : * If the input rel belongs to a single FDW, so does the grouped rel.
3812 : */
3813 39090 : grouped_rel->serverid = input_rel->serverid;
3814 39090 : grouped_rel->userid = input_rel->userid;
3815 39090 : grouped_rel->useridiscurrent = input_rel->useridiscurrent;
3816 39090 : grouped_rel->fdwroutine = input_rel->fdwroutine;
3817 :
3818 39090 : return grouped_rel;
3819 : }
3820 :
3821 : /*
3822 : * is_degenerate_grouping
3823 : *
3824 : * A degenerate grouping is one in which the query has a HAVING qual and/or
3825 : * grouping sets, but no aggregates and no GROUP BY (which implies that the
3826 : * grouping sets are all empty).
3827 : */
3828 : static bool
3829 37596 : is_degenerate_grouping(PlannerInfo *root)
3830 : {
3831 37596 : Query *parse = root->parse;
3832 :
3833 36522 : return (root->hasHavingQual || parse->groupingSets) &&
3834 74118 : !parse->hasAggs && parse->groupClause == NIL;
3835 : }
3836 :
3837 : /*
3838 : * create_degenerate_grouping_paths
3839 : *
3840 : * When the grouping is degenerate (see is_degenerate_grouping), we are
3841 : * supposed to emit either zero or one row for each grouping set depending on
3842 : * whether HAVING succeeds. Furthermore, there cannot be any variables in
3843 : * either HAVING or the targetlist, so we actually do not need the FROM table
3844 : * at all! We can just throw away the plan-so-far and generate a Result node.
3845 : * This is a sufficiently unusual corner case that it's not worth contorting
3846 : * the structure of this module to avoid having to generate the earlier paths
3847 : * in the first place.
3848 : */
3849 : static void
3850 18 : create_degenerate_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
3851 : RelOptInfo *grouped_rel)
3852 : {
3853 18 : Query *parse = root->parse;
3854 : int nrows;
3855 : Path *path;
3856 :
3857 18 : nrows = list_length(parse->groupingSets);
3858 18 : if (nrows > 1)
3859 : {
3860 : /*
3861 : * Doesn't seem worthwhile writing code to cons up a generate_series
3862 : * or a values scan to emit multiple rows. Instead just make N clones
3863 : * and append them. (With a volatile HAVING clause, this means you
3864 : * might get between 0 and N output rows. Offhand I think that's
3865 : * desired.)
3866 : */
3867 0 : List *paths = NIL;
3868 :
3869 0 : while (--nrows >= 0)
3870 : {
3871 : path = (Path *)
3872 0 : create_group_result_path(root, grouped_rel,
3873 0 : grouped_rel->reltarget,
3874 0 : (List *) parse->havingQual);
3875 0 : paths = lappend(paths, path);
3876 : }
3877 : path = (Path *)
3878 0 : create_append_path(root,
3879 : grouped_rel,
3880 : paths,
3881 : NIL,
3882 : NIL,
3883 : NULL,
3884 : 0,
3885 : false,
3886 : -1);
3887 : }
3888 : else
3889 : {
3890 : /* No grouping sets, or just one, so one output row */
3891 : path = (Path *)
3892 18 : create_group_result_path(root, grouped_rel,
3893 18 : grouped_rel->reltarget,
3894 18 : (List *) parse->havingQual);
3895 : }
3896 :
3897 18 : add_path(grouped_rel, path);
3898 18 : }
3899 :
3900 : /*
3901 : * create_ordinary_grouping_paths
3902 : *
3903 : * Create grouping paths for the ordinary (that is, non-degenerate) case.
3904 : *
3905 : * We need to consider sorted and hashed aggregation in the same function,
3906 : * because otherwise (1) it would be harder to throw an appropriate error
3907 : * message if neither way works, and (2) we should not allow hashtable size
3908 : * considerations to dissuade us from using hashing if sorting is not possible.
3909 : *
3910 : * *partially_grouped_rel_p will be set to the partially grouped rel which this
3911 : * function creates, or to NULL if it doesn't create one.
3912 : */
3913 : static void
3914 39072 : create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
3915 : RelOptInfo *grouped_rel,
3916 : const AggClauseCosts *agg_costs,
3917 : grouping_sets_data *gd,
3918 : GroupPathExtraData *extra,
3919 : RelOptInfo **partially_grouped_rel_p)
3920 : {
3921 39072 : Path *cheapest_path = input_rel->cheapest_total_path;
3922 39072 : RelOptInfo *partially_grouped_rel = NULL;
3923 : double dNumGroups;
3924 39072 : PartitionwiseAggregateType patype = PARTITIONWISE_AGGREGATE_NONE;
3925 :
3926 : /*
3927 : * If this is the topmost grouping relation or if the parent relation is
3928 : * doing some form of partitionwise aggregation, then we may be able to do
3929 : * it at this level also. However, if the input relation is not
3930 : * partitioned, partitionwise aggregate is impossible.
3931 : */
3932 39072 : if (extra->patype != PARTITIONWISE_AGGREGATE_NONE &&
3933 2050 : IS_PARTITIONED_REL(input_rel))
3934 : {
3935 : /*
3936 : * If this is the topmost relation or if the parent relation is doing
3937 : * full partitionwise aggregation, then we can do full partitionwise
3938 : * aggregation provided that the GROUP BY clause contains all of the
3939 : * partitioning columns at this level and the collation used by GROUP
3940 : * BY matches the partitioning collation. Otherwise, we can do at
3941 : * most partial partitionwise aggregation. But if partial aggregation
3942 : * is not supported in general then we can't use it for partitionwise
3943 : * aggregation either.
3944 : *
3945 : * Check parse->groupClause not processed_groupClause, because it's
3946 : * okay if some of the partitioning columns were proved redundant.
3947 : */
3948 1160 : if (extra->patype == PARTITIONWISE_AGGREGATE_FULL &&
3949 556 : group_by_has_partkey(input_rel, extra->targetList,
3950 556 : root->parse->groupClause))
3951 320 : patype = PARTITIONWISE_AGGREGATE_FULL;
3952 284 : else if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0)
3953 242 : patype = PARTITIONWISE_AGGREGATE_PARTIAL;
3954 : else
3955 42 : patype = PARTITIONWISE_AGGREGATE_NONE;
3956 : }
3957 :
3958 : /*
3959 : * Before generating paths for grouped_rel, we first generate any possible
3960 : * partially grouped paths; that way, later code can easily consider both
3961 : * parallel and non-parallel approaches to grouping.
3962 : */
3963 39072 : if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0)
3964 : {
3965 : bool force_rel_creation;
3966 :
3967 : /*
3968 : * If we're doing partitionwise aggregation at this level, force
3969 : * creation of a partially_grouped_rel so we can add partitionwise
3970 : * paths to it.
3971 : */
3972 34820 : force_rel_creation = (patype == PARTITIONWISE_AGGREGATE_PARTIAL);
3973 :
3974 : partially_grouped_rel =
3975 34820 : create_partial_grouping_paths(root,
3976 : grouped_rel,
3977 : input_rel,
3978 : gd,
3979 : extra,
3980 : force_rel_creation);
3981 : }
3982 :
3983 : /* Set out parameter. */
3984 39072 : *partially_grouped_rel_p = partially_grouped_rel;
3985 :
3986 : /* Apply partitionwise aggregation technique, if possible. */
3987 39072 : if (patype != PARTITIONWISE_AGGREGATE_NONE)
3988 562 : create_partitionwise_grouping_paths(root, input_rel, grouped_rel,
3989 : partially_grouped_rel, agg_costs,
3990 : gd, patype, extra);
3991 :
3992 : /* If we are doing partial aggregation only, return. */
3993 39072 : if (extra->patype == PARTITIONWISE_AGGREGATE_PARTIAL)
3994 : {
3995 : Assert(partially_grouped_rel);
3996 :
3997 618 : if (partially_grouped_rel->pathlist)
3998 618 : set_cheapest(partially_grouped_rel);
3999 :
4000 618 : return;
4001 : }
4002 :
4003 : /* Gather any partially grouped partial paths. */
4004 38454 : if (partially_grouped_rel && partially_grouped_rel->partial_pathlist)
4005 : {
4006 1482 : gather_grouping_paths(root, partially_grouped_rel);
4007 1482 : set_cheapest(partially_grouped_rel);
4008 : }
4009 :
4010 : /*
4011 : * Estimate number of groups.
4012 : */
4013 38454 : dNumGroups = get_number_of_groups(root,
4014 : cheapest_path->rows,
4015 : gd,
4016 : extra->targetList);
4017 :
4018 : /* Build final grouping paths */
4019 38454 : add_paths_to_grouping_rel(root, input_rel, grouped_rel,
4020 : partially_grouped_rel, agg_costs, gd,
4021 : dNumGroups, extra);
4022 :
4023 : /* Give a helpful error if we failed to find any implementation */
4024 38454 : if (grouped_rel->pathlist == NIL)
4025 6 : ereport(ERROR,
4026 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4027 : errmsg("could not implement GROUP BY"),
4028 : errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4029 :
4030 : /*
4031 : * If there is an FDW that's responsible for all baserels of the query,
4032 : * let it consider adding ForeignPaths.
4033 : */
4034 38448 : if (grouped_rel->fdwroutine &&
4035 332 : grouped_rel->fdwroutine->GetForeignUpperPaths)
4036 332 : grouped_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_GROUP_AGG,
4037 : input_rel, grouped_rel,
4038 : extra);
4039 :
4040 : /* Let extensions possibly add some more paths */
4041 38448 : if (create_upper_paths_hook)
4042 0 : (*create_upper_paths_hook) (root, UPPERREL_GROUP_AGG,
4043 : input_rel, grouped_rel,
4044 : extra);
4045 : }
4046 :
4047 : /*
4048 : * For a given input path, consider the possible ways of doing grouping sets on
4049 : * it, by combinations of hashing and sorting. This can be called multiple
4050 : * times, so it's important that it not scribble on input. No result is
4051 : * returned, but any generated paths are added to grouped_rel.
4052 : */
4053 : static void
4054 1684 : consider_groupingsets_paths(PlannerInfo *root,
4055 : RelOptInfo *grouped_rel,
4056 : Path *path,
4057 : bool is_sorted,
4058 : bool can_hash,
4059 : grouping_sets_data *gd,
4060 : const AggClauseCosts *agg_costs,
4061 : double dNumGroups)
4062 : {
4063 1684 : Query *parse = root->parse;
4064 1684 : Size hash_mem_limit = get_hash_memory_limit();
4065 :
4066 : /*
4067 : * If we're not being offered sorted input, then only consider plans that
4068 : * can be done entirely by hashing.
4069 : *
4070 : * We can hash everything if it looks like it'll fit in hash_mem. But if
4071 : * the input is actually sorted despite not being advertised as such, we
4072 : * prefer to make use of that in order to use less memory.
4073 : *
4074 : * If none of the grouping sets are sortable, then ignore the hash_mem
4075 : * limit and generate a path anyway, since otherwise we'll just fail.
4076 : */
4077 1684 : if (!is_sorted)
4078 : {
4079 770 : List *new_rollups = NIL;
4080 770 : RollupData *unhashed_rollup = NULL;
4081 : List *sets_data;
4082 770 : List *empty_sets_data = NIL;
4083 770 : List *empty_sets = NIL;
4084 : ListCell *lc;
4085 770 : ListCell *l_start = list_head(gd->rollups);
4086 770 : AggStrategy strat = AGG_HASHED;
4087 : double hashsize;
4088 770 : double exclude_groups = 0.0;
4089 :
4090 : Assert(can_hash);
4091 :
4092 : /*
4093 : * If the input is coincidentally sorted usefully (which can happen
4094 : * even if is_sorted is false, since that only means that our caller
4095 : * has set up the sorting for us), then save some hashtable space by
4096 : * making use of that. But we need to watch out for degenerate cases:
4097 : *
4098 : * 1) If there are any empty grouping sets, then group_pathkeys might
4099 : * be NIL if all non-empty grouping sets are unsortable. In this case,
4100 : * there will be a rollup containing only empty groups, and the
4101 : * pathkeys_contained_in test is vacuously true; this is ok.
4102 : *
4103 : * XXX: the above relies on the fact that group_pathkeys is generated
4104 : * from the first rollup. If we add the ability to consider multiple
4105 : * sort orders for grouping input, this assumption might fail.
4106 : *
4107 : * 2) If there are no empty sets and only unsortable sets, then the
4108 : * rollups list will be empty (and thus l_start == NULL), and
4109 : * group_pathkeys will be NIL; we must ensure that the vacuously-true
4110 : * pathkeys_contained_in test doesn't cause us to crash.
4111 : */
4112 1534 : if (l_start != NULL &&
4113 764 : pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
4114 : {
4115 12 : unhashed_rollup = lfirst_node(RollupData, l_start);
4116 12 : exclude_groups = unhashed_rollup->numGroups;
4117 12 : l_start = lnext(gd->rollups, l_start);
4118 : }
4119 :
4120 770 : hashsize = estimate_hashagg_tablesize(root,
4121 : path,
4122 : agg_costs,
4123 : dNumGroups - exclude_groups);
4124 :
4125 : /*
4126 : * gd->rollups is empty if we have only unsortable columns to work
4127 : * with. Override hash_mem in that case; otherwise, we'll rely on the
4128 : * sorted-input case to generate usable mixed paths.
4129 : */
4130 770 : if (hashsize > hash_mem_limit && gd->rollups)
4131 18 : return; /* nope, won't fit */
4132 :
4133 : /*
4134 : * We need to burst the existing rollups list into individual grouping
4135 : * sets and recompute a groupClause for each set.
4136 : */
4137 752 : sets_data = list_copy(gd->unsortable_sets);
4138 :
4139 1872 : for_each_cell(lc, gd->rollups, l_start)
4140 : {
4141 1144 : RollupData *rollup = lfirst_node(RollupData, lc);
4142 :
4143 : /*
4144 : * If we find an unhashable rollup that's not been skipped by the
4145 : * "actually sorted" check above, we can't cope; we'd need sorted
4146 : * input (with a different sort order) but we can't get that here.
4147 : * So bail out; we'll get a valid path from the is_sorted case
4148 : * instead.
4149 : *
4150 : * The mere presence of empty grouping sets doesn't make a rollup
4151 : * unhashable (see preprocess_grouping_sets), we handle those
4152 : * specially below.
4153 : */
4154 1144 : if (!rollup->hashable)
4155 24 : return;
4156 :
4157 1120 : sets_data = list_concat(sets_data, rollup->gsets_data);
4158 : }
4159 3054 : foreach(lc, sets_data)
4160 : {
4161 2326 : GroupingSetData *gs = lfirst_node(GroupingSetData, lc);
4162 2326 : List *gset = gs->set;
4163 : RollupData *rollup;
4164 :
4165 2326 : if (gset == NIL)
4166 : {
4167 : /* Empty grouping sets can't be hashed. */
4168 484 : empty_sets_data = lappend(empty_sets_data, gs);
4169 484 : empty_sets = lappend(empty_sets, NIL);
4170 : }
4171 : else
4172 : {
4173 1842 : rollup = makeNode(RollupData);
4174 :
4175 1842 : rollup->groupClause = preprocess_groupclause(root, gset);
4176 1842 : rollup->gsets_data = list_make1(gs);
4177 1842 : rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4178 : rollup->gsets_data,
4179 : gd->tleref_to_colnum_map);
4180 1842 : rollup->numGroups = gs->numGroups;
4181 1842 : rollup->hashable = true;
4182 1842 : rollup->is_hashed = true;
4183 1842 : new_rollups = lappend(new_rollups, rollup);
4184 : }
4185 : }
4186 :
4187 : /*
4188 : * If we didn't find anything nonempty to hash, then bail. We'll
4189 : * generate a path from the is_sorted case.
4190 : */
4191 728 : if (new_rollups == NIL)
4192 0 : return;
4193 :
4194 : /*
4195 : * If there were empty grouping sets they should have been in the
4196 : * first rollup.
4197 : */
4198 : Assert(!unhashed_rollup || !empty_sets);
4199 :
4200 728 : if (unhashed_rollup)
4201 : {
4202 12 : new_rollups = lappend(new_rollups, unhashed_rollup);
4203 12 : strat = AGG_MIXED;
4204 : }
4205 716 : else if (empty_sets)
4206 : {
4207 436 : RollupData *rollup = makeNode(RollupData);
4208 :
4209 436 : rollup->groupClause = NIL;
4210 436 : rollup->gsets_data = empty_sets_data;
4211 436 : rollup->gsets = empty_sets;
4212 436 : rollup->numGroups = list_length(empty_sets);
4213 436 : rollup->hashable = false;
4214 436 : rollup->is_hashed = false;
4215 436 : new_rollups = lappend(new_rollups, rollup);
4216 436 : strat = AGG_MIXED;
4217 : }
4218 :
4219 728 : add_path(grouped_rel, (Path *)
4220 728 : create_groupingsets_path(root,
4221 : grouped_rel,
4222 : path,
4223 728 : (List *) parse->havingQual,
4224 : strat,
4225 : new_rollups,
4226 : agg_costs));
4227 728 : return;
4228 : }
4229 :
4230 : /*
4231 : * If we have sorted input but nothing we can do with it, bail.
4232 : */
4233 914 : if (gd->rollups == NIL)
4234 0 : return;
4235 :
4236 : /*
4237 : * Given sorted input, we try and make two paths: one sorted and one mixed
4238 : * sort/hash. (We need to try both because hashagg might be disabled, or
4239 : * some columns might not be sortable.)
4240 : *
4241 : * can_hash is passed in as false if some obstacle elsewhere (such as
4242 : * ordered aggs) means that we shouldn't consider hashing at all.
4243 : */
4244 914 : if (can_hash && gd->any_hashable)
4245 : {
4246 836 : List *rollups = NIL;
4247 836 : List *hash_sets = list_copy(gd->unsortable_sets);
4248 836 : double availspace = hash_mem_limit;
4249 : ListCell *lc;
4250 :
4251 : /*
4252 : * Account first for space needed for groups we can't sort at all.
4253 : */
4254 836 : availspace -= estimate_hashagg_tablesize(root,
4255 : path,
4256 : agg_costs,
4257 : gd->dNumHashGroups);
4258 :
4259 836 : if (availspace > 0 && list_length(gd->rollups) > 1)
4260 : {
4261 : double scale;
4262 420 : int num_rollups = list_length(gd->rollups);
4263 : int k_capacity;
4264 420 : int *k_weights = palloc(num_rollups * sizeof(int));
4265 420 : Bitmapset *hash_items = NULL;
4266 : int i;
4267 :
4268 : /*
4269 : * We treat this as a knapsack problem: the knapsack capacity
4270 : * represents hash_mem, the item weights are the estimated memory
4271 : * usage of the hashtables needed to implement a single rollup,
4272 : * and we really ought to use the cost saving as the item value;
4273 : * however, currently the costs assigned to sort nodes don't
4274 : * reflect the comparison costs well, and so we treat all items as
4275 : * of equal value (each rollup we hash instead saves us one sort).
4276 : *
4277 : * To use the discrete knapsack, we need to scale the values to a
4278 : * reasonably small bounded range. We choose to allow a 5% error
4279 : * margin; we have no more than 4096 rollups in the worst possible
4280 : * case, which with a 5% error margin will require a bit over 42MB
4281 : * of workspace. (Anyone wanting to plan queries that complex had
4282 : * better have the memory for it. In more reasonable cases, with
4283 : * no more than a couple of dozen rollups, the memory usage will
4284 : * be negligible.)
4285 : *
4286 : * k_capacity is naturally bounded, but we clamp the values for
4287 : * scale and weight (below) to avoid overflows or underflows (or
4288 : * uselessly trying to use a scale factor less than 1 byte).
4289 : */
4290 420 : scale = Max(availspace / (20.0 * num_rollups), 1.0);
4291 420 : k_capacity = (int) floor(availspace / scale);
4292 :
4293 : /*
4294 : * We leave the first rollup out of consideration since it's the
4295 : * one that matches the input sort order. We assign indexes "i"
4296 : * to only those entries considered for hashing; the second loop,
4297 : * below, must use the same condition.
4298 : */
4299 420 : i = 0;
4300 1056 : for_each_from(lc, gd->rollups, 1)
4301 : {
4302 636 : RollupData *rollup = lfirst_node(RollupData, lc);
4303 :
4304 636 : if (rollup->hashable)
4305 : {
4306 636 : double sz = estimate_hashagg_tablesize(root,
4307 : path,
4308 : agg_costs,
4309 : rollup->numGroups);
4310 :
4311 : /*
4312 : * If sz is enormous, but hash_mem (and hence scale) is
4313 : * small, avoid integer overflow here.
4314 : */
4315 636 : k_weights[i] = (int) Min(floor(sz / scale),
4316 : k_capacity + 1.0);
4317 636 : ++i;
4318 : }
4319 : }
4320 :
4321 : /*
4322 : * Apply knapsack algorithm; compute the set of items which
4323 : * maximizes the value stored (in this case the number of sorts
4324 : * saved) while keeping the total size (approximately) within
4325 : * capacity.
4326 : */
4327 420 : if (i > 0)
4328 420 : hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
4329 :
4330 420 : if (!bms_is_empty(hash_items))
4331 : {
4332 420 : rollups = list_make1(linitial(gd->rollups));
4333 :
4334 420 : i = 0;
4335 1056 : for_each_from(lc, gd->rollups, 1)
4336 : {
4337 636 : RollupData *rollup = lfirst_node(RollupData, lc);
4338 :
4339 636 : if (rollup->hashable)
4340 : {
4341 636 : if (bms_is_member(i, hash_items))
4342 600 : hash_sets = list_concat(hash_sets,
4343 600 : rollup->gsets_data);
4344 : else
4345 36 : rollups = lappend(rollups, rollup);
4346 636 : ++i;
4347 : }
4348 : else
4349 0 : rollups = lappend(rollups, rollup);
4350 : }
4351 : }
4352 : }
4353 :
4354 836 : if (!rollups && hash_sets)
4355 24 : rollups = list_copy(gd->rollups);
4356 :
4357 1576 : foreach(lc, hash_sets)
4358 : {
4359 740 : GroupingSetData *gs = lfirst_node(GroupingSetData, lc);
4360 740 : RollupData *rollup = makeNode(RollupData);
4361 :
4362 : Assert(gs->set != NIL);
4363 :
4364 740 : rollup->groupClause = preprocess_groupclause(root, gs->set);
4365 740 : rollup->gsets_data = list_make1(gs);
4366 740 : rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4367 : rollup->gsets_data,
4368 : gd->tleref_to_colnum_map);
4369 740 : rollup->numGroups = gs->numGroups;
4370 740 : rollup->hashable = true;
4371 740 : rollup->is_hashed = true;
4372 740 : rollups = lcons(rollup, rollups);
4373 : }
4374 :
4375 836 : if (rollups)
4376 : {
4377 444 : add_path(grouped_rel, (Path *)
4378 444 : create_groupingsets_path(root,
4379 : grouped_rel,
4380 : path,
4381 444 : (List *) parse->havingQual,
4382 : AGG_MIXED,
4383 : rollups,
4384 : agg_costs));
4385 : }
4386 : }
4387 :
4388 : /*
4389 : * Now try the simple sorted case.
4390 : */
4391 914 : if (!gd->unsortable_sets)
4392 884 : add_path(grouped_rel, (Path *)
4393 884 : create_groupingsets_path(root,
4394 : grouped_rel,
4395 : path,
4396 884 : (List *) parse->havingQual,
4397 : AGG_SORTED,
4398 : gd->rollups,
4399 : agg_costs));
4400 : }
4401 :
4402 : /*
4403 : * create_window_paths
4404 : *
4405 : * Build a new upperrel containing Paths for window-function evaluation.
4406 : *
4407 : * input_rel: contains the source-data Paths
4408 : * input_target: result of make_window_input_target
4409 : * output_target: what the topmost WindowAggPath should return
4410 : * wflists: result of find_window_functions
4411 : * activeWindows: result of select_active_windows
4412 : *
4413 : * Note: all Paths in input_rel are expected to return input_target.
4414 : */
4415 : static RelOptInfo *
4416 2336 : create_window_paths(PlannerInfo *root,
4417 : RelOptInfo *input_rel,
4418 : PathTarget *input_target,
4419 : PathTarget *output_target,
4420 : bool output_target_parallel_safe,
4421 : WindowFuncLists *wflists,
4422 : List *activeWindows)
4423 : {
4424 : RelOptInfo *window_rel;
4425 : ListCell *lc;
4426 :
4427 : /* For now, do all work in the (WINDOW, NULL) upperrel */
4428 2336 : window_rel = fetch_upper_rel(root, UPPERREL_WINDOW, NULL);
4429 :
4430 : /*
4431 : * If the input relation is not parallel-safe, then the window relation
4432 : * can't be parallel-safe, either. Otherwise, we need to examine the
4433 : * target list and active windows for non-parallel-safe constructs.
4434 : */
4435 2336 : if (input_rel->consider_parallel && output_target_parallel_safe &&
4436 0 : is_parallel_safe(root, (Node *) activeWindows))
4437 0 : window_rel->consider_parallel = true;
4438 :
4439 : /*
4440 : * If the input rel belongs to a single FDW, so does the window rel.
4441 : */
4442 2336 : window_rel->serverid = input_rel->serverid;
4443 2336 : window_rel->userid = input_rel->userid;
4444 2336 : window_rel->useridiscurrent = input_rel->useridiscurrent;
4445 2336 : window_rel->fdwroutine = input_rel->fdwroutine;
4446 :
4447 : /*
4448 : * Consider computing window functions starting from the existing
4449 : * cheapest-total path (which will likely require a sort) as well as any
4450 : * existing paths that satisfy or partially satisfy root->window_pathkeys.
4451 : */
4452 4984 : foreach(lc, input_rel->pathlist)
4453 : {
4454 2648 : Path *path = (Path *) lfirst(lc);
4455 : int presorted_keys;
4456 :
4457 2960 : if (path == input_rel->cheapest_total_path ||
4458 312 : pathkeys_count_contained_in(root->window_pathkeys, path->pathkeys,
4459 144 : &presorted_keys) ||
4460 144 : presorted_keys > 0)
4461 2534 : create_one_window_path(root,
4462 : window_rel,
4463 : path,
4464 : input_target,
4465 : output_target,
4466 : wflists,
4467 : activeWindows);
4468 : }
4469 :
4470 : /*
4471 : * If there is an FDW that's responsible for all baserels of the query,
4472 : * let it consider adding ForeignPaths.
4473 : */
4474 2336 : if (window_rel->fdwroutine &&
4475 12 : window_rel->fdwroutine->GetForeignUpperPaths)
4476 12 : window_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_WINDOW,
4477 : input_rel, window_rel,
4478 : NULL);
4479 :
4480 : /* Let extensions possibly add some more paths */
4481 2336 : if (create_upper_paths_hook)
4482 0 : (*create_upper_paths_hook) (root, UPPERREL_WINDOW,
4483 : input_rel, window_rel, NULL);
4484 :
4485 : /* Now choose the best path(s) */
4486 2336 : set_cheapest(window_rel);
4487 :
4488 2336 : return window_rel;
4489 : }
4490 :
4491 : /*
4492 : * Stack window-function implementation steps atop the given Path, and
4493 : * add the result to window_rel.
4494 : *
4495 : * window_rel: upperrel to contain result
4496 : * path: input Path to use (must return input_target)
4497 : * input_target: result of make_window_input_target
4498 : * output_target: what the topmost WindowAggPath should return
4499 : * wflists: result of find_window_functions
4500 : * activeWindows: result of select_active_windows
4501 : */
4502 : static void
4503 2534 : create_one_window_path(PlannerInfo *root,
4504 : RelOptInfo *window_rel,
4505 : Path *path,
4506 : PathTarget *input_target,
4507 : PathTarget *output_target,
4508 : WindowFuncLists *wflists,
4509 : List *activeWindows)
4510 : {
4511 : PathTarget *window_target;
4512 : ListCell *l;
4513 2534 : List *topqual = NIL;
4514 :
4515 : /*
4516 : * Since each window clause could require a different sort order, we stack
4517 : * up a WindowAgg node for each clause, with sort steps between them as
4518 : * needed. (We assume that select_active_windows chose a good order for
4519 : * executing the clauses in.)
4520 : *
4521 : * input_target should contain all Vars and Aggs needed for the result.
4522 : * (In some cases we wouldn't need to propagate all of these all the way
4523 : * to the top, since they might only be needed as inputs to WindowFuncs.
4524 : * It's probably not worth trying to optimize that though.) It must also
4525 : * contain all window partitioning and sorting expressions, to ensure
4526 : * they're computed only once at the bottom of the stack (that's critical
4527 : * for volatile functions). As we climb up the stack, we'll add outputs
4528 : * for the WindowFuncs computed at each level.
4529 : */
4530 2534 : window_target = input_target;
4531 :
4532 5218 : foreach(l, activeWindows)
4533 : {
4534 2684 : WindowClause *wc = lfirst_node(WindowClause, l);
4535 : List *window_pathkeys;
4536 2684 : List *runcondition = NIL;
4537 : int presorted_keys;
4538 : bool is_sorted;
4539 : bool topwindow;
4540 : ListCell *lc2;
4541 :
4542 2684 : window_pathkeys = make_pathkeys_for_window(root,
4543 : wc,
4544 : root->processed_tlist);
4545 :
4546 2684 : is_sorted = pathkeys_count_contained_in(window_pathkeys,
4547 : path->pathkeys,
4548 : &presorted_keys);
4549 :
4550 : /* Sort if necessary */
4551 2684 : if (!is_sorted)
4552 : {
4553 : /*
4554 : * No presorted keys or incremental sort disabled, just perform a
4555 : * complete sort.
4556 : */
4557 2072 : if (presorted_keys == 0 || !enable_incremental_sort)
4558 2006 : path = (Path *) create_sort_path(root, window_rel,
4559 : path,
4560 : window_pathkeys,
4561 : -1.0);
4562 : else
4563 : {
4564 : /*
4565 : * Since we have presorted keys and incremental sort is
4566 : * enabled, just use incremental sort.
4567 : */
4568 66 : path = (Path *) create_incremental_sort_path(root,
4569 : window_rel,
4570 : path,
4571 : window_pathkeys,
4572 : presorted_keys,
4573 : -1.0);
4574 : }
4575 : }
4576 :
4577 2684 : if (lnext(activeWindows, l))
4578 : {
4579 : /*
4580 : * Add the current WindowFuncs to the output target for this
4581 : * intermediate WindowAggPath. We must copy window_target to
4582 : * avoid changing the previous path's target.
4583 : *
4584 : * Note: a WindowFunc adds nothing to the target's eval costs; but
4585 : * we do need to account for the increase in tlist width.
4586 : */
4587 150 : int64 tuple_width = window_target->width;
4588 :
4589 150 : window_target = copy_pathtarget(window_target);
4590 342 : foreach(lc2, wflists->windowFuncs[wc->winref])
4591 : {
4592 192 : WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
4593 :
4594 192 : add_column_to_pathtarget(window_target, (Expr *) wfunc, 0);
4595 192 : tuple_width += get_typavgwidth(wfunc->wintype, -1);
4596 : }
4597 150 : window_target->width = clamp_width_est(tuple_width);
4598 : }
4599 : else
4600 : {
4601 : /* Install the goal target in the topmost WindowAgg */
4602 2534 : window_target = output_target;
4603 : }
4604 :
4605 : /* mark the final item in the list as the top-level window */
4606 2684 : topwindow = foreach_current_index(l) == list_length(activeWindows) - 1;
4607 :
4608 : /*
4609 : * Collect the WindowFuncRunConditions from each WindowFunc and
4610 : * convert them into OpExprs
4611 : */
4612 6076 : foreach(lc2, wflists->windowFuncs[wc->winref])
4613 : {
4614 : ListCell *lc3;
4615 3392 : WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
4616 :
4617 3572 : foreach(lc3, wfunc->runCondition)
4618 : {
4619 180 : WindowFuncRunCondition *wfuncrc =
4620 : lfirst_node(WindowFuncRunCondition, lc3);
4621 : Expr *opexpr;
4622 : Expr *leftop;
4623 : Expr *rightop;
4624 :
4625 180 : if (wfuncrc->wfunc_left)
4626 : {
4627 162 : leftop = (Expr *) copyObject(wfunc);
4628 162 : rightop = copyObject(wfuncrc->arg);
4629 : }
4630 : else
4631 : {
4632 18 : leftop = copyObject(wfuncrc->arg);
4633 18 : rightop = (Expr *) copyObject(wfunc);
4634 : }
4635 :
4636 180 : opexpr = make_opclause(wfuncrc->opno,
4637 : BOOLOID,
4638 : false,
4639 : leftop,
4640 : rightop,
4641 : InvalidOid,
4642 : wfuncrc->inputcollid);
4643 :
4644 180 : runcondition = lappend(runcondition, opexpr);
4645 :
4646 180 : if (!topwindow)
4647 24 : topqual = lappend(topqual, opexpr);
4648 : }
4649 : }
4650 :
4651 : path = (Path *)
4652 2684 : create_windowagg_path(root, window_rel, path, window_target,
4653 2684 : wflists->windowFuncs[wc->winref],
4654 : runcondition, wc,
4655 : topwindow ? topqual : NIL, topwindow);
4656 : }
4657 :
4658 2534 : add_path(window_rel, path);
4659 2534 : }
4660 :
4661 : /*
4662 : * create_distinct_paths
4663 : *
4664 : * Build a new upperrel containing Paths for SELECT DISTINCT evaluation.
4665 : *
4666 : * input_rel: contains the source-data Paths
4667 : * target: the pathtarget for the result Paths to compute
4668 : *
4669 : * Note: input paths should already compute the desired pathtarget, since
4670 : * Sort/Unique won't project anything.
4671 : */
4672 : static RelOptInfo *
4673 2454 : create_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
4674 : PathTarget *target)
4675 : {
4676 : RelOptInfo *distinct_rel;
4677 :
4678 : /* For now, do all work in the (DISTINCT, NULL) upperrel */
4679 2454 : distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL);
4680 :
4681 : /*
4682 : * We don't compute anything at this level, so distinct_rel will be
4683 : * parallel-safe if the input rel is parallel-safe. In particular, if
4684 : * there is a DISTINCT ON (...) clause, any path for the input_rel will
4685 : * output those expressions, and will not be parallel-safe unless those
4686 : * expressions are parallel-safe.
4687 : */
4688 2454 : distinct_rel->consider_parallel = input_rel->consider_parallel;
4689 :
4690 : /*
4691 : * If the input rel belongs to a single FDW, so does the distinct_rel.
4692 : */
4693 2454 : distinct_rel->serverid = input_rel->serverid;
4694 2454 : distinct_rel->userid = input_rel->userid;
4695 2454 : distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4696 2454 : distinct_rel->fdwroutine = input_rel->fdwroutine;
4697 :
4698 : /* build distinct paths based on input_rel's pathlist */
4699 2454 : create_final_distinct_paths(root, input_rel, distinct_rel);
4700 :
4701 : /* now build distinct paths based on input_rel's partial_pathlist */
4702 2454 : create_partial_distinct_paths(root, input_rel, distinct_rel, target);
4703 :
4704 : /* Give a helpful error if we failed to create any paths */
4705 2454 : if (distinct_rel->pathlist == NIL)
4706 0 : ereport(ERROR,
4707 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4708 : errmsg("could not implement DISTINCT"),
4709 : errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4710 :
4711 : /*
4712 : * If there is an FDW that's responsible for all baserels of the query,
4713 : * let it consider adding ForeignPaths.
4714 : */
4715 2454 : if (distinct_rel->fdwroutine &&
4716 16 : distinct_rel->fdwroutine->GetForeignUpperPaths)
4717 16 : distinct_rel->fdwroutine->GetForeignUpperPaths(root,
4718 : UPPERREL_DISTINCT,
4719 : input_rel,
4720 : distinct_rel,
4721 : NULL);
4722 :
4723 : /* Let extensions possibly add some more paths */
4724 2454 : if (create_upper_paths_hook)
4725 0 : (*create_upper_paths_hook) (root, UPPERREL_DISTINCT, input_rel,
4726 : distinct_rel, NULL);
4727 :
4728 : /* Now choose the best path(s) */
4729 2454 : set_cheapest(distinct_rel);
4730 :
4731 2454 : return distinct_rel;
4732 : }
4733 :
4734 : /*
4735 : * create_partial_distinct_paths
4736 : *
4737 : * Process 'input_rel' partial paths and add unique/aggregate paths to the
4738 : * UPPERREL_PARTIAL_DISTINCT rel. For paths created, add Gather/GatherMerge
4739 : * paths on top and add a final unique/aggregate path to remove any duplicate
4740 : * produced from combining rows from parallel workers.
4741 : */
4742 : static void
4743 2454 : create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
4744 : RelOptInfo *final_distinct_rel,
4745 : PathTarget *target)
4746 : {
4747 : RelOptInfo *partial_distinct_rel;
4748 : Query *parse;
4749 : List *distinctExprs;
4750 : double numDistinctRows;
4751 : Path *cheapest_partial_path;
4752 : ListCell *lc;
4753 :
4754 : /* nothing to do when there are no partial paths in the input rel */
4755 2454 : if (!input_rel->consider_parallel || input_rel->partial_pathlist == NIL)
4756 2346 : return;
4757 :
4758 108 : parse = root->parse;
4759 :
4760 : /* can't do parallel DISTINCT ON */
4761 108 : if (parse->hasDistinctOn)
4762 0 : return;
4763 :
4764 108 : partial_distinct_rel = fetch_upper_rel(root, UPPERREL_PARTIAL_DISTINCT,
4765 : NULL);
4766 108 : partial_distinct_rel->reltarget = target;
4767 108 : partial_distinct_rel->consider_parallel = input_rel->consider_parallel;
4768 :
4769 : /*
4770 : * If input_rel belongs to a single FDW, so does the partial_distinct_rel.
4771 : */
4772 108 : partial_distinct_rel->serverid = input_rel->serverid;
4773 108 : partial_distinct_rel->userid = input_rel->userid;
4774 108 : partial_distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4775 108 : partial_distinct_rel->fdwroutine = input_rel->fdwroutine;
4776 :
4777 108 : cheapest_partial_path = linitial(input_rel->partial_pathlist);
4778 :
4779 108 : distinctExprs = get_sortgrouplist_exprs(root->processed_distinctClause,
4780 : parse->targetList);
4781 :
4782 : /* estimate how many distinct rows we'll get from each worker */
4783 108 : numDistinctRows = estimate_num_groups(root, distinctExprs,
4784 : cheapest_partial_path->rows,
4785 : NULL, NULL);
4786 :
4787 : /*
4788 : * Try sorting the cheapest path and incrementally sorting any paths with
4789 : * presorted keys and put a unique paths atop of those. We'll also
4790 : * attempt to reorder the required pathkeys to match the input path's
4791 : * pathkeys as much as possible, in hopes of avoiding a possible need to
4792 : * re-sort.
4793 : */
4794 108 : if (grouping_is_sortable(root->processed_distinctClause))
4795 : {
4796 234 : foreach(lc, input_rel->partial_pathlist)
4797 : {
4798 126 : Path *input_path = (Path *) lfirst(lc);
4799 : Path *sorted_path;
4800 126 : List *useful_pathkeys_list = NIL;
4801 :
4802 : useful_pathkeys_list =
4803 126 : get_useful_pathkeys_for_distinct(root,
4804 : root->distinct_pathkeys,
4805 : input_path->pathkeys);
4806 : Assert(list_length(useful_pathkeys_list) > 0);
4807 :
4808 390 : foreach_node(List, useful_pathkeys, useful_pathkeys_list)
4809 : {
4810 138 : sorted_path = make_ordered_path(root,
4811 : partial_distinct_rel,
4812 : input_path,
4813 : cheapest_partial_path,
4814 : useful_pathkeys,
4815 : -1.0);
4816 :
4817 138 : if (sorted_path == NULL)
4818 12 : continue;
4819 :
4820 : /*
4821 : * An empty distinct_pathkeys means all tuples have the same
4822 : * value for the DISTINCT clause. See
4823 : * create_final_distinct_paths()
4824 : */
4825 126 : if (root->distinct_pathkeys == NIL)
4826 : {
4827 : Node *limitCount;
4828 :
4829 6 : limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
4830 : sizeof(int64),
4831 : Int64GetDatum(1), false,
4832 : FLOAT8PASSBYVAL);
4833 :
4834 : /*
4835 : * Apply a LimitPath onto the partial path to restrict the
4836 : * tuples from each worker to 1.
4837 : * create_final_distinct_paths will need to apply an
4838 : * additional LimitPath to restrict this to a single row
4839 : * after the Gather node. If the query already has a
4840 : * LIMIT clause, then we could end up with three Limit
4841 : * nodes in the final plan. Consolidating the top two of
4842 : * these could be done, but does not seem worth troubling
4843 : * over.
4844 : */
4845 6 : add_partial_path(partial_distinct_rel, (Path *)
4846 6 : create_limit_path(root, partial_distinct_rel,
4847 : sorted_path,
4848 : NULL,
4849 : limitCount,
4850 : LIMIT_OPTION_COUNT,
4851 : 0, 1));
4852 : }
4853 : else
4854 : {
4855 120 : add_partial_path(partial_distinct_rel, (Path *)
4856 120 : create_upper_unique_path(root, partial_distinct_rel,
4857 : sorted_path,
4858 120 : list_length(root->distinct_pathkeys),
4859 : numDistinctRows));
4860 : }
4861 : }
4862 : }
4863 : }
4864 :
4865 : /*
4866 : * Now try hash aggregate paths, if enabled and hashing is possible. Since
4867 : * we're not on the hook to ensure we do our best to create at least one
4868 : * path here, we treat enable_hashagg as a hard off-switch rather than the
4869 : * slightly softer variant in create_final_distinct_paths.
4870 : */
4871 108 : if (enable_hashagg && grouping_is_hashable(root->processed_distinctClause))
4872 : {
4873 78 : add_partial_path(partial_distinct_rel, (Path *)
4874 78 : create_agg_path(root,
4875 : partial_distinct_rel,
4876 : cheapest_partial_path,
4877 : cheapest_partial_path->pathtarget,
4878 : AGG_HASHED,
4879 : AGGSPLIT_SIMPLE,
4880 : root->processed_distinctClause,
4881 : NIL,
4882 : NULL,
4883 : numDistinctRows));
4884 : }
4885 :
4886 : /*
4887 : * If there is an FDW that's responsible for all baserels of the query,
4888 : * let it consider adding ForeignPaths.
4889 : */
4890 108 : if (partial_distinct_rel->fdwroutine &&
4891 0 : partial_distinct_rel->fdwroutine->GetForeignUpperPaths)
4892 0 : partial_distinct_rel->fdwroutine->GetForeignUpperPaths(root,
4893 : UPPERREL_PARTIAL_DISTINCT,
4894 : input_rel,
4895 : partial_distinct_rel,
4896 : NULL);
4897 :
4898 : /* Let extensions possibly add some more partial paths */
4899 108 : if (create_upper_paths_hook)
4900 0 : (*create_upper_paths_hook) (root, UPPERREL_PARTIAL_DISTINCT,
4901 : input_rel, partial_distinct_rel, NULL);
4902 :
4903 108 : if (partial_distinct_rel->partial_pathlist != NIL)
4904 : {
4905 108 : generate_useful_gather_paths(root, partial_distinct_rel, true);
4906 108 : set_cheapest(partial_distinct_rel);
4907 :
4908 : /*
4909 : * Finally, create paths to distinctify the final result. This step
4910 : * is needed to remove any duplicates due to combining rows from
4911 : * parallel workers.
4912 : */
4913 108 : create_final_distinct_paths(root, partial_distinct_rel,
4914 : final_distinct_rel);
4915 : }
4916 : }
4917 :
4918 : /*
4919 : * create_final_distinct_paths
4920 : * Create distinct paths in 'distinct_rel' based on 'input_rel' pathlist
4921 : *
4922 : * input_rel: contains the source-data paths
4923 : * distinct_rel: destination relation for storing created paths
4924 : */
4925 : static RelOptInfo *
4926 2562 : create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
4927 : RelOptInfo *distinct_rel)
4928 : {
4929 2562 : Query *parse = root->parse;
4930 2562 : Path *cheapest_input_path = input_rel->cheapest_total_path;
4931 : double numDistinctRows;
4932 : bool allow_hash;
4933 :
4934 : /* Estimate number of distinct rows there will be */
4935 2562 : if (parse->groupClause || parse->groupingSets || parse->hasAggs ||
4936 2488 : root->hasHavingQual)
4937 : {
4938 : /*
4939 : * If there was grouping or aggregation, use the number of input rows
4940 : * as the estimated number of DISTINCT rows (ie, assume the input is
4941 : * already mostly unique).
4942 : */
4943 74 : numDistinctRows = cheapest_input_path->rows;
4944 : }
4945 : else
4946 : {
4947 : /*
4948 : * Otherwise, the UNIQUE filter has effects comparable to GROUP BY.
4949 : */
4950 : List *distinctExprs;
4951 :
4952 2488 : distinctExprs = get_sortgrouplist_exprs(root->processed_distinctClause,
4953 : parse->targetList);
4954 2488 : numDistinctRows = estimate_num_groups(root, distinctExprs,
4955 : cheapest_input_path->rows,
4956 : NULL, NULL);
4957 : }
4958 :
4959 : /*
4960 : * Consider sort-based implementations of DISTINCT, if possible.
4961 : */
4962 2562 : if (grouping_is_sortable(root->processed_distinctClause))
4963 : {
4964 : /*
4965 : * Firstly, if we have any adequately-presorted paths, just stick a
4966 : * Unique node on those. We also, consider doing an explicit sort of
4967 : * the cheapest input path and Unique'ing that. If any paths have
4968 : * presorted keys then we'll create an incremental sort atop of those
4969 : * before adding a unique node on the top. We'll also attempt to
4970 : * reorder the required pathkeys to match the input path's pathkeys as
4971 : * much as possible, in hopes of avoiding a possible need to re-sort.
4972 : *
4973 : * When we have DISTINCT ON, we must sort by the more rigorous of
4974 : * DISTINCT and ORDER BY, else it won't have the desired behavior.
4975 : * Also, if we do have to do an explicit sort, we might as well use
4976 : * the more rigorous ordering to avoid a second sort later. (Note
4977 : * that the parser will have ensured that one clause is a prefix of
4978 : * the other.)
4979 : */
4980 : List *needed_pathkeys;
4981 : ListCell *lc;
4982 2556 : double limittuples = root->distinct_pathkeys == NIL ? 1.0 : -1.0;
4983 :
4984 2796 : if (parse->hasDistinctOn &&
4985 240 : list_length(root->distinct_pathkeys) <
4986 240 : list_length(root->sort_pathkeys))
4987 54 : needed_pathkeys = root->sort_pathkeys;
4988 : else
4989 2502 : needed_pathkeys = root->distinct_pathkeys;
4990 :
4991 6542 : foreach(lc, input_rel->pathlist)
4992 : {
4993 3986 : Path *input_path = (Path *) lfirst(lc);
4994 : Path *sorted_path;
4995 3986 : List *useful_pathkeys_list = NIL;
4996 :
4997 : useful_pathkeys_list =
4998 3986 : get_useful_pathkeys_for_distinct(root,
4999 : needed_pathkeys,
5000 : input_path->pathkeys);
5001 : Assert(list_length(useful_pathkeys_list) > 0);
5002 :
5003 12430 : foreach_node(List, useful_pathkeys, useful_pathkeys_list)
5004 : {
5005 4458 : sorted_path = make_ordered_path(root,
5006 : distinct_rel,
5007 : input_path,
5008 : cheapest_input_path,
5009 : useful_pathkeys,
5010 : limittuples);
5011 :
5012 4458 : if (sorted_path == NULL)
5013 530 : continue;
5014 :
5015 : /*
5016 : * distinct_pathkeys may have become empty if all of the
5017 : * pathkeys were determined to be redundant. If all of the
5018 : * pathkeys are redundant then each DISTINCT target must only
5019 : * allow a single value, therefore all resulting tuples must
5020 : * be identical (or at least indistinguishable by an equality
5021 : * check). We can uniquify these tuples simply by just taking
5022 : * the first tuple. All we do here is add a path to do "LIMIT
5023 : * 1" atop of 'sorted_path'. When doing a DISTINCT ON we may
5024 : * still have a non-NIL sort_pathkeys list, so we must still
5025 : * only do this with paths which are correctly sorted by
5026 : * sort_pathkeys.
5027 : */
5028 3928 : if (root->distinct_pathkeys == NIL)
5029 : {
5030 : Node *limitCount;
5031 :
5032 102 : limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
5033 : sizeof(int64),
5034 : Int64GetDatum(1), false,
5035 : FLOAT8PASSBYVAL);
5036 :
5037 : /*
5038 : * If the query already has a LIMIT clause, then we could
5039 : * end up with a duplicate LimitPath in the final plan.
5040 : * That does not seem worth troubling over too much.
5041 : */
5042 102 : add_path(distinct_rel, (Path *)
5043 102 : create_limit_path(root, distinct_rel, sorted_path,
5044 : NULL, limitCount,
5045 : LIMIT_OPTION_COUNT, 0, 1));
5046 : }
5047 : else
5048 : {
5049 3826 : add_path(distinct_rel, (Path *)
5050 3826 : create_upper_unique_path(root, distinct_rel,
5051 : sorted_path,
5052 3826 : list_length(root->distinct_pathkeys),
5053 : numDistinctRows));
5054 : }
5055 : }
5056 : }
5057 : }
5058 :
5059 : /*
5060 : * Consider hash-based implementations of DISTINCT, if possible.
5061 : *
5062 : * If we were not able to make any other types of path, we *must* hash or
5063 : * die trying. If we do have other choices, there are two things that
5064 : * should prevent selection of hashing: if the query uses DISTINCT ON
5065 : * (because it won't really have the expected behavior if we hash), or if
5066 : * enable_hashagg is off.
5067 : *
5068 : * Note: grouping_is_hashable() is much more expensive to check than the
5069 : * other gating conditions, so we want to do it last.
5070 : */
5071 2562 : if (distinct_rel->pathlist == NIL)
5072 6 : allow_hash = true; /* we have no alternatives */
5073 2556 : else if (parse->hasDistinctOn || !enable_hashagg)
5074 390 : allow_hash = false; /* policy-based decision not to hash */
5075 : else
5076 2166 : allow_hash = true; /* default */
5077 :
5078 2562 : if (allow_hash && grouping_is_hashable(root->processed_distinctClause))
5079 : {
5080 : /* Generate hashed aggregate path --- no sort needed */
5081 2172 : add_path(distinct_rel, (Path *)
5082 2172 : create_agg_path(root,
5083 : distinct_rel,
5084 : cheapest_input_path,
5085 : cheapest_input_path->pathtarget,
5086 : AGG_HASHED,
5087 : AGGSPLIT_SIMPLE,
5088 : root->processed_distinctClause,
5089 : NIL,
5090 : NULL,
5091 : numDistinctRows));
5092 : }
5093 :
5094 2562 : return distinct_rel;
5095 : }
5096 :
5097 : /*
5098 : * get_useful_pathkeys_for_distinct
5099 : * Get useful orderings of pathkeys for distinctClause by reordering
5100 : * 'needed_pathkeys' to match the given 'path_pathkeys' as much as possible.
5101 : *
5102 : * This returns a list of pathkeys that can be useful for DISTINCT or DISTINCT
5103 : * ON clause. For convenience, it always includes the given 'needed_pathkeys'.
5104 : */
5105 : static List *
5106 4112 : get_useful_pathkeys_for_distinct(PlannerInfo *root, List *needed_pathkeys,
5107 : List *path_pathkeys)
5108 : {
5109 4112 : List *useful_pathkeys_list = NIL;
5110 4112 : List *useful_pathkeys = NIL;
5111 :
5112 : /* always include the given 'needed_pathkeys' */
5113 4112 : useful_pathkeys_list = lappend(useful_pathkeys_list,
5114 : needed_pathkeys);
5115 :
5116 4112 : if (!enable_distinct_reordering)
5117 0 : return useful_pathkeys_list;
5118 :
5119 : /*
5120 : * Scan the given 'path_pathkeys' and construct a list of PathKey nodes
5121 : * that match 'needed_pathkeys', but only up to the longest matching
5122 : * prefix.
5123 : *
5124 : * When we have DISTINCT ON, we must ensure that the resulting pathkey
5125 : * list matches initial distinctClause pathkeys; otherwise, it won't have
5126 : * the desired behavior.
5127 : */
5128 10058 : foreach_node(PathKey, pathkey, path_pathkeys)
5129 : {
5130 : /*
5131 : * The PathKey nodes are canonical, so they can be checked for
5132 : * equality by simple pointer comparison.
5133 : */
5134 1862 : if (!list_member_ptr(needed_pathkeys, pathkey))
5135 10 : break;
5136 1852 : if (root->parse->hasDistinctOn &&
5137 204 : !list_member_ptr(root->distinct_pathkeys, pathkey))
5138 18 : break;
5139 :
5140 1834 : useful_pathkeys = lappend(useful_pathkeys, pathkey);
5141 : }
5142 :
5143 : /* If no match at all, no point in reordering needed_pathkeys */
5144 4112 : if (useful_pathkeys == NIL)
5145 2542 : return useful_pathkeys_list;
5146 :
5147 : /*
5148 : * If not full match, the resulting pathkey list is not useful without
5149 : * incremental sort.
5150 : */
5151 1570 : if (list_length(useful_pathkeys) < list_length(needed_pathkeys) &&
5152 866 : !enable_incremental_sort)
5153 60 : return useful_pathkeys_list;
5154 :
5155 : /* Append the remaining PathKey nodes in needed_pathkeys */
5156 1510 : useful_pathkeys = list_concat_unique_ptr(useful_pathkeys,
5157 : needed_pathkeys);
5158 :
5159 : /*
5160 : * If the resulting pathkey list is the same as the 'needed_pathkeys',
5161 : * just drop it.
5162 : */
5163 1510 : if (compare_pathkeys(needed_pathkeys,
5164 : useful_pathkeys) == PATHKEYS_EQUAL)
5165 1026 : return useful_pathkeys_list;
5166 :
5167 484 : useful_pathkeys_list = lappend(useful_pathkeys_list,
5168 : useful_pathkeys);
5169 :
5170 484 : return useful_pathkeys_list;
5171 : }
5172 :
5173 : /*
5174 : * create_ordered_paths
5175 : *
5176 : * Build a new upperrel containing Paths for ORDER BY evaluation.
5177 : *
5178 : * All paths in the result must satisfy the ORDER BY ordering.
5179 : * The only new paths we need consider are an explicit full sort
5180 : * and incremental sort on the cheapest-total existing path.
5181 : *
5182 : * input_rel: contains the source-data Paths
5183 : * target: the output tlist the result Paths must emit
5184 : * limit_tuples: estimated bound on the number of output tuples,
5185 : * or -1 if no LIMIT or couldn't estimate
5186 : *
5187 : * XXX This only looks at sort_pathkeys. I wonder if it needs to look at the
5188 : * other pathkeys (grouping, ...) like generate_useful_gather_paths.
5189 : */
5190 : static RelOptInfo *
5191 77466 : create_ordered_paths(PlannerInfo *root,
5192 : RelOptInfo *input_rel,
5193 : PathTarget *target,
5194 : bool target_parallel_safe,
5195 : double limit_tuples)
5196 : {
5197 77466 : Path *cheapest_input_path = input_rel->cheapest_total_path;
5198 : RelOptInfo *ordered_rel;
5199 : ListCell *lc;
5200 :
5201 : /* For now, do all work in the (ORDERED, NULL) upperrel */
5202 77466 : ordered_rel = fetch_upper_rel(root, UPPERREL_ORDERED, NULL);
5203 :
5204 : /*
5205 : * If the input relation is not parallel-safe, then the ordered relation
5206 : * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
5207 : * target list is parallel-safe.
5208 : */
5209 77466 : if (input_rel->consider_parallel && target_parallel_safe)
5210 55772 : ordered_rel->consider_parallel = true;
5211 :
5212 : /*
5213 : * If the input rel belongs to a single FDW, so does the ordered_rel.
5214 : */
5215 77466 : ordered_rel->serverid = input_rel->serverid;
5216 77466 : ordered_rel->userid = input_rel->userid;
5217 77466 : ordered_rel->useridiscurrent = input_rel->useridiscurrent;
5218 77466 : ordered_rel->fdwroutine = input_rel->fdwroutine;
5219 :
5220 187686 : foreach(lc, input_rel->pathlist)
5221 : {
5222 110220 : Path *input_path = (Path *) lfirst(lc);
5223 : Path *sorted_path;
5224 : bool is_sorted;
5225 : int presorted_keys;
5226 :
5227 110220 : is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
5228 : input_path->pathkeys, &presorted_keys);
5229 :
5230 110220 : if (is_sorted)
5231 35504 : sorted_path = input_path;
5232 : else
5233 : {
5234 : /*
5235 : * Try at least sorting the cheapest path and also try
5236 : * incrementally sorting any path which is partially sorted
5237 : * already (no need to deal with paths which have presorted keys
5238 : * when incremental sort is disabled unless it's the cheapest
5239 : * input path).
5240 : */
5241 74716 : if (input_path != cheapest_input_path &&
5242 5500 : (presorted_keys == 0 || !enable_incremental_sort))
5243 1742 : continue;
5244 :
5245 : /*
5246 : * We've no need to consider both a sort and incremental sort.
5247 : * We'll just do a sort if there are no presorted keys and an
5248 : * incremental sort when there are presorted keys.
5249 : */
5250 72974 : if (presorted_keys == 0 || !enable_incremental_sort)
5251 68790 : sorted_path = (Path *) create_sort_path(root,
5252 : ordered_rel,
5253 : input_path,
5254 : root->sort_pathkeys,
5255 : limit_tuples);
5256 : else
5257 4184 : sorted_path = (Path *) create_incremental_sort_path(root,
5258 : ordered_rel,
5259 : input_path,
5260 : root->sort_pathkeys,
5261 : presorted_keys,
5262 : limit_tuples);
5263 : }
5264 :
5265 : /*
5266 : * If the pathtarget of the result path has different expressions from
5267 : * the target to be applied, a projection step is needed.
5268 : */
5269 108478 : if (!equal(sorted_path->pathtarget->exprs, target->exprs))
5270 306 : sorted_path = apply_projection_to_path(root, ordered_rel,
5271 : sorted_path, target);
5272 :
5273 108478 : add_path(ordered_rel, sorted_path);
5274 : }
5275 :
5276 : /*
5277 : * generate_gather_paths() will have already generated a simple Gather
5278 : * path for the best parallel path, if any, and the loop above will have
5279 : * considered sorting it. Similarly, generate_gather_paths() will also
5280 : * have generated order-preserving Gather Merge plans which can be used
5281 : * without sorting if they happen to match the sort_pathkeys, and the loop
5282 : * above will have handled those as well. However, there's one more
5283 : * possibility: it may make sense to sort the cheapest partial path or
5284 : * incrementally sort any partial path that is partially sorted according
5285 : * to the required output order and then use Gather Merge.
5286 : */
5287 77466 : if (ordered_rel->consider_parallel && root->sort_pathkeys != NIL &&
5288 55634 : input_rel->partial_pathlist != NIL)
5289 : {
5290 : Path *cheapest_partial_path;
5291 :
5292 2204 : cheapest_partial_path = linitial(input_rel->partial_pathlist);
5293 :
5294 4614 : foreach(lc, input_rel->partial_pathlist)
5295 : {
5296 2410 : Path *input_path = (Path *) lfirst(lc);
5297 : Path *sorted_path;
5298 : bool is_sorted;
5299 : int presorted_keys;
5300 : double total_groups;
5301 :
5302 2410 : is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
5303 : input_path->pathkeys,
5304 : &presorted_keys);
5305 :
5306 2410 : if (is_sorted)
5307 182 : continue;
5308 :
5309 : /*
5310 : * Try at least sorting the cheapest path and also try
5311 : * incrementally sorting any path which is partially sorted
5312 : * already (no need to deal with paths which have presorted keys
5313 : * when incremental sort is disabled unless it's the cheapest
5314 : * partial path).
5315 : */
5316 2228 : if (input_path != cheapest_partial_path &&
5317 42 : (presorted_keys == 0 || !enable_incremental_sort))
5318 0 : continue;
5319 :
5320 : /*
5321 : * We've no need to consider both a sort and incremental sort.
5322 : * We'll just do a sort if there are no presorted keys and an
5323 : * incremental sort when there are presorted keys.
5324 : */
5325 2228 : if (presorted_keys == 0 || !enable_incremental_sort)
5326 2168 : sorted_path = (Path *) create_sort_path(root,
5327 : ordered_rel,
5328 : input_path,
5329 : root->sort_pathkeys,
5330 : limit_tuples);
5331 : else
5332 60 : sorted_path = (Path *) create_incremental_sort_path(root,
5333 : ordered_rel,
5334 : input_path,
5335 : root->sort_pathkeys,
5336 : presorted_keys,
5337 : limit_tuples);
5338 2228 : total_groups = compute_gather_rows(sorted_path);
5339 : sorted_path = (Path *)
5340 2228 : create_gather_merge_path(root, ordered_rel,
5341 : sorted_path,
5342 : sorted_path->pathtarget,
5343 : root->sort_pathkeys, NULL,
5344 : &total_groups);
5345 :
5346 : /*
5347 : * If the pathtarget of the result path has different expressions
5348 : * from the target to be applied, a projection step is needed.
5349 : */
5350 2228 : if (!equal(sorted_path->pathtarget->exprs, target->exprs))
5351 6 : sorted_path = apply_projection_to_path(root, ordered_rel,
5352 : sorted_path, target);
5353 :
5354 2228 : add_path(ordered_rel, sorted_path);
5355 : }
5356 : }
5357 :
5358 : /*
5359 : * If there is an FDW that's responsible for all baserels of the query,
5360 : * let it consider adding ForeignPaths.
5361 : */
5362 77466 : if (ordered_rel->fdwroutine &&
5363 380 : ordered_rel->fdwroutine->GetForeignUpperPaths)
5364 366 : ordered_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_ORDERED,
5365 : input_rel, ordered_rel,
5366 : NULL);
5367 :
5368 : /* Let extensions possibly add some more paths */
5369 77466 : if (create_upper_paths_hook)
5370 0 : (*create_upper_paths_hook) (root, UPPERREL_ORDERED,
5371 : input_rel, ordered_rel, NULL);
5372 :
5373 : /*
5374 : * No need to bother with set_cheapest here; grouping_planner does not
5375 : * need us to do it.
5376 : */
5377 : Assert(ordered_rel->pathlist != NIL);
5378 :
5379 77466 : return ordered_rel;
5380 : }
5381 :
5382 :
5383 : /*
5384 : * make_group_input_target
5385 : * Generate appropriate PathTarget for initial input to grouping nodes.
5386 : *
5387 : * If there is grouping or aggregation, the scan/join subplan cannot emit
5388 : * the query's final targetlist; for example, it certainly can't emit any
5389 : * aggregate function calls. This routine generates the correct target
5390 : * for the scan/join subplan.
5391 : *
5392 : * The query target list passed from the parser already contains entries
5393 : * for all ORDER BY and GROUP BY expressions, but it will not have entries
5394 : * for variables used only in HAVING clauses; so we need to add those
5395 : * variables to the subplan target list. Also, we flatten all expressions
5396 : * except GROUP BY items into their component variables; other expressions
5397 : * will be computed by the upper plan nodes rather than by the subplan.
5398 : * For example, given a query like
5399 : * SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
5400 : * we want to pass this targetlist to the subplan:
5401 : * a+b,c,d
5402 : * where the a+b target will be used by the Sort/Group steps, and the
5403 : * other targets will be used for computing the final results.
5404 : *
5405 : * 'final_target' is the query's final target list (in PathTarget form)
5406 : *
5407 : * The result is the PathTarget to be computed by the Paths returned from
5408 : * query_planner().
5409 : */
5410 : static PathTarget *
5411 37596 : make_group_input_target(PlannerInfo *root, PathTarget *final_target)
5412 : {
5413 37596 : Query *parse = root->parse;
5414 : PathTarget *input_target;
5415 : List *non_group_cols;
5416 : List *non_group_vars;
5417 : int i;
5418 : ListCell *lc;
5419 :
5420 : /*
5421 : * We must build a target containing all grouping columns, plus any other
5422 : * Vars mentioned in the query's targetlist and HAVING qual.
5423 : */
5424 37596 : input_target = create_empty_pathtarget();
5425 37596 : non_group_cols = NIL;
5426 :
5427 37596 : i = 0;
5428 91860 : foreach(lc, final_target->exprs)
5429 : {
5430 54264 : Expr *expr = (Expr *) lfirst(lc);
5431 54264 : Index sgref = get_pathtarget_sortgroupref(final_target, i);
5432 :
5433 62914 : if (sgref && root->processed_groupClause &&
5434 8650 : get_sortgroupref_clause_noerr(sgref,
5435 : root->processed_groupClause) != NULL)
5436 : {
5437 : /*
5438 : * It's a grouping column, so add it to the input target as-is.
5439 : *
5440 : * Note that the target is logically below the grouping step. So
5441 : * with grouping sets we need to remove the RT index of the
5442 : * grouping step if there is any from the target expression.
5443 : */
5444 6914 : if (parse->hasGroupRTE && parse->groupingSets != NIL)
5445 : {
5446 : Assert(root->group_rtindex > 0);
5447 : expr = (Expr *)
5448 1752 : remove_nulling_relids((Node *) expr,
5449 1752 : bms_make_singleton(root->group_rtindex),
5450 : NULL);
5451 : }
5452 6914 : add_column_to_pathtarget(input_target, expr, sgref);
5453 : }
5454 : else
5455 : {
5456 : /*
5457 : * Non-grouping column, so just remember the expression for later
5458 : * call to pull_var_clause.
5459 : */
5460 47350 : non_group_cols = lappend(non_group_cols, expr);
5461 : }
5462 :
5463 54264 : i++;
5464 : }
5465 :
5466 : /*
5467 : * If there's a HAVING clause, we'll need the Vars it uses, too.
5468 : */
5469 37596 : if (parse->havingQual)
5470 938 : non_group_cols = lappend(non_group_cols, parse->havingQual);
5471 :
5472 : /*
5473 : * Pull out all the Vars mentioned in non-group cols (plus HAVING), and
5474 : * add them to the input target if not already present. (A Var used
5475 : * directly as a GROUP BY item will be present already.) Note this
5476 : * includes Vars used in resjunk items, so we are covering the needs of
5477 : * ORDER BY and window specifications. Vars used within Aggrefs and
5478 : * WindowFuncs will be pulled out here, too.
5479 : *
5480 : * Note that the target is logically below the grouping step. So with
5481 : * grouping sets we need to remove the RT index of the grouping step if
5482 : * there is any from the non-group Vars.
5483 : */
5484 37596 : non_group_vars = pull_var_clause((Node *) non_group_cols,
5485 : PVC_RECURSE_AGGREGATES |
5486 : PVC_RECURSE_WINDOWFUNCS |
5487 : PVC_INCLUDE_PLACEHOLDERS);
5488 37596 : if (parse->hasGroupRTE && parse->groupingSets != NIL)
5489 : {
5490 : Assert(root->group_rtindex > 0);
5491 : non_group_vars = (List *)
5492 806 : remove_nulling_relids((Node *) non_group_vars,
5493 806 : bms_make_singleton(root->group_rtindex),
5494 : NULL);
5495 : }
5496 37596 : add_new_columns_to_pathtarget(input_target, non_group_vars);
5497 :
5498 : /* clean up cruft */
5499 37596 : list_free(non_group_vars);
5500 37596 : list_free(non_group_cols);
5501 :
5502 : /* XXX this causes some redundant cost calculation ... */
5503 37596 : return set_pathtarget_cost_width(root, input_target);
5504 : }
5505 :
5506 : /*
5507 : * make_partial_grouping_target
5508 : * Generate appropriate PathTarget for output of partial aggregate
5509 : * (or partial grouping, if there are no aggregates) nodes.
5510 : *
5511 : * A partial aggregation node needs to emit all the same aggregates that
5512 : * a regular aggregation node would, plus any aggregates used in HAVING;
5513 : * except that the Aggref nodes should be marked as partial aggregates.
5514 : *
5515 : * In addition, we'd better emit any Vars and PlaceHolderVars that are
5516 : * used outside of Aggrefs in the aggregation tlist and HAVING. (Presumably,
5517 : * these would be Vars that are grouped by or used in grouping expressions.)
5518 : *
5519 : * grouping_target is the tlist to be emitted by the topmost aggregation step.
5520 : * havingQual represents the HAVING clause.
5521 : */
5522 : static PathTarget *
5523 2198 : make_partial_grouping_target(PlannerInfo *root,
5524 : PathTarget *grouping_target,
5525 : Node *havingQual)
5526 : {
5527 : PathTarget *partial_target;
5528 : List *non_group_cols;
5529 : List *non_group_exprs;
5530 : int i;
5531 : ListCell *lc;
5532 :
5533 2198 : partial_target = create_empty_pathtarget();
5534 2198 : non_group_cols = NIL;
5535 :
5536 2198 : i = 0;
5537 7814 : foreach(lc, grouping_target->exprs)
5538 : {
5539 5616 : Expr *expr = (Expr *) lfirst(lc);
5540 5616 : Index sgref = get_pathtarget_sortgroupref(grouping_target, i);
5541 :
5542 9436 : if (sgref && root->processed_groupClause &&
5543 3820 : get_sortgroupref_clause_noerr(sgref,
5544 : root->processed_groupClause) != NULL)
5545 : {
5546 : /*
5547 : * It's a grouping column, so add it to the partial_target as-is.
5548 : * (This allows the upper agg step to repeat the grouping calcs.)
5549 : */
5550 1906 : add_column_to_pathtarget(partial_target, expr, sgref);
5551 : }
5552 : else
5553 : {
5554 : /*
5555 : * Non-grouping column, so just remember the expression for later
5556 : * call to pull_var_clause.
5557 : */
5558 3710 : non_group_cols = lappend(non_group_cols, expr);
5559 : }
5560 :
5561 5616 : i++;
5562 : }
5563 :
5564 : /*
5565 : * If there's a HAVING clause, we'll need the Vars/Aggrefs it uses, too.
5566 : */
5567 2198 : if (havingQual)
5568 824 : non_group_cols = lappend(non_group_cols, havingQual);
5569 :
5570 : /*
5571 : * Pull out all the Vars, PlaceHolderVars, and Aggrefs mentioned in
5572 : * non-group cols (plus HAVING), and add them to the partial_target if not
5573 : * already present. (An expression used directly as a GROUP BY item will
5574 : * be present already.) Note this includes Vars used in resjunk items, so
5575 : * we are covering the needs of ORDER BY and window specifications.
5576 : */
5577 2198 : non_group_exprs = pull_var_clause((Node *) non_group_cols,
5578 : PVC_INCLUDE_AGGREGATES |
5579 : PVC_RECURSE_WINDOWFUNCS |
5580 : PVC_INCLUDE_PLACEHOLDERS);
5581 :
5582 2198 : add_new_columns_to_pathtarget(partial_target, non_group_exprs);
5583 :
5584 : /*
5585 : * Adjust Aggrefs to put them in partial mode. At this point all Aggrefs
5586 : * are at the top level of the target list, so we can just scan the list
5587 : * rather than recursing through the expression trees.
5588 : */
5589 8386 : foreach(lc, partial_target->exprs)
5590 : {
5591 6188 : Aggref *aggref = (Aggref *) lfirst(lc);
5592 :
5593 6188 : if (IsA(aggref, Aggref))
5594 : {
5595 : Aggref *newaggref;
5596 :
5597 : /*
5598 : * We shouldn't need to copy the substructure of the Aggref node,
5599 : * but flat-copy the node itself to avoid damaging other trees.
5600 : */
5601 4252 : newaggref = makeNode(Aggref);
5602 4252 : memcpy(newaggref, aggref, sizeof(Aggref));
5603 :
5604 : /* For now, assume serialization is required */
5605 4252 : mark_partial_aggref(newaggref, AGGSPLIT_INITIAL_SERIAL);
5606 :
5607 4252 : lfirst(lc) = newaggref;
5608 : }
5609 : }
5610 :
5611 : /* clean up cruft */
5612 2198 : list_free(non_group_exprs);
5613 2198 : list_free(non_group_cols);
5614 :
5615 : /* XXX this causes some redundant cost calculation ... */
5616 2198 : return set_pathtarget_cost_width(root, partial_target);
5617 : }
5618 :
5619 : /*
5620 : * mark_partial_aggref
5621 : * Adjust an Aggref to make it represent a partial-aggregation step.
5622 : *
5623 : * The Aggref node is modified in-place; caller must do any copying required.
5624 : */
5625 : void
5626 7072 : mark_partial_aggref(Aggref *agg, AggSplit aggsplit)
5627 : {
5628 : /* aggtranstype should be computed by this point */
5629 : Assert(OidIsValid(agg->aggtranstype));
5630 : /* ... but aggsplit should still be as the parser left it */
5631 : Assert(agg->aggsplit == AGGSPLIT_SIMPLE);
5632 :
5633 : /* Mark the Aggref with the intended partial-aggregation mode */
5634 7072 : agg->aggsplit = aggsplit;
5635 :
5636 : /*
5637 : * Adjust result type if needed. Normally, a partial aggregate returns
5638 : * the aggregate's transition type; but if that's INTERNAL and we're
5639 : * serializing, it returns BYTEA instead.
5640 : */
5641 7072 : if (DO_AGGSPLIT_SKIPFINAL(aggsplit))
5642 : {
5643 5662 : if (agg->aggtranstype == INTERNALOID && DO_AGGSPLIT_SERIALIZE(aggsplit))
5644 242 : agg->aggtype = BYTEAOID;
5645 : else
5646 5420 : agg->aggtype = agg->aggtranstype;
5647 : }
5648 7072 : }
5649 :
5650 : /*
5651 : * postprocess_setop_tlist
5652 : * Fix up targetlist returned by plan_set_operations().
5653 : *
5654 : * We need to transpose sort key info from the orig_tlist into new_tlist.
5655 : * NOTE: this would not be good enough if we supported resjunk sort keys
5656 : * for results of set operations --- then, we'd need to project a whole
5657 : * new tlist to evaluate the resjunk columns. For now, just ereport if we
5658 : * find any resjunk columns in orig_tlist.
5659 : */
5660 : static List *
5661 5736 : postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
5662 : {
5663 : ListCell *l;
5664 5736 : ListCell *orig_tlist_item = list_head(orig_tlist);
5665 :
5666 23038 : foreach(l, new_tlist)
5667 : {
5668 17302 : TargetEntry *new_tle = lfirst_node(TargetEntry, l);
5669 : TargetEntry *orig_tle;
5670 :
5671 : /* ignore resjunk columns in setop result */
5672 17302 : if (new_tle->resjunk)
5673 0 : continue;
5674 :
5675 : Assert(orig_tlist_item != NULL);
5676 17302 : orig_tle = lfirst_node(TargetEntry, orig_tlist_item);
5677 17302 : orig_tlist_item = lnext(orig_tlist, orig_tlist_item);
5678 17302 : if (orig_tle->resjunk) /* should not happen */
5679 0 : elog(ERROR, "resjunk output columns are not implemented");
5680 : Assert(new_tle->resno == orig_tle->resno);
5681 17302 : new_tle->ressortgroupref = orig_tle->ressortgroupref;
5682 : }
5683 5736 : if (orig_tlist_item != NULL)
5684 0 : elog(ERROR, "resjunk output columns are not implemented");
5685 5736 : return new_tlist;
5686 : }
5687 :
5688 : /*
5689 : * optimize_window_clauses
5690 : * Call each WindowFunc's prosupport function to see if we're able to
5691 : * make any adjustments to any of the WindowClause's so that the executor
5692 : * can execute the window functions in a more optimal way.
5693 : *
5694 : * Currently we only allow adjustments to the WindowClause's frameOptions. We
5695 : * may allow more things to be done here in the future.
5696 : */
5697 : static void
5698 2336 : optimize_window_clauses(PlannerInfo *root, WindowFuncLists *wflists)
5699 : {
5700 2336 : List *windowClause = root->parse->windowClause;
5701 : ListCell *lc;
5702 :
5703 4882 : foreach(lc, windowClause)
5704 : {
5705 2546 : WindowClause *wc = lfirst_node(WindowClause, lc);
5706 : ListCell *lc2;
5707 2546 : int optimizedFrameOptions = 0;
5708 :
5709 : Assert(wc->winref <= wflists->maxWinRef);
5710 :
5711 : /* skip any WindowClauses that have no WindowFuncs */
5712 2546 : if (wflists->windowFuncs[wc->winref] == NIL)
5713 24 : continue;
5714 :
5715 3062 : foreach(lc2, wflists->windowFuncs[wc->winref])
5716 : {
5717 : SupportRequestOptimizeWindowClause req;
5718 : SupportRequestOptimizeWindowClause *res;
5719 2564 : WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
5720 : Oid prosupport;
5721 :
5722 2564 : prosupport = get_func_support(wfunc->winfnoid);
5723 :
5724 : /* Check if there's a support function for 'wfunc' */
5725 2564 : if (!OidIsValid(prosupport))
5726 2024 : break; /* can't optimize this WindowClause */
5727 :
5728 760 : req.type = T_SupportRequestOptimizeWindowClause;
5729 760 : req.window_clause = wc;
5730 760 : req.window_func = wfunc;
5731 760 : req.frameOptions = wc->frameOptions;
5732 :
5733 : /* call the support function */
5734 : res = (SupportRequestOptimizeWindowClause *)
5735 760 : DatumGetPointer(OidFunctionCall1(prosupport,
5736 : PointerGetDatum(&req)));
5737 :
5738 : /*
5739 : * Skip to next WindowClause if the support function does not
5740 : * support this request type.
5741 : */
5742 760 : if (res == NULL)
5743 220 : break;
5744 :
5745 : /*
5746 : * Save these frameOptions for the first WindowFunc for this
5747 : * WindowClause.
5748 : */
5749 540 : if (foreach_current_index(lc2) == 0)
5750 516 : optimizedFrameOptions = res->frameOptions;
5751 :
5752 : /*
5753 : * On subsequent WindowFuncs, if the frameOptions are not the same
5754 : * then we're unable to optimize the frameOptions for this
5755 : * WindowClause.
5756 : */
5757 24 : else if (optimizedFrameOptions != res->frameOptions)
5758 0 : break; /* skip to the next WindowClause, if any */
5759 : }
5760 :
5761 : /* adjust the frameOptions if all WindowFunc's agree that it's ok */
5762 2522 : if (lc2 == NULL && wc->frameOptions != optimizedFrameOptions)
5763 : {
5764 : ListCell *lc3;
5765 :
5766 : /* apply the new frame options */
5767 498 : wc->frameOptions = optimizedFrameOptions;
5768 :
5769 : /*
5770 : * We now check to see if changing the frameOptions has caused
5771 : * this WindowClause to be a duplicate of some other WindowClause.
5772 : * This can only happen if we have multiple WindowClauses, so
5773 : * don't bother if there's only 1.
5774 : */
5775 498 : if (list_length(windowClause) == 1)
5776 408 : continue;
5777 :
5778 : /*
5779 : * Do the duplicate check and reuse the existing WindowClause if
5780 : * we find a duplicate.
5781 : */
5782 228 : foreach(lc3, windowClause)
5783 : {
5784 174 : WindowClause *existing_wc = lfirst_node(WindowClause, lc3);
5785 :
5786 : /* skip over the WindowClause we're currently editing */
5787 174 : if (existing_wc == wc)
5788 54 : continue;
5789 :
5790 : /*
5791 : * Perform the same duplicate check that is done in
5792 : * transformWindowFuncCall.
5793 : */
5794 240 : if (equal(wc->partitionClause, existing_wc->partitionClause) &&
5795 120 : equal(wc->orderClause, existing_wc->orderClause) &&
5796 120 : wc->frameOptions == existing_wc->frameOptions &&
5797 72 : equal(wc->startOffset, existing_wc->startOffset) &&
5798 36 : equal(wc->endOffset, existing_wc->endOffset))
5799 : {
5800 : ListCell *lc4;
5801 :
5802 : /*
5803 : * Now move each WindowFunc in 'wc' into 'existing_wc'.
5804 : * This required adjusting each WindowFunc's winref and
5805 : * moving the WindowFuncs in 'wc' to the list of
5806 : * WindowFuncs in 'existing_wc'.
5807 : */
5808 78 : foreach(lc4, wflists->windowFuncs[wc->winref])
5809 : {
5810 42 : WindowFunc *wfunc = lfirst_node(WindowFunc, lc4);
5811 :
5812 42 : wfunc->winref = existing_wc->winref;
5813 : }
5814 :
5815 : /* move list items */
5816 72 : wflists->windowFuncs[existing_wc->winref] = list_concat(wflists->windowFuncs[existing_wc->winref],
5817 36 : wflists->windowFuncs[wc->winref]);
5818 36 : wflists->windowFuncs[wc->winref] = NIL;
5819 :
5820 : /*
5821 : * transformWindowFuncCall() should have made sure there
5822 : * are no other duplicates, so we needn't bother looking
5823 : * any further.
5824 : */
5825 36 : break;
5826 : }
5827 : }
5828 : }
5829 : }
5830 2336 : }
5831 :
5832 : /*
5833 : * select_active_windows
5834 : * Create a list of the "active" window clauses (ie, those referenced
5835 : * by non-deleted WindowFuncs) in the order they are to be executed.
5836 : */
5837 : static List *
5838 2336 : select_active_windows(PlannerInfo *root, WindowFuncLists *wflists)
5839 : {
5840 2336 : List *windowClause = root->parse->windowClause;
5841 2336 : List *result = NIL;
5842 : ListCell *lc;
5843 2336 : int nActive = 0;
5844 2336 : WindowClauseSortData *actives = palloc(sizeof(WindowClauseSortData)
5845 2336 : * list_length(windowClause));
5846 :
5847 : /* First, construct an array of the active windows */
5848 4882 : foreach(lc, windowClause)
5849 : {
5850 2546 : WindowClause *wc = lfirst_node(WindowClause, lc);
5851 :
5852 : /* It's only active if wflists shows some related WindowFuncs */
5853 : Assert(wc->winref <= wflists->maxWinRef);
5854 2546 : if (wflists->windowFuncs[wc->winref] == NIL)
5855 60 : continue;
5856 :
5857 2486 : actives[nActive].wc = wc; /* original clause */
5858 :
5859 : /*
5860 : * For sorting, we want the list of partition keys followed by the
5861 : * list of sort keys. But pathkeys construction will remove duplicates
5862 : * between the two, so we can as well (even though we can't detect all
5863 : * of the duplicates, since some may come from ECs - that might mean
5864 : * we miss optimization chances here). We must, however, ensure that
5865 : * the order of entries is preserved with respect to the ones we do
5866 : * keep.
5867 : *
5868 : * partitionClause and orderClause had their own duplicates removed in
5869 : * parse analysis, so we're only concerned here with removing
5870 : * orderClause entries that also appear in partitionClause.
5871 : */
5872 4972 : actives[nActive].uniqueOrder =
5873 2486 : list_concat_unique(list_copy(wc->partitionClause),
5874 2486 : wc->orderClause);
5875 2486 : nActive++;
5876 : }
5877 :
5878 : /*
5879 : * Sort active windows by their partitioning/ordering clauses, ignoring
5880 : * any framing clauses, so that the windows that need the same sorting are
5881 : * adjacent in the list. When we come to generate paths, this will avoid
5882 : * inserting additional Sort nodes.
5883 : *
5884 : * This is how we implement a specific requirement from the SQL standard,
5885 : * which says that when two or more windows are order-equivalent (i.e.
5886 : * have matching partition and order clauses, even if their names or
5887 : * framing clauses differ), then all peer rows must be presented in the
5888 : * same order in all of them. If we allowed multiple sort nodes for such
5889 : * cases, we'd risk having the peer rows end up in different orders in
5890 : * equivalent windows due to sort instability. (See General Rule 4 of
5891 : * <window clause> in SQL2008 - SQL2016.)
5892 : *
5893 : * Additionally, if the entire list of clauses of one window is a prefix
5894 : * of another, put first the window with stronger sorting requirements.
5895 : * This way we will first sort for stronger window, and won't have to sort
5896 : * again for the weaker one.
5897 : */
5898 2336 : qsort(actives, nActive, sizeof(WindowClauseSortData), common_prefix_cmp);
5899 :
5900 : /* build ordered list of the original WindowClause nodes */
5901 4822 : for (int i = 0; i < nActive; i++)
5902 2486 : result = lappend(result, actives[i].wc);
5903 :
5904 2336 : pfree(actives);
5905 :
5906 2336 : return result;
5907 : }
5908 :
5909 : /*
5910 : * common_prefix_cmp
5911 : * QSort comparison function for WindowClauseSortData
5912 : *
5913 : * Sort the windows by the required sorting clauses. First, compare the sort
5914 : * clauses themselves. Second, if one window's clauses are a prefix of another
5915 : * one's clauses, put the window with more sort clauses first.
5916 : *
5917 : * We purposefully sort by the highest tleSortGroupRef first. Since
5918 : * tleSortGroupRefs are assigned for the query's DISTINCT and ORDER BY first
5919 : * and because here we sort the lowest tleSortGroupRefs last, if a
5920 : * WindowClause is sharing a tleSortGroupRef with the query's DISTINCT or
5921 : * ORDER BY clause, this makes it more likely that the final WindowAgg will
5922 : * provide presorted input for the query's DISTINCT or ORDER BY clause, thus
5923 : * reducing the total number of sorts required for the query.
5924 : */
5925 : static int
5926 162 : common_prefix_cmp(const void *a, const void *b)
5927 : {
5928 162 : const WindowClauseSortData *wcsa = a;
5929 162 : const WindowClauseSortData *wcsb = b;
5930 : ListCell *item_a;
5931 : ListCell *item_b;
5932 :
5933 276 : forboth(item_a, wcsa->uniqueOrder, item_b, wcsb->uniqueOrder)
5934 : {
5935 216 : SortGroupClause *sca = lfirst_node(SortGroupClause, item_a);
5936 216 : SortGroupClause *scb = lfirst_node(SortGroupClause, item_b);
5937 :
5938 216 : if (sca->tleSortGroupRef > scb->tleSortGroupRef)
5939 102 : return -1;
5940 204 : else if (sca->tleSortGroupRef < scb->tleSortGroupRef)
5941 66 : return 1;
5942 138 : else if (sca->sortop > scb->sortop)
5943 0 : return -1;
5944 138 : else if (sca->sortop < scb->sortop)
5945 24 : return 1;
5946 114 : else if (sca->nulls_first && !scb->nulls_first)
5947 0 : return -1;
5948 114 : else if (!sca->nulls_first && scb->nulls_first)
5949 0 : return 1;
5950 : /* no need to compare eqop, since it is fully determined by sortop */
5951 : }
5952 :
5953 60 : if (list_length(wcsa->uniqueOrder) > list_length(wcsb->uniqueOrder))
5954 6 : return -1;
5955 54 : else if (list_length(wcsa->uniqueOrder) < list_length(wcsb->uniqueOrder))
5956 12 : return 1;
5957 :
5958 42 : return 0;
5959 : }
5960 :
5961 : /*
5962 : * make_window_input_target
5963 : * Generate appropriate PathTarget for initial input to WindowAgg nodes.
5964 : *
5965 : * When the query has window functions, this function computes the desired
5966 : * target to be computed by the node just below the first WindowAgg.
5967 : * This tlist must contain all values needed to evaluate the window functions,
5968 : * compute the final target list, and perform any required final sort step.
5969 : * If multiple WindowAggs are needed, each intermediate one adds its window
5970 : * function results onto this base tlist; only the topmost WindowAgg computes
5971 : * the actual desired target list.
5972 : *
5973 : * This function is much like make_group_input_target, though not quite enough
5974 : * like it to share code. As in that function, we flatten most expressions
5975 : * into their component variables. But we do not want to flatten window
5976 : * PARTITION BY/ORDER BY clauses, since that might result in multiple
5977 : * evaluations of them, which would be bad (possibly even resulting in
5978 : * inconsistent answers, if they contain volatile functions).
5979 : * Also, we must not flatten GROUP BY clauses that were left unflattened by
5980 : * make_group_input_target, because we may no longer have access to the
5981 : * individual Vars in them.
5982 : *
5983 : * Another key difference from make_group_input_target is that we don't
5984 : * flatten Aggref expressions, since those are to be computed below the
5985 : * window functions and just referenced like Vars above that.
5986 : *
5987 : * 'final_target' is the query's final target list (in PathTarget form)
5988 : * 'activeWindows' is the list of active windows previously identified by
5989 : * select_active_windows.
5990 : *
5991 : * The result is the PathTarget to be computed by the plan node immediately
5992 : * below the first WindowAgg node.
5993 : */
5994 : static PathTarget *
5995 2336 : make_window_input_target(PlannerInfo *root,
5996 : PathTarget *final_target,
5997 : List *activeWindows)
5998 : {
5999 : PathTarget *input_target;
6000 : Bitmapset *sgrefs;
6001 : List *flattenable_cols;
6002 : List *flattenable_vars;
6003 : int i;
6004 : ListCell *lc;
6005 :
6006 : Assert(root->parse->hasWindowFuncs);
6007 :
6008 : /*
6009 : * Collect the sortgroupref numbers of window PARTITION/ORDER BY clauses
6010 : * into a bitmapset for convenient reference below.
6011 : */
6012 2336 : sgrefs = NULL;
6013 4822 : foreach(lc, activeWindows)
6014 : {
6015 2486 : WindowClause *wc = lfirst_node(WindowClause, lc);
6016 : ListCell *lc2;
6017 :
6018 3198 : foreach(lc2, wc->partitionClause)
6019 : {
6020 712 : SortGroupClause *sortcl = lfirst_node(SortGroupClause, lc2);
6021 :
6022 712 : sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
6023 : }
6024 4630 : foreach(lc2, wc->orderClause)
6025 : {
6026 2144 : SortGroupClause *sortcl = lfirst_node(SortGroupClause, lc2);
6027 :
6028 2144 : sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
6029 : }
6030 : }
6031 :
6032 : /* Add in sortgroupref numbers of GROUP BY clauses, too */
6033 2522 : foreach(lc, root->processed_groupClause)
6034 : {
6035 186 : SortGroupClause *grpcl = lfirst_node(SortGroupClause, lc);
6036 :
6037 186 : sgrefs = bms_add_member(sgrefs, grpcl->tleSortGroupRef);
6038 : }
6039 :
6040 : /*
6041 : * Construct a target containing all the non-flattenable targetlist items,
6042 : * and save aside the others for a moment.
6043 : */
6044 2336 : input_target = create_empty_pathtarget();
6045 2336 : flattenable_cols = NIL;
6046 :
6047 2336 : i = 0;
6048 10114 : foreach(lc, final_target->exprs)
6049 : {
6050 7778 : Expr *expr = (Expr *) lfirst(lc);
6051 7778 : Index sgref = get_pathtarget_sortgroupref(final_target, i);
6052 :
6053 : /*
6054 : * Don't want to deconstruct window clauses or GROUP BY items. (Note
6055 : * that such items can't contain window functions, so it's okay to
6056 : * compute them below the WindowAgg nodes.)
6057 : */
6058 7778 : if (sgref != 0 && bms_is_member(sgref, sgrefs))
6059 : {
6060 : /*
6061 : * Don't want to deconstruct this value, so add it to the input
6062 : * target as-is.
6063 : */
6064 2746 : add_column_to_pathtarget(input_target, expr, sgref);
6065 : }
6066 : else
6067 : {
6068 : /*
6069 : * Column is to be flattened, so just remember the expression for
6070 : * later call to pull_var_clause.
6071 : */
6072 5032 : flattenable_cols = lappend(flattenable_cols, expr);
6073 : }
6074 :
6075 7778 : i++;
6076 : }
6077 :
6078 : /*
6079 : * Pull out all the Vars and Aggrefs mentioned in flattenable columns, and
6080 : * add them to the input target if not already present. (Some might be
6081 : * there already because they're used directly as window/group clauses.)
6082 : *
6083 : * Note: it's essential to use PVC_INCLUDE_AGGREGATES here, so that any
6084 : * Aggrefs are placed in the Agg node's tlist and not left to be computed
6085 : * at higher levels. On the other hand, we should recurse into
6086 : * WindowFuncs to make sure their input expressions are available.
6087 : */
6088 2336 : flattenable_vars = pull_var_clause((Node *) flattenable_cols,
6089 : PVC_INCLUDE_AGGREGATES |
6090 : PVC_RECURSE_WINDOWFUNCS |
6091 : PVC_INCLUDE_PLACEHOLDERS);
6092 2336 : add_new_columns_to_pathtarget(input_target, flattenable_vars);
6093 :
6094 : /* clean up cruft */
6095 2336 : list_free(flattenable_vars);
6096 2336 : list_free(flattenable_cols);
6097 :
6098 : /* XXX this causes some redundant cost calculation ... */
6099 2336 : return set_pathtarget_cost_width(root, input_target);
6100 : }
6101 :
6102 : /*
6103 : * make_pathkeys_for_window
6104 : * Create a pathkeys list describing the required input ordering
6105 : * for the given WindowClause.
6106 : *
6107 : * Modifies wc's partitionClause to remove any clauses which are deemed
6108 : * redundant by the pathkey logic.
6109 : *
6110 : * The required ordering is first the PARTITION keys, then the ORDER keys.
6111 : * In the future we might try to implement windowing using hashing, in which
6112 : * case the ordering could be relaxed, but for now we always sort.
6113 : */
6114 : static List *
6115 5020 : make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
6116 : List *tlist)
6117 : {
6118 5020 : List *window_pathkeys = NIL;
6119 :
6120 : /* Throw error if can't sort */
6121 5020 : if (!grouping_is_sortable(wc->partitionClause))
6122 0 : ereport(ERROR,
6123 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
6124 : errmsg("could not implement window PARTITION BY"),
6125 : errdetail("Window partitioning columns must be of sortable datatypes.")));
6126 5020 : if (!grouping_is_sortable(wc->orderClause))
6127 0 : ereport(ERROR,
6128 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
6129 : errmsg("could not implement window ORDER BY"),
6130 : errdetail("Window ordering columns must be of sortable datatypes.")));
6131 :
6132 : /*
6133 : * First fetch the pathkeys for the PARTITION BY clause. We can safely
6134 : * remove any clauses from the wc->partitionClause for redundant pathkeys.
6135 : */
6136 5020 : if (wc->partitionClause != NIL)
6137 : {
6138 : bool sortable;
6139 :
6140 1198 : window_pathkeys = make_pathkeys_for_sortclauses_extended(root,
6141 : &wc->partitionClause,
6142 : tlist,
6143 : true,
6144 : false,
6145 : &sortable,
6146 : false);
6147 :
6148 : Assert(sortable);
6149 : }
6150 :
6151 : /*
6152 : * In principle, we could also consider removing redundant ORDER BY items
6153 : * too as doing so does not alter the result of peer row checks done by
6154 : * the executor. However, we must *not* remove the ordering column for
6155 : * RANGE OFFSET cases, as the executor needs that for in_range tests even
6156 : * if it's known to be equal to some partitioning column.
6157 : */
6158 5020 : if (wc->orderClause != NIL)
6159 : {
6160 : List *orderby_pathkeys;
6161 :
6162 4210 : orderby_pathkeys = make_pathkeys_for_sortclauses(root,
6163 : wc->orderClause,
6164 : tlist);
6165 :
6166 : /* Okay, make the combined pathkeys */
6167 4210 : if (window_pathkeys != NIL)
6168 880 : window_pathkeys = append_pathkeys(window_pathkeys, orderby_pathkeys);
6169 : else
6170 3330 : window_pathkeys = orderby_pathkeys;
6171 : }
6172 :
6173 5020 : return window_pathkeys;
6174 : }
6175 :
6176 : /*
6177 : * make_sort_input_target
6178 : * Generate appropriate PathTarget for initial input to Sort step.
6179 : *
6180 : * If the query has ORDER BY, this function chooses the target to be computed
6181 : * by the node just below the Sort (and DISTINCT, if any, since Unique can't
6182 : * project) steps. This might or might not be identical to the query's final
6183 : * output target.
6184 : *
6185 : * The main argument for keeping the sort-input tlist the same as the final
6186 : * is that we avoid a separate projection node (which will be needed if
6187 : * they're different, because Sort can't project). However, there are also
6188 : * advantages to postponing tlist evaluation till after the Sort: it ensures
6189 : * a consistent order of evaluation for any volatile functions in the tlist,
6190 : * and if there's also a LIMIT, we can stop the query without ever computing
6191 : * tlist functions for later rows, which is beneficial for both volatile and
6192 : * expensive functions.
6193 : *
6194 : * Our current policy is to postpone volatile expressions till after the sort
6195 : * unconditionally (assuming that that's possible, ie they are in plain tlist
6196 : * columns and not ORDER BY/GROUP BY/DISTINCT columns). We also prefer to
6197 : * postpone set-returning expressions, because running them beforehand would
6198 : * bloat the sort dataset, and because it might cause unexpected output order
6199 : * if the sort isn't stable. However there's a constraint on that: all SRFs
6200 : * in the tlist should be evaluated at the same plan step, so that they can
6201 : * run in sync in nodeProjectSet. So if any SRFs are in sort columns, we
6202 : * mustn't postpone any SRFs. (Note that in principle that policy should
6203 : * probably get applied to the group/window input targetlists too, but we
6204 : * have not done that historically.) Lastly, expensive expressions are
6205 : * postponed if there is a LIMIT, or if root->tuple_fraction shows that
6206 : * partial evaluation of the query is possible (if neither is true, we expect
6207 : * to have to evaluate the expressions for every row anyway), or if there are
6208 : * any volatile or set-returning expressions (since once we've put in a
6209 : * projection at all, it won't cost any more to postpone more stuff).
6210 : *
6211 : * Another issue that could potentially be considered here is that
6212 : * evaluating tlist expressions could result in data that's either wider
6213 : * or narrower than the input Vars, thus changing the volume of data that
6214 : * has to go through the Sort. However, we usually have only a very bad
6215 : * idea of the output width of any expression more complex than a Var,
6216 : * so for now it seems too risky to try to optimize on that basis.
6217 : *
6218 : * Note that if we do produce a modified sort-input target, and then the
6219 : * query ends up not using an explicit Sort, no particular harm is done:
6220 : * we'll initially use the modified target for the preceding path nodes,
6221 : * but then change them to the final target with apply_projection_to_path.
6222 : * Moreover, in such a case the guarantees about evaluation order of
6223 : * volatile functions still hold, since the rows are sorted already.
6224 : *
6225 : * This function has some things in common with make_group_input_target and
6226 : * make_window_input_target, though the detailed rules for what to do are
6227 : * different. We never flatten/postpone any grouping or ordering columns;
6228 : * those are needed before the sort. If we do flatten a particular
6229 : * expression, we leave Aggref and WindowFunc nodes alone, since those were
6230 : * computed earlier.
6231 : *
6232 : * 'final_target' is the query's final target list (in PathTarget form)
6233 : * 'have_postponed_srfs' is an output argument, see below
6234 : *
6235 : * The result is the PathTarget to be computed by the plan node immediately
6236 : * below the Sort step (and the Distinct step, if any). This will be
6237 : * exactly final_target if we decide a projection step wouldn't be helpful.
6238 : *
6239 : * In addition, *have_postponed_srfs is set to true if we choose to postpone
6240 : * any set-returning functions to after the Sort.
6241 : */
6242 : static PathTarget *
6243 73712 : make_sort_input_target(PlannerInfo *root,
6244 : PathTarget *final_target,
6245 : bool *have_postponed_srfs)
6246 : {
6247 73712 : Query *parse = root->parse;
6248 : PathTarget *input_target;
6249 : int ncols;
6250 : bool *col_is_srf;
6251 : bool *postpone_col;
6252 : bool have_srf;
6253 : bool have_volatile;
6254 : bool have_expensive;
6255 : bool have_srf_sortcols;
6256 : bool postpone_srfs;
6257 : List *postponable_cols;
6258 : List *postponable_vars;
6259 : int i;
6260 : ListCell *lc;
6261 :
6262 : /* Shouldn't get here unless query has ORDER BY */
6263 : Assert(parse->sortClause);
6264 :
6265 73712 : *have_postponed_srfs = false; /* default result */
6266 :
6267 : /* Inspect tlist and collect per-column information */
6268 73712 : ncols = list_length(final_target->exprs);
6269 73712 : col_is_srf = (bool *) palloc0(ncols * sizeof(bool));
6270 73712 : postpone_col = (bool *) palloc0(ncols * sizeof(bool));
6271 73712 : have_srf = have_volatile = have_expensive = have_srf_sortcols = false;
6272 :
6273 73712 : i = 0;
6274 585188 : foreach(lc, final_target->exprs)
6275 : {
6276 511476 : Expr *expr = (Expr *) lfirst(lc);
6277 :
6278 : /*
6279 : * If the column has a sortgroupref, assume it has to be evaluated
6280 : * before sorting. Generally such columns would be ORDER BY, GROUP
6281 : * BY, etc targets. One exception is columns that were removed from
6282 : * GROUP BY by remove_useless_groupby_columns() ... but those would
6283 : * only be Vars anyway. There don't seem to be any cases where it
6284 : * would be worth the trouble to double-check.
6285 : */
6286 511476 : if (get_pathtarget_sortgroupref(final_target, i) == 0)
6287 : {
6288 : /*
6289 : * Check for SRF or volatile functions. Check the SRF case first
6290 : * because we must know whether we have any postponed SRFs.
6291 : */
6292 399242 : if (parse->hasTargetSRFs &&
6293 216 : expression_returns_set((Node *) expr))
6294 : {
6295 : /* We'll decide below whether these are postponable */
6296 96 : col_is_srf[i] = true;
6297 96 : have_srf = true;
6298 : }
6299 398930 : else if (contain_volatile_functions((Node *) expr))
6300 : {
6301 : /* Unconditionally postpone */
6302 160 : postpone_col[i] = true;
6303 160 : have_volatile = true;
6304 : }
6305 : else
6306 : {
6307 : /*
6308 : * Else check the cost. XXX it's annoying to have to do this
6309 : * when set_pathtarget_cost_width() just did it. Refactor to
6310 : * allow sharing the work?
6311 : */
6312 : QualCost cost;
6313 :
6314 398770 : cost_qual_eval_node(&cost, (Node *) expr, root);
6315 :
6316 : /*
6317 : * We arbitrarily define "expensive" as "more than 10X
6318 : * cpu_operator_cost". Note this will take in any PL function
6319 : * with default cost.
6320 : */
6321 398770 : if (cost.per_tuple > 10 * cpu_operator_cost)
6322 : {
6323 15702 : postpone_col[i] = true;
6324 15702 : have_expensive = true;
6325 : }
6326 : }
6327 : }
6328 : else
6329 : {
6330 : /* For sortgroupref cols, just check if any contain SRFs */
6331 112450 : if (!have_srf_sortcols &&
6332 112760 : parse->hasTargetSRFs &&
6333 310 : expression_returns_set((Node *) expr))
6334 124 : have_srf_sortcols = true;
6335 : }
6336 :
6337 511476 : i++;
6338 : }
6339 :
6340 : /*
6341 : * We can postpone SRFs if we have some but none are in sortgroupref cols.
6342 : */
6343 73712 : postpone_srfs = (have_srf && !have_srf_sortcols);
6344 :
6345 : /*
6346 : * If we don't need a post-sort projection, just return final_target.
6347 : */
6348 73712 : if (!(postpone_srfs || have_volatile ||
6349 73496 : (have_expensive &&
6350 9454 : (parse->limitCount || root->tuple_fraction > 0))))
6351 73460 : return final_target;
6352 :
6353 : /*
6354 : * Report whether the post-sort projection will contain set-returning
6355 : * functions. This is important because it affects whether the Sort can
6356 : * rely on the query's LIMIT (if any) to bound the number of rows it needs
6357 : * to return.
6358 : */
6359 252 : *have_postponed_srfs = postpone_srfs;
6360 :
6361 : /*
6362 : * Construct the sort-input target, taking all non-postponable columns and
6363 : * then adding Vars, PlaceHolderVars, Aggrefs, and WindowFuncs found in
6364 : * the postponable ones.
6365 : */
6366 252 : input_target = create_empty_pathtarget();
6367 252 : postponable_cols = NIL;
6368 :
6369 252 : i = 0;
6370 2044 : foreach(lc, final_target->exprs)
6371 : {
6372 1792 : Expr *expr = (Expr *) lfirst(lc);
6373 :
6374 1792 : if (postpone_col[i] || (postpone_srfs && col_is_srf[i]))
6375 310 : postponable_cols = lappend(postponable_cols, expr);
6376 : else
6377 1482 : add_column_to_pathtarget(input_target, expr,
6378 1482 : get_pathtarget_sortgroupref(final_target, i));
6379 :
6380 1792 : i++;
6381 : }
6382 :
6383 : /*
6384 : * Pull out all the Vars, Aggrefs, and WindowFuncs mentioned in
6385 : * postponable columns, and add them to the sort-input target if not
6386 : * already present. (Some might be there already.) We mustn't
6387 : * deconstruct Aggrefs or WindowFuncs here, since the projection node
6388 : * would be unable to recompute them.
6389 : */
6390 252 : postponable_vars = pull_var_clause((Node *) postponable_cols,
6391 : PVC_INCLUDE_AGGREGATES |
6392 : PVC_INCLUDE_WINDOWFUNCS |
6393 : PVC_INCLUDE_PLACEHOLDERS);
6394 252 : add_new_columns_to_pathtarget(input_target, postponable_vars);
6395 :
6396 : /* clean up cruft */
6397 252 : list_free(postponable_vars);
6398 252 : list_free(postponable_cols);
6399 :
6400 : /* XXX this represents even more redundant cost calculation ... */
6401 252 : return set_pathtarget_cost_width(root, input_target);
6402 : }
6403 :
6404 : /*
6405 : * get_cheapest_fractional_path
6406 : * Find the cheapest path for retrieving a specified fraction of all
6407 : * the tuples expected to be returned by the given relation.
6408 : *
6409 : * We interpret tuple_fraction the same way as grouping_planner.
6410 : *
6411 : * We assume set_cheapest() has been run on the given rel.
6412 : */
6413 : Path *
6414 516790 : get_cheapest_fractional_path(RelOptInfo *rel, double tuple_fraction)
6415 : {
6416 516790 : Path *best_path = rel->cheapest_total_path;
6417 : ListCell *l;
6418 :
6419 : /* If all tuples will be retrieved, just return the cheapest-total path */
6420 516790 : if (tuple_fraction <= 0.0)
6421 511246 : return best_path;
6422 :
6423 : /* Convert absolute # of tuples to a fraction; no need to clamp to 0..1 */
6424 5544 : if (tuple_fraction >= 1.0 && best_path->rows > 0)
6425 2112 : tuple_fraction /= best_path->rows;
6426 :
6427 13700 : foreach(l, rel->pathlist)
6428 : {
6429 8156 : Path *path = (Path *) lfirst(l);
6430 :
6431 10768 : if (path == rel->cheapest_total_path ||
6432 2612 : compare_fractional_path_costs(best_path, path, tuple_fraction) <= 0)
6433 7936 : continue;
6434 :
6435 220 : best_path = path;
6436 : }
6437 :
6438 5544 : return best_path;
6439 : }
6440 :
6441 : /*
6442 : * adjust_paths_for_srfs
6443 : * Fix up the Paths of the given upperrel to handle tSRFs properly.
6444 : *
6445 : * The executor can only handle set-returning functions that appear at the
6446 : * top level of the targetlist of a ProjectSet plan node. If we have any SRFs
6447 : * that are not at top level, we need to split up the evaluation into multiple
6448 : * plan levels in which each level satisfies this constraint. This function
6449 : * modifies each Path of an upperrel that (might) compute any SRFs in its
6450 : * output tlist to insert appropriate projection steps.
6451 : *
6452 : * The given targets and targets_contain_srfs lists are from
6453 : * split_pathtarget_at_srfs(). We assume the existing Paths emit the first
6454 : * target in targets.
6455 : */
6456 : static void
6457 9380 : adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
6458 : List *targets, List *targets_contain_srfs)
6459 : {
6460 : ListCell *lc;
6461 :
6462 : Assert(list_length(targets) == list_length(targets_contain_srfs));
6463 : Assert(!linitial_int(targets_contain_srfs));
6464 :
6465 : /* If no SRFs appear at this plan level, nothing to do */
6466 9380 : if (list_length(targets) == 1)
6467 614 : return;
6468 :
6469 : /*
6470 : * Stack SRF-evaluation nodes atop each path for the rel.
6471 : *
6472 : * In principle we should re-run set_cheapest() here to identify the
6473 : * cheapest path, but it seems unlikely that adding the same tlist eval
6474 : * costs to all the paths would change that, so we don't bother. Instead,
6475 : * just assume that the cheapest-startup and cheapest-total paths remain
6476 : * so. (There should be no parameterized paths anymore, so we needn't
6477 : * worry about updating cheapest_parameterized_paths.)
6478 : */
6479 17558 : foreach(lc, rel->pathlist)
6480 : {
6481 8792 : Path *subpath = (Path *) lfirst(lc);
6482 8792 : Path *newpath = subpath;
6483 : ListCell *lc1,
6484 : *lc2;
6485 :
6486 : Assert(subpath->param_info == NULL);
6487 27508 : forboth(lc1, targets, lc2, targets_contain_srfs)
6488 : {
6489 18716 : PathTarget *thistarget = lfirst_node(PathTarget, lc1);
6490 18716 : bool contains_srfs = (bool) lfirst_int(lc2);
6491 :
6492 : /* If this level doesn't contain SRFs, do regular projection */
6493 18716 : if (contains_srfs)
6494 8852 : newpath = (Path *) create_set_projection_path(root,
6495 : rel,
6496 : newpath,
6497 : thistarget);
6498 : else
6499 9864 : newpath = (Path *) apply_projection_to_path(root,
6500 : rel,
6501 : newpath,
6502 : thistarget);
6503 : }
6504 8792 : lfirst(lc) = newpath;
6505 8792 : if (subpath == rel->cheapest_startup_path)
6506 358 : rel->cheapest_startup_path = newpath;
6507 8792 : if (subpath == rel->cheapest_total_path)
6508 358 : rel->cheapest_total_path = newpath;
6509 : }
6510 :
6511 : /* Likewise for partial paths, if any */
6512 8772 : foreach(lc, rel->partial_pathlist)
6513 : {
6514 6 : Path *subpath = (Path *) lfirst(lc);
6515 6 : Path *newpath = subpath;
6516 : ListCell *lc1,
6517 : *lc2;
6518 :
6519 : Assert(subpath->param_info == NULL);
6520 24 : forboth(lc1, targets, lc2, targets_contain_srfs)
6521 : {
6522 18 : PathTarget *thistarget = lfirst_node(PathTarget, lc1);
6523 18 : bool contains_srfs = (bool) lfirst_int(lc2);
6524 :
6525 : /* If this level doesn't contain SRFs, do regular projection */
6526 18 : if (contains_srfs)
6527 6 : newpath = (Path *) create_set_projection_path(root,
6528 : rel,
6529 : newpath,
6530 : thistarget);
6531 : else
6532 : {
6533 : /* avoid apply_projection_to_path, in case of multiple refs */
6534 12 : newpath = (Path *) create_projection_path(root,
6535 : rel,
6536 : newpath,
6537 : thistarget);
6538 : }
6539 : }
6540 6 : lfirst(lc) = newpath;
6541 : }
6542 : }
6543 :
6544 : /*
6545 : * expression_planner
6546 : * Perform planner's transformations on a standalone expression.
6547 : *
6548 : * Various utility commands need to evaluate expressions that are not part
6549 : * of a plannable query. They can do so using the executor's regular
6550 : * expression-execution machinery, but first the expression has to be fed
6551 : * through here to transform it from parser output to something executable.
6552 : *
6553 : * Currently, we disallow sublinks in standalone expressions, so there's no
6554 : * real "planning" involved here. (That might not always be true though.)
6555 : * What we must do is run eval_const_expressions to ensure that any function
6556 : * calls are converted to positional notation and function default arguments
6557 : * get inserted. The fact that constant subexpressions get simplified is a
6558 : * side-effect that is useful when the expression will get evaluated more than
6559 : * once. Also, we must fix operator function IDs.
6560 : *
6561 : * This does not return any information about dependencies of the expression.
6562 : * Hence callers should use the results only for the duration of the current
6563 : * query. Callers that would like to cache the results for longer should use
6564 : * expression_planner_with_deps, probably via the plancache.
6565 : *
6566 : * Note: this must not make any damaging changes to the passed-in expression
6567 : * tree. (It would actually be okay to apply fix_opfuncids to it, but since
6568 : * we first do an expression_tree_mutator-based walk, what is returned will
6569 : * be a new node tree.) The result is constructed in the current memory
6570 : * context; beware that this can leak a lot of additional stuff there, too.
6571 : */
6572 : Expr *
6573 237554 : expression_planner(Expr *expr)
6574 : {
6575 : Node *result;
6576 :
6577 : /*
6578 : * Convert named-argument function calls, insert default arguments and
6579 : * simplify constant subexprs
6580 : */
6581 237554 : result = eval_const_expressions(NULL, (Node *) expr);
6582 :
6583 : /* Fill in opfuncid values if missing */
6584 237536 : fix_opfuncids(result);
6585 :
6586 237536 : return (Expr *) result;
6587 : }
6588 :
6589 : /*
6590 : * expression_planner_with_deps
6591 : * Perform planner's transformations on a standalone expression,
6592 : * returning expression dependency information along with the result.
6593 : *
6594 : * This is identical to expression_planner() except that it also returns
6595 : * information about possible dependencies of the expression, ie identities of
6596 : * objects whose definitions affect the result. As in a PlannedStmt, these
6597 : * are expressed as a list of relation Oids and a list of PlanInvalItems.
6598 : */
6599 : Expr *
6600 340 : expression_planner_with_deps(Expr *expr,
6601 : List **relationOids,
6602 : List **invalItems)
6603 : {
6604 : Node *result;
6605 : PlannerGlobal glob;
6606 : PlannerInfo root;
6607 :
6608 : /* Make up dummy planner state so we can use setrefs machinery */
6609 7480 : MemSet(&glob, 0, sizeof(glob));
6610 340 : glob.type = T_PlannerGlobal;
6611 340 : glob.relationOids = NIL;
6612 340 : glob.invalItems = NIL;
6613 :
6614 30260 : MemSet(&root, 0, sizeof(root));
6615 340 : root.type = T_PlannerInfo;
6616 340 : root.glob = &glob;
6617 :
6618 : /*
6619 : * Convert named-argument function calls, insert default arguments and
6620 : * simplify constant subexprs. Collect identities of inlined functions
6621 : * and elided domains, too.
6622 : */
6623 340 : result = eval_const_expressions(&root, (Node *) expr);
6624 :
6625 : /* Fill in opfuncid values if missing */
6626 340 : fix_opfuncids(result);
6627 :
6628 : /*
6629 : * Now walk the finished expression to find anything else we ought to
6630 : * record as an expression dependency.
6631 : */
6632 340 : (void) extract_query_dependencies_walker(result, &root);
6633 :
6634 340 : *relationOids = glob.relationOids;
6635 340 : *invalItems = glob.invalItems;
6636 :
6637 340 : return (Expr *) result;
6638 : }
6639 :
6640 :
6641 : /*
6642 : * plan_cluster_use_sort
6643 : * Use the planner to decide how CLUSTER should implement sorting
6644 : *
6645 : * tableOid is the OID of a table to be clustered on its index indexOid
6646 : * (which is already known to be a btree index). Decide whether it's
6647 : * cheaper to do an indexscan or a seqscan-plus-sort to execute the CLUSTER.
6648 : * Return true to use sorting, false to use an indexscan.
6649 : *
6650 : * Note: caller had better already hold some type of lock on the table.
6651 : */
6652 : bool
6653 190 : plan_cluster_use_sort(Oid tableOid, Oid indexOid)
6654 : {
6655 : PlannerInfo *root;
6656 : Query *query;
6657 : PlannerGlobal *glob;
6658 : RangeTblEntry *rte;
6659 : RelOptInfo *rel;
6660 : IndexOptInfo *indexInfo;
6661 : QualCost indexExprCost;
6662 : Cost comparisonCost;
6663 : Path *seqScanPath;
6664 : Path seqScanAndSortPath;
6665 : IndexPath *indexScanPath;
6666 : ListCell *lc;
6667 :
6668 : /* We can short-circuit the cost comparison if indexscans are disabled */
6669 190 : if (!enable_indexscan)
6670 30 : return true; /* use sort */
6671 :
6672 : /* Set up mostly-dummy planner state */
6673 160 : query = makeNode(Query);
6674 160 : query->commandType = CMD_SELECT;
6675 :
6676 160 : glob = makeNode(PlannerGlobal);
6677 :
6678 160 : root = makeNode(PlannerInfo);
6679 160 : root->parse = query;
6680 160 : root->glob = glob;
6681 160 : root->query_level = 1;
6682 160 : root->planner_cxt = CurrentMemoryContext;
6683 160 : root->wt_param_id = -1;
6684 160 : root->join_domains = list_make1(makeNode(JoinDomain));
6685 :
6686 : /* Build a minimal RTE for the rel */
6687 160 : rte = makeNode(RangeTblEntry);
6688 160 : rte->rtekind = RTE_RELATION;
6689 160 : rte->relid = tableOid;
6690 160 : rte->relkind = RELKIND_RELATION; /* Don't be too picky. */
6691 160 : rte->rellockmode = AccessShareLock;
6692 160 : rte->lateral = false;
6693 160 : rte->inh = false;
6694 160 : rte->inFromCl = true;
6695 160 : query->rtable = list_make1(rte);
6696 160 : addRTEPermissionInfo(&query->rteperminfos, rte);
6697 :
6698 : /* Set up RTE/RelOptInfo arrays */
6699 160 : setup_simple_rel_arrays(root);
6700 :
6701 : /* Build RelOptInfo */
6702 160 : rel = build_simple_rel(root, 1, NULL);
6703 :
6704 : /* Locate IndexOptInfo for the target index */
6705 160 : indexInfo = NULL;
6706 198 : foreach(lc, rel->indexlist)
6707 : {
6708 198 : indexInfo = lfirst_node(IndexOptInfo, lc);
6709 198 : if (indexInfo->indexoid == indexOid)
6710 160 : break;
6711 : }
6712 :
6713 : /*
6714 : * It's possible that get_relation_info did not generate an IndexOptInfo
6715 : * for the desired index; this could happen if it's not yet reached its
6716 : * indcheckxmin usability horizon, or if it's a system index and we're
6717 : * ignoring system indexes. In such cases we should tell CLUSTER to not
6718 : * trust the index contents but use seqscan-and-sort.
6719 : */
6720 160 : if (lc == NULL) /* not in the list? */
6721 0 : return true; /* use sort */
6722 :
6723 : /*
6724 : * Rather than doing all the pushups that would be needed to use
6725 : * set_baserel_size_estimates, just do a quick hack for rows and width.
6726 : */
6727 160 : rel->rows = rel->tuples;
6728 160 : rel->reltarget->width = get_relation_data_width(tableOid, NULL);
6729 :
6730 160 : root->total_table_pages = rel->pages;
6731 :
6732 : /*
6733 : * Determine eval cost of the index expressions, if any. We need to
6734 : * charge twice that amount for each tuple comparison that happens during
6735 : * the sort, since tuplesort.c will have to re-evaluate the index
6736 : * expressions each time. (XXX that's pretty inefficient...)
6737 : */
6738 160 : cost_qual_eval(&indexExprCost, indexInfo->indexprs, root);
6739 160 : comparisonCost = 2.0 * (indexExprCost.startup + indexExprCost.per_tuple);
6740 :
6741 : /* Estimate the cost of seq scan + sort */
6742 160 : seqScanPath = create_seqscan_path(root, rel, NULL, 0);
6743 160 : cost_sort(&seqScanAndSortPath, root, NIL,
6744 : seqScanPath->disabled_nodes,
6745 160 : seqScanPath->total_cost, rel->tuples, rel->reltarget->width,
6746 : comparisonCost, maintenance_work_mem, -1.0);
6747 :
6748 : /* Estimate the cost of index scan */
6749 160 : indexScanPath = create_index_path(root, indexInfo,
6750 : NIL, NIL, NIL, NIL,
6751 : ForwardScanDirection, false,
6752 : NULL, 1.0, false);
6753 :
6754 160 : return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost);
6755 : }
6756 :
6757 : /*
6758 : * plan_create_index_workers
6759 : * Use the planner to decide how many parallel worker processes
6760 : * CREATE INDEX should request for use
6761 : *
6762 : * tableOid is the table on which the index is to be built. indexOid is the
6763 : * OID of an index to be created or reindexed (which must be an index with
6764 : * support for parallel builds - currently btree or BRIN).
6765 : *
6766 : * Return value is the number of parallel worker processes to request. It
6767 : * may be unsafe to proceed if this is 0. Note that this does not include the
6768 : * leader participating as a worker (value is always a number of parallel
6769 : * worker processes).
6770 : *
6771 : * Note: caller had better already hold some type of lock on the table and
6772 : * index.
6773 : */
6774 : int
6775 34026 : plan_create_index_workers(Oid tableOid, Oid indexOid)
6776 : {
6777 : PlannerInfo *root;
6778 : Query *query;
6779 : PlannerGlobal *glob;
6780 : RangeTblEntry *rte;
6781 : Relation heap;
6782 : Relation index;
6783 : RelOptInfo *rel;
6784 : int parallel_workers;
6785 : BlockNumber heap_blocks;
6786 : double reltuples;
6787 : double allvisfrac;
6788 :
6789 : /*
6790 : * We don't allow performing parallel operation in standalone backend or
6791 : * when parallelism is disabled.
6792 : */
6793 34026 : if (!IsUnderPostmaster || max_parallel_maintenance_workers == 0)
6794 466 : return 0;
6795 :
6796 : /* Set up largely-dummy planner state */
6797 33560 : query = makeNode(Query);
6798 33560 : query->commandType = CMD_SELECT;
6799 :
6800 33560 : glob = makeNode(PlannerGlobal);
6801 :
6802 33560 : root = makeNode(PlannerInfo);
6803 33560 : root->parse = query;
6804 33560 : root->glob = glob;
6805 33560 : root->query_level = 1;
6806 33560 : root->planner_cxt = CurrentMemoryContext;
6807 33560 : root->wt_param_id = -1;
6808 33560 : root->join_domains = list_make1(makeNode(JoinDomain));
6809 :
6810 : /*
6811 : * Build a minimal RTE.
6812 : *
6813 : * Mark the RTE with inh = true. This is a kludge to prevent
6814 : * get_relation_info() from fetching index info, which is necessary
6815 : * because it does not expect that any IndexOptInfo is currently
6816 : * undergoing REINDEX.
6817 : */
6818 33560 : rte = makeNode(RangeTblEntry);
6819 33560 : rte->rtekind = RTE_RELATION;
6820 33560 : rte->relid = tableOid;
6821 33560 : rte->relkind = RELKIND_RELATION; /* Don't be too picky. */
6822 33560 : rte->rellockmode = AccessShareLock;
6823 33560 : rte->lateral = false;
6824 33560 : rte->inh = true;
6825 33560 : rte->inFromCl = true;
6826 33560 : query->rtable = list_make1(rte);
6827 33560 : addRTEPermissionInfo(&query->rteperminfos, rte);
6828 :
6829 : /* Set up RTE/RelOptInfo arrays */
6830 33560 : setup_simple_rel_arrays(root);
6831 :
6832 : /* Build RelOptInfo */
6833 33560 : rel = build_simple_rel(root, 1, NULL);
6834 :
6835 : /* Rels are assumed already locked by the caller */
6836 33560 : heap = table_open(tableOid, NoLock);
6837 33560 : index = index_open(indexOid, NoLock);
6838 :
6839 : /*
6840 : * Determine if it's safe to proceed.
6841 : *
6842 : * Currently, parallel workers can't access the leader's temporary tables.
6843 : * Furthermore, any index predicate or index expressions must be parallel
6844 : * safe.
6845 : */
6846 33560 : if (heap->rd_rel->relpersistence == RELPERSISTENCE_TEMP ||
6847 31596 : !is_parallel_safe(root, (Node *) RelationGetIndexExpressions(index)) ||
6848 31476 : !is_parallel_safe(root, (Node *) RelationGetIndexPredicate(index)))
6849 : {
6850 2084 : parallel_workers = 0;
6851 2084 : goto done;
6852 : }
6853 :
6854 : /*
6855 : * If parallel_workers storage parameter is set for the table, accept that
6856 : * as the number of parallel worker processes to launch (though still cap
6857 : * at max_parallel_maintenance_workers). Note that we deliberately do not
6858 : * consider any other factor when parallel_workers is set. (e.g., memory
6859 : * use by workers.)
6860 : */
6861 31476 : if (rel->rel_parallel_workers != -1)
6862 : {
6863 14 : parallel_workers = Min(rel->rel_parallel_workers,
6864 : max_parallel_maintenance_workers);
6865 14 : goto done;
6866 : }
6867 :
6868 : /*
6869 : * Estimate heap relation size ourselves, since rel->pages cannot be
6870 : * trusted (heap RTE was marked as inheritance parent)
6871 : */
6872 31462 : estimate_rel_size(heap, NULL, &heap_blocks, &reltuples, &allvisfrac);
6873 :
6874 : /*
6875 : * Determine number of workers to scan the heap relation using generic
6876 : * model
6877 : */
6878 31462 : parallel_workers = compute_parallel_worker(rel, heap_blocks, -1,
6879 : max_parallel_maintenance_workers);
6880 :
6881 : /*
6882 : * Cap workers based on available maintenance_work_mem as needed.
6883 : *
6884 : * Note that each tuplesort participant receives an even share of the
6885 : * total maintenance_work_mem budget. Aim to leave participants
6886 : * (including the leader as a participant) with no less than 32MB of
6887 : * memory. This leaves cases where maintenance_work_mem is set to 64MB
6888 : * immediately past the threshold of being capable of launching a single
6889 : * parallel worker to sort.
6890 : */
6891 31618 : while (parallel_workers > 0 &&
6892 314 : maintenance_work_mem / (parallel_workers + 1) < 32 * 1024)
6893 156 : parallel_workers--;
6894 :
6895 31462 : done:
6896 33560 : index_close(index, NoLock);
6897 33560 : table_close(heap, NoLock);
6898 :
6899 33560 : return parallel_workers;
6900 : }
6901 :
6902 : /*
6903 : * add_paths_to_grouping_rel
6904 : *
6905 : * Add non-partial paths to grouping relation.
6906 : */
6907 : static void
6908 38454 : add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
6909 : RelOptInfo *grouped_rel,
6910 : RelOptInfo *partially_grouped_rel,
6911 : const AggClauseCosts *agg_costs,
6912 : grouping_sets_data *gd, double dNumGroups,
6913 : GroupPathExtraData *extra)
6914 : {
6915 38454 : Query *parse = root->parse;
6916 38454 : Path *cheapest_path = input_rel->cheapest_total_path;
6917 : ListCell *lc;
6918 38454 : bool can_hash = (extra->flags & GROUPING_CAN_USE_HASH) != 0;
6919 38454 : bool can_sort = (extra->flags & GROUPING_CAN_USE_SORT) != 0;
6920 38454 : List *havingQual = (List *) extra->havingQual;
6921 38454 : AggClauseCosts *agg_final_costs = &extra->agg_final_costs;
6922 :
6923 38454 : if (can_sort)
6924 : {
6925 : /*
6926 : * Use any available suitably-sorted path as input, and also consider
6927 : * sorting the cheapest-total path and incremental sort on any paths
6928 : * with presorted keys.
6929 : */
6930 79544 : foreach(lc, input_rel->pathlist)
6931 : {
6932 : ListCell *lc2;
6933 41096 : Path *path = (Path *) lfirst(lc);
6934 41096 : Path *path_save = path;
6935 41096 : List *pathkey_orderings = NIL;
6936 :
6937 : /* generate alternative group orderings that might be useful */
6938 41096 : pathkey_orderings = get_useful_group_keys_orderings(root, path);
6939 :
6940 : Assert(list_length(pathkey_orderings) > 0);
6941 :
6942 82336 : foreach(lc2, pathkey_orderings)
6943 : {
6944 41240 : GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
6945 :
6946 : /* restore the path (we replace it in the loop) */
6947 41240 : path = path_save;
6948 :
6949 41240 : path = make_ordered_path(root,
6950 : grouped_rel,
6951 : path,
6952 : cheapest_path,
6953 : info->pathkeys,
6954 : -1.0);
6955 41240 : if (path == NULL)
6956 368 : continue;
6957 :
6958 : /* Now decide what to stick atop it */
6959 40872 : if (parse->groupingSets)
6960 : {
6961 914 : consider_groupingsets_paths(root, grouped_rel,
6962 : path, true, can_hash,
6963 : gd, agg_costs, dNumGroups);
6964 : }
6965 39958 : else if (parse->hasAggs)
6966 : {
6967 : /*
6968 : * We have aggregation, possibly with plain GROUP BY. Make
6969 : * an AggPath.
6970 : */
6971 39186 : add_path(grouped_rel, (Path *)
6972 39186 : create_agg_path(root,
6973 : grouped_rel,
6974 : path,
6975 39186 : grouped_rel->reltarget,
6976 39186 : parse->groupClause ? AGG_SORTED : AGG_PLAIN,
6977 : AGGSPLIT_SIMPLE,
6978 : info->clauses,
6979 : havingQual,
6980 : agg_costs,
6981 : dNumGroups));
6982 : }
6983 772 : else if (parse->groupClause)
6984 : {
6985 : /*
6986 : * We have GROUP BY without aggregation or grouping sets.
6987 : * Make a GroupPath.
6988 : */
6989 772 : add_path(grouped_rel, (Path *)
6990 772 : create_group_path(root,
6991 : grouped_rel,
6992 : path,
6993 : info->clauses,
6994 : havingQual,
6995 : dNumGroups));
6996 : }
6997 : else
6998 : {
6999 : /* Other cases should have been handled above */
7000 : Assert(false);
7001 : }
7002 : }
7003 : }
7004 :
7005 : /*
7006 : * Instead of operating directly on the input relation, we can
7007 : * consider finalizing a partially aggregated path.
7008 : */
7009 38448 : if (partially_grouped_rel != NULL)
7010 : {
7011 3990 : foreach(lc, partially_grouped_rel->pathlist)
7012 : {
7013 : ListCell *lc2;
7014 2410 : Path *path = (Path *) lfirst(lc);
7015 2410 : Path *path_save = path;
7016 2410 : List *pathkey_orderings = NIL;
7017 :
7018 : /* generate alternative group orderings that might be useful */
7019 2410 : pathkey_orderings = get_useful_group_keys_orderings(root, path);
7020 :
7021 : Assert(list_length(pathkey_orderings) > 0);
7022 :
7023 : /* process all potentially interesting grouping reorderings */
7024 4820 : foreach(lc2, pathkey_orderings)
7025 : {
7026 2410 : GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
7027 :
7028 : /* restore the path (we replace it in the loop) */
7029 2410 : path = path_save;
7030 :
7031 2410 : path = make_ordered_path(root,
7032 : grouped_rel,
7033 : path,
7034 2410 : partially_grouped_rel->cheapest_total_path,
7035 : info->pathkeys,
7036 : -1.0);
7037 :
7038 2410 : if (path == NULL)
7039 108 : continue;
7040 :
7041 2302 : if (parse->hasAggs)
7042 2054 : add_path(grouped_rel, (Path *)
7043 2054 : create_agg_path(root,
7044 : grouped_rel,
7045 : path,
7046 2054 : grouped_rel->reltarget,
7047 2054 : parse->groupClause ? AGG_SORTED : AGG_PLAIN,
7048 : AGGSPLIT_FINAL_DESERIAL,
7049 : info->clauses,
7050 : havingQual,
7051 : agg_final_costs,
7052 : dNumGroups));
7053 : else
7054 248 : add_path(grouped_rel, (Path *)
7055 248 : create_group_path(root,
7056 : grouped_rel,
7057 : path,
7058 : info->clauses,
7059 : havingQual,
7060 : dNumGroups));
7061 :
7062 : }
7063 : }
7064 : }
7065 : }
7066 :
7067 38454 : if (can_hash)
7068 : {
7069 4944 : if (parse->groupingSets)
7070 : {
7071 : /*
7072 : * Try for a hash-only groupingsets path over unsorted input.
7073 : */
7074 770 : consider_groupingsets_paths(root, grouped_rel,
7075 : cheapest_path, false, true,
7076 : gd, agg_costs, dNumGroups);
7077 : }
7078 : else
7079 : {
7080 : /*
7081 : * Generate a HashAgg Path. We just need an Agg over the
7082 : * cheapest-total input path, since input order won't matter.
7083 : */
7084 4174 : add_path(grouped_rel, (Path *)
7085 4174 : create_agg_path(root, grouped_rel,
7086 : cheapest_path,
7087 4174 : grouped_rel->reltarget,
7088 : AGG_HASHED,
7089 : AGGSPLIT_SIMPLE,
7090 : root->processed_groupClause,
7091 : havingQual,
7092 : agg_costs,
7093 : dNumGroups));
7094 : }
7095 :
7096 : /*
7097 : * Generate a Finalize HashAgg Path atop of the cheapest partially
7098 : * grouped path, assuming there is one
7099 : */
7100 4944 : if (partially_grouped_rel && partially_grouped_rel->pathlist)
7101 : {
7102 784 : Path *path = partially_grouped_rel->cheapest_total_path;
7103 :
7104 784 : add_path(grouped_rel, (Path *)
7105 784 : create_agg_path(root,
7106 : grouped_rel,
7107 : path,
7108 784 : grouped_rel->reltarget,
7109 : AGG_HASHED,
7110 : AGGSPLIT_FINAL_DESERIAL,
7111 : root->processed_groupClause,
7112 : havingQual,
7113 : agg_final_costs,
7114 : dNumGroups));
7115 : }
7116 : }
7117 :
7118 : /*
7119 : * When partitionwise aggregate is used, we might have fully aggregated
7120 : * paths in the partial pathlist, because add_paths_to_append_rel() will
7121 : * consider a path for grouped_rel consisting of a Parallel Append of
7122 : * non-partial paths from each child.
7123 : */
7124 38454 : if (grouped_rel->partial_pathlist != NIL)
7125 162 : gather_grouping_paths(root, grouped_rel);
7126 38454 : }
7127 :
7128 : /*
7129 : * create_partial_grouping_paths
7130 : *
7131 : * Create a new upper relation representing the result of partial aggregation
7132 : * and populate it with appropriate paths. Note that we don't finalize the
7133 : * lists of paths here, so the caller can add additional partial or non-partial
7134 : * paths and must afterward call gather_grouping_paths and set_cheapest on
7135 : * the returned upper relation.
7136 : *
7137 : * All paths for this new upper relation -- both partial and non-partial --
7138 : * have been partially aggregated but require a subsequent FinalizeAggregate
7139 : * step.
7140 : *
7141 : * NB: This function is allowed to return NULL if it determines that there is
7142 : * no real need to create a new RelOptInfo.
7143 : */
7144 : static RelOptInfo *
7145 34820 : create_partial_grouping_paths(PlannerInfo *root,
7146 : RelOptInfo *grouped_rel,
7147 : RelOptInfo *input_rel,
7148 : grouping_sets_data *gd,
7149 : GroupPathExtraData *extra,
7150 : bool force_rel_creation)
7151 : {
7152 34820 : Query *parse = root->parse;
7153 : RelOptInfo *partially_grouped_rel;
7154 34820 : AggClauseCosts *agg_partial_costs = &extra->agg_partial_costs;
7155 34820 : AggClauseCosts *agg_final_costs = &extra->agg_final_costs;
7156 34820 : Path *cheapest_partial_path = NULL;
7157 34820 : Path *cheapest_total_path = NULL;
7158 34820 : double dNumPartialGroups = 0;
7159 34820 : double dNumPartialPartialGroups = 0;
7160 : ListCell *lc;
7161 34820 : bool can_hash = (extra->flags & GROUPING_CAN_USE_HASH) != 0;
7162 34820 : bool can_sort = (extra->flags & GROUPING_CAN_USE_SORT) != 0;
7163 :
7164 : /*
7165 : * Consider whether we should generate partially aggregated non-partial
7166 : * paths. We can only do this if we have a non-partial path, and only if
7167 : * the parent of the input rel is performing partial partitionwise
7168 : * aggregation. (Note that extra->patype is the type of partitionwise
7169 : * aggregation being used at the parent level, not this level.)
7170 : */
7171 34820 : if (input_rel->pathlist != NIL &&
7172 34820 : extra->patype == PARTITIONWISE_AGGREGATE_PARTIAL)
7173 618 : cheapest_total_path = input_rel->cheapest_total_path;
7174 :
7175 : /*
7176 : * If parallelism is possible for grouped_rel, then we should consider
7177 : * generating partially-grouped partial paths. However, if the input rel
7178 : * has no partial paths, then we can't.
7179 : */
7180 34820 : if (grouped_rel->consider_parallel && input_rel->partial_pathlist != NIL)
7181 1782 : cheapest_partial_path = linitial(input_rel->partial_pathlist);
7182 :
7183 : /*
7184 : * If we can't partially aggregate partial paths, and we can't partially
7185 : * aggregate non-partial paths, then don't bother creating the new
7186 : * RelOptInfo at all, unless the caller specified force_rel_creation.
7187 : */
7188 34820 : if (cheapest_total_path == NULL &&
7189 32720 : cheapest_partial_path == NULL &&
7190 32720 : !force_rel_creation)
7191 32622 : return NULL;
7192 :
7193 : /*
7194 : * Build a new upper relation to represent the result of partially
7195 : * aggregating the rows from the input relation.
7196 : */
7197 2198 : partially_grouped_rel = fetch_upper_rel(root,
7198 : UPPERREL_PARTIAL_GROUP_AGG,
7199 : grouped_rel->relids);
7200 2198 : partially_grouped_rel->consider_parallel =
7201 2198 : grouped_rel->consider_parallel;
7202 2198 : partially_grouped_rel->reloptkind = grouped_rel->reloptkind;
7203 2198 : partially_grouped_rel->serverid = grouped_rel->serverid;
7204 2198 : partially_grouped_rel->userid = grouped_rel->userid;
7205 2198 : partially_grouped_rel->useridiscurrent = grouped_rel->useridiscurrent;
7206 2198 : partially_grouped_rel->fdwroutine = grouped_rel->fdwroutine;
7207 :
7208 : /*
7209 : * Build target list for partial aggregate paths. These paths cannot just
7210 : * emit the same tlist as regular aggregate paths, because (1) we must
7211 : * include Vars and Aggrefs needed in HAVING, which might not appear in
7212 : * the result tlist, and (2) the Aggrefs must be set in partial mode.
7213 : */
7214 2198 : partially_grouped_rel->reltarget =
7215 2198 : make_partial_grouping_target(root, grouped_rel->reltarget,
7216 : extra->havingQual);
7217 :
7218 2198 : if (!extra->partial_costs_set)
7219 : {
7220 : /*
7221 : * Collect statistics about aggregates for estimating costs of
7222 : * performing aggregation in parallel.
7223 : */
7224 7752 : MemSet(agg_partial_costs, 0, sizeof(AggClauseCosts));
7225 7752 : MemSet(agg_final_costs, 0, sizeof(AggClauseCosts));
7226 1292 : if (parse->hasAggs)
7227 : {
7228 : /* partial phase */
7229 1158 : get_agg_clause_costs(root, AGGSPLIT_INITIAL_SERIAL,
7230 : agg_partial_costs);
7231 :
7232 : /* final phase */
7233 1158 : get_agg_clause_costs(root, AGGSPLIT_FINAL_DESERIAL,
7234 : agg_final_costs);
7235 : }
7236 :
7237 1292 : extra->partial_costs_set = true;
7238 : }
7239 :
7240 : /* Estimate number of partial groups. */
7241 2198 : if (cheapest_total_path != NULL)
7242 : dNumPartialGroups =
7243 618 : get_number_of_groups(root,
7244 : cheapest_total_path->rows,
7245 : gd,
7246 : extra->targetList);
7247 2198 : if (cheapest_partial_path != NULL)
7248 : dNumPartialPartialGroups =
7249 1782 : get_number_of_groups(root,
7250 : cheapest_partial_path->rows,
7251 : gd,
7252 : extra->targetList);
7253 :
7254 2198 : if (can_sort && cheapest_total_path != NULL)
7255 : {
7256 : /* This should have been checked previously */
7257 : Assert(parse->hasAggs || parse->groupClause);
7258 :
7259 : /*
7260 : * Use any available suitably-sorted path as input, and also consider
7261 : * sorting the cheapest partial path.
7262 : */
7263 1236 : foreach(lc, input_rel->pathlist)
7264 : {
7265 : ListCell *lc2;
7266 618 : Path *path = (Path *) lfirst(lc);
7267 618 : Path *path_save = path;
7268 618 : List *pathkey_orderings = NIL;
7269 :
7270 : /* generate alternative group orderings that might be useful */
7271 618 : pathkey_orderings = get_useful_group_keys_orderings(root, path);
7272 :
7273 : Assert(list_length(pathkey_orderings) > 0);
7274 :
7275 : /* process all potentially interesting grouping reorderings */
7276 1236 : foreach(lc2, pathkey_orderings)
7277 : {
7278 618 : GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
7279 :
7280 : /* restore the path (we replace it in the loop) */
7281 618 : path = path_save;
7282 :
7283 618 : path = make_ordered_path(root,
7284 : partially_grouped_rel,
7285 : path,
7286 : cheapest_total_path,
7287 : info->pathkeys,
7288 : -1.0);
7289 :
7290 618 : if (path == NULL)
7291 0 : continue;
7292 :
7293 618 : if (parse->hasAggs)
7294 546 : add_path(partially_grouped_rel, (Path *)
7295 546 : create_agg_path(root,
7296 : partially_grouped_rel,
7297 : path,
7298 546 : partially_grouped_rel->reltarget,
7299 546 : parse->groupClause ? AGG_SORTED : AGG_PLAIN,
7300 : AGGSPLIT_INITIAL_SERIAL,
7301 : info->clauses,
7302 : NIL,
7303 : agg_partial_costs,
7304 : dNumPartialGroups));
7305 : else
7306 72 : add_path(partially_grouped_rel, (Path *)
7307 72 : create_group_path(root,
7308 : partially_grouped_rel,
7309 : path,
7310 : info->clauses,
7311 : NIL,
7312 : dNumPartialGroups));
7313 : }
7314 : }
7315 : }
7316 :
7317 2198 : if (can_sort && cheapest_partial_path != NULL)
7318 : {
7319 : /* Similar to above logic, but for partial paths. */
7320 3576 : foreach(lc, input_rel->partial_pathlist)
7321 : {
7322 : ListCell *lc2;
7323 1794 : Path *path = (Path *) lfirst(lc);
7324 1794 : Path *path_save = path;
7325 1794 : List *pathkey_orderings = NIL;
7326 :
7327 : /* generate alternative group orderings that might be useful */
7328 1794 : pathkey_orderings = get_useful_group_keys_orderings(root, path);
7329 :
7330 : Assert(list_length(pathkey_orderings) > 0);
7331 :
7332 : /* process all potentially interesting grouping reorderings */
7333 3588 : foreach(lc2, pathkey_orderings)
7334 : {
7335 1794 : GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
7336 :
7337 :
7338 : /* restore the path (we replace it in the loop) */
7339 1794 : path = path_save;
7340 :
7341 1794 : path = make_ordered_path(root,
7342 : partially_grouped_rel,
7343 : path,
7344 : cheapest_partial_path,
7345 : info->pathkeys,
7346 : -1.0);
7347 :
7348 1794 : if (path == NULL)
7349 6 : continue;
7350 :
7351 1788 : if (parse->hasAggs)
7352 1666 : add_partial_path(partially_grouped_rel, (Path *)
7353 1666 : create_agg_path(root,
7354 : partially_grouped_rel,
7355 : path,
7356 1666 : partially_grouped_rel->reltarget,
7357 1666 : parse->groupClause ? AGG_SORTED : AGG_PLAIN,
7358 : AGGSPLIT_INITIAL_SERIAL,
7359 : info->clauses,
7360 : NIL,
7361 : agg_partial_costs,
7362 : dNumPartialPartialGroups));
7363 : else
7364 122 : add_partial_path(partially_grouped_rel, (Path *)
7365 122 : create_group_path(root,
7366 : partially_grouped_rel,
7367 : path,
7368 : info->clauses,
7369 : NIL,
7370 : dNumPartialPartialGroups));
7371 : }
7372 : }
7373 : }
7374 :
7375 : /*
7376 : * Add a partially-grouped HashAgg Path where possible
7377 : */
7378 2198 : if (can_hash && cheapest_total_path != NULL)
7379 : {
7380 : /* Checked above */
7381 : Assert(parse->hasAggs || parse->groupClause);
7382 :
7383 618 : add_path(partially_grouped_rel, (Path *)
7384 618 : create_agg_path(root,
7385 : partially_grouped_rel,
7386 : cheapest_total_path,
7387 618 : partially_grouped_rel->reltarget,
7388 : AGG_HASHED,
7389 : AGGSPLIT_INITIAL_SERIAL,
7390 : root->processed_groupClause,
7391 : NIL,
7392 : agg_partial_costs,
7393 : dNumPartialGroups));
7394 : }
7395 :
7396 : /*
7397 : * Now add a partially-grouped HashAgg partial Path where possible
7398 : */
7399 2198 : if (can_hash && cheapest_partial_path != NULL)
7400 : {
7401 986 : add_partial_path(partially_grouped_rel, (Path *)
7402 986 : create_agg_path(root,
7403 : partially_grouped_rel,
7404 : cheapest_partial_path,
7405 986 : partially_grouped_rel->reltarget,
7406 : AGG_HASHED,
7407 : AGGSPLIT_INITIAL_SERIAL,
7408 : root->processed_groupClause,
7409 : NIL,
7410 : agg_partial_costs,
7411 : dNumPartialPartialGroups));
7412 : }
7413 :
7414 : /*
7415 : * If there is an FDW that's responsible for all baserels of the query,
7416 : * let it consider adding partially grouped ForeignPaths.
7417 : */
7418 2198 : if (partially_grouped_rel->fdwroutine &&
7419 6 : partially_grouped_rel->fdwroutine->GetForeignUpperPaths)
7420 : {
7421 6 : FdwRoutine *fdwroutine = partially_grouped_rel->fdwroutine;
7422 :
7423 6 : fdwroutine->GetForeignUpperPaths(root,
7424 : UPPERREL_PARTIAL_GROUP_AGG,
7425 : input_rel, partially_grouped_rel,
7426 : extra);
7427 : }
7428 :
7429 2198 : return partially_grouped_rel;
7430 : }
7431 :
7432 : /*
7433 : * make_ordered_path
7434 : * Return a path ordered by 'pathkeys' based on the given 'path'. May
7435 : * return NULL if it doesn't make sense to generate an ordered path in
7436 : * this case.
7437 : */
7438 : static Path *
7439 50658 : make_ordered_path(PlannerInfo *root, RelOptInfo *rel, Path *path,
7440 : Path *cheapest_path, List *pathkeys, double limit_tuples)
7441 : {
7442 : bool is_sorted;
7443 : int presorted_keys;
7444 :
7445 50658 : is_sorted = pathkeys_count_contained_in(pathkeys,
7446 : path->pathkeys,
7447 : &presorted_keys);
7448 :
7449 50658 : if (!is_sorted)
7450 : {
7451 : /*
7452 : * Try at least sorting the cheapest path and also try incrementally
7453 : * sorting any path which is partially sorted already (no need to deal
7454 : * with paths which have presorted keys when incremental sort is
7455 : * disabled unless it's the cheapest input path).
7456 : */
7457 12570 : if (path != cheapest_path &&
7458 1992 : (presorted_keys == 0 || !enable_incremental_sort))
7459 1024 : return NULL;
7460 :
7461 : /*
7462 : * We've no need to consider both a sort and incremental sort. We'll
7463 : * just do a sort if there are no presorted keys and an incremental
7464 : * sort when there are presorted keys.
7465 : */
7466 11546 : if (presorted_keys == 0 || !enable_incremental_sort)
7467 10410 : path = (Path *) create_sort_path(root,
7468 : rel,
7469 : path,
7470 : pathkeys,
7471 : limit_tuples);
7472 : else
7473 1136 : path = (Path *) create_incremental_sort_path(root,
7474 : rel,
7475 : path,
7476 : pathkeys,
7477 : presorted_keys,
7478 : limit_tuples);
7479 : }
7480 :
7481 49634 : return path;
7482 : }
7483 :
7484 : /*
7485 : * Generate Gather and Gather Merge paths for a grouping relation or partial
7486 : * grouping relation.
7487 : *
7488 : * generate_useful_gather_paths does most of the work, but we also consider a
7489 : * special case: we could try sorting the data by the group_pathkeys and then
7490 : * applying Gather Merge.
7491 : *
7492 : * NB: This function shouldn't be used for anything other than a grouped or
7493 : * partially grouped relation not only because of the fact that it explicitly
7494 : * references group_pathkeys but we pass "true" as the third argument to
7495 : * generate_useful_gather_paths().
7496 : */
7497 : static void
7498 1644 : gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
7499 : {
7500 : ListCell *lc;
7501 : Path *cheapest_partial_path;
7502 : List *groupby_pathkeys;
7503 :
7504 : /*
7505 : * This occurs after any partial aggregation has taken place, so trim off
7506 : * any pathkeys added for ORDER BY / DISTINCT aggregates.
7507 : */
7508 1644 : if (list_length(root->group_pathkeys) > root->num_groupby_pathkeys)
7509 18 : groupby_pathkeys = list_copy_head(root->group_pathkeys,
7510 : root->num_groupby_pathkeys);
7511 : else
7512 1626 : groupby_pathkeys = root->group_pathkeys;
7513 :
7514 : /* Try Gather for unordered paths and Gather Merge for ordered ones. */
7515 1644 : generate_useful_gather_paths(root, rel, true);
7516 :
7517 1644 : cheapest_partial_path = linitial(rel->partial_pathlist);
7518 :
7519 : /* XXX Shouldn't this also consider the group-key-reordering? */
7520 3894 : foreach(lc, rel->partial_pathlist)
7521 : {
7522 2250 : Path *path = (Path *) lfirst(lc);
7523 : bool is_sorted;
7524 : int presorted_keys;
7525 : double total_groups;
7526 :
7527 2250 : is_sorted = pathkeys_count_contained_in(groupby_pathkeys,
7528 : path->pathkeys,
7529 : &presorted_keys);
7530 :
7531 2250 : if (is_sorted)
7532 1470 : continue;
7533 :
7534 : /*
7535 : * Try at least sorting the cheapest path and also try incrementally
7536 : * sorting any path which is partially sorted already (no need to deal
7537 : * with paths which have presorted keys when incremental sort is
7538 : * disabled unless it's the cheapest input path).
7539 : */
7540 780 : if (path != cheapest_partial_path &&
7541 0 : (presorted_keys == 0 || !enable_incremental_sort))
7542 0 : continue;
7543 :
7544 : /*
7545 : * We've no need to consider both a sort and incremental sort. We'll
7546 : * just do a sort if there are no presorted keys and an incremental
7547 : * sort when there are presorted keys.
7548 : */
7549 780 : if (presorted_keys == 0 || !enable_incremental_sort)
7550 780 : path = (Path *) create_sort_path(root, rel, path,
7551 : groupby_pathkeys,
7552 : -1.0);
7553 : else
7554 0 : path = (Path *) create_incremental_sort_path(root,
7555 : rel,
7556 : path,
7557 : groupby_pathkeys,
7558 : presorted_keys,
7559 : -1.0);
7560 780 : total_groups = compute_gather_rows(path);
7561 : path = (Path *)
7562 780 : create_gather_merge_path(root,
7563 : rel,
7564 : path,
7565 780 : rel->reltarget,
7566 : groupby_pathkeys,
7567 : NULL,
7568 : &total_groups);
7569 :
7570 780 : add_path(rel, path);
7571 : }
7572 1644 : }
7573 :
7574 : /*
7575 : * can_partial_agg
7576 : *
7577 : * Determines whether or not partial grouping and/or aggregation is possible.
7578 : * Returns true when possible, false otherwise.
7579 : */
7580 : static bool
7581 37578 : can_partial_agg(PlannerInfo *root)
7582 : {
7583 37578 : Query *parse = root->parse;
7584 :
7585 37578 : if (!parse->hasAggs && parse->groupClause == NIL)
7586 : {
7587 : /*
7588 : * We don't know how to do parallel aggregation unless we have either
7589 : * some aggregates or a grouping clause.
7590 : */
7591 0 : return false;
7592 : }
7593 37578 : else if (parse->groupingSets)
7594 : {
7595 : /* We don't know how to do grouping sets in parallel. */
7596 848 : return false;
7597 : }
7598 36730 : else if (root->hasNonPartialAggs || root->hasNonSerialAggs)
7599 : {
7600 : /* Insufficient support for partial mode. */
7601 3332 : return false;
7602 : }
7603 :
7604 : /* Everything looks good. */
7605 33398 : return true;
7606 : }
7607 :
7608 : /*
7609 : * apply_scanjoin_target_to_paths
7610 : *
7611 : * Adjust the final scan/join relation, and recursively all of its children,
7612 : * to generate the final scan/join target. It would be more correct to model
7613 : * this as a separate planning step with a new RelOptInfo at the toplevel and
7614 : * for each child relation, but doing it this way is noticeably cheaper.
7615 : * Maybe that problem can be solved at some point, but for now we do this.
7616 : *
7617 : * If tlist_same_exprs is true, then the scan/join target to be applied has
7618 : * the same expressions as the existing reltarget, so we need only insert the
7619 : * appropriate sortgroupref information. By avoiding the creation of
7620 : * projection paths we save effort both immediately and at plan creation time.
7621 : */
7622 : static void
7623 572956 : apply_scanjoin_target_to_paths(PlannerInfo *root,
7624 : RelOptInfo *rel,
7625 : List *scanjoin_targets,
7626 : List *scanjoin_targets_contain_srfs,
7627 : bool scanjoin_target_parallel_safe,
7628 : bool tlist_same_exprs)
7629 : {
7630 572956 : bool rel_is_partitioned = IS_PARTITIONED_REL(rel);
7631 : PathTarget *scanjoin_target;
7632 : ListCell *lc;
7633 :
7634 : /* This recurses, so be paranoid. */
7635 572956 : check_stack_depth();
7636 :
7637 : /*
7638 : * If the rel is partitioned, we want to drop its existing paths and
7639 : * generate new ones. This function would still be correct if we kept the
7640 : * existing paths: we'd modify them to generate the correct target above
7641 : * the partitioning Append, and then they'd compete on cost with paths
7642 : * generating the target below the Append. However, in our current cost
7643 : * model the latter way is always the same or cheaper cost, so modifying
7644 : * the existing paths would just be useless work. Moreover, when the cost
7645 : * is the same, varying roundoff errors might sometimes allow an existing
7646 : * path to be picked, resulting in undesirable cross-platform plan
7647 : * variations. So we drop old paths and thereby force the work to be done
7648 : * below the Append, except in the case of a non-parallel-safe target.
7649 : *
7650 : * Some care is needed, because we have to allow
7651 : * generate_useful_gather_paths to see the old partial paths in the next
7652 : * stanza. Hence, zap the main pathlist here, then allow
7653 : * generate_useful_gather_paths to add path(s) to the main list, and
7654 : * finally zap the partial pathlist.
7655 : */
7656 572956 : if (rel_is_partitioned)
7657 12550 : rel->pathlist = NIL;
7658 :
7659 : /*
7660 : * If the scan/join target is not parallel-safe, partial paths cannot
7661 : * generate it.
7662 : */
7663 572956 : if (!scanjoin_target_parallel_safe)
7664 : {
7665 : /*
7666 : * Since we can't generate the final scan/join target in parallel
7667 : * workers, this is our last opportunity to use any partial paths that
7668 : * exist; so build Gather path(s) that use them and emit whatever the
7669 : * current reltarget is. We don't do this in the case where the
7670 : * target is parallel-safe, since we will be able to generate superior
7671 : * paths by doing it after the final scan/join target has been
7672 : * applied.
7673 : */
7674 82306 : generate_useful_gather_paths(root, rel, false);
7675 :
7676 : /* Can't use parallel query above this level. */
7677 82306 : rel->partial_pathlist = NIL;
7678 82306 : rel->consider_parallel = false;
7679 : }
7680 :
7681 : /* Finish dropping old paths for a partitioned rel, per comment above */
7682 572956 : if (rel_is_partitioned)
7683 12550 : rel->partial_pathlist = NIL;
7684 :
7685 : /* Extract SRF-free scan/join target. */
7686 572956 : scanjoin_target = linitial_node(PathTarget, scanjoin_targets);
7687 :
7688 : /*
7689 : * Apply the SRF-free scan/join target to each existing path.
7690 : *
7691 : * If the tlist exprs are the same, we can just inject the sortgroupref
7692 : * information into the existing pathtargets. Otherwise, replace each
7693 : * path with a projection path that generates the SRF-free scan/join
7694 : * target. This can't change the ordering of paths within rel->pathlist,
7695 : * so we just modify the list in place.
7696 : */
7697 1177204 : foreach(lc, rel->pathlist)
7698 : {
7699 604248 : Path *subpath = (Path *) lfirst(lc);
7700 :
7701 : /* Shouldn't have any parameterized paths anymore */
7702 : Assert(subpath->param_info == NULL);
7703 :
7704 604248 : if (tlist_same_exprs)
7705 191468 : subpath->pathtarget->sortgrouprefs =
7706 191468 : scanjoin_target->sortgrouprefs;
7707 : else
7708 : {
7709 : Path *newpath;
7710 :
7711 412780 : newpath = (Path *) create_projection_path(root, rel, subpath,
7712 : scanjoin_target);
7713 412780 : lfirst(lc) = newpath;
7714 : }
7715 : }
7716 :
7717 : /* Likewise adjust the targets for any partial paths. */
7718 592480 : foreach(lc, rel->partial_pathlist)
7719 : {
7720 19524 : Path *subpath = (Path *) lfirst(lc);
7721 :
7722 : /* Shouldn't have any parameterized paths anymore */
7723 : Assert(subpath->param_info == NULL);
7724 :
7725 19524 : if (tlist_same_exprs)
7726 15902 : subpath->pathtarget->sortgrouprefs =
7727 15902 : scanjoin_target->sortgrouprefs;
7728 : else
7729 : {
7730 : Path *newpath;
7731 :
7732 3622 : newpath = (Path *) create_projection_path(root, rel, subpath,
7733 : scanjoin_target);
7734 3622 : lfirst(lc) = newpath;
7735 : }
7736 : }
7737 :
7738 : /*
7739 : * Now, if final scan/join target contains SRFs, insert ProjectSetPath(s)
7740 : * atop each existing path. (Note that this function doesn't look at the
7741 : * cheapest-path fields, which is a good thing because they're bogus right
7742 : * now.)
7743 : */
7744 572956 : if (root->parse->hasTargetSRFs)
7745 8766 : adjust_paths_for_srfs(root, rel,
7746 : scanjoin_targets,
7747 : scanjoin_targets_contain_srfs);
7748 :
7749 : /*
7750 : * Update the rel's target to be the final (with SRFs) scan/join target.
7751 : * This now matches the actual output of all the paths, and we might get
7752 : * confused in createplan.c if they don't agree. We must do this now so
7753 : * that any append paths made in the next part will use the correct
7754 : * pathtarget (cf. create_append_path).
7755 : *
7756 : * Note that this is also necessary if GetForeignUpperPaths() gets called
7757 : * on the final scan/join relation or on any of its children, since the
7758 : * FDW might look at the rel's target to create ForeignPaths.
7759 : */
7760 572956 : rel->reltarget = llast_node(PathTarget, scanjoin_targets);
7761 :
7762 : /*
7763 : * If the relation is partitioned, recursively apply the scan/join target
7764 : * to all partitions, and generate brand-new Append paths in which the
7765 : * scan/join target is computed below the Append rather than above it.
7766 : * Since Append is not projection-capable, that might save a separate
7767 : * Result node, and it also is important for partitionwise aggregate.
7768 : */
7769 572956 : if (rel_is_partitioned)
7770 : {
7771 12550 : List *live_children = NIL;
7772 : int i;
7773 :
7774 : /* Adjust each partition. */
7775 12550 : i = -1;
7776 35370 : while ((i = bms_next_member(rel->live_parts, i)) >= 0)
7777 : {
7778 22820 : RelOptInfo *child_rel = rel->part_rels[i];
7779 : AppendRelInfo **appinfos;
7780 : int nappinfos;
7781 22820 : List *child_scanjoin_targets = NIL;
7782 :
7783 : Assert(child_rel != NULL);
7784 :
7785 : /* Dummy children can be ignored. */
7786 22820 : if (IS_DUMMY_REL(child_rel))
7787 42 : continue;
7788 :
7789 : /* Translate scan/join targets for this child. */
7790 22778 : appinfos = find_appinfos_by_relids(root, child_rel->relids,
7791 : &nappinfos);
7792 45556 : foreach(lc, scanjoin_targets)
7793 : {
7794 22778 : PathTarget *target = lfirst_node(PathTarget, lc);
7795 :
7796 22778 : target = copy_pathtarget(target);
7797 22778 : target->exprs = (List *)
7798 22778 : adjust_appendrel_attrs(root,
7799 22778 : (Node *) target->exprs,
7800 : nappinfos, appinfos);
7801 22778 : child_scanjoin_targets = lappend(child_scanjoin_targets,
7802 : target);
7803 : }
7804 22778 : pfree(appinfos);
7805 :
7806 : /* Recursion does the real work. */
7807 22778 : apply_scanjoin_target_to_paths(root, child_rel,
7808 : child_scanjoin_targets,
7809 : scanjoin_targets_contain_srfs,
7810 : scanjoin_target_parallel_safe,
7811 : tlist_same_exprs);
7812 :
7813 : /* Save non-dummy children for Append paths. */
7814 22778 : if (!IS_DUMMY_REL(child_rel))
7815 22778 : live_children = lappend(live_children, child_rel);
7816 : }
7817 :
7818 : /* Build new paths for this relation by appending child paths. */
7819 12550 : add_paths_to_append_rel(root, rel, live_children);
7820 : }
7821 :
7822 : /*
7823 : * Consider generating Gather or Gather Merge paths. We must only do this
7824 : * if the relation is parallel safe, and we don't do it for child rels to
7825 : * avoid creating multiple Gather nodes within the same plan. We must do
7826 : * this after all paths have been generated and before set_cheapest, since
7827 : * one of the generated paths may turn out to be the cheapest one.
7828 : */
7829 572956 : if (rel->consider_parallel && !IS_OTHER_REL(rel))
7830 191482 : generate_useful_gather_paths(root, rel, false);
7831 :
7832 : /*
7833 : * Reassess which paths are the cheapest, now that we've potentially added
7834 : * new Gather (or Gather Merge) and/or Append (or MergeAppend) paths to
7835 : * this relation.
7836 : */
7837 572956 : set_cheapest(rel);
7838 572956 : }
7839 :
7840 : /*
7841 : * create_partitionwise_grouping_paths
7842 : *
7843 : * If the partition keys of input relation are part of the GROUP BY clause, all
7844 : * the rows belonging to a given group come from a single partition. This
7845 : * allows aggregation/grouping over a partitioned relation to be broken down
7846 : * into aggregation/grouping on each partition. This should be no worse, and
7847 : * often better, than the normal approach.
7848 : *
7849 : * However, if the GROUP BY clause does not contain all the partition keys,
7850 : * rows from a given group may be spread across multiple partitions. In that
7851 : * case, we perform partial aggregation for each group, append the results,
7852 : * and then finalize aggregation. This is less certain to win than the
7853 : * previous case. It may win if the PartialAggregate stage greatly reduces
7854 : * the number of groups, because fewer rows will pass through the Append node.
7855 : * It may lose if we have lots of small groups.
7856 : */
7857 : static void
7858 562 : create_partitionwise_grouping_paths(PlannerInfo *root,
7859 : RelOptInfo *input_rel,
7860 : RelOptInfo *grouped_rel,
7861 : RelOptInfo *partially_grouped_rel,
7862 : const AggClauseCosts *agg_costs,
7863 : grouping_sets_data *gd,
7864 : PartitionwiseAggregateType patype,
7865 : GroupPathExtraData *extra)
7866 : {
7867 562 : List *grouped_live_children = NIL;
7868 562 : List *partially_grouped_live_children = NIL;
7869 562 : PathTarget *target = grouped_rel->reltarget;
7870 562 : bool partial_grouping_valid = true;
7871 : int i;
7872 :
7873 : Assert(patype != PARTITIONWISE_AGGREGATE_NONE);
7874 : Assert(patype != PARTITIONWISE_AGGREGATE_PARTIAL ||
7875 : partially_grouped_rel != NULL);
7876 :
7877 : /* Add paths for partitionwise aggregation/grouping. */
7878 562 : i = -1;
7879 2056 : while ((i = bms_next_member(input_rel->live_parts, i)) >= 0)
7880 : {
7881 1494 : RelOptInfo *child_input_rel = input_rel->part_rels[i];
7882 : PathTarget *child_target;
7883 : AppendRelInfo **appinfos;
7884 : int nappinfos;
7885 : GroupPathExtraData child_extra;
7886 : RelOptInfo *child_grouped_rel;
7887 : RelOptInfo *child_partially_grouped_rel;
7888 :
7889 : Assert(child_input_rel != NULL);
7890 :
7891 : /* Dummy children can be ignored. */
7892 1494 : if (IS_DUMMY_REL(child_input_rel))
7893 0 : continue;
7894 :
7895 1494 : child_target = copy_pathtarget(target);
7896 :
7897 : /*
7898 : * Copy the given "extra" structure as is and then override the
7899 : * members specific to this child.
7900 : */
7901 1494 : memcpy(&child_extra, extra, sizeof(child_extra));
7902 :
7903 1494 : appinfos = find_appinfos_by_relids(root, child_input_rel->relids,
7904 : &nappinfos);
7905 :
7906 1494 : child_target->exprs = (List *)
7907 1494 : adjust_appendrel_attrs(root,
7908 1494 : (Node *) target->exprs,
7909 : nappinfos, appinfos);
7910 :
7911 : /* Translate havingQual and targetList. */
7912 1494 : child_extra.havingQual = (Node *)
7913 : adjust_appendrel_attrs(root,
7914 : extra->havingQual,
7915 : nappinfos, appinfos);
7916 1494 : child_extra.targetList = (List *)
7917 1494 : adjust_appendrel_attrs(root,
7918 1494 : (Node *) extra->targetList,
7919 : nappinfos, appinfos);
7920 :
7921 : /*
7922 : * extra->patype was the value computed for our parent rel; patype is
7923 : * the value for this relation. For the child, our value is its
7924 : * parent rel's value.
7925 : */
7926 1494 : child_extra.patype = patype;
7927 :
7928 : /*
7929 : * Create grouping relation to hold fully aggregated grouping and/or
7930 : * aggregation paths for the child.
7931 : */
7932 1494 : child_grouped_rel = make_grouping_rel(root, child_input_rel,
7933 : child_target,
7934 1494 : extra->target_parallel_safe,
7935 : child_extra.havingQual);
7936 :
7937 : /* Create grouping paths for this child relation. */
7938 1494 : create_ordinary_grouping_paths(root, child_input_rel,
7939 : child_grouped_rel,
7940 : agg_costs, gd, &child_extra,
7941 : &child_partially_grouped_rel);
7942 :
7943 1494 : if (child_partially_grouped_rel)
7944 : {
7945 : partially_grouped_live_children =
7946 906 : lappend(partially_grouped_live_children,
7947 : child_partially_grouped_rel);
7948 : }
7949 : else
7950 588 : partial_grouping_valid = false;
7951 :
7952 1494 : if (patype == PARTITIONWISE_AGGREGATE_FULL)
7953 : {
7954 876 : set_cheapest(child_grouped_rel);
7955 876 : grouped_live_children = lappend(grouped_live_children,
7956 : child_grouped_rel);
7957 : }
7958 :
7959 1494 : pfree(appinfos);
7960 : }
7961 :
7962 : /*
7963 : * Try to create append paths for partially grouped children. For full
7964 : * partitionwise aggregation, we might have paths in the partial_pathlist
7965 : * if parallel aggregation is possible. For partial partitionwise
7966 : * aggregation, we may have paths in both pathlist and partial_pathlist.
7967 : *
7968 : * NB: We must have a partially grouped path for every child in order to
7969 : * generate a partially grouped path for this relation.
7970 : */
7971 562 : if (partially_grouped_rel && partial_grouping_valid)
7972 : {
7973 : Assert(partially_grouped_live_children != NIL);
7974 :
7975 350 : add_paths_to_append_rel(root, partially_grouped_rel,
7976 : partially_grouped_live_children);
7977 :
7978 : /*
7979 : * We need call set_cheapest, since the finalization step will use the
7980 : * cheapest path from the rel.
7981 : */
7982 350 : if (partially_grouped_rel->pathlist)
7983 350 : set_cheapest(partially_grouped_rel);
7984 : }
7985 :
7986 : /* If possible, create append paths for fully grouped children. */
7987 562 : if (patype == PARTITIONWISE_AGGREGATE_FULL)
7988 : {
7989 : Assert(grouped_live_children != NIL);
7990 :
7991 320 : add_paths_to_append_rel(root, grouped_rel, grouped_live_children);
7992 : }
7993 562 : }
7994 :
7995 : /*
7996 : * group_by_has_partkey
7997 : *
7998 : * Returns true if all the partition keys of the given relation are part of
7999 : * the GROUP BY clauses, including having matching collation, false otherwise.
8000 : */
8001 : static bool
8002 556 : group_by_has_partkey(RelOptInfo *input_rel,
8003 : List *targetList,
8004 : List *groupClause)
8005 : {
8006 556 : List *groupexprs = get_sortgrouplist_exprs(groupClause, targetList);
8007 556 : int cnt = 0;
8008 : int partnatts;
8009 :
8010 : /* Input relation should be partitioned. */
8011 : Assert(input_rel->part_scheme);
8012 :
8013 : /* Rule out early, if there are no partition keys present. */
8014 556 : if (!input_rel->partexprs)
8015 0 : return false;
8016 :
8017 556 : partnatts = input_rel->part_scheme->partnatts;
8018 :
8019 912 : for (cnt = 0; cnt < partnatts; cnt++)
8020 : {
8021 592 : List *partexprs = input_rel->partexprs[cnt];
8022 : ListCell *lc;
8023 592 : bool found = false;
8024 :
8025 810 : foreach(lc, partexprs)
8026 : {
8027 : ListCell *lg;
8028 586 : Expr *partexpr = lfirst(lc);
8029 586 : Oid partcoll = input_rel->part_scheme->partcollation[cnt];
8030 :
8031 924 : foreach(lg, groupexprs)
8032 : {
8033 706 : Expr *groupexpr = lfirst(lg);
8034 706 : Oid groupcoll = exprCollation((Node *) groupexpr);
8035 :
8036 : /*
8037 : * Note: we can assume there is at most one RelabelType node;
8038 : * eval_const_expressions() will have simplified if more than
8039 : * one.
8040 : */
8041 706 : if (IsA(groupexpr, RelabelType))
8042 24 : groupexpr = ((RelabelType *) groupexpr)->arg;
8043 :
8044 706 : if (equal(groupexpr, partexpr))
8045 : {
8046 : /*
8047 : * Reject a match if the grouping collation does not match
8048 : * the partitioning collation.
8049 : */
8050 368 : if (OidIsValid(partcoll) && OidIsValid(groupcoll) &&
8051 : partcoll != groupcoll)
8052 12 : return false;
8053 :
8054 356 : found = true;
8055 356 : break;
8056 : }
8057 : }
8058 :
8059 574 : if (found)
8060 356 : break;
8061 : }
8062 :
8063 : /*
8064 : * If none of the partition key expressions match with any of the
8065 : * GROUP BY expression, return false.
8066 : */
8067 580 : if (!found)
8068 224 : return false;
8069 : }
8070 :
8071 320 : return true;
8072 : }
8073 :
8074 : /*
8075 : * generate_setop_child_grouplist
8076 : * Build a SortGroupClause list defining the sort/grouping properties
8077 : * of the child of a set operation.
8078 : *
8079 : * This is similar to generate_setop_grouplist() but differs as the setop
8080 : * child query's targetlist entries may already have a tleSortGroupRef
8081 : * assigned for other purposes, such as GROUP BYs. Here we keep the
8082 : * SortGroupClause list in the same order as 'op' groupClauses and just adjust
8083 : * the tleSortGroupRef to reference the TargetEntry's 'ressortgroupref'. If
8084 : * any of the columns in the targetlist don't match to the setop's colTypes
8085 : * then we return an empty list. This may leave some TLEs with unreferenced
8086 : * ressortgroupref markings, but that's harmless.
8087 : */
8088 : static List *
8089 12158 : generate_setop_child_grouplist(SetOperationStmt *op, List *targetlist)
8090 : {
8091 12158 : List *grouplist = copyObject(op->groupClauses);
8092 : ListCell *lg;
8093 : ListCell *lt;
8094 : ListCell *ct;
8095 :
8096 12158 : lg = list_head(grouplist);
8097 12158 : ct = list_head(op->colTypes);
8098 48734 : foreach(lt, targetlist)
8099 : {
8100 37006 : TargetEntry *tle = (TargetEntry *) lfirst(lt);
8101 : SortGroupClause *sgc;
8102 : Oid coltype;
8103 :
8104 : /* resjunk columns could have sortgrouprefs. Leave these alone */
8105 37006 : if (tle->resjunk)
8106 0 : continue;
8107 :
8108 : /*
8109 : * We expect every non-resjunk target to have a SortGroupClause and
8110 : * colTypes.
8111 : */
8112 : Assert(lg != NULL);
8113 : Assert(ct != NULL);
8114 37006 : sgc = (SortGroupClause *) lfirst(lg);
8115 37006 : coltype = lfirst_oid(ct);
8116 :
8117 : /* reject if target type isn't the same as the setop target type */
8118 37006 : if (coltype != exprType((Node *) tle->expr))
8119 430 : return NIL;
8120 :
8121 36576 : lg = lnext(grouplist, lg);
8122 36576 : ct = lnext(op->colTypes, ct);
8123 :
8124 : /* assign a tleSortGroupRef, or reuse the existing one */
8125 36576 : sgc->tleSortGroupRef = assignSortGroupRef(tle, targetlist);
8126 : }
8127 :
8128 : Assert(lg == NULL);
8129 : Assert(ct == NULL);
8130 :
8131 11728 : return grouplist;
8132 : }
|