Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * planner.c
4 : * The query optimizer external interface.
5 : *
6 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/optimizer/plan/planner.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 :
16 : #include "postgres.h"
17 :
18 : #include <limits.h>
19 : #include <math.h>
20 :
21 : #include "access/genam.h"
22 : #include "access/parallel.h"
23 : #include "access/sysattr.h"
24 : #include "access/table.h"
25 : #include "catalog/pg_aggregate.h"
26 : #include "catalog/pg_constraint.h"
27 : #include "catalog/pg_inherits.h"
28 : #include "catalog/pg_proc.h"
29 : #include "catalog/pg_type.h"
30 : #include "executor/executor.h"
31 : #include "foreign/fdwapi.h"
32 : #include "jit/jit.h"
33 : #include "lib/bipartite_match.h"
34 : #include "lib/knapsack.h"
35 : #include "miscadmin.h"
36 : #include "nodes/makefuncs.h"
37 : #include "nodes/nodeFuncs.h"
38 : #ifdef OPTIMIZER_DEBUG
39 : #include "nodes/print.h"
40 : #endif
41 : #include "nodes/supportnodes.h"
42 : #include "optimizer/appendinfo.h"
43 : #include "optimizer/clauses.h"
44 : #include "optimizer/cost.h"
45 : #include "optimizer/optimizer.h"
46 : #include "optimizer/paramassign.h"
47 : #include "optimizer/pathnode.h"
48 : #include "optimizer/paths.h"
49 : #include "optimizer/plancat.h"
50 : #include "optimizer/planmain.h"
51 : #include "optimizer/planner.h"
52 : #include "optimizer/prep.h"
53 : #include "optimizer/subselect.h"
54 : #include "optimizer/tlist.h"
55 : #include "parser/analyze.h"
56 : #include "parser/parse_agg.h"
57 : #include "parser/parse_clause.h"
58 : #include "parser/parse_relation.h"
59 : #include "parser/parsetree.h"
60 : #include "partitioning/partdesc.h"
61 : #include "rewrite/rewriteManip.h"
62 : #include "utils/lsyscache.h"
63 : #include "utils/rel.h"
64 : #include "utils/selfuncs.h"
65 :
66 : /* GUC parameters */
67 : double cursor_tuple_fraction = DEFAULT_CURSOR_TUPLE_FRACTION;
68 : int debug_parallel_query = DEBUG_PARALLEL_OFF;
69 : bool parallel_leader_participation = true;
70 :
71 : /* Hook for plugins to get control in planner() */
72 : planner_hook_type planner_hook = NULL;
73 :
74 : /* Hook for plugins to get control when grouping_planner() plans upper rels */
75 : create_upper_paths_hook_type create_upper_paths_hook = NULL;
76 :
77 :
78 : /* Expression kind codes for preprocess_expression */
79 : #define EXPRKIND_QUAL 0
80 : #define EXPRKIND_TARGET 1
81 : #define EXPRKIND_RTFUNC 2
82 : #define EXPRKIND_RTFUNC_LATERAL 3
83 : #define EXPRKIND_VALUES 4
84 : #define EXPRKIND_VALUES_LATERAL 5
85 : #define EXPRKIND_LIMIT 6
86 : #define EXPRKIND_APPINFO 7
87 : #define EXPRKIND_PHV 8
88 : #define EXPRKIND_TABLESAMPLE 9
89 : #define EXPRKIND_ARBITER_ELEM 10
90 : #define EXPRKIND_TABLEFUNC 11
91 : #define EXPRKIND_TABLEFUNC_LATERAL 12
92 : #define EXPRKIND_GROUPEXPR 13
93 :
94 : /*
95 : * Data specific to grouping sets
96 : */
97 : typedef struct
98 : {
99 : List *rollups;
100 : List *hash_sets_idx;
101 : double dNumHashGroups;
102 : bool any_hashable;
103 : Bitmapset *unsortable_refs;
104 : Bitmapset *unhashable_refs;
105 : List *unsortable_sets;
106 : int *tleref_to_colnum_map;
107 : } grouping_sets_data;
108 :
109 : /*
110 : * Temporary structure for use during WindowClause reordering in order to be
111 : * able to sort WindowClauses on partitioning/ordering prefix.
112 : */
113 : typedef struct
114 : {
115 : WindowClause *wc;
116 : List *uniqueOrder; /* A List of unique ordering/partitioning
117 : * clauses per Window */
118 : } WindowClauseSortData;
119 :
120 : /* Passthrough data for standard_qp_callback */
121 : typedef struct
122 : {
123 : List *activeWindows; /* active windows, if any */
124 : grouping_sets_data *gset_data; /* grouping sets data, if any */
125 : SetOperationStmt *setop; /* parent set operation or NULL if not a
126 : * subquery belonging to a set operation */
127 : } standard_qp_extra;
128 :
129 : /* Local functions */
130 : static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
131 : static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
132 : static void grouping_planner(PlannerInfo *root, double tuple_fraction,
133 : SetOperationStmt *setops);
134 : static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root);
135 : static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
136 : int *tleref_to_colnum_map);
137 : static void preprocess_rowmarks(PlannerInfo *root);
138 : static double preprocess_limit(PlannerInfo *root,
139 : double tuple_fraction,
140 : int64 *offset_est, int64 *count_est);
141 : static void remove_useless_groupby_columns(PlannerInfo *root);
142 : static List *preprocess_groupclause(PlannerInfo *root, List *force);
143 : static List *extract_rollup_sets(List *groupingSets);
144 : static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
145 : static void standard_qp_callback(PlannerInfo *root, void *extra);
146 : static double get_number_of_groups(PlannerInfo *root,
147 : double path_rows,
148 : grouping_sets_data *gd,
149 : List *target_list);
150 : static RelOptInfo *create_grouping_paths(PlannerInfo *root,
151 : RelOptInfo *input_rel,
152 : PathTarget *target,
153 : bool target_parallel_safe,
154 : grouping_sets_data *gd);
155 : static bool is_degenerate_grouping(PlannerInfo *root);
156 : static void create_degenerate_grouping_paths(PlannerInfo *root,
157 : RelOptInfo *input_rel,
158 : RelOptInfo *grouped_rel);
159 : static RelOptInfo *make_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
160 : PathTarget *target, bool target_parallel_safe,
161 : Node *havingQual);
162 : static void create_ordinary_grouping_paths(PlannerInfo *root,
163 : RelOptInfo *input_rel,
164 : RelOptInfo *grouped_rel,
165 : const AggClauseCosts *agg_costs,
166 : grouping_sets_data *gd,
167 : GroupPathExtraData *extra,
168 : RelOptInfo **partially_grouped_rel_p);
169 : static void consider_groupingsets_paths(PlannerInfo *root,
170 : RelOptInfo *grouped_rel,
171 : Path *path,
172 : bool is_sorted,
173 : bool can_hash,
174 : grouping_sets_data *gd,
175 : const AggClauseCosts *agg_costs,
176 : double dNumGroups);
177 : static RelOptInfo *create_window_paths(PlannerInfo *root,
178 : RelOptInfo *input_rel,
179 : PathTarget *input_target,
180 : PathTarget *output_target,
181 : bool output_target_parallel_safe,
182 : WindowFuncLists *wflists,
183 : List *activeWindows);
184 : static void create_one_window_path(PlannerInfo *root,
185 : RelOptInfo *window_rel,
186 : Path *path,
187 : PathTarget *input_target,
188 : PathTarget *output_target,
189 : WindowFuncLists *wflists,
190 : List *activeWindows);
191 : static RelOptInfo *create_distinct_paths(PlannerInfo *root,
192 : RelOptInfo *input_rel,
193 : PathTarget *target);
194 : static void create_partial_distinct_paths(PlannerInfo *root,
195 : RelOptInfo *input_rel,
196 : RelOptInfo *final_distinct_rel,
197 : PathTarget *target);
198 : static RelOptInfo *create_final_distinct_paths(PlannerInfo *root,
199 : RelOptInfo *input_rel,
200 : RelOptInfo *distinct_rel);
201 : static RelOptInfo *create_ordered_paths(PlannerInfo *root,
202 : RelOptInfo *input_rel,
203 : PathTarget *target,
204 : bool target_parallel_safe,
205 : double limit_tuples);
206 : static PathTarget *make_group_input_target(PlannerInfo *root,
207 : PathTarget *final_target);
208 : static PathTarget *make_partial_grouping_target(PlannerInfo *root,
209 : PathTarget *grouping_target,
210 : Node *havingQual);
211 : static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
212 : static void optimize_window_clauses(PlannerInfo *root,
213 : WindowFuncLists *wflists);
214 : static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
215 : static PathTarget *make_window_input_target(PlannerInfo *root,
216 : PathTarget *final_target,
217 : List *activeWindows);
218 : static List *make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
219 : List *tlist);
220 : static PathTarget *make_sort_input_target(PlannerInfo *root,
221 : PathTarget *final_target,
222 : bool *have_postponed_srfs);
223 : static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
224 : List *targets, List *targets_contain_srfs);
225 : static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
226 : RelOptInfo *grouped_rel,
227 : RelOptInfo *partially_grouped_rel,
228 : const AggClauseCosts *agg_costs,
229 : grouping_sets_data *gd,
230 : double dNumGroups,
231 : GroupPathExtraData *extra);
232 : static RelOptInfo *create_partial_grouping_paths(PlannerInfo *root,
233 : RelOptInfo *grouped_rel,
234 : RelOptInfo *input_rel,
235 : grouping_sets_data *gd,
236 : GroupPathExtraData *extra,
237 : bool force_rel_creation);
238 : static void gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel);
239 : static bool can_partial_agg(PlannerInfo *root);
240 : static void apply_scanjoin_target_to_paths(PlannerInfo *root,
241 : RelOptInfo *rel,
242 : List *scanjoin_targets,
243 : List *scanjoin_targets_contain_srfs,
244 : bool scanjoin_target_parallel_safe,
245 : bool tlist_same_exprs);
246 : static void create_partitionwise_grouping_paths(PlannerInfo *root,
247 : RelOptInfo *input_rel,
248 : RelOptInfo *grouped_rel,
249 : RelOptInfo *partially_grouped_rel,
250 : const AggClauseCosts *agg_costs,
251 : grouping_sets_data *gd,
252 : PartitionwiseAggregateType patype,
253 : GroupPathExtraData *extra);
254 : static bool group_by_has_partkey(RelOptInfo *input_rel,
255 : List *targetList,
256 : List *groupClause);
257 : static int common_prefix_cmp(const void *a, const void *b);
258 : static List *generate_setop_child_grouplist(SetOperationStmt *op,
259 : List *targetlist);
260 :
261 :
262 : /*****************************************************************************
263 : *
264 : * Query optimizer entry point
265 : *
266 : * To support loadable plugins that monitor or modify planner behavior,
267 : * we provide a hook variable that lets a plugin get control before and
268 : * after the standard planning process. The plugin would normally call
269 : * standard_planner().
270 : *
271 : * Note to plugin authors: standard_planner() scribbles on its Query input,
272 : * so you'd better copy that data structure if you want to plan more than once.
273 : *
274 : *****************************************************************************/
275 : PlannedStmt *
276 454026 : planner(Query *parse, const char *query_string, int cursorOptions,
277 : ParamListInfo boundParams)
278 : {
279 : PlannedStmt *result;
280 :
281 454026 : if (planner_hook)
282 90980 : result = (*planner_hook) (parse, query_string, cursorOptions, boundParams);
283 : else
284 363046 : result = standard_planner(parse, query_string, cursorOptions, boundParams);
285 449920 : return result;
286 : }
287 :
288 : PlannedStmt *
289 454026 : standard_planner(Query *parse, const char *query_string, int cursorOptions,
290 : ParamListInfo boundParams)
291 : {
292 : PlannedStmt *result;
293 : PlannerGlobal *glob;
294 : double tuple_fraction;
295 : PlannerInfo *root;
296 : RelOptInfo *final_rel;
297 : Path *best_path;
298 : Plan *top_plan;
299 : ListCell *lp,
300 : *lr;
301 :
302 : /*
303 : * Set up global state for this planner invocation. This data is needed
304 : * across all levels of sub-Query that might exist in the given command,
305 : * so we keep it in a separate struct that's linked to by each per-Query
306 : * PlannerInfo.
307 : */
308 454026 : glob = makeNode(PlannerGlobal);
309 :
310 454026 : glob->boundParams = boundParams;
311 454026 : glob->subplans = NIL;
312 454026 : glob->subpaths = NIL;
313 454026 : glob->subroots = NIL;
314 454026 : glob->rewindPlanIDs = NULL;
315 454026 : glob->finalrtable = NIL;
316 454026 : glob->finalrteperminfos = NIL;
317 454026 : glob->finalrowmarks = NIL;
318 454026 : glob->resultRelations = NIL;
319 454026 : glob->appendRelations = NIL;
320 454026 : glob->relationOids = NIL;
321 454026 : glob->invalItems = NIL;
322 454026 : glob->paramExecTypes = NIL;
323 454026 : glob->lastPHId = 0;
324 454026 : glob->lastRowMarkId = 0;
325 454026 : glob->lastPlanNodeId = 0;
326 454026 : glob->transientPlan = false;
327 454026 : glob->dependsOnRole = false;
328 :
329 : /*
330 : * Assess whether it's feasible to use parallel mode for this query. We
331 : * can't do this in a standalone backend, or if the command will try to
332 : * modify any data, or if this is a cursor operation, or if GUCs are set
333 : * to values that don't permit parallelism, or if parallel-unsafe
334 : * functions are present in the query tree.
335 : *
336 : * (Note that we do allow CREATE TABLE AS, SELECT INTO, and CREATE
337 : * MATERIALIZED VIEW to use parallel plans, but this is safe only because
338 : * the command is writing into a completely new table which workers won't
339 : * be able to see. If the workers could see the table, the fact that
340 : * group locking would cause them to ignore the leader's heavyweight GIN
341 : * page locks would make this unsafe. We'll have to fix that somehow if
342 : * we want to allow parallel inserts in general; updates and deletes have
343 : * additional problems especially around combo CIDs.)
344 : *
345 : * For now, we don't try to use parallel mode if we're running inside a
346 : * parallel worker. We might eventually be able to relax this
347 : * restriction, but for now it seems best not to have parallel workers
348 : * trying to create their own parallel workers.
349 : */
350 454026 : if ((cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 &&
351 428444 : IsUnderPostmaster &&
352 428444 : parse->commandType == CMD_SELECT &&
353 344142 : !parse->hasModifyingCTE &&
354 344002 : max_parallel_workers_per_gather > 0 &&
355 343416 : !IsParallelWorker())
356 : {
357 : /* all the cheap tests pass, so scan the query tree */
358 343340 : glob->maxParallelHazard = max_parallel_hazard(parse);
359 343340 : glob->parallelModeOK = (glob->maxParallelHazard != PROPARALLEL_UNSAFE);
360 : }
361 : else
362 : {
363 : /* skip the query tree scan, just assume it's unsafe */
364 110686 : glob->maxParallelHazard = PROPARALLEL_UNSAFE;
365 110686 : glob->parallelModeOK = false;
366 : }
367 :
368 : /*
369 : * glob->parallelModeNeeded is normally set to false here and changed to
370 : * true during plan creation if a Gather or Gather Merge plan is actually
371 : * created (cf. create_gather_plan, create_gather_merge_plan).
372 : *
373 : * However, if debug_parallel_query = on or debug_parallel_query =
374 : * regress, then we impose parallel mode whenever it's safe to do so, even
375 : * if the final plan doesn't use parallelism. It's not safe to do so if
376 : * the query contains anything parallel-unsafe; parallelModeOK will be
377 : * false in that case. Note that parallelModeOK can't change after this
378 : * point. Otherwise, everything in the query is either parallel-safe or
379 : * parallel-restricted, and in either case it should be OK to impose
380 : * parallel-mode restrictions. If that ends up breaking something, then
381 : * either some function the user included in the query is incorrectly
382 : * labeled as parallel-safe or parallel-restricted when in reality it's
383 : * parallel-unsafe, or else the query planner itself has a bug.
384 : */
385 740642 : glob->parallelModeNeeded = glob->parallelModeOK &&
386 286616 : (debug_parallel_query != DEBUG_PARALLEL_OFF);
387 :
388 : /* Determine what fraction of the plan is likely to be scanned */
389 454026 : if (cursorOptions & CURSOR_OPT_FAST_PLAN)
390 : {
391 : /*
392 : * We have no real idea how many tuples the user will ultimately FETCH
393 : * from a cursor, but it is often the case that he doesn't want 'em
394 : * all, or would prefer a fast-start plan anyway so that he can
395 : * process some of the tuples sooner. Use a GUC parameter to decide
396 : * what fraction to optimize for.
397 : */
398 2892 : tuple_fraction = cursor_tuple_fraction;
399 :
400 : /*
401 : * We document cursor_tuple_fraction as simply being a fraction, which
402 : * means the edge cases 0 and 1 have to be treated specially here. We
403 : * convert 1 to 0 ("all the tuples") and 0 to a very small fraction.
404 : */
405 2892 : if (tuple_fraction >= 1.0)
406 0 : tuple_fraction = 0.0;
407 2892 : else if (tuple_fraction <= 0.0)
408 0 : tuple_fraction = 1e-10;
409 : }
410 : else
411 : {
412 : /* Default assumption is we need all the tuples */
413 451134 : tuple_fraction = 0.0;
414 : }
415 :
416 : /* primary planning entry point (may recurse for subqueries) */
417 454026 : root = subquery_planner(glob, parse, NULL, false, tuple_fraction, NULL);
418 :
419 : /* Select best Path and turn it into a Plan */
420 450316 : final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
421 450316 : best_path = get_cheapest_fractional_path(final_rel, tuple_fraction);
422 :
423 450316 : top_plan = create_plan(root, best_path);
424 :
425 : /*
426 : * If creating a plan for a scrollable cursor, make sure it can run
427 : * backwards on demand. Add a Material node at the top at need.
428 : */
429 449920 : if (cursorOptions & CURSOR_OPT_SCROLL)
430 : {
431 264 : if (!ExecSupportsBackwardScan(top_plan))
432 30 : top_plan = materialize_finished_plan(top_plan);
433 : }
434 :
435 : /*
436 : * Optionally add a Gather node for testing purposes, provided this is
437 : * actually a safe thing to do.
438 : *
439 : * We can add Gather even when top_plan has parallel-safe initPlans, but
440 : * then we have to move the initPlans to the Gather node because of
441 : * SS_finalize_plan's limitations. That would cause cosmetic breakage of
442 : * regression tests when debug_parallel_query = regress, because initPlans
443 : * that would normally appear on the top_plan move to the Gather, causing
444 : * them to disappear from EXPLAIN output. That doesn't seem worth kluging
445 : * EXPLAIN to hide, so skip it when debug_parallel_query = regress.
446 : */
447 449920 : if (debug_parallel_query != DEBUG_PARALLEL_OFF &&
448 182 : top_plan->parallel_safe &&
449 84 : (top_plan->initPlan == NIL ||
450 0 : debug_parallel_query != DEBUG_PARALLEL_REGRESS))
451 : {
452 84 : Gather *gather = makeNode(Gather);
453 : Cost initplan_cost;
454 : bool unsafe_initplans;
455 :
456 84 : gather->plan.targetlist = top_plan->targetlist;
457 84 : gather->plan.qual = NIL;
458 84 : gather->plan.lefttree = top_plan;
459 84 : gather->plan.righttree = NULL;
460 84 : gather->num_workers = 1;
461 84 : gather->single_copy = true;
462 84 : gather->invisible = (debug_parallel_query == DEBUG_PARALLEL_REGRESS);
463 :
464 : /* Transfer any initPlans to the new top node */
465 84 : gather->plan.initPlan = top_plan->initPlan;
466 84 : top_plan->initPlan = NIL;
467 :
468 : /*
469 : * Since this Gather has no parallel-aware descendants to signal to,
470 : * we don't need a rescan Param.
471 : */
472 84 : gather->rescan_param = -1;
473 :
474 : /*
475 : * Ideally we'd use cost_gather here, but setting up dummy path data
476 : * to satisfy it doesn't seem much cleaner than knowing what it does.
477 : */
478 84 : gather->plan.startup_cost = top_plan->startup_cost +
479 : parallel_setup_cost;
480 84 : gather->plan.total_cost = top_plan->total_cost +
481 84 : parallel_setup_cost + parallel_tuple_cost * top_plan->plan_rows;
482 84 : gather->plan.plan_rows = top_plan->plan_rows;
483 84 : gather->plan.plan_width = top_plan->plan_width;
484 84 : gather->plan.parallel_aware = false;
485 84 : gather->plan.parallel_safe = false;
486 :
487 : /*
488 : * Delete the initplans' cost from top_plan. We needn't add it to the
489 : * Gather node, since the above coding already included it there.
490 : */
491 84 : SS_compute_initplan_cost(gather->plan.initPlan,
492 : &initplan_cost, &unsafe_initplans);
493 84 : top_plan->startup_cost -= initplan_cost;
494 84 : top_plan->total_cost -= initplan_cost;
495 :
496 : /* use parallel mode for parallel plans. */
497 84 : root->glob->parallelModeNeeded = true;
498 :
499 84 : top_plan = &gather->plan;
500 : }
501 :
502 : /*
503 : * If any Params were generated, run through the plan tree and compute
504 : * each plan node's extParam/allParam sets. Ideally we'd merge this into
505 : * set_plan_references' tree traversal, but for now it has to be separate
506 : * because we need to visit subplans before not after main plan.
507 : */
508 449920 : if (glob->paramExecTypes != NIL)
509 : {
510 : Assert(list_length(glob->subplans) == list_length(glob->subroots));
511 189094 : forboth(lp, glob->subplans, lr, glob->subroots)
512 : {
513 38610 : Plan *subplan = (Plan *) lfirst(lp);
514 38610 : PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
515 :
516 38610 : SS_finalize_plan(subroot, subplan);
517 : }
518 150484 : SS_finalize_plan(root, top_plan);
519 : }
520 :
521 : /* final cleanup of the plan */
522 : Assert(glob->finalrtable == NIL);
523 : Assert(glob->finalrteperminfos == NIL);
524 : Assert(glob->finalrowmarks == NIL);
525 : Assert(glob->resultRelations == NIL);
526 : Assert(glob->appendRelations == NIL);
527 449920 : top_plan = set_plan_references(root, top_plan);
528 : /* ... and the subplans (both regular subplans and initplans) */
529 : Assert(list_length(glob->subplans) == list_length(glob->subroots));
530 488530 : forboth(lp, glob->subplans, lr, glob->subroots)
531 : {
532 38610 : Plan *subplan = (Plan *) lfirst(lp);
533 38610 : PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
534 :
535 38610 : lfirst(lp) = set_plan_references(subroot, subplan);
536 : }
537 :
538 : /* build the PlannedStmt result */
539 449920 : result = makeNode(PlannedStmt);
540 :
541 449920 : result->commandType = parse->commandType;
542 449920 : result->queryId = parse->queryId;
543 449920 : result->hasReturning = (parse->returningList != NIL);
544 449920 : result->hasModifyingCTE = parse->hasModifyingCTE;
545 449920 : result->canSetTag = parse->canSetTag;
546 449920 : result->transientPlan = glob->transientPlan;
547 449920 : result->dependsOnRole = glob->dependsOnRole;
548 449920 : result->parallelModeNeeded = glob->parallelModeNeeded;
549 449920 : result->planTree = top_plan;
550 449920 : result->rtable = glob->finalrtable;
551 449920 : result->permInfos = glob->finalrteperminfos;
552 449920 : result->resultRelations = glob->resultRelations;
553 449920 : result->appendRelations = glob->appendRelations;
554 449920 : result->subplans = glob->subplans;
555 449920 : result->rewindPlanIDs = glob->rewindPlanIDs;
556 449920 : result->rowMarks = glob->finalrowmarks;
557 449920 : result->relationOids = glob->relationOids;
558 449920 : result->invalItems = glob->invalItems;
559 449920 : result->paramExecTypes = glob->paramExecTypes;
560 : /* utilityStmt should be null, but we might as well copy it */
561 449920 : result->utilityStmt = parse->utilityStmt;
562 449920 : result->stmt_location = parse->stmt_location;
563 449920 : result->stmt_len = parse->stmt_len;
564 :
565 449920 : result->jitFlags = PGJIT_NONE;
566 449920 : if (jit_enabled && jit_above_cost >= 0 &&
567 449376 : top_plan->total_cost > jit_above_cost)
568 : {
569 932 : result->jitFlags |= PGJIT_PERFORM;
570 :
571 : /*
572 : * Decide how much effort should be put into generating better code.
573 : */
574 932 : if (jit_optimize_above_cost >= 0 &&
575 932 : top_plan->total_cost > jit_optimize_above_cost)
576 432 : result->jitFlags |= PGJIT_OPT3;
577 932 : if (jit_inline_above_cost >= 0 &&
578 932 : top_plan->total_cost > jit_inline_above_cost)
579 432 : result->jitFlags |= PGJIT_INLINE;
580 :
581 : /*
582 : * Decide which operations should be JITed.
583 : */
584 932 : if (jit_expressions)
585 932 : result->jitFlags |= PGJIT_EXPR;
586 932 : if (jit_tuple_deforming)
587 932 : result->jitFlags |= PGJIT_DEFORM;
588 : }
589 :
590 449920 : if (glob->partition_directory != NULL)
591 11314 : DestroyPartitionDirectory(glob->partition_directory);
592 :
593 449920 : return result;
594 : }
595 :
596 :
597 : /*--------------------
598 : * subquery_planner
599 : * Invokes the planner on a subquery. We recurse to here for each
600 : * sub-SELECT found in the query tree.
601 : *
602 : * glob is the global state for the current planner run.
603 : * parse is the querytree produced by the parser & rewriter.
604 : * parent_root is the immediate parent Query's info (NULL at the top level).
605 : * hasRecursion is true if this is a recursive WITH query.
606 : * tuple_fraction is the fraction of tuples we expect will be retrieved.
607 : * tuple_fraction is interpreted as explained for grouping_planner, below.
608 : * setops is used for set operation subqueries to provide the subquery with
609 : * the context in which it's being used so that Paths correctly sorted for the
610 : * set operation can be generated. NULL when not planning a set operation
611 : * child.
612 : *
613 : * Basically, this routine does the stuff that should only be done once
614 : * per Query object. It then calls grouping_planner. At one time,
615 : * grouping_planner could be invoked recursively on the same Query object;
616 : * that's not currently true, but we keep the separation between the two
617 : * routines anyway, in case we need it again someday.
618 : *
619 : * subquery_planner will be called recursively to handle sub-Query nodes
620 : * found within the query's expressions and rangetable.
621 : *
622 : * Returns the PlannerInfo struct ("root") that contains all data generated
623 : * while planning the subquery. In particular, the Path(s) attached to
624 : * the (UPPERREL_FINAL, NULL) upperrel represent our conclusions about the
625 : * cheapest way(s) to implement the query. The top level will select the
626 : * best Path and pass it through createplan.c to produce a finished Plan.
627 : *--------------------
628 : */
629 : PlannerInfo *
630 514732 : subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root,
631 : bool hasRecursion, double tuple_fraction,
632 : SetOperationStmt *setops)
633 : {
634 : PlannerInfo *root;
635 : List *newWithCheckOptions;
636 : List *newHaving;
637 : bool hasOuterJoins;
638 : bool hasResultRTEs;
639 : RelOptInfo *final_rel;
640 : ListCell *l;
641 :
642 : /* Create a PlannerInfo data structure for this subquery */
643 514732 : root = makeNode(PlannerInfo);
644 514732 : root->parse = parse;
645 514732 : root->glob = glob;
646 514732 : root->query_level = parent_root ? parent_root->query_level + 1 : 1;
647 514732 : root->parent_root = parent_root;
648 514732 : root->plan_params = NIL;
649 514732 : root->outer_params = NULL;
650 514732 : root->planner_cxt = CurrentMemoryContext;
651 514732 : root->init_plans = NIL;
652 514732 : root->cte_plan_ids = NIL;
653 514732 : root->multiexpr_params = NIL;
654 514732 : root->join_domains = NIL;
655 514732 : root->eq_classes = NIL;
656 514732 : root->ec_merging_done = false;
657 514732 : root->last_rinfo_serial = 0;
658 514732 : root->all_result_relids =
659 514732 : parse->resultRelation ? bms_make_singleton(parse->resultRelation) : NULL;
660 514732 : root->leaf_result_relids = NULL; /* we'll find out leaf-ness later */
661 514732 : root->append_rel_list = NIL;
662 514732 : root->row_identity_vars = NIL;
663 514732 : root->rowMarks = NIL;
664 514732 : memset(root->upper_rels, 0, sizeof(root->upper_rels));
665 514732 : memset(root->upper_targets, 0, sizeof(root->upper_targets));
666 514732 : root->processed_groupClause = NIL;
667 514732 : root->processed_distinctClause = NIL;
668 514732 : root->processed_tlist = NIL;
669 514732 : root->update_colnos = NIL;
670 514732 : root->grouping_map = NULL;
671 514732 : root->minmax_aggs = NIL;
672 514732 : root->qual_security_level = 0;
673 514732 : root->hasPseudoConstantQuals = false;
674 514732 : root->hasAlternativeSubPlans = false;
675 514732 : root->placeholdersFrozen = false;
676 514732 : root->hasRecursion = hasRecursion;
677 514732 : if (hasRecursion)
678 812 : root->wt_param_id = assign_special_exec_param(root);
679 : else
680 513920 : root->wt_param_id = -1;
681 514732 : root->non_recursive_path = NULL;
682 514732 : root->partColsUpdated = false;
683 :
684 : /*
685 : * Create the top-level join domain. This won't have valid contents until
686 : * deconstruct_jointree fills it in, but the node needs to exist before
687 : * that so we can build EquivalenceClasses referencing it.
688 : */
689 514732 : root->join_domains = list_make1(makeNode(JoinDomain));
690 :
691 : /*
692 : * If there is a WITH list, process each WITH query and either convert it
693 : * to RTE_SUBQUERY RTE(s) or build an initplan SubPlan structure for it.
694 : */
695 514732 : if (parse->cteList)
696 2458 : SS_process_ctes(root);
697 :
698 : /*
699 : * If it's a MERGE command, transform the joinlist as appropriate.
700 : */
701 514726 : transform_MERGE_to_join(parse);
702 :
703 : /*
704 : * If the FROM clause is empty, replace it with a dummy RTE_RESULT RTE, so
705 : * that we don't need so many special cases to deal with that situation.
706 : */
707 514726 : replace_empty_jointree(parse);
708 :
709 : /*
710 : * Look for ANY and EXISTS SubLinks in WHERE and JOIN/ON clauses, and try
711 : * to transform them into joins. Note that this step does not descend
712 : * into subqueries; if we pull up any subqueries below, their SubLinks are
713 : * processed just before pulling them up.
714 : */
715 514726 : if (parse->hasSubLinks)
716 30244 : pull_up_sublinks(root);
717 :
718 : /*
719 : * Scan the rangetable for function RTEs, do const-simplification on them,
720 : * and then inline them if possible (producing subqueries that might get
721 : * pulled up next). Recursion issues here are handled in the same way as
722 : * for SubLinks.
723 : */
724 514726 : preprocess_function_rtes(root);
725 :
726 : /*
727 : * Check to see if any subqueries in the jointree can be merged into this
728 : * query.
729 : */
730 514720 : pull_up_subqueries(root);
731 :
732 : /*
733 : * If this is a simple UNION ALL query, flatten it into an appendrel. We
734 : * do this now because it requires applying pull_up_subqueries to the leaf
735 : * queries of the UNION ALL, which weren't touched above because they
736 : * weren't referenced by the jointree (they will be after we do this).
737 : */
738 514714 : if (parse->setOperations)
739 5768 : flatten_simple_union_all(root);
740 :
741 : /*
742 : * Survey the rangetable to see what kinds of entries are present. We can
743 : * skip some later processing if relevant SQL features are not used; for
744 : * example if there are no JOIN RTEs we can avoid the expense of doing
745 : * flatten_join_alias_vars(). This must be done after we have finished
746 : * adding rangetable entries, of course. (Note: actually, processing of
747 : * inherited or partitioned rels can cause RTEs for their child tables to
748 : * get added later; but those must all be RTE_RELATION entries, so they
749 : * don't invalidate the conclusions drawn here.)
750 : */
751 514714 : root->hasJoinRTEs = false;
752 514714 : root->hasLateralRTEs = false;
753 514714 : root->group_rtindex = 0;
754 514714 : hasOuterJoins = false;
755 514714 : hasResultRTEs = false;
756 1361638 : foreach(l, parse->rtable)
757 : {
758 846924 : RangeTblEntry *rte = lfirst_node(RangeTblEntry, l);
759 :
760 846924 : switch (rte->rtekind)
761 : {
762 433548 : case RTE_RELATION:
763 433548 : if (rte->inh)
764 : {
765 : /*
766 : * Check to see if the relation actually has any children;
767 : * if not, clear the inh flag so we can treat it as a
768 : * plain base relation.
769 : *
770 : * Note: this could give a false-positive result, if the
771 : * rel once had children but no longer does. We used to
772 : * be able to clear rte->inh later on when we discovered
773 : * that, but no more; we have to handle such cases as
774 : * full-fledged inheritance.
775 : */
776 348752 : rte->inh = has_subclass(rte->relid);
777 : }
778 433548 : break;
779 76430 : case RTE_JOIN:
780 76430 : root->hasJoinRTEs = true;
781 76430 : if (IS_OUTER_JOIN(rte->jointype))
782 42878 : hasOuterJoins = true;
783 76430 : break;
784 221140 : case RTE_RESULT:
785 221140 : hasResultRTEs = true;
786 221140 : break;
787 4354 : case RTE_GROUP:
788 : Assert(parse->hasGroupRTE);
789 4354 : root->group_rtindex = list_cell_number(parse->rtable, l) + 1;
790 4354 : break;
791 111452 : default:
792 : /* No work here for other RTE types */
793 111452 : break;
794 : }
795 :
796 846924 : if (rte->lateral)
797 10482 : root->hasLateralRTEs = true;
798 :
799 : /*
800 : * We can also determine the maximum security level required for any
801 : * securityQuals now. Addition of inheritance-child RTEs won't affect
802 : * this, because child tables don't have their own securityQuals; see
803 : * expand_single_inheritance_child().
804 : */
805 846924 : if (rte->securityQuals)
806 2454 : root->qual_security_level = Max(root->qual_security_level,
807 : list_length(rte->securityQuals));
808 : }
809 :
810 : /*
811 : * If we have now verified that the query target relation is
812 : * non-inheriting, mark it as a leaf target.
813 : */
814 514714 : if (parse->resultRelation)
815 : {
816 90584 : RangeTblEntry *rte = rt_fetch(parse->resultRelation, parse->rtable);
817 :
818 90584 : if (!rte->inh)
819 87960 : root->leaf_result_relids =
820 87960 : bms_make_singleton(parse->resultRelation);
821 : }
822 :
823 : /*
824 : * Preprocess RowMark information. We need to do this after subquery
825 : * pullup, so that all base relations are present.
826 : */
827 514714 : preprocess_rowmarks(root);
828 :
829 : /*
830 : * Set hasHavingQual to remember if HAVING clause is present. Needed
831 : * because preprocess_expression will reduce a constant-true condition to
832 : * an empty qual list ... but "HAVING TRUE" is not a semantic no-op.
833 : */
834 514714 : root->hasHavingQual = (parse->havingQual != NULL);
835 :
836 : /*
837 : * Do expression preprocessing on targetlist and quals, as well as other
838 : * random expressions in the querytree. Note that we do not need to
839 : * handle sort/group expressions explicitly, because they are actually
840 : * part of the targetlist.
841 : */
842 511082 : parse->targetList = (List *)
843 514714 : preprocess_expression(root, (Node *) parse->targetList,
844 : EXPRKIND_TARGET);
845 :
846 511082 : newWithCheckOptions = NIL;
847 513414 : foreach(l, parse->withCheckOptions)
848 : {
849 2332 : WithCheckOption *wco = lfirst_node(WithCheckOption, l);
850 :
851 2332 : wco->qual = preprocess_expression(root, wco->qual,
852 : EXPRKIND_QUAL);
853 2332 : if (wco->qual != NULL)
854 1932 : newWithCheckOptions = lappend(newWithCheckOptions, wco);
855 : }
856 511082 : parse->withCheckOptions = newWithCheckOptions;
857 :
858 511082 : parse->returningList = (List *)
859 511082 : preprocess_expression(root, (Node *) parse->returningList,
860 : EXPRKIND_TARGET);
861 :
862 511082 : preprocess_qual_conditions(root, (Node *) parse->jointree);
863 :
864 511082 : parse->havingQual = preprocess_expression(root, parse->havingQual,
865 : EXPRKIND_QUAL);
866 :
867 513634 : foreach(l, parse->windowClause)
868 : {
869 2552 : WindowClause *wc = lfirst_node(WindowClause, l);
870 :
871 : /* partitionClause/orderClause are sort/group expressions */
872 2552 : wc->startOffset = preprocess_expression(root, wc->startOffset,
873 : EXPRKIND_LIMIT);
874 2552 : wc->endOffset = preprocess_expression(root, wc->endOffset,
875 : EXPRKIND_LIMIT);
876 : }
877 :
878 511082 : parse->limitOffset = preprocess_expression(root, parse->limitOffset,
879 : EXPRKIND_LIMIT);
880 511082 : parse->limitCount = preprocess_expression(root, parse->limitCount,
881 : EXPRKIND_LIMIT);
882 :
883 511082 : if (parse->onConflict)
884 : {
885 3604 : parse->onConflict->arbiterElems = (List *)
886 1802 : preprocess_expression(root,
887 1802 : (Node *) parse->onConflict->arbiterElems,
888 : EXPRKIND_ARBITER_ELEM);
889 3604 : parse->onConflict->arbiterWhere =
890 1802 : preprocess_expression(root,
891 1802 : parse->onConflict->arbiterWhere,
892 : EXPRKIND_QUAL);
893 3604 : parse->onConflict->onConflictSet = (List *)
894 1802 : preprocess_expression(root,
895 1802 : (Node *) parse->onConflict->onConflictSet,
896 : EXPRKIND_TARGET);
897 1802 : parse->onConflict->onConflictWhere =
898 1802 : preprocess_expression(root,
899 1802 : parse->onConflict->onConflictWhere,
900 : EXPRKIND_QUAL);
901 : /* exclRelTlist contains only Vars, so no preprocessing needed */
902 : }
903 :
904 513776 : foreach(l, parse->mergeActionList)
905 : {
906 2694 : MergeAction *action = (MergeAction *) lfirst(l);
907 :
908 2694 : action->targetList = (List *)
909 2694 : preprocess_expression(root,
910 2694 : (Node *) action->targetList,
911 : EXPRKIND_TARGET);
912 2694 : action->qual =
913 2694 : preprocess_expression(root,
914 : (Node *) action->qual,
915 : EXPRKIND_QUAL);
916 : }
917 :
918 511082 : parse->mergeJoinCondition =
919 511082 : preprocess_expression(root, parse->mergeJoinCondition, EXPRKIND_QUAL);
920 :
921 511082 : root->append_rel_list = (List *)
922 511082 : preprocess_expression(root, (Node *) root->append_rel_list,
923 : EXPRKIND_APPINFO);
924 :
925 : /* Also need to preprocess expressions within RTEs */
926 1354096 : foreach(l, parse->rtable)
927 : {
928 843014 : RangeTblEntry *rte = lfirst_node(RangeTblEntry, l);
929 : int kind;
930 : ListCell *lcsq;
931 :
932 843014 : if (rte->rtekind == RTE_RELATION)
933 : {
934 433282 : if (rte->tablesample)
935 222 : rte->tablesample = (TableSampleClause *)
936 222 : preprocess_expression(root,
937 222 : (Node *) rte->tablesample,
938 : EXPRKIND_TABLESAMPLE);
939 : }
940 409732 : else if (rte->rtekind == RTE_SUBQUERY)
941 : {
942 : /*
943 : * We don't want to do all preprocessing yet on the subquery's
944 : * expressions, since that will happen when we plan it. But if it
945 : * contains any join aliases of our level, those have to get
946 : * expanded now, because planning of the subquery won't do it.
947 : * That's only possible if the subquery is LATERAL.
948 : */
949 54456 : if (rte->lateral && root->hasJoinRTEs)
950 878 : rte->subquery = (Query *)
951 878 : flatten_join_alias_vars(root, root->parse,
952 878 : (Node *) rte->subquery);
953 : }
954 355276 : else if (rte->rtekind == RTE_FUNCTION)
955 : {
956 : /* Preprocess the function expression(s) fully */
957 43978 : kind = rte->lateral ? EXPRKIND_RTFUNC_LATERAL : EXPRKIND_RTFUNC;
958 43978 : rte->functions = (List *)
959 43978 : preprocess_expression(root, (Node *) rte->functions, kind);
960 : }
961 311298 : else if (rte->rtekind == RTE_TABLEFUNC)
962 : {
963 : /* Preprocess the function expression(s) fully */
964 626 : kind = rte->lateral ? EXPRKIND_TABLEFUNC_LATERAL : EXPRKIND_TABLEFUNC;
965 626 : rte->tablefunc = (TableFunc *)
966 626 : preprocess_expression(root, (Node *) rte->tablefunc, kind);
967 : }
968 310672 : else if (rte->rtekind == RTE_VALUES)
969 : {
970 : /* Preprocess the values lists fully */
971 7908 : kind = rte->lateral ? EXPRKIND_VALUES_LATERAL : EXPRKIND_VALUES;
972 7908 : rte->values_lists = (List *)
973 7908 : preprocess_expression(root, (Node *) rte->values_lists, kind);
974 : }
975 302764 : else if (rte->rtekind == RTE_GROUP)
976 : {
977 : /* Preprocess the groupexprs list fully */
978 4354 : rte->groupexprs = (List *)
979 4354 : preprocess_expression(root, (Node *) rte->groupexprs,
980 : EXPRKIND_GROUPEXPR);
981 : }
982 :
983 : /*
984 : * Process each element of the securityQuals list as if it were a
985 : * separate qual expression (as indeed it is). We need to do it this
986 : * way to get proper canonicalization of AND/OR structure. Note that
987 : * this converts each element into an implicit-AND sublist.
988 : */
989 845814 : foreach(lcsq, rte->securityQuals)
990 : {
991 2800 : lfirst(lcsq) = preprocess_expression(root,
992 2800 : (Node *) lfirst(lcsq),
993 : EXPRKIND_QUAL);
994 : }
995 : }
996 :
997 : /*
998 : * Now that we are done preprocessing expressions, and in particular done
999 : * flattening join alias variables, get rid of the joinaliasvars lists.
1000 : * They no longer match what expressions in the rest of the tree look
1001 : * like, because we have not preprocessed expressions in those lists (and
1002 : * do not want to; for example, expanding a SubLink there would result in
1003 : * a useless unreferenced subplan). Leaving them in place simply creates
1004 : * a hazard for later scans of the tree. We could try to prevent that by
1005 : * using QTW_IGNORE_JOINALIASES in every tree scan done after this point,
1006 : * but that doesn't sound very reliable.
1007 : */
1008 511082 : if (root->hasJoinRTEs)
1009 : {
1010 264830 : foreach(l, parse->rtable)
1011 : {
1012 218112 : RangeTblEntry *rte = lfirst_node(RangeTblEntry, l);
1013 :
1014 218112 : rte->joinaliasvars = NIL;
1015 : }
1016 : }
1017 :
1018 : /*
1019 : * Replace any Vars in the subquery's targetlist and havingQual that
1020 : * reference GROUP outputs with the underlying grouping expressions.
1021 : *
1022 : * Note that we need to perform this replacement after we've preprocessed
1023 : * the grouping expressions. This is to ensure that there is only one
1024 : * instance of SubPlan for each SubLink contained within the grouping
1025 : * expressions.
1026 : */
1027 511082 : if (parse->hasGroupRTE)
1028 : {
1029 4354 : parse->targetList = (List *)
1030 4354 : flatten_group_exprs(root, root->parse, (Node *) parse->targetList);
1031 4354 : parse->havingQual =
1032 4354 : flatten_group_exprs(root, root->parse, parse->havingQual);
1033 : }
1034 :
1035 : /* Constant-folding might have removed all set-returning functions */
1036 511082 : if (parse->hasTargetSRFs)
1037 8662 : parse->hasTargetSRFs = expression_returns_set((Node *) parse->targetList);
1038 :
1039 : /*
1040 : * In some cases we may want to transfer a HAVING clause into WHERE. We
1041 : * cannot do so if the HAVING clause contains aggregates (obviously) or
1042 : * volatile functions (since a HAVING clause is supposed to be executed
1043 : * only once per group). We also can't do this if there are any nonempty
1044 : * grouping sets and the clause references any columns that are nullable
1045 : * by the grouping sets; moving such a clause into WHERE would potentially
1046 : * change the results. (If there are only empty grouping sets, then the
1047 : * HAVING clause must be degenerate as discussed below.)
1048 : *
1049 : * Also, it may be that the clause is so expensive to execute that we're
1050 : * better off doing it only once per group, despite the loss of
1051 : * selectivity. This is hard to estimate short of doing the entire
1052 : * planning process twice, so we use a heuristic: clauses containing
1053 : * subplans are left in HAVING. Otherwise, we move or copy the HAVING
1054 : * clause into WHERE, in hopes of eliminating tuples before aggregation
1055 : * instead of after.
1056 : *
1057 : * If the query has explicit grouping then we can simply move such a
1058 : * clause into WHERE; any group that fails the clause will not be in the
1059 : * output because none of its tuples will reach the grouping or
1060 : * aggregation stage. Otherwise we must have a degenerate (variable-free)
1061 : * HAVING clause, which we put in WHERE so that query_planner() can use it
1062 : * in a gating Result node, but also keep in HAVING to ensure that we
1063 : * don't emit a bogus aggregated row. (This could be done better, but it
1064 : * seems not worth optimizing.)
1065 : *
1066 : * Note that a HAVING clause may contain expressions that are not fully
1067 : * preprocessed. This can happen if these expressions are part of
1068 : * grouping items. In such cases, they are replaced with GROUP Vars in
1069 : * the parser and then replaced back after we've done with expression
1070 : * preprocessing on havingQual. This is not an issue if the clause
1071 : * remains in HAVING, because these expressions will be matched to lower
1072 : * target items in setrefs.c. However, if the clause is moved or copied
1073 : * into WHERE, we need to ensure that these expressions are fully
1074 : * preprocessed.
1075 : *
1076 : * Note that both havingQual and parse->jointree->quals are in
1077 : * implicitly-ANDed-list form at this point, even though they are declared
1078 : * as Node *.
1079 : */
1080 511082 : newHaving = NIL;
1081 512528 : foreach(l, (List *) parse->havingQual)
1082 : {
1083 1446 : Node *havingclause = (Node *) lfirst(l);
1084 :
1085 1762 : if (contain_agg_clause(havingclause) ||
1086 632 : contain_volatile_functions(havingclause) ||
1087 316 : contain_subplans(havingclause) ||
1088 382 : (parse->groupClause && parse->groupingSets &&
1089 66 : bms_is_member(root->group_rtindex, pull_varnos(root, havingclause))))
1090 : {
1091 : /* keep it in HAVING */
1092 1184 : newHaving = lappend(newHaving, havingclause);
1093 : }
1094 262 : else if (parse->groupClause)
1095 : {
1096 : Node *whereclause;
1097 :
1098 : /* Preprocess the HAVING clause fully */
1099 244 : whereclause = preprocess_expression(root, havingclause,
1100 : EXPRKIND_QUAL);
1101 : /* ... and move it to WHERE */
1102 244 : parse->jointree->quals = (Node *)
1103 244 : list_concat((List *) parse->jointree->quals,
1104 : (List *) whereclause);
1105 : }
1106 : else
1107 : {
1108 : Node *whereclause;
1109 :
1110 : /* Preprocess the HAVING clause fully */
1111 18 : whereclause = preprocess_expression(root, copyObject(havingclause),
1112 : EXPRKIND_QUAL);
1113 : /* ... and put a copy in WHERE */
1114 36 : parse->jointree->quals = (Node *)
1115 18 : list_concat((List *) parse->jointree->quals,
1116 : (List *) whereclause);
1117 : /* ... and also keep it in HAVING */
1118 18 : newHaving = lappend(newHaving, havingclause);
1119 : }
1120 : }
1121 511082 : parse->havingQual = (Node *) newHaving;
1122 :
1123 : /*
1124 : * If we have any outer joins, try to reduce them to plain inner joins.
1125 : * This step is most easily done after we've done expression
1126 : * preprocessing.
1127 : */
1128 511082 : if (hasOuterJoins)
1129 30190 : reduce_outer_joins(root);
1130 :
1131 : /*
1132 : * If we have any RTE_RESULT relations, see if they can be deleted from
1133 : * the jointree. We also rely on this processing to flatten single-child
1134 : * FromExprs underneath outer joins. This step is most effectively done
1135 : * after we've done expression preprocessing and outer join reduction.
1136 : */
1137 511082 : if (hasResultRTEs || hasOuterJoins)
1138 246658 : remove_useless_result_rtes(root);
1139 :
1140 : /*
1141 : * Do the main planning.
1142 : */
1143 511082 : grouping_planner(root, tuple_fraction, setops);
1144 :
1145 : /*
1146 : * Capture the set of outer-level param IDs we have access to, for use in
1147 : * extParam/allParam calculations later.
1148 : */
1149 511016 : SS_identify_outer_params(root);
1150 :
1151 : /*
1152 : * If any initPlans were created in this query level, adjust the surviving
1153 : * Paths' costs and parallel-safety flags to account for them. The
1154 : * initPlans won't actually get attached to the plan tree till
1155 : * create_plan() runs, but we must include their effects now.
1156 : */
1157 511016 : final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1158 511016 : SS_charge_for_initplans(root, final_rel);
1159 :
1160 : /*
1161 : * Make sure we've identified the cheapest Path for the final rel. (By
1162 : * doing this here not in grouping_planner, we include initPlan costs in
1163 : * the decision, though it's unlikely that will change anything.)
1164 : */
1165 511016 : set_cheapest(final_rel);
1166 :
1167 511016 : return root;
1168 : }
1169 :
1170 : /*
1171 : * preprocess_expression
1172 : * Do subquery_planner's preprocessing work for an expression,
1173 : * which can be a targetlist, a WHERE clause (including JOIN/ON
1174 : * conditions), a HAVING clause, or a few other things.
1175 : */
1176 : static Node *
1177 4264466 : preprocess_expression(PlannerInfo *root, Node *expr, int kind)
1178 : {
1179 : /*
1180 : * Fall out quickly if expression is empty. This occurs often enough to
1181 : * be worth checking. Note that null->null is the correct conversion for
1182 : * implicit-AND result format, too.
1183 : */
1184 4264466 : if (expr == NULL)
1185 3403134 : return NULL;
1186 :
1187 : /*
1188 : * If the query has any join RTEs, replace join alias variables with
1189 : * base-relation variables. We must do this first, since any expressions
1190 : * we may extract from the joinaliasvars lists have not been preprocessed.
1191 : * For example, if we did this after sublink processing, sublinks expanded
1192 : * out from join aliases would not get processed. But we can skip this in
1193 : * non-lateral RTE functions, VALUES lists, and TABLESAMPLE clauses, since
1194 : * they can't contain any Vars of the current query level.
1195 : */
1196 861332 : if (root->hasJoinRTEs &&
1197 336612 : !(kind == EXPRKIND_RTFUNC ||
1198 168108 : kind == EXPRKIND_VALUES ||
1199 : kind == EXPRKIND_TABLESAMPLE ||
1200 : kind == EXPRKIND_TABLEFUNC))
1201 168096 : expr = flatten_join_alias_vars(root, root->parse, expr);
1202 :
1203 : /*
1204 : * Simplify constant expressions. For function RTEs, this was already
1205 : * done by preprocess_function_rtes. (But note we must do it again for
1206 : * EXPRKIND_RTFUNC_LATERAL, because those might by now contain
1207 : * un-simplified subexpressions inserted by flattening of subqueries or
1208 : * join alias variables.)
1209 : *
1210 : * Note: an essential effect of this is to convert named-argument function
1211 : * calls to positional notation and insert the current actual values of
1212 : * any default arguments for functions. To ensure that happens, we *must*
1213 : * process all expressions here. Previous PG versions sometimes skipped
1214 : * const-simplification if it didn't seem worth the trouble, but we can't
1215 : * do that anymore.
1216 : *
1217 : * Note: this also flattens nested AND and OR expressions into N-argument
1218 : * form. All processing of a qual expression after this point must be
1219 : * careful to maintain AND/OR flatness --- that is, do not generate a tree
1220 : * with AND directly under AND, nor OR directly under OR.
1221 : */
1222 861332 : if (kind != EXPRKIND_RTFUNC)
1223 826094 : expr = eval_const_expressions(root, expr);
1224 :
1225 : /*
1226 : * If it's a qual or havingQual, canonicalize it.
1227 : */
1228 857700 : if (kind == EXPRKIND_QUAL)
1229 : {
1230 292214 : expr = (Node *) canonicalize_qual((Expr *) expr, false);
1231 :
1232 : #ifdef OPTIMIZER_DEBUG
1233 : printf("After canonicalize_qual()\n");
1234 : pprint(expr);
1235 : #endif
1236 : }
1237 :
1238 : /*
1239 : * Check for ANY ScalarArrayOpExpr with Const arrays and set the
1240 : * hashfuncid of any that might execute more quickly by using hash lookups
1241 : * instead of a linear search.
1242 : */
1243 857700 : if (kind == EXPRKIND_QUAL || kind == EXPRKIND_TARGET)
1244 : {
1245 790672 : convert_saop_to_hashed_saop(expr);
1246 : }
1247 :
1248 : /* Expand SubLinks to SubPlans */
1249 857700 : if (root->parse->hasSubLinks)
1250 83940 : expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
1251 :
1252 : /*
1253 : * XXX do not insert anything here unless you have grokked the comments in
1254 : * SS_replace_correlation_vars ...
1255 : */
1256 :
1257 : /* Replace uplevel vars with Param nodes (this IS possible in VALUES) */
1258 857700 : if (root->query_level > 1)
1259 135930 : expr = SS_replace_correlation_vars(root, expr);
1260 :
1261 : /*
1262 : * If it's a qual or havingQual, convert it to implicit-AND format. (We
1263 : * don't want to do this before eval_const_expressions, since the latter
1264 : * would be unable to simplify a top-level AND correctly. Also,
1265 : * SS_process_sublinks expects explicit-AND format.)
1266 : */
1267 857700 : if (kind == EXPRKIND_QUAL)
1268 292214 : expr = (Node *) make_ands_implicit((Expr *) expr);
1269 :
1270 857700 : return expr;
1271 : }
1272 :
1273 : /*
1274 : * preprocess_qual_conditions
1275 : * Recursively scan the query's jointree and do subquery_planner's
1276 : * preprocessing work on each qual condition found therein.
1277 : */
1278 : static void
1279 1236988 : preprocess_qual_conditions(PlannerInfo *root, Node *jtnode)
1280 : {
1281 1236988 : if (jtnode == NULL)
1282 0 : return;
1283 1236988 : if (IsA(jtnode, RangeTblRef))
1284 : {
1285 : /* nothing to do here */
1286 : }
1287 603006 : else if (IsA(jtnode, FromExpr))
1288 : {
1289 522154 : FromExpr *f = (FromExpr *) jtnode;
1290 : ListCell *l;
1291 :
1292 1086356 : foreach(l, f->fromlist)
1293 564202 : preprocess_qual_conditions(root, lfirst(l));
1294 :
1295 522154 : f->quals = preprocess_expression(root, f->quals, EXPRKIND_QUAL);
1296 : }
1297 80852 : else if (IsA(jtnode, JoinExpr))
1298 : {
1299 80852 : JoinExpr *j = (JoinExpr *) jtnode;
1300 :
1301 80852 : preprocess_qual_conditions(root, j->larg);
1302 80852 : preprocess_qual_conditions(root, j->rarg);
1303 :
1304 80852 : j->quals = preprocess_expression(root, j->quals, EXPRKIND_QUAL);
1305 : }
1306 : else
1307 0 : elog(ERROR, "unrecognized node type: %d",
1308 : (int) nodeTag(jtnode));
1309 : }
1310 :
1311 : /*
1312 : * preprocess_phv_expression
1313 : * Do preprocessing on a PlaceHolderVar expression that's been pulled up.
1314 : *
1315 : * If a LATERAL subquery references an output of another subquery, and that
1316 : * output must be wrapped in a PlaceHolderVar because of an intermediate outer
1317 : * join, then we'll push the PlaceHolderVar expression down into the subquery
1318 : * and later pull it back up during find_lateral_references, which runs after
1319 : * subquery_planner has preprocessed all the expressions that were in the
1320 : * current query level to start with. So we need to preprocess it then.
1321 : */
1322 : Expr *
1323 72 : preprocess_phv_expression(PlannerInfo *root, Expr *expr)
1324 : {
1325 72 : return (Expr *) preprocess_expression(root, (Node *) expr, EXPRKIND_PHV);
1326 : }
1327 :
1328 : /*--------------------
1329 : * grouping_planner
1330 : * Perform planning steps related to grouping, aggregation, etc.
1331 : *
1332 : * This function adds all required top-level processing to the scan/join
1333 : * Path(s) produced by query_planner.
1334 : *
1335 : * tuple_fraction is the fraction of tuples we expect will be retrieved.
1336 : * tuple_fraction is interpreted as follows:
1337 : * 0: expect all tuples to be retrieved (normal case)
1338 : * 0 < tuple_fraction < 1: expect the given fraction of tuples available
1339 : * from the plan to be retrieved
1340 : * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
1341 : * expected to be retrieved (ie, a LIMIT specification).
1342 : * setops is used for set operation subqueries to provide the subquery with
1343 : * the context in which it's being used so that Paths correctly sorted for the
1344 : * set operation can be generated. NULL when not planning a set operation
1345 : * child.
1346 : *
1347 : * Returns nothing; the useful output is in the Paths we attach to the
1348 : * (UPPERREL_FINAL, NULL) upperrel in *root. In addition,
1349 : * root->processed_tlist contains the final processed targetlist.
1350 : *
1351 : * Note that we have not done set_cheapest() on the final rel; it's convenient
1352 : * to leave this to the caller.
1353 : *--------------------
1354 : */
1355 : static void
1356 511082 : grouping_planner(PlannerInfo *root, double tuple_fraction,
1357 : SetOperationStmt *setops)
1358 : {
1359 511082 : Query *parse = root->parse;
1360 511082 : int64 offset_est = 0;
1361 511082 : int64 count_est = 0;
1362 511082 : double limit_tuples = -1.0;
1363 511082 : bool have_postponed_srfs = false;
1364 : PathTarget *final_target;
1365 : List *final_targets;
1366 : List *final_targets_contain_srfs;
1367 : bool final_target_parallel_safe;
1368 : RelOptInfo *current_rel;
1369 : RelOptInfo *final_rel;
1370 : FinalPathExtraData extra;
1371 : ListCell *lc;
1372 :
1373 : /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
1374 511082 : if (parse->limitCount || parse->limitOffset)
1375 : {
1376 4672 : tuple_fraction = preprocess_limit(root, tuple_fraction,
1377 : &offset_est, &count_est);
1378 :
1379 : /*
1380 : * If we have a known LIMIT, and don't have an unknown OFFSET, we can
1381 : * estimate the effects of using a bounded sort.
1382 : */
1383 4672 : if (count_est > 0 && offset_est >= 0)
1384 4208 : limit_tuples = (double) count_est + (double) offset_est;
1385 : }
1386 :
1387 : /* Make tuple_fraction accessible to lower-level routines */
1388 511082 : root->tuple_fraction = tuple_fraction;
1389 :
1390 511082 : if (parse->setOperations)
1391 : {
1392 : /*
1393 : * Construct Paths for set operations. The results will not need any
1394 : * work except perhaps a top-level sort and/or LIMIT. Note that any
1395 : * special work for recursive unions is the responsibility of
1396 : * plan_set_operations.
1397 : */
1398 5444 : current_rel = plan_set_operations(root);
1399 :
1400 : /*
1401 : * We should not need to call preprocess_targetlist, since we must be
1402 : * in a SELECT query node. Instead, use the processed_tlist returned
1403 : * by plan_set_operations (since this tells whether it returned any
1404 : * resjunk columns!), and transfer any sort key information from the
1405 : * original tlist.
1406 : */
1407 : Assert(parse->commandType == CMD_SELECT);
1408 :
1409 : /* for safety, copy processed_tlist instead of modifying in-place */
1410 5438 : root->processed_tlist =
1411 5438 : postprocess_setop_tlist(copyObject(root->processed_tlist),
1412 : parse->targetList);
1413 :
1414 : /* Also extract the PathTarget form of the setop result tlist */
1415 5438 : final_target = current_rel->cheapest_total_path->pathtarget;
1416 :
1417 : /* And check whether it's parallel safe */
1418 : final_target_parallel_safe =
1419 5438 : is_parallel_safe(root, (Node *) final_target->exprs);
1420 :
1421 : /* The setop result tlist couldn't contain any SRFs */
1422 : Assert(!parse->hasTargetSRFs);
1423 5438 : final_targets = final_targets_contain_srfs = NIL;
1424 :
1425 : /*
1426 : * Can't handle FOR [KEY] UPDATE/SHARE here (parser should have
1427 : * checked already, but let's make sure).
1428 : */
1429 5438 : if (parse->rowMarks)
1430 0 : ereport(ERROR,
1431 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1432 : /*------
1433 : translator: %s is a SQL row locking clause such as FOR UPDATE */
1434 : errmsg("%s is not allowed with UNION/INTERSECT/EXCEPT",
1435 : LCS_asString(linitial_node(RowMarkClause,
1436 : parse->rowMarks)->strength))));
1437 :
1438 : /*
1439 : * Calculate pathkeys that represent result ordering requirements
1440 : */
1441 : Assert(parse->distinctClause == NIL);
1442 5438 : root->sort_pathkeys = make_pathkeys_for_sortclauses(root,
1443 : parse->sortClause,
1444 : root->processed_tlist);
1445 : }
1446 : else
1447 : {
1448 : /* No set operations, do regular planning */
1449 : PathTarget *sort_input_target;
1450 : List *sort_input_targets;
1451 : List *sort_input_targets_contain_srfs;
1452 : bool sort_input_target_parallel_safe;
1453 : PathTarget *grouping_target;
1454 : List *grouping_targets;
1455 : List *grouping_targets_contain_srfs;
1456 : bool grouping_target_parallel_safe;
1457 : PathTarget *scanjoin_target;
1458 : List *scanjoin_targets;
1459 : List *scanjoin_targets_contain_srfs;
1460 : bool scanjoin_target_parallel_safe;
1461 : bool scanjoin_target_same_exprs;
1462 : bool have_grouping;
1463 505638 : WindowFuncLists *wflists = NULL;
1464 505638 : List *activeWindows = NIL;
1465 505638 : grouping_sets_data *gset_data = NULL;
1466 : standard_qp_extra qp_extra;
1467 :
1468 : /* A recursive query should always have setOperations */
1469 : Assert(!root->hasRecursion);
1470 :
1471 : /* Preprocess grouping sets and GROUP BY clause, if any */
1472 505638 : if (parse->groupingSets)
1473 : {
1474 854 : gset_data = preprocess_grouping_sets(root);
1475 : }
1476 504784 : else if (parse->groupClause)
1477 : {
1478 : /* Preprocess regular GROUP BY clause, if any */
1479 3542 : root->processed_groupClause = preprocess_groupclause(root, NIL);
1480 : /* Remove any redundant GROUP BY columns */
1481 3542 : remove_useless_groupby_columns(root);
1482 : }
1483 :
1484 : /*
1485 : * Preprocess targetlist. Note that much of the remaining planning
1486 : * work will be done with the PathTarget representation of tlists, but
1487 : * we must also maintain the full representation of the final tlist so
1488 : * that we can transfer its decoration (resnames etc) to the topmost
1489 : * tlist of the finished Plan. This is kept in processed_tlist.
1490 : */
1491 505632 : preprocess_targetlist(root);
1492 :
1493 : /*
1494 : * Mark all the aggregates with resolved aggtranstypes, and detect
1495 : * aggregates that are duplicates or can share transition state. We
1496 : * must do this before slicing and dicing the tlist into various
1497 : * pathtargets, else some copies of the Aggref nodes might escape
1498 : * being marked.
1499 : */
1500 505632 : if (parse->hasAggs)
1501 : {
1502 38924 : preprocess_aggrefs(root, (Node *) root->processed_tlist);
1503 38924 : preprocess_aggrefs(root, (Node *) parse->havingQual);
1504 : }
1505 :
1506 : /*
1507 : * Locate any window functions in the tlist. (We don't need to look
1508 : * anywhere else, since expressions used in ORDER BY will be in there
1509 : * too.) Note that they could all have been eliminated by constant
1510 : * folding, in which case we don't need to do any more work.
1511 : */
1512 505632 : if (parse->hasWindowFuncs)
1513 : {
1514 2336 : wflists = find_window_functions((Node *) root->processed_tlist,
1515 2336 : list_length(parse->windowClause));
1516 2336 : if (wflists->numWindowFuncs > 0)
1517 : {
1518 : /*
1519 : * See if any modifications can be made to each WindowClause
1520 : * to allow the executor to execute the WindowFuncs more
1521 : * quickly.
1522 : */
1523 2330 : optimize_window_clauses(root, wflists);
1524 :
1525 2330 : activeWindows = select_active_windows(root, wflists);
1526 : }
1527 : else
1528 6 : parse->hasWindowFuncs = false;
1529 : }
1530 :
1531 : /*
1532 : * Preprocess MIN/MAX aggregates, if any. Note: be careful about
1533 : * adding logic between here and the query_planner() call. Anything
1534 : * that is needed in MIN/MAX-optimizable cases will have to be
1535 : * duplicated in planagg.c.
1536 : */
1537 505632 : if (parse->hasAggs)
1538 38924 : preprocess_minmax_aggregates(root);
1539 :
1540 : /*
1541 : * Figure out whether there's a hard limit on the number of rows that
1542 : * query_planner's result subplan needs to return. Even if we know a
1543 : * hard limit overall, it doesn't apply if the query has any
1544 : * grouping/aggregation operations, or SRFs in the tlist.
1545 : */
1546 505632 : if (parse->groupClause ||
1547 501284 : parse->groupingSets ||
1548 501242 : parse->distinctClause ||
1549 499010 : parse->hasAggs ||
1550 463942 : parse->hasWindowFuncs ||
1551 461750 : parse->hasTargetSRFs ||
1552 453520 : root->hasHavingQual)
1553 52130 : root->limit_tuples = -1.0;
1554 : else
1555 453502 : root->limit_tuples = limit_tuples;
1556 :
1557 : /* Set up data needed by standard_qp_callback */
1558 505632 : qp_extra.activeWindows = activeWindows;
1559 505632 : qp_extra.gset_data = gset_data;
1560 :
1561 : /*
1562 : * If we're a subquery for a set operation, store the SetOperationStmt
1563 : * in qp_extra.
1564 : */
1565 505632 : qp_extra.setop = setops;
1566 :
1567 : /*
1568 : * Generate the best unsorted and presorted paths for the scan/join
1569 : * portion of this Query, ie the processing represented by the
1570 : * FROM/WHERE clauses. (Note there may not be any presorted paths.)
1571 : * We also generate (in standard_qp_callback) pathkey representations
1572 : * of the query's sort clause, distinct clause, etc.
1573 : */
1574 505632 : current_rel = query_planner(root, standard_qp_callback, &qp_extra);
1575 :
1576 : /*
1577 : * Convert the query's result tlist into PathTarget format.
1578 : *
1579 : * Note: this cannot be done before query_planner() has performed
1580 : * appendrel expansion, because that might add resjunk entries to
1581 : * root->processed_tlist. Waiting till afterwards is also helpful
1582 : * because the target width estimates can use per-Var width numbers
1583 : * that were obtained within query_planner().
1584 : */
1585 505584 : final_target = create_pathtarget(root, root->processed_tlist);
1586 : final_target_parallel_safe =
1587 505584 : is_parallel_safe(root, (Node *) final_target->exprs);
1588 :
1589 : /*
1590 : * If ORDER BY was given, consider whether we should use a post-sort
1591 : * projection, and compute the adjusted target for preceding steps if
1592 : * so.
1593 : */
1594 505584 : if (parse->sortClause)
1595 : {
1596 59228 : sort_input_target = make_sort_input_target(root,
1597 : final_target,
1598 : &have_postponed_srfs);
1599 : sort_input_target_parallel_safe =
1600 59228 : is_parallel_safe(root, (Node *) sort_input_target->exprs);
1601 : }
1602 : else
1603 : {
1604 446356 : sort_input_target = final_target;
1605 446356 : sort_input_target_parallel_safe = final_target_parallel_safe;
1606 : }
1607 :
1608 : /*
1609 : * If we have window functions to deal with, the output from any
1610 : * grouping step needs to be what the window functions want;
1611 : * otherwise, it should be sort_input_target.
1612 : */
1613 505584 : if (activeWindows)
1614 : {
1615 2330 : grouping_target = make_window_input_target(root,
1616 : final_target,
1617 : activeWindows);
1618 : grouping_target_parallel_safe =
1619 2330 : is_parallel_safe(root, (Node *) grouping_target->exprs);
1620 : }
1621 : else
1622 : {
1623 503254 : grouping_target = sort_input_target;
1624 503254 : grouping_target_parallel_safe = sort_input_target_parallel_safe;
1625 : }
1626 :
1627 : /*
1628 : * If we have grouping or aggregation to do, the topmost scan/join
1629 : * plan node must emit what the grouping step wants; otherwise, it
1630 : * should emit grouping_target.
1631 : */
1632 501236 : have_grouping = (parse->groupClause || parse->groupingSets ||
1633 1006820 : parse->hasAggs || root->hasHavingQual);
1634 505584 : if (have_grouping)
1635 : {
1636 39514 : scanjoin_target = make_group_input_target(root, final_target);
1637 : scanjoin_target_parallel_safe =
1638 39514 : is_parallel_safe(root, (Node *) scanjoin_target->exprs);
1639 : }
1640 : else
1641 : {
1642 466070 : scanjoin_target = grouping_target;
1643 466070 : scanjoin_target_parallel_safe = grouping_target_parallel_safe;
1644 : }
1645 :
1646 : /*
1647 : * If there are any SRFs in the targetlist, we must separate each of
1648 : * these PathTargets into SRF-computing and SRF-free targets. Replace
1649 : * each of the named targets with a SRF-free version, and remember the
1650 : * list of additional projection steps we need to add afterwards.
1651 : */
1652 505584 : if (parse->hasTargetSRFs)
1653 : {
1654 : /* final_target doesn't recompute any SRFs in sort_input_target */
1655 8662 : split_pathtarget_at_srfs(root, final_target, sort_input_target,
1656 : &final_targets,
1657 : &final_targets_contain_srfs);
1658 8662 : final_target = linitial_node(PathTarget, final_targets);
1659 : Assert(!linitial_int(final_targets_contain_srfs));
1660 : /* likewise for sort_input_target vs. grouping_target */
1661 8662 : split_pathtarget_at_srfs(root, sort_input_target, grouping_target,
1662 : &sort_input_targets,
1663 : &sort_input_targets_contain_srfs);
1664 8662 : sort_input_target = linitial_node(PathTarget, sort_input_targets);
1665 : Assert(!linitial_int(sort_input_targets_contain_srfs));
1666 : /* likewise for grouping_target vs. scanjoin_target */
1667 8662 : split_pathtarget_at_srfs(root, grouping_target, scanjoin_target,
1668 : &grouping_targets,
1669 : &grouping_targets_contain_srfs);
1670 8662 : grouping_target = linitial_node(PathTarget, grouping_targets);
1671 : Assert(!linitial_int(grouping_targets_contain_srfs));
1672 : /* scanjoin_target will not have any SRFs precomputed for it */
1673 8662 : split_pathtarget_at_srfs(root, scanjoin_target, NULL,
1674 : &scanjoin_targets,
1675 : &scanjoin_targets_contain_srfs);
1676 8662 : scanjoin_target = linitial_node(PathTarget, scanjoin_targets);
1677 : Assert(!linitial_int(scanjoin_targets_contain_srfs));
1678 : }
1679 : else
1680 : {
1681 : /* initialize lists; for most of these, dummy values are OK */
1682 496922 : final_targets = final_targets_contain_srfs = NIL;
1683 496922 : sort_input_targets = sort_input_targets_contain_srfs = NIL;
1684 496922 : grouping_targets = grouping_targets_contain_srfs = NIL;
1685 496922 : scanjoin_targets = list_make1(scanjoin_target);
1686 496922 : scanjoin_targets_contain_srfs = NIL;
1687 : }
1688 :
1689 : /* Apply scan/join target. */
1690 505584 : scanjoin_target_same_exprs = list_length(scanjoin_targets) == 1
1691 505584 : && equal(scanjoin_target->exprs, current_rel->reltarget->exprs);
1692 505584 : apply_scanjoin_target_to_paths(root, current_rel, scanjoin_targets,
1693 : scanjoin_targets_contain_srfs,
1694 : scanjoin_target_parallel_safe,
1695 : scanjoin_target_same_exprs);
1696 :
1697 : /*
1698 : * Save the various upper-rel PathTargets we just computed into
1699 : * root->upper_targets[]. The core code doesn't use this, but it
1700 : * provides a convenient place for extensions to get at the info. For
1701 : * consistency, we save all the intermediate targets, even though some
1702 : * of the corresponding upperrels might not be needed for this query.
1703 : */
1704 505584 : root->upper_targets[UPPERREL_FINAL] = final_target;
1705 505584 : root->upper_targets[UPPERREL_ORDERED] = final_target;
1706 505584 : root->upper_targets[UPPERREL_DISTINCT] = sort_input_target;
1707 505584 : root->upper_targets[UPPERREL_PARTIAL_DISTINCT] = sort_input_target;
1708 505584 : root->upper_targets[UPPERREL_WINDOW] = sort_input_target;
1709 505584 : root->upper_targets[UPPERREL_GROUP_AGG] = grouping_target;
1710 :
1711 : /*
1712 : * If we have grouping and/or aggregation, consider ways to implement
1713 : * that. We build a new upperrel representing the output of this
1714 : * phase.
1715 : */
1716 505584 : if (have_grouping)
1717 : {
1718 39514 : current_rel = create_grouping_paths(root,
1719 : current_rel,
1720 : grouping_target,
1721 : grouping_target_parallel_safe,
1722 : gset_data);
1723 : /* Fix things up if grouping_target contains SRFs */
1724 39508 : if (parse->hasTargetSRFs)
1725 390 : adjust_paths_for_srfs(root, current_rel,
1726 : grouping_targets,
1727 : grouping_targets_contain_srfs);
1728 : }
1729 :
1730 : /*
1731 : * If we have window functions, consider ways to implement those. We
1732 : * build a new upperrel representing the output of this phase.
1733 : */
1734 505578 : if (activeWindows)
1735 : {
1736 2330 : current_rel = create_window_paths(root,
1737 : current_rel,
1738 : grouping_target,
1739 : sort_input_target,
1740 : sort_input_target_parallel_safe,
1741 : wflists,
1742 : activeWindows);
1743 : /* Fix things up if sort_input_target contains SRFs */
1744 2330 : if (parse->hasTargetSRFs)
1745 12 : adjust_paths_for_srfs(root, current_rel,
1746 : sort_input_targets,
1747 : sort_input_targets_contain_srfs);
1748 : }
1749 :
1750 : /*
1751 : * If there is a DISTINCT clause, consider ways to implement that. We
1752 : * build a new upperrel representing the output of this phase.
1753 : */
1754 505578 : if (parse->distinctClause)
1755 : {
1756 2266 : current_rel = create_distinct_paths(root,
1757 : current_rel,
1758 : sort_input_target);
1759 : }
1760 : } /* end of if (setOperations) */
1761 :
1762 : /*
1763 : * If ORDER BY was given, consider ways to implement that, and generate a
1764 : * new upperrel containing only paths that emit the correct ordering and
1765 : * project the correct final_target. We can apply the original
1766 : * limit_tuples limit in sort costing here, but only if there are no
1767 : * postponed SRFs.
1768 : */
1769 511016 : if (parse->sortClause)
1770 : {
1771 62744 : current_rel = create_ordered_paths(root,
1772 : current_rel,
1773 : final_target,
1774 : final_target_parallel_safe,
1775 : have_postponed_srfs ? -1.0 :
1776 : limit_tuples);
1777 : /* Fix things up if final_target contains SRFs */
1778 62744 : if (parse->hasTargetSRFs)
1779 196 : adjust_paths_for_srfs(root, current_rel,
1780 : final_targets,
1781 : final_targets_contain_srfs);
1782 : }
1783 :
1784 : /*
1785 : * Now we are prepared to build the final-output upperrel.
1786 : */
1787 511016 : final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1788 :
1789 : /*
1790 : * If the input rel is marked consider_parallel and there's nothing that's
1791 : * not parallel-safe in the LIMIT clause, then the final_rel can be marked
1792 : * consider_parallel as well. Note that if the query has rowMarks or is
1793 : * not a SELECT, consider_parallel will be false for every relation in the
1794 : * query.
1795 : */
1796 664238 : if (current_rel->consider_parallel &&
1797 306420 : is_parallel_safe(root, parse->limitOffset) &&
1798 153198 : is_parallel_safe(root, parse->limitCount))
1799 153192 : final_rel->consider_parallel = true;
1800 :
1801 : /*
1802 : * If the current_rel belongs to a single FDW, so does the final_rel.
1803 : */
1804 511016 : final_rel->serverid = current_rel->serverid;
1805 511016 : final_rel->userid = current_rel->userid;
1806 511016 : final_rel->useridiscurrent = current_rel->useridiscurrent;
1807 511016 : final_rel->fdwroutine = current_rel->fdwroutine;
1808 :
1809 : /*
1810 : * Generate paths for the final_rel. Insert all surviving paths, with
1811 : * LockRows, Limit, and/or ModifyTable steps added if needed.
1812 : */
1813 1038562 : foreach(lc, current_rel->pathlist)
1814 : {
1815 527546 : Path *path = (Path *) lfirst(lc);
1816 :
1817 : /*
1818 : * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node.
1819 : * (Note: we intentionally test parse->rowMarks not root->rowMarks
1820 : * here. If there are only non-locking rowmarks, they should be
1821 : * handled by the ModifyTable node instead. However, root->rowMarks
1822 : * is what goes into the LockRows node.)
1823 : */
1824 527546 : if (parse->rowMarks)
1825 : {
1826 8194 : path = (Path *) create_lockrows_path(root, final_rel, path,
1827 : root->rowMarks,
1828 : assign_special_exec_param(root));
1829 : }
1830 :
1831 : /*
1832 : * If there is a LIMIT/OFFSET clause, add the LIMIT node.
1833 : */
1834 527546 : if (limit_needed(parse))
1835 : {
1836 5602 : path = (Path *) create_limit_path(root, final_rel, path,
1837 : parse->limitOffset,
1838 : parse->limitCount,
1839 : parse->limitOption,
1840 : offset_est, count_est);
1841 : }
1842 :
1843 : /*
1844 : * If this is an INSERT/UPDATE/DELETE/MERGE, add the ModifyTable node.
1845 : */
1846 527546 : if (parse->commandType != CMD_SELECT)
1847 : {
1848 : Index rootRelation;
1849 90332 : List *resultRelations = NIL;
1850 90332 : List *updateColnosLists = NIL;
1851 90332 : List *withCheckOptionLists = NIL;
1852 90332 : List *returningLists = NIL;
1853 90332 : List *mergeActionLists = NIL;
1854 90332 : List *mergeJoinConditions = NIL;
1855 : List *rowMarks;
1856 :
1857 90332 : if (bms_membership(root->all_result_relids) == BMS_MULTIPLE)
1858 : {
1859 : /* Inherited UPDATE/DELETE/MERGE */
1860 2594 : RelOptInfo *top_result_rel = find_base_rel(root,
1861 : parse->resultRelation);
1862 2594 : int resultRelation = -1;
1863 :
1864 : /* Pass the root result rel forward to the executor. */
1865 2594 : rootRelation = parse->resultRelation;
1866 :
1867 : /* Add only leaf children to ModifyTable. */
1868 7588 : while ((resultRelation = bms_next_member(root->leaf_result_relids,
1869 : resultRelation)) >= 0)
1870 : {
1871 4994 : RelOptInfo *this_result_rel = find_base_rel(root,
1872 : resultRelation);
1873 :
1874 : /*
1875 : * Also exclude any leaf rels that have turned dummy since
1876 : * being added to the list, for example, by being excluded
1877 : * by constraint exclusion.
1878 : */
1879 4994 : if (IS_DUMMY_REL(this_result_rel))
1880 84 : continue;
1881 :
1882 : /* Build per-target-rel lists needed by ModifyTable */
1883 4910 : resultRelations = lappend_int(resultRelations,
1884 : resultRelation);
1885 4910 : if (parse->commandType == CMD_UPDATE)
1886 : {
1887 3480 : List *update_colnos = root->update_colnos;
1888 :
1889 3480 : if (this_result_rel != top_result_rel)
1890 : update_colnos =
1891 3480 : adjust_inherited_attnums_multilevel(root,
1892 : update_colnos,
1893 : this_result_rel->relid,
1894 : top_result_rel->relid);
1895 3480 : updateColnosLists = lappend(updateColnosLists,
1896 : update_colnos);
1897 : }
1898 4910 : if (parse->withCheckOptions)
1899 : {
1900 426 : List *withCheckOptions = parse->withCheckOptions;
1901 :
1902 426 : if (this_result_rel != top_result_rel)
1903 : withCheckOptions = (List *)
1904 426 : adjust_appendrel_attrs_multilevel(root,
1905 : (Node *) withCheckOptions,
1906 : this_result_rel,
1907 : top_result_rel);
1908 426 : withCheckOptionLists = lappend(withCheckOptionLists,
1909 : withCheckOptions);
1910 : }
1911 4910 : if (parse->returningList)
1912 : {
1913 552 : List *returningList = parse->returningList;
1914 :
1915 552 : if (this_result_rel != top_result_rel)
1916 : returningList = (List *)
1917 552 : adjust_appendrel_attrs_multilevel(root,
1918 : (Node *) returningList,
1919 : this_result_rel,
1920 : top_result_rel);
1921 552 : returningLists = lappend(returningLists,
1922 : returningList);
1923 : }
1924 4910 : if (parse->mergeActionList)
1925 : {
1926 : ListCell *l;
1927 390 : List *mergeActionList = NIL;
1928 :
1929 : /*
1930 : * Copy MergeActions and translate stuff that
1931 : * references attribute numbers.
1932 : */
1933 1344 : foreach(l, parse->mergeActionList)
1934 : {
1935 954 : MergeAction *action = lfirst(l),
1936 954 : *leaf_action = copyObject(action);
1937 :
1938 954 : leaf_action->qual =
1939 954 : adjust_appendrel_attrs_multilevel(root,
1940 : (Node *) action->qual,
1941 : this_result_rel,
1942 : top_result_rel);
1943 954 : leaf_action->targetList = (List *)
1944 954 : adjust_appendrel_attrs_multilevel(root,
1945 954 : (Node *) action->targetList,
1946 : this_result_rel,
1947 : top_result_rel);
1948 954 : if (leaf_action->commandType == CMD_UPDATE)
1949 592 : leaf_action->updateColnos =
1950 592 : adjust_inherited_attnums_multilevel(root,
1951 : action->updateColnos,
1952 : this_result_rel->relid,
1953 : top_result_rel->relid);
1954 954 : mergeActionList = lappend(mergeActionList,
1955 : leaf_action);
1956 : }
1957 :
1958 390 : mergeActionLists = lappend(mergeActionLists,
1959 : mergeActionList);
1960 : }
1961 4910 : if (parse->commandType == CMD_MERGE)
1962 : {
1963 390 : Node *mergeJoinCondition = parse->mergeJoinCondition;
1964 :
1965 390 : if (this_result_rel != top_result_rel)
1966 : mergeJoinCondition =
1967 390 : adjust_appendrel_attrs_multilevel(root,
1968 : mergeJoinCondition,
1969 : this_result_rel,
1970 : top_result_rel);
1971 390 : mergeJoinConditions = lappend(mergeJoinConditions,
1972 : mergeJoinCondition);
1973 : }
1974 : }
1975 :
1976 2594 : if (resultRelations == NIL)
1977 : {
1978 : /*
1979 : * We managed to exclude every child rel, so generate a
1980 : * dummy one-relation plan using info for the top target
1981 : * rel (even though that may not be a leaf target).
1982 : * Although it's clear that no data will be updated or
1983 : * deleted, we still need to have a ModifyTable node so
1984 : * that any statement triggers will be executed. (This
1985 : * could be cleaner if we fixed nodeModifyTable.c to allow
1986 : * zero target relations, but that probably wouldn't be a
1987 : * net win.)
1988 : */
1989 30 : resultRelations = list_make1_int(parse->resultRelation);
1990 30 : if (parse->commandType == CMD_UPDATE)
1991 30 : updateColnosLists = list_make1(root->update_colnos);
1992 30 : if (parse->withCheckOptions)
1993 0 : withCheckOptionLists = list_make1(parse->withCheckOptions);
1994 30 : if (parse->returningList)
1995 18 : returningLists = list_make1(parse->returningList);
1996 30 : if (parse->mergeActionList)
1997 0 : mergeActionLists = list_make1(parse->mergeActionList);
1998 30 : if (parse->commandType == CMD_MERGE)
1999 0 : mergeJoinConditions = list_make1(parse->mergeJoinCondition);
2000 : }
2001 : }
2002 : else
2003 : {
2004 : /* Single-relation INSERT/UPDATE/DELETE/MERGE. */
2005 87738 : rootRelation = 0; /* there's no separate root rel */
2006 87738 : resultRelations = list_make1_int(parse->resultRelation);
2007 87738 : if (parse->commandType == CMD_UPDATE)
2008 11210 : updateColnosLists = list_make1(root->update_colnos);
2009 87738 : if (parse->withCheckOptions)
2010 908 : withCheckOptionLists = list_make1(parse->withCheckOptions);
2011 87738 : if (parse->returningList)
2012 2210 : returningLists = list_make1(parse->returningList);
2013 87738 : if (parse->mergeActionList)
2014 1602 : mergeActionLists = list_make1(parse->mergeActionList);
2015 87738 : if (parse->commandType == CMD_MERGE)
2016 1602 : mergeJoinConditions = list_make1(parse->mergeJoinCondition);
2017 : }
2018 :
2019 : /*
2020 : * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node
2021 : * will have dealt with fetching non-locked marked rows, else we
2022 : * need to have ModifyTable do that.
2023 : */
2024 90332 : if (parse->rowMarks)
2025 0 : rowMarks = NIL;
2026 : else
2027 90332 : rowMarks = root->rowMarks;
2028 :
2029 : path = (Path *)
2030 90332 : create_modifytable_path(root, final_rel,
2031 : path,
2032 : parse->commandType,
2033 90332 : parse->canSetTag,
2034 90332 : parse->resultRelation,
2035 : rootRelation,
2036 90332 : root->partColsUpdated,
2037 : resultRelations,
2038 : updateColnosLists,
2039 : withCheckOptionLists,
2040 : returningLists,
2041 : rowMarks,
2042 : parse->onConflict,
2043 : mergeActionLists,
2044 : mergeJoinConditions,
2045 : assign_special_exec_param(root));
2046 : }
2047 :
2048 : /* And shove it into final_rel */
2049 527546 : add_path(final_rel, path);
2050 : }
2051 :
2052 : /*
2053 : * Generate partial paths for final_rel, too, if outer query levels might
2054 : * be able to make use of them.
2055 : */
2056 511016 : if (final_rel->consider_parallel && root->query_level > 1 &&
2057 19206 : !limit_needed(parse))
2058 : {
2059 : Assert(!parse->rowMarks && parse->commandType == CMD_SELECT);
2060 19152 : foreach(lc, current_rel->partial_pathlist)
2061 : {
2062 102 : Path *partial_path = (Path *) lfirst(lc);
2063 :
2064 102 : add_partial_path(final_rel, partial_path);
2065 : }
2066 : }
2067 :
2068 511016 : extra.limit_needed = limit_needed(parse);
2069 511016 : extra.limit_tuples = limit_tuples;
2070 511016 : extra.count_est = count_est;
2071 511016 : extra.offset_est = offset_est;
2072 :
2073 : /*
2074 : * If there is an FDW that's responsible for all baserels of the query,
2075 : * let it consider adding ForeignPaths.
2076 : */
2077 511016 : if (final_rel->fdwroutine &&
2078 1232 : final_rel->fdwroutine->GetForeignUpperPaths)
2079 1164 : final_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_FINAL,
2080 : current_rel, final_rel,
2081 : &extra);
2082 :
2083 : /* Let extensions possibly add some more paths */
2084 511016 : if (create_upper_paths_hook)
2085 0 : (*create_upper_paths_hook) (root, UPPERREL_FINAL,
2086 : current_rel, final_rel, &extra);
2087 :
2088 : /* Note: currently, we leave it to callers to do set_cheapest() */
2089 511016 : }
2090 :
2091 : /*
2092 : * Do preprocessing for groupingSets clause and related data. This handles the
2093 : * preliminary steps of expanding the grouping sets, organizing them into lists
2094 : * of rollups, and preparing annotations which will later be filled in with
2095 : * size estimates.
2096 : */
2097 : static grouping_sets_data *
2098 854 : preprocess_grouping_sets(PlannerInfo *root)
2099 : {
2100 854 : Query *parse = root->parse;
2101 : List *sets;
2102 854 : int maxref = 0;
2103 : ListCell *lc_set;
2104 854 : grouping_sets_data *gd = palloc0(sizeof(grouping_sets_data));
2105 :
2106 854 : parse->groupingSets = expand_grouping_sets(parse->groupingSets, parse->groupDistinct, -1);
2107 :
2108 854 : gd->any_hashable = false;
2109 854 : gd->unhashable_refs = NULL;
2110 854 : gd->unsortable_refs = NULL;
2111 854 : gd->unsortable_sets = NIL;
2112 :
2113 : /*
2114 : * We don't currently make any attempt to optimize the groupClause when
2115 : * there are grouping sets, so just duplicate it in processed_groupClause.
2116 : */
2117 854 : root->processed_groupClause = parse->groupClause;
2118 :
2119 854 : if (parse->groupClause)
2120 : {
2121 : ListCell *lc;
2122 :
2123 2576 : foreach(lc, parse->groupClause)
2124 : {
2125 1764 : SortGroupClause *gc = lfirst_node(SortGroupClause, lc);
2126 1764 : Index ref = gc->tleSortGroupRef;
2127 :
2128 1764 : if (ref > maxref)
2129 1728 : maxref = ref;
2130 :
2131 1764 : if (!gc->hashable)
2132 30 : gd->unhashable_refs = bms_add_member(gd->unhashable_refs, ref);
2133 :
2134 1764 : if (!OidIsValid(gc->sortop))
2135 42 : gd->unsortable_refs = bms_add_member(gd->unsortable_refs, ref);
2136 : }
2137 : }
2138 :
2139 : /* Allocate workspace array for remapping */
2140 854 : gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
2141 :
2142 : /*
2143 : * If we have any unsortable sets, we must extract them before trying to
2144 : * prepare rollups. Unsortable sets don't go through
2145 : * reorder_grouping_sets, so we must apply the GroupingSetData annotation
2146 : * here.
2147 : */
2148 854 : if (!bms_is_empty(gd->unsortable_refs))
2149 : {
2150 42 : List *sortable_sets = NIL;
2151 : ListCell *lc;
2152 :
2153 126 : foreach(lc, parse->groupingSets)
2154 : {
2155 90 : List *gset = (List *) lfirst(lc);
2156 :
2157 90 : if (bms_overlap_list(gd->unsortable_refs, gset))
2158 : {
2159 48 : GroupingSetData *gs = makeNode(GroupingSetData);
2160 :
2161 48 : gs->set = gset;
2162 48 : gd->unsortable_sets = lappend(gd->unsortable_sets, gs);
2163 :
2164 : /*
2165 : * We must enforce here that an unsortable set is hashable;
2166 : * later code assumes this. Parse analysis only checks that
2167 : * every individual column is either hashable or sortable.
2168 : *
2169 : * Note that passing this test doesn't guarantee we can
2170 : * generate a plan; there might be other showstoppers.
2171 : */
2172 48 : if (bms_overlap_list(gd->unhashable_refs, gset))
2173 6 : ereport(ERROR,
2174 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2175 : errmsg("could not implement GROUP BY"),
2176 : errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
2177 : }
2178 : else
2179 42 : sortable_sets = lappend(sortable_sets, gset);
2180 : }
2181 :
2182 36 : if (sortable_sets)
2183 30 : sets = extract_rollup_sets(sortable_sets);
2184 : else
2185 6 : sets = NIL;
2186 : }
2187 : else
2188 812 : sets = extract_rollup_sets(parse->groupingSets);
2189 :
2190 2202 : foreach(lc_set, sets)
2191 : {
2192 1354 : List *current_sets = (List *) lfirst(lc_set);
2193 1354 : RollupData *rollup = makeNode(RollupData);
2194 : GroupingSetData *gs;
2195 :
2196 : /*
2197 : * Reorder the current list of grouping sets into correct prefix
2198 : * order. If only one aggregation pass is needed, try to make the
2199 : * list match the ORDER BY clause; if more than one pass is needed, we
2200 : * don't bother with that.
2201 : *
2202 : * Note that this reorders the sets from smallest-member-first to
2203 : * largest-member-first, and applies the GroupingSetData annotations,
2204 : * though the data will be filled in later.
2205 : */
2206 1354 : current_sets = reorder_grouping_sets(current_sets,
2207 1354 : (list_length(sets) == 1
2208 : ? parse->sortClause
2209 : : NIL));
2210 :
2211 : /*
2212 : * Get the initial (and therefore largest) grouping set.
2213 : */
2214 1354 : gs = linitial_node(GroupingSetData, current_sets);
2215 :
2216 : /*
2217 : * Order the groupClause appropriately. If the first grouping set is
2218 : * empty, then the groupClause must also be empty; otherwise we have
2219 : * to force the groupClause to match that grouping set's order.
2220 : *
2221 : * (The first grouping set can be empty even though parse->groupClause
2222 : * is not empty only if all non-empty grouping sets are unsortable.
2223 : * The groupClauses for hashed grouping sets are built later on.)
2224 : */
2225 1354 : if (gs->set)
2226 1312 : rollup->groupClause = preprocess_groupclause(root, gs->set);
2227 : else
2228 42 : rollup->groupClause = NIL;
2229 :
2230 : /*
2231 : * Is it hashable? We pretend empty sets are hashable even though we
2232 : * actually force them not to be hashed later. But don't bother if
2233 : * there's nothing but empty sets (since in that case we can't hash
2234 : * anything).
2235 : */
2236 1354 : if (gs->set &&
2237 1312 : !bms_overlap_list(gd->unhashable_refs, gs->set))
2238 : {
2239 1288 : rollup->hashable = true;
2240 1288 : gd->any_hashable = true;
2241 : }
2242 :
2243 : /*
2244 : * Now that we've pinned down an order for the groupClause for this
2245 : * list of grouping sets, we need to remap the entries in the grouping
2246 : * sets from sortgrouprefs to plain indices (0-based) into the
2247 : * groupClause for this collection of grouping sets. We keep the
2248 : * original form for later use, though.
2249 : */
2250 1354 : rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
2251 : current_sets,
2252 : gd->tleref_to_colnum_map);
2253 1354 : rollup->gsets_data = current_sets;
2254 :
2255 1354 : gd->rollups = lappend(gd->rollups, rollup);
2256 : }
2257 :
2258 848 : if (gd->unsortable_sets)
2259 : {
2260 : /*
2261 : * We have not yet pinned down a groupclause for this, but we will
2262 : * need index-based lists for estimation purposes. Construct
2263 : * hash_sets_idx based on the entire original groupclause for now.
2264 : */
2265 36 : gd->hash_sets_idx = remap_to_groupclause_idx(parse->groupClause,
2266 : gd->unsortable_sets,
2267 : gd->tleref_to_colnum_map);
2268 36 : gd->any_hashable = true;
2269 : }
2270 :
2271 848 : return gd;
2272 : }
2273 :
2274 : /*
2275 : * Given a groupclause and a list of GroupingSetData, return equivalent sets
2276 : * (without annotation) mapped to indexes into the given groupclause.
2277 : */
2278 : static List *
2279 3972 : remap_to_groupclause_idx(List *groupClause,
2280 : List *gsets,
2281 : int *tleref_to_colnum_map)
2282 : {
2283 3972 : int ref = 0;
2284 3972 : List *result = NIL;
2285 : ListCell *lc;
2286 :
2287 9808 : foreach(lc, groupClause)
2288 : {
2289 5836 : SortGroupClause *gc = lfirst_node(SortGroupClause, lc);
2290 :
2291 5836 : tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
2292 : }
2293 :
2294 9246 : foreach(lc, gsets)
2295 : {
2296 5274 : List *set = NIL;
2297 : ListCell *lc2;
2298 5274 : GroupingSetData *gs = lfirst_node(GroupingSetData, lc);
2299 :
2300 11948 : foreach(lc2, gs->set)
2301 : {
2302 6674 : set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]);
2303 : }
2304 :
2305 5274 : result = lappend(result, set);
2306 : }
2307 :
2308 3972 : return result;
2309 : }
2310 :
2311 :
2312 : /*
2313 : * preprocess_rowmarks - set up PlanRowMarks if needed
2314 : */
2315 : static void
2316 514714 : preprocess_rowmarks(PlannerInfo *root)
2317 : {
2318 514714 : Query *parse = root->parse;
2319 : Bitmapset *rels;
2320 : List *prowmarks;
2321 : ListCell *l;
2322 : int i;
2323 :
2324 514714 : if (parse->rowMarks)
2325 : {
2326 : /*
2327 : * We've got trouble if FOR [KEY] UPDATE/SHARE appears inside
2328 : * grouping, since grouping renders a reference to individual tuple
2329 : * CTIDs invalid. This is also checked at parse time, but that's
2330 : * insufficient because of rule substitution, query pullup, etc.
2331 : */
2332 7732 : CheckSelectLocking(parse, linitial_node(RowMarkClause,
2333 : parse->rowMarks)->strength);
2334 : }
2335 : else
2336 : {
2337 : /*
2338 : * We only need rowmarks for UPDATE, DELETE, MERGE, or FOR [KEY]
2339 : * UPDATE/SHARE.
2340 : */
2341 506982 : if (parse->commandType != CMD_UPDATE &&
2342 493892 : parse->commandType != CMD_DELETE &&
2343 489588 : parse->commandType != CMD_MERGE)
2344 487830 : return;
2345 : }
2346 :
2347 : /*
2348 : * We need to have rowmarks for all base relations except the target. We
2349 : * make a bitmapset of all base rels and then remove the items we don't
2350 : * need or have FOR [KEY] UPDATE/SHARE marks for.
2351 : */
2352 26884 : rels = get_relids_in_jointree((Node *) parse->jointree, false, false);
2353 26884 : if (parse->resultRelation)
2354 19152 : rels = bms_del_member(rels, parse->resultRelation);
2355 :
2356 : /*
2357 : * Convert RowMarkClauses to PlanRowMark representation.
2358 : */
2359 26884 : prowmarks = NIL;
2360 34872 : foreach(l, parse->rowMarks)
2361 : {
2362 7988 : RowMarkClause *rc = lfirst_node(RowMarkClause, l);
2363 7988 : RangeTblEntry *rte = rt_fetch(rc->rti, parse->rtable);
2364 : PlanRowMark *newrc;
2365 :
2366 : /*
2367 : * Currently, it is syntactically impossible to have FOR UPDATE et al
2368 : * applied to an update/delete target rel. If that ever becomes
2369 : * possible, we should drop the target from the PlanRowMark list.
2370 : */
2371 : Assert(rc->rti != parse->resultRelation);
2372 :
2373 : /*
2374 : * Ignore RowMarkClauses for subqueries; they aren't real tables and
2375 : * can't support true locking. Subqueries that got flattened into the
2376 : * main query should be ignored completely. Any that didn't will get
2377 : * ROW_MARK_COPY items in the next loop.
2378 : */
2379 7988 : if (rte->rtekind != RTE_RELATION)
2380 108 : continue;
2381 :
2382 7880 : rels = bms_del_member(rels, rc->rti);
2383 :
2384 7880 : newrc = makeNode(PlanRowMark);
2385 7880 : newrc->rti = newrc->prti = rc->rti;
2386 7880 : newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2387 7880 : newrc->markType = select_rowmark_type(rte, rc->strength);
2388 7880 : newrc->allMarkTypes = (1 << newrc->markType);
2389 7880 : newrc->strength = rc->strength;
2390 7880 : newrc->waitPolicy = rc->waitPolicy;
2391 7880 : newrc->isParent = false;
2392 :
2393 7880 : prowmarks = lappend(prowmarks, newrc);
2394 : }
2395 :
2396 : /*
2397 : * Now, add rowmarks for any non-target, non-locked base relations.
2398 : */
2399 26884 : i = 0;
2400 64288 : foreach(l, parse->rtable)
2401 : {
2402 37404 : RangeTblEntry *rte = lfirst_node(RangeTblEntry, l);
2403 : PlanRowMark *newrc;
2404 :
2405 37404 : i++;
2406 37404 : if (!bms_is_member(i, rels))
2407 33986 : continue;
2408 :
2409 3418 : newrc = makeNode(PlanRowMark);
2410 3418 : newrc->rti = newrc->prti = i;
2411 3418 : newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2412 3418 : newrc->markType = select_rowmark_type(rte, LCS_NONE);
2413 3418 : newrc->allMarkTypes = (1 << newrc->markType);
2414 3418 : newrc->strength = LCS_NONE;
2415 3418 : newrc->waitPolicy = LockWaitBlock; /* doesn't matter */
2416 3418 : newrc->isParent = false;
2417 :
2418 3418 : prowmarks = lappend(prowmarks, newrc);
2419 : }
2420 :
2421 26884 : root->rowMarks = prowmarks;
2422 : }
2423 :
2424 : /*
2425 : * Select RowMarkType to use for a given table
2426 : */
2427 : RowMarkType
2428 13612 : select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength)
2429 : {
2430 13612 : if (rte->rtekind != RTE_RELATION)
2431 : {
2432 : /* If it's not a table at all, use ROW_MARK_COPY */
2433 1378 : return ROW_MARK_COPY;
2434 : }
2435 12234 : else if (rte->relkind == RELKIND_FOREIGN_TABLE)
2436 : {
2437 : /* Let the FDW select the rowmark type, if it wants to */
2438 192 : FdwRoutine *fdwroutine = GetFdwRoutineByRelId(rte->relid);
2439 :
2440 192 : if (fdwroutine->GetForeignRowMarkType != NULL)
2441 0 : return fdwroutine->GetForeignRowMarkType(rte, strength);
2442 : /* Otherwise, use ROW_MARK_COPY by default */
2443 192 : return ROW_MARK_COPY;
2444 : }
2445 : else
2446 : {
2447 : /* Regular table, apply the appropriate lock type */
2448 12042 : switch (strength)
2449 : {
2450 2236 : case LCS_NONE:
2451 :
2452 : /*
2453 : * We don't need a tuple lock, only the ability to re-fetch
2454 : * the row.
2455 : */
2456 2236 : return ROW_MARK_REFERENCE;
2457 : break;
2458 7982 : case LCS_FORKEYSHARE:
2459 7982 : return ROW_MARK_KEYSHARE;
2460 : break;
2461 300 : case LCS_FORSHARE:
2462 300 : return ROW_MARK_SHARE;
2463 : break;
2464 72 : case LCS_FORNOKEYUPDATE:
2465 72 : return ROW_MARK_NOKEYEXCLUSIVE;
2466 : break;
2467 1452 : case LCS_FORUPDATE:
2468 1452 : return ROW_MARK_EXCLUSIVE;
2469 : break;
2470 : }
2471 0 : elog(ERROR, "unrecognized LockClauseStrength %d", (int) strength);
2472 : return ROW_MARK_EXCLUSIVE; /* keep compiler quiet */
2473 : }
2474 : }
2475 :
2476 : /*
2477 : * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses
2478 : *
2479 : * We try to estimate the values of the LIMIT/OFFSET clauses, and pass the
2480 : * results back in *count_est and *offset_est. These variables are set to
2481 : * 0 if the corresponding clause is not present, and -1 if it's present
2482 : * but we couldn't estimate the value for it. (The "0" convention is OK
2483 : * for OFFSET but a little bit bogus for LIMIT: effectively we estimate
2484 : * LIMIT 0 as though it were LIMIT 1. But this is in line with the planner's
2485 : * usual practice of never estimating less than one row.) These values will
2486 : * be passed to create_limit_path, which see if you change this code.
2487 : *
2488 : * The return value is the suitably adjusted tuple_fraction to use for
2489 : * planning the query. This adjustment is not overridable, since it reflects
2490 : * plan actions that grouping_planner() will certainly take, not assumptions
2491 : * about context.
2492 : */
2493 : static double
2494 4672 : preprocess_limit(PlannerInfo *root, double tuple_fraction,
2495 : int64 *offset_est, int64 *count_est)
2496 : {
2497 4672 : Query *parse = root->parse;
2498 : Node *est;
2499 : double limit_fraction;
2500 :
2501 : /* Should not be called unless LIMIT or OFFSET */
2502 : Assert(parse->limitCount || parse->limitOffset);
2503 :
2504 : /*
2505 : * Try to obtain the clause values. We use estimate_expression_value
2506 : * primarily because it can sometimes do something useful with Params.
2507 : */
2508 4672 : if (parse->limitCount)
2509 : {
2510 4232 : est = estimate_expression_value(root, parse->limitCount);
2511 4232 : if (est && IsA(est, Const))
2512 : {
2513 4226 : if (((Const *) est)->constisnull)
2514 : {
2515 : /* NULL indicates LIMIT ALL, ie, no limit */
2516 0 : *count_est = 0; /* treat as not present */
2517 : }
2518 : else
2519 : {
2520 4226 : *count_est = DatumGetInt64(((Const *) est)->constvalue);
2521 4226 : if (*count_est <= 0)
2522 150 : *count_est = 1; /* force to at least 1 */
2523 : }
2524 : }
2525 : else
2526 6 : *count_est = -1; /* can't estimate */
2527 : }
2528 : else
2529 440 : *count_est = 0; /* not present */
2530 :
2531 4672 : if (parse->limitOffset)
2532 : {
2533 800 : est = estimate_expression_value(root, parse->limitOffset);
2534 800 : if (est && IsA(est, Const))
2535 : {
2536 776 : if (((Const *) est)->constisnull)
2537 : {
2538 : /* Treat NULL as no offset; the executor will too */
2539 0 : *offset_est = 0; /* treat as not present */
2540 : }
2541 : else
2542 : {
2543 776 : *offset_est = DatumGetInt64(((Const *) est)->constvalue);
2544 776 : if (*offset_est < 0)
2545 0 : *offset_est = 0; /* treat as not present */
2546 : }
2547 : }
2548 : else
2549 24 : *offset_est = -1; /* can't estimate */
2550 : }
2551 : else
2552 3872 : *offset_est = 0; /* not present */
2553 :
2554 4672 : if (*count_est != 0)
2555 : {
2556 : /*
2557 : * A LIMIT clause limits the absolute number of tuples returned.
2558 : * However, if it's not a constant LIMIT then we have to guess; for
2559 : * lack of a better idea, assume 10% of the plan's result is wanted.
2560 : */
2561 4232 : if (*count_est < 0 || *offset_est < 0)
2562 : {
2563 : /* LIMIT or OFFSET is an expression ... punt ... */
2564 24 : limit_fraction = 0.10;
2565 : }
2566 : else
2567 : {
2568 : /* LIMIT (plus OFFSET, if any) is max number of tuples needed */
2569 4208 : limit_fraction = (double) *count_est + (double) *offset_est;
2570 : }
2571 :
2572 : /*
2573 : * If we have absolute limits from both caller and LIMIT, use the
2574 : * smaller value; likewise if they are both fractional. If one is
2575 : * fractional and the other absolute, we can't easily determine which
2576 : * is smaller, but we use the heuristic that the absolute will usually
2577 : * be smaller.
2578 : */
2579 4232 : if (tuple_fraction >= 1.0)
2580 : {
2581 6 : if (limit_fraction >= 1.0)
2582 : {
2583 : /* both absolute */
2584 6 : tuple_fraction = Min(tuple_fraction, limit_fraction);
2585 : }
2586 : else
2587 : {
2588 : /* caller absolute, limit fractional; use caller's value */
2589 : }
2590 : }
2591 4226 : else if (tuple_fraction > 0.0)
2592 : {
2593 146 : if (limit_fraction >= 1.0)
2594 : {
2595 : /* caller fractional, limit absolute; use limit */
2596 146 : tuple_fraction = limit_fraction;
2597 : }
2598 : else
2599 : {
2600 : /* both fractional */
2601 0 : tuple_fraction = Min(tuple_fraction, limit_fraction);
2602 : }
2603 : }
2604 : else
2605 : {
2606 : /* no info from caller, just use limit */
2607 4080 : tuple_fraction = limit_fraction;
2608 : }
2609 : }
2610 440 : else if (*offset_est != 0 && tuple_fraction > 0.0)
2611 : {
2612 : /*
2613 : * We have an OFFSET but no LIMIT. This acts entirely differently
2614 : * from the LIMIT case: here, we need to increase rather than decrease
2615 : * the caller's tuple_fraction, because the OFFSET acts to cause more
2616 : * tuples to be fetched instead of fewer. This only matters if we got
2617 : * a tuple_fraction > 0, however.
2618 : *
2619 : * As above, use 10% if OFFSET is present but unestimatable.
2620 : */
2621 12 : if (*offset_est < 0)
2622 0 : limit_fraction = 0.10;
2623 : else
2624 12 : limit_fraction = (double) *offset_est;
2625 :
2626 : /*
2627 : * If we have absolute counts from both caller and OFFSET, add them
2628 : * together; likewise if they are both fractional. If one is
2629 : * fractional and the other absolute, we want to take the larger, and
2630 : * we heuristically assume that's the fractional one.
2631 : */
2632 12 : if (tuple_fraction >= 1.0)
2633 : {
2634 0 : if (limit_fraction >= 1.0)
2635 : {
2636 : /* both absolute, so add them together */
2637 0 : tuple_fraction += limit_fraction;
2638 : }
2639 : else
2640 : {
2641 : /* caller absolute, limit fractional; use limit */
2642 0 : tuple_fraction = limit_fraction;
2643 : }
2644 : }
2645 : else
2646 : {
2647 12 : if (limit_fraction >= 1.0)
2648 : {
2649 : /* caller fractional, limit absolute; use caller's value */
2650 : }
2651 : else
2652 : {
2653 : /* both fractional, so add them together */
2654 0 : tuple_fraction += limit_fraction;
2655 0 : if (tuple_fraction >= 1.0)
2656 0 : tuple_fraction = 0.0; /* assume fetch all */
2657 : }
2658 : }
2659 : }
2660 :
2661 4672 : return tuple_fraction;
2662 : }
2663 :
2664 : /*
2665 : * limit_needed - do we actually need a Limit plan node?
2666 : *
2667 : * If we have constant-zero OFFSET and constant-null LIMIT, we can skip adding
2668 : * a Limit node. This is worth checking for because "OFFSET 0" is a common
2669 : * locution for an optimization fence. (Because other places in the planner
2670 : * merely check whether parse->limitOffset isn't NULL, it will still work as
2671 : * an optimization fence --- we're just suppressing unnecessary run-time
2672 : * overhead.)
2673 : *
2674 : * This might look like it could be merged into preprocess_limit, but there's
2675 : * a key distinction: here we need hard constants in OFFSET/LIMIT, whereas
2676 : * in preprocess_limit it's good enough to consider estimated values.
2677 : */
2678 : bool
2679 1064258 : limit_needed(Query *parse)
2680 : {
2681 : Node *node;
2682 :
2683 1064258 : node = parse->limitCount;
2684 1064258 : if (node)
2685 : {
2686 10112 : if (IsA(node, Const))
2687 : {
2688 : /* NULL indicates LIMIT ALL, ie, no limit */
2689 9918 : if (!((Const *) node)->constisnull)
2690 9918 : return true; /* LIMIT with a constant value */
2691 : }
2692 : else
2693 194 : return true; /* non-constant LIMIT */
2694 : }
2695 :
2696 1054146 : node = parse->limitOffset;
2697 1054146 : if (node)
2698 : {
2699 1298 : if (IsA(node, Const))
2700 : {
2701 : /* Treat NULL as no offset; the executor would too */
2702 1032 : if (!((Const *) node)->constisnull)
2703 : {
2704 1032 : int64 offset = DatumGetInt64(((Const *) node)->constvalue);
2705 :
2706 1032 : if (offset != 0)
2707 84 : return true; /* OFFSET with a nonzero value */
2708 : }
2709 : }
2710 : else
2711 266 : return true; /* non-constant OFFSET */
2712 : }
2713 :
2714 1053796 : return false; /* don't need a Limit plan node */
2715 : }
2716 :
2717 :
2718 : /*
2719 : * remove_useless_groupby_columns
2720 : * Remove any columns in the GROUP BY clause that are redundant due to
2721 : * being functionally dependent on other GROUP BY columns.
2722 : *
2723 : * Since some other DBMSes do not allow references to ungrouped columns, it's
2724 : * not unusual to find all columns listed in GROUP BY even though listing the
2725 : * primary-key columns would be sufficient. Deleting such excess columns
2726 : * avoids redundant sorting work, so it's worth doing.
2727 : *
2728 : * Relcache invalidations will ensure that cached plans become invalidated
2729 : * when the underlying index of the pkey constraint is dropped.
2730 : *
2731 : * Currently, we only make use of pkey constraints for this, however, we may
2732 : * wish to take this further in the future and also use unique constraints
2733 : * which have NOT NULL columns. In that case, plan invalidation will still
2734 : * work since relations will receive a relcache invalidation when a NOT NULL
2735 : * constraint is dropped.
2736 : */
2737 : static void
2738 3542 : remove_useless_groupby_columns(PlannerInfo *root)
2739 : {
2740 3542 : Query *parse = root->parse;
2741 : Bitmapset **groupbyattnos;
2742 : Bitmapset **surplusvars;
2743 : ListCell *lc;
2744 : int relid;
2745 :
2746 : /* No chance to do anything if there are less than two GROUP BY items */
2747 3542 : if (list_length(root->processed_groupClause) < 2)
2748 2230 : return;
2749 :
2750 : /* Don't fiddle with the GROUP BY clause if the query has grouping sets */
2751 1312 : if (parse->groupingSets)
2752 0 : return;
2753 :
2754 : /*
2755 : * Scan the GROUP BY clause to find GROUP BY items that are simple Vars.
2756 : * Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k
2757 : * that are GROUP BY items.
2758 : */
2759 1312 : groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2760 1312 : (list_length(parse->rtable) + 1));
2761 4700 : foreach(lc, root->processed_groupClause)
2762 : {
2763 3388 : SortGroupClause *sgc = lfirst_node(SortGroupClause, lc);
2764 3388 : TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2765 3388 : Var *var = (Var *) tle->expr;
2766 :
2767 : /*
2768 : * Ignore non-Vars and Vars from other query levels.
2769 : *
2770 : * XXX in principle, stable expressions containing Vars could also be
2771 : * removed, if all the Vars are functionally dependent on other GROUP
2772 : * BY items. But it's not clear that such cases occur often enough to
2773 : * be worth troubling over.
2774 : */
2775 3388 : if (!IsA(var, Var) ||
2776 2516 : var->varlevelsup > 0)
2777 872 : continue;
2778 :
2779 : /* OK, remember we have this Var */
2780 2516 : relid = var->varno;
2781 : Assert(relid <= list_length(parse->rtable));
2782 2516 : groupbyattnos[relid] = bms_add_member(groupbyattnos[relid],
2783 2516 : var->varattno - FirstLowInvalidHeapAttributeNumber);
2784 : }
2785 :
2786 : /*
2787 : * Consider each relation and see if it is possible to remove some of its
2788 : * Vars from GROUP BY. For simplicity and speed, we do the actual removal
2789 : * in a separate pass. Here, we just fill surplusvars[k] with a bitmapset
2790 : * of the column attnos of RTE k that are removable GROUP BY items.
2791 : */
2792 1312 : surplusvars = NULL; /* don't allocate array unless required */
2793 1312 : relid = 0;
2794 5006 : foreach(lc, parse->rtable)
2795 : {
2796 3694 : RangeTblEntry *rte = lfirst_node(RangeTblEntry, lc);
2797 : Bitmapset *relattnos;
2798 : Bitmapset *pkattnos;
2799 : Oid constraintOid;
2800 :
2801 3694 : relid++;
2802 :
2803 : /* Only plain relations could have primary-key constraints */
2804 3694 : if (rte->rtekind != RTE_RELATION)
2805 3488 : continue;
2806 :
2807 : /*
2808 : * We must skip inheritance parent tables as some of the child rels
2809 : * may cause duplicate rows. This cannot happen with partitioned
2810 : * tables, however.
2811 : */
2812 1748 : if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
2813 18 : continue;
2814 :
2815 : /* Nothing to do unless this rel has multiple Vars in GROUP BY */
2816 1730 : relattnos = groupbyattnos[relid];
2817 1730 : if (bms_membership(relattnos) != BMS_MULTIPLE)
2818 934 : continue;
2819 :
2820 : /*
2821 : * Can't remove any columns for this rel if there is no suitable
2822 : * (i.e., nondeferrable) primary key constraint.
2823 : */
2824 796 : pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid);
2825 796 : if (pkattnos == NULL)
2826 590 : continue;
2827 :
2828 : /*
2829 : * If the primary key is a proper subset of relattnos then we have
2830 : * some items in the GROUP BY that can be removed.
2831 : */
2832 206 : if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1)
2833 : {
2834 : /*
2835 : * To easily remember whether we've found anything to do, we don't
2836 : * allocate the surplusvars[] array until we find something.
2837 : */
2838 188 : if (surplusvars == NULL)
2839 182 : surplusvars = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2840 182 : (list_length(parse->rtable) + 1));
2841 :
2842 : /* Remember the attnos of the removable columns */
2843 188 : surplusvars[relid] = bms_difference(relattnos, pkattnos);
2844 : }
2845 : }
2846 :
2847 : /*
2848 : * If we found any surplus Vars, build a new GROUP BY clause without them.
2849 : * (Note: this may leave some TLEs with unreferenced ressortgroupref
2850 : * markings, but that's harmless.)
2851 : */
2852 1312 : if (surplusvars != NULL)
2853 : {
2854 182 : List *new_groupby = NIL;
2855 :
2856 774 : foreach(lc, root->processed_groupClause)
2857 : {
2858 592 : SortGroupClause *sgc = lfirst_node(SortGroupClause, lc);
2859 592 : TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2860 592 : Var *var = (Var *) tle->expr;
2861 :
2862 : /*
2863 : * New list must include non-Vars, outer Vars, and anything not
2864 : * marked as surplus.
2865 : */
2866 592 : if (!IsA(var, Var) ||
2867 592 : var->varlevelsup > 0 ||
2868 592 : !bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber,
2869 592 : surplusvars[var->varno]))
2870 374 : new_groupby = lappend(new_groupby, sgc);
2871 : }
2872 :
2873 182 : root->processed_groupClause = new_groupby;
2874 : }
2875 : }
2876 :
2877 : /*
2878 : * preprocess_groupclause - do preparatory work on GROUP BY clause
2879 : *
2880 : * The idea here is to adjust the ordering of the GROUP BY elements
2881 : * (which in itself is semantically insignificant) to match ORDER BY,
2882 : * thereby allowing a single sort operation to both implement the ORDER BY
2883 : * requirement and set up for a Unique step that implements GROUP BY.
2884 : * We also consider partial match between GROUP BY and ORDER BY elements,
2885 : * which could allow to implement ORDER BY using the incremental sort.
2886 : *
2887 : * We also consider other orderings of the GROUP BY elements, which could
2888 : * match the sort ordering of other possible plans (eg an indexscan) and
2889 : * thereby reduce cost. This is implemented during the generation of grouping
2890 : * paths. See get_useful_group_keys_orderings() for details.
2891 : *
2892 : * Note: we need no comparable processing of the distinctClause because
2893 : * the parser already enforced that that matches ORDER BY.
2894 : *
2895 : * Note: we return a fresh List, but its elements are the same
2896 : * SortGroupClauses appearing in parse->groupClause. This is important
2897 : * because later processing may modify the processed_groupClause list.
2898 : *
2899 : * For grouping sets, the order of items is instead forced to agree with that
2900 : * of the grouping set (and items not in the grouping set are skipped). The
2901 : * work of sorting the order of grouping set elements to match the ORDER BY if
2902 : * possible is done elsewhere.
2903 : */
2904 : static List *
2905 7436 : preprocess_groupclause(PlannerInfo *root, List *force)
2906 : {
2907 7436 : Query *parse = root->parse;
2908 7436 : List *new_groupclause = NIL;
2909 : ListCell *sl;
2910 : ListCell *gl;
2911 :
2912 : /* For grouping sets, we need to force the ordering */
2913 7436 : if (force)
2914 : {
2915 9652 : foreach(sl, force)
2916 : {
2917 5758 : Index ref = lfirst_int(sl);
2918 5758 : SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
2919 :
2920 5758 : new_groupclause = lappend(new_groupclause, cl);
2921 : }
2922 :
2923 3894 : return new_groupclause;
2924 : }
2925 :
2926 : /* If no ORDER BY, nothing useful to do here */
2927 3542 : if (parse->sortClause == NIL)
2928 1972 : return list_copy(parse->groupClause);
2929 :
2930 : /*
2931 : * Scan the ORDER BY clause and construct a list of matching GROUP BY
2932 : * items, but only as far as we can make a matching prefix.
2933 : *
2934 : * This code assumes that the sortClause contains no duplicate items.
2935 : */
2936 3046 : foreach(sl, parse->sortClause)
2937 : {
2938 2122 : SortGroupClause *sc = lfirst_node(SortGroupClause, sl);
2939 :
2940 3218 : foreach(gl, parse->groupClause)
2941 : {
2942 2572 : SortGroupClause *gc = lfirst_node(SortGroupClause, gl);
2943 :
2944 2572 : if (equal(gc, sc))
2945 : {
2946 1476 : new_groupclause = lappend(new_groupclause, gc);
2947 1476 : break;
2948 : }
2949 : }
2950 2122 : if (gl == NULL)
2951 646 : break; /* no match, so stop scanning */
2952 : }
2953 :
2954 :
2955 : /* If no match at all, no point in reordering GROUP BY */
2956 1570 : if (new_groupclause == NIL)
2957 298 : return list_copy(parse->groupClause);
2958 :
2959 : /*
2960 : * Add any remaining GROUP BY items to the new list. We don't require a
2961 : * complete match, because even partial match allows ORDER BY to be
2962 : * implemented using incremental sort. Also, give up if there are any
2963 : * non-sortable GROUP BY items, since then there's no hope anyway.
2964 : */
2965 2914 : foreach(gl, parse->groupClause)
2966 : {
2967 1642 : SortGroupClause *gc = lfirst_node(SortGroupClause, gl);
2968 :
2969 1642 : if (list_member_ptr(new_groupclause, gc))
2970 1476 : continue; /* it matched an ORDER BY item */
2971 166 : if (!OidIsValid(gc->sortop)) /* give up, GROUP BY can't be sorted */
2972 0 : return list_copy(parse->groupClause);
2973 166 : new_groupclause = lappend(new_groupclause, gc);
2974 : }
2975 :
2976 : /* Success --- install the rearranged GROUP BY list */
2977 : Assert(list_length(parse->groupClause) == list_length(new_groupclause));
2978 1272 : return new_groupclause;
2979 : }
2980 :
2981 : /*
2982 : * Extract lists of grouping sets that can be implemented using a single
2983 : * rollup-type aggregate pass each. Returns a list of lists of grouping sets.
2984 : *
2985 : * Input must be sorted with smallest sets first. Result has each sublist
2986 : * sorted with smallest sets first.
2987 : *
2988 : * We want to produce the absolute minimum possible number of lists here to
2989 : * avoid excess sorts. Fortunately, there is an algorithm for this; the problem
2990 : * of finding the minimal partition of a partially-ordered set into chains
2991 : * (which is what we need, taking the list of grouping sets as a poset ordered
2992 : * by set inclusion) can be mapped to the problem of finding the maximum
2993 : * cardinality matching on a bipartite graph, which is solvable in polynomial
2994 : * time with a worst case of no worse than O(n^2.5) and usually much
2995 : * better. Since our N is at most 4096, we don't need to consider fallbacks to
2996 : * heuristic or approximate methods. (Planning time for a 12-d cube is under
2997 : * half a second on my modest system even with optimization off and assertions
2998 : * on.)
2999 : */
3000 : static List *
3001 842 : extract_rollup_sets(List *groupingSets)
3002 : {
3003 842 : int num_sets_raw = list_length(groupingSets);
3004 842 : int num_empty = 0;
3005 842 : int num_sets = 0; /* distinct sets */
3006 842 : int num_chains = 0;
3007 842 : List *result = NIL;
3008 : List **results;
3009 : List **orig_sets;
3010 : Bitmapset **set_masks;
3011 : int *chains;
3012 : short **adjacency;
3013 : short *adjacency_buf;
3014 : BipartiteMatchState *state;
3015 : int i;
3016 : int j;
3017 : int j_size;
3018 842 : ListCell *lc1 = list_head(groupingSets);
3019 : ListCell *lc;
3020 :
3021 : /*
3022 : * Start by stripping out empty sets. The algorithm doesn't require this,
3023 : * but the planner currently needs all empty sets to be returned in the
3024 : * first list, so we strip them here and add them back after.
3025 : */
3026 1452 : while (lc1 && lfirst(lc1) == NIL)
3027 : {
3028 610 : ++num_empty;
3029 610 : lc1 = lnext(groupingSets, lc1);
3030 : }
3031 :
3032 : /* bail out now if it turns out that all we had were empty sets. */
3033 842 : if (!lc1)
3034 42 : return list_make1(groupingSets);
3035 :
3036 : /*----------
3037 : * We don't strictly need to remove duplicate sets here, but if we don't,
3038 : * they tend to become scattered through the result, which is a bit
3039 : * confusing (and irritating if we ever decide to optimize them out).
3040 : * So we remove them here and add them back after.
3041 : *
3042 : * For each non-duplicate set, we fill in the following:
3043 : *
3044 : * orig_sets[i] = list of the original set lists
3045 : * set_masks[i] = bitmapset for testing inclusion
3046 : * adjacency[i] = array [n, v1, v2, ... vn] of adjacency indices
3047 : *
3048 : * chains[i] will be the result group this set is assigned to.
3049 : *
3050 : * We index all of these from 1 rather than 0 because it is convenient
3051 : * to leave 0 free for the NIL node in the graph algorithm.
3052 : *----------
3053 : */
3054 800 : orig_sets = palloc0((num_sets_raw + 1) * sizeof(List *));
3055 800 : set_masks = palloc0((num_sets_raw + 1) * sizeof(Bitmapset *));
3056 800 : adjacency = palloc0((num_sets_raw + 1) * sizeof(short *));
3057 800 : adjacency_buf = palloc((num_sets_raw + 1) * sizeof(short));
3058 :
3059 800 : j_size = 0;
3060 800 : j = 0;
3061 800 : i = 1;
3062 :
3063 2840 : for_each_cell(lc, groupingSets, lc1)
3064 : {
3065 2040 : List *candidate = (List *) lfirst(lc);
3066 2040 : Bitmapset *candidate_set = NULL;
3067 : ListCell *lc2;
3068 2040 : int dup_of = 0;
3069 :
3070 4974 : foreach(lc2, candidate)
3071 : {
3072 2934 : candidate_set = bms_add_member(candidate_set, lfirst_int(lc2));
3073 : }
3074 :
3075 : /* we can only be a dup if we're the same length as a previous set */
3076 2040 : if (j_size == list_length(candidate))
3077 : {
3078 : int k;
3079 :
3080 1712 : for (k = j; k < i; ++k)
3081 : {
3082 1104 : if (bms_equal(set_masks[k], candidate_set))
3083 : {
3084 158 : dup_of = k;
3085 158 : break;
3086 : }
3087 : }
3088 : }
3089 1274 : else if (j_size < list_length(candidate))
3090 : {
3091 1274 : j_size = list_length(candidate);
3092 1274 : j = i;
3093 : }
3094 :
3095 2040 : if (dup_of > 0)
3096 : {
3097 158 : orig_sets[dup_of] = lappend(orig_sets[dup_of], candidate);
3098 158 : bms_free(candidate_set);
3099 : }
3100 : else
3101 : {
3102 : int k;
3103 1882 : int n_adj = 0;
3104 :
3105 1882 : orig_sets[i] = list_make1(candidate);
3106 1882 : set_masks[i] = candidate_set;
3107 :
3108 : /* fill in adjacency list; no need to compare equal-size sets */
3109 :
3110 3154 : for (k = j - 1; k > 0; --k)
3111 : {
3112 1272 : if (bms_is_subset(set_masks[k], candidate_set))
3113 1110 : adjacency_buf[++n_adj] = k;
3114 : }
3115 :
3116 1882 : if (n_adj > 0)
3117 : {
3118 598 : adjacency_buf[0] = n_adj;
3119 598 : adjacency[i] = palloc((n_adj + 1) * sizeof(short));
3120 598 : memcpy(adjacency[i], adjacency_buf, (n_adj + 1) * sizeof(short));
3121 : }
3122 : else
3123 1284 : adjacency[i] = NULL;
3124 :
3125 1882 : ++i;
3126 : }
3127 : }
3128 :
3129 800 : num_sets = i - 1;
3130 :
3131 : /*
3132 : * Apply the graph matching algorithm to do the work.
3133 : */
3134 800 : state = BipartiteMatch(num_sets, num_sets, adjacency);
3135 :
3136 : /*
3137 : * Now, the state->pair* fields have the info we need to assign sets to
3138 : * chains. Two sets (u,v) belong to the same chain if pair_uv[u] = v or
3139 : * pair_vu[v] = u (both will be true, but we check both so that we can do
3140 : * it in one pass)
3141 : */
3142 800 : chains = palloc0((num_sets + 1) * sizeof(int));
3143 :
3144 2682 : for (i = 1; i <= num_sets; ++i)
3145 : {
3146 1882 : int u = state->pair_vu[i];
3147 1882 : int v = state->pair_uv[i];
3148 :
3149 1882 : if (u > 0 && u < i)
3150 0 : chains[i] = chains[u];
3151 1882 : else if (v > 0 && v < i)
3152 570 : chains[i] = chains[v];
3153 : else
3154 1312 : chains[i] = ++num_chains;
3155 : }
3156 :
3157 : /* build result lists. */
3158 800 : results = palloc0((num_chains + 1) * sizeof(List *));
3159 :
3160 2682 : for (i = 1; i <= num_sets; ++i)
3161 : {
3162 1882 : int c = chains[i];
3163 :
3164 : Assert(c > 0);
3165 :
3166 1882 : results[c] = list_concat(results[c], orig_sets[i]);
3167 : }
3168 :
3169 : /* push any empty sets back on the first list. */
3170 1320 : while (num_empty-- > 0)
3171 520 : results[1] = lcons(NIL, results[1]);
3172 :
3173 : /* make result list */
3174 2112 : for (i = 1; i <= num_chains; ++i)
3175 1312 : result = lappend(result, results[i]);
3176 :
3177 : /*
3178 : * Free all the things.
3179 : *
3180 : * (This is over-fussy for small sets but for large sets we could have
3181 : * tied up a nontrivial amount of memory.)
3182 : */
3183 800 : BipartiteMatchFree(state);
3184 800 : pfree(results);
3185 800 : pfree(chains);
3186 2682 : for (i = 1; i <= num_sets; ++i)
3187 1882 : if (adjacency[i])
3188 598 : pfree(adjacency[i]);
3189 800 : pfree(adjacency);
3190 800 : pfree(adjacency_buf);
3191 800 : pfree(orig_sets);
3192 2682 : for (i = 1; i <= num_sets; ++i)
3193 1882 : bms_free(set_masks[i]);
3194 800 : pfree(set_masks);
3195 :
3196 800 : return result;
3197 : }
3198 :
3199 : /*
3200 : * Reorder the elements of a list of grouping sets such that they have correct
3201 : * prefix relationships. Also inserts the GroupingSetData annotations.
3202 : *
3203 : * The input must be ordered with smallest sets first; the result is returned
3204 : * with largest sets first. Note that the result shares no list substructure
3205 : * with the input, so it's safe for the caller to modify it later.
3206 : *
3207 : * If we're passed in a sortclause, we follow its order of columns to the
3208 : * extent possible, to minimize the chance that we add unnecessary sorts.
3209 : * (We're trying here to ensure that GROUPING SETS ((a,b,c),(c)) ORDER BY c,b,a
3210 : * gets implemented in one pass.)
3211 : */
3212 : static List *
3213 1354 : reorder_grouping_sets(List *groupingSets, List *sortclause)
3214 : {
3215 : ListCell *lc;
3216 1354 : List *previous = NIL;
3217 1354 : List *result = NIL;
3218 :
3219 4004 : foreach(lc, groupingSets)
3220 : {
3221 2650 : List *candidate = (List *) lfirst(lc);
3222 2650 : List *new_elems = list_difference_int(candidate, previous);
3223 2650 : GroupingSetData *gs = makeNode(GroupingSetData);
3224 :
3225 2814 : while (list_length(sortclause) > list_length(previous) &&
3226 : new_elems != NIL)
3227 : {
3228 272 : SortGroupClause *sc = list_nth(sortclause, list_length(previous));
3229 272 : int ref = sc->tleSortGroupRef;
3230 :
3231 272 : if (list_member_int(new_elems, ref))
3232 : {
3233 164 : previous = lappend_int(previous, ref);
3234 164 : new_elems = list_delete_int(new_elems, ref);
3235 : }
3236 : else
3237 : {
3238 : /* diverged from the sortclause; give up on it */
3239 108 : sortclause = NIL;
3240 108 : break;
3241 : }
3242 : }
3243 :
3244 2650 : previous = list_concat(previous, new_elems);
3245 :
3246 2650 : gs->set = list_copy(previous);
3247 2650 : result = lcons(gs, result);
3248 : }
3249 :
3250 1354 : list_free(previous);
3251 :
3252 1354 : return result;
3253 : }
3254 :
3255 : /*
3256 : * has_volatile_pathkey
3257 : * Returns true if any PathKey in 'keys' has an EquivalenceClass
3258 : * containing a volatile function. Otherwise returns false.
3259 : */
3260 : static bool
3261 2240 : has_volatile_pathkey(List *keys)
3262 : {
3263 : ListCell *lc;
3264 :
3265 4636 : foreach(lc, keys)
3266 : {
3267 2414 : PathKey *pathkey = lfirst_node(PathKey, lc);
3268 :
3269 2414 : if (pathkey->pk_eclass->ec_has_volatile)
3270 18 : return true;
3271 : }
3272 :
3273 2222 : return false;
3274 : }
3275 :
3276 : /*
3277 : * adjust_group_pathkeys_for_groupagg
3278 : * Add pathkeys to root->group_pathkeys to reflect the best set of
3279 : * pre-ordered input for ordered aggregates.
3280 : *
3281 : * We define "best" as the pathkeys that suit the largest number of
3282 : * aggregate functions. We find these by looking at the first ORDER BY /
3283 : * DISTINCT aggregate and take the pathkeys for that before searching for
3284 : * other aggregates that require the same or a more strict variation of the
3285 : * same pathkeys. We then repeat that process for any remaining aggregates
3286 : * with different pathkeys and if we find another set of pathkeys that suits a
3287 : * larger number of aggregates then we select those pathkeys instead.
3288 : *
3289 : * When the best pathkeys are found we also mark each Aggref that can use
3290 : * those pathkeys as aggpresorted = true.
3291 : *
3292 : * Note: When an aggregate function's ORDER BY / DISTINCT clause contains any
3293 : * volatile functions, we never make use of these pathkeys. We want to ensure
3294 : * that sorts using volatile functions are done independently in each Aggref
3295 : * rather than once at the query level. If we were to allow this then Aggrefs
3296 : * with compatible sort orders would all transition their rows in the same
3297 : * order if those pathkeys were deemed to be the best pathkeys to sort on.
3298 : * Whereas, if some other set of Aggref's pathkeys happened to be deemed
3299 : * better pathkeys to sort on, then the volatile function Aggrefs would be
3300 : * left to perform their sorts individually. To avoid this inconsistent
3301 : * behavior which could make Aggref results depend on what other Aggrefs the
3302 : * query contains, we always force Aggrefs with volatile functions to perform
3303 : * their own sorts.
3304 : */
3305 : static void
3306 1820 : adjust_group_pathkeys_for_groupagg(PlannerInfo *root)
3307 : {
3308 1820 : List *grouppathkeys = root->group_pathkeys;
3309 : List *bestpathkeys;
3310 : Bitmapset *bestaggs;
3311 : Bitmapset *unprocessed_aggs;
3312 : ListCell *lc;
3313 : int i;
3314 :
3315 : /* Shouldn't be here if there are grouping sets */
3316 : Assert(root->parse->groupingSets == NIL);
3317 : /* Shouldn't be here unless there are some ordered aggregates */
3318 : Assert(root->numOrderedAggs > 0);
3319 :
3320 : /* Do nothing if disabled */
3321 1820 : if (!enable_presorted_aggregate)
3322 6 : return;
3323 :
3324 : /*
3325 : * Make a first pass over all AggInfos to collect a Bitmapset containing
3326 : * the indexes of all AggInfos to be processed below.
3327 : */
3328 1814 : unprocessed_aggs = NULL;
3329 4312 : foreach(lc, root->agginfos)
3330 : {
3331 2498 : AggInfo *agginfo = lfirst_node(AggInfo, lc);
3332 2498 : Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs);
3333 :
3334 2498 : if (AGGKIND_IS_ORDERED_SET(aggref->aggkind))
3335 264 : continue;
3336 :
3337 : /* only add aggregates with a DISTINCT or ORDER BY */
3338 2234 : if (aggref->aggdistinct != NIL || aggref->aggorder != NIL)
3339 1934 : unprocessed_aggs = bms_add_member(unprocessed_aggs,
3340 : foreach_current_index(lc));
3341 : }
3342 :
3343 : /*
3344 : * Now process all the unprocessed_aggs to find the best set of pathkeys
3345 : * for the given set of aggregates.
3346 : *
3347 : * On the first outer loop here 'bestaggs' will be empty. We'll populate
3348 : * this during the first loop using the pathkeys for the very first
3349 : * AggInfo then taking any stronger pathkeys from any other AggInfos with
3350 : * a more strict set of compatible pathkeys. Once the outer loop is
3351 : * complete, we mark off all the aggregates with compatible pathkeys then
3352 : * remove those from the unprocessed_aggs and repeat the process to try to
3353 : * find another set of pathkeys that are suitable for a larger number of
3354 : * aggregates. The outer loop will stop when there are not enough
3355 : * unprocessed aggregates for it to be possible to find a set of pathkeys
3356 : * to suit a larger number of aggregates.
3357 : */
3358 1814 : bestpathkeys = NIL;
3359 1814 : bestaggs = NULL;
3360 3580 : while (bms_num_members(unprocessed_aggs) > bms_num_members(bestaggs))
3361 : {
3362 1766 : Bitmapset *aggindexes = NULL;
3363 1766 : List *currpathkeys = NIL;
3364 :
3365 1766 : i = -1;
3366 5772 : while ((i = bms_next_member(unprocessed_aggs, i)) >= 0)
3367 : {
3368 2240 : AggInfo *agginfo = list_nth_node(AggInfo, root->agginfos, i);
3369 2240 : Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs);
3370 : List *sortlist;
3371 : List *pathkeys;
3372 :
3373 2240 : if (aggref->aggdistinct != NIL)
3374 706 : sortlist = aggref->aggdistinct;
3375 : else
3376 1534 : sortlist = aggref->aggorder;
3377 :
3378 2240 : pathkeys = make_pathkeys_for_sortclauses(root, sortlist,
3379 : aggref->args);
3380 :
3381 : /*
3382 : * Ignore Aggrefs which have volatile functions in their ORDER BY
3383 : * or DISTINCT clause.
3384 : */
3385 2240 : if (has_volatile_pathkey(pathkeys))
3386 : {
3387 18 : unprocessed_aggs = bms_del_member(unprocessed_aggs, i);
3388 18 : continue;
3389 : }
3390 :
3391 : /*
3392 : * When not set yet, take the pathkeys from the first unprocessed
3393 : * aggregate.
3394 : */
3395 2222 : if (currpathkeys == NIL)
3396 : {
3397 1760 : currpathkeys = pathkeys;
3398 :
3399 : /* include the GROUP BY pathkeys, if they exist */
3400 1760 : if (grouppathkeys != NIL)
3401 270 : currpathkeys = append_pathkeys(list_copy(grouppathkeys),
3402 : currpathkeys);
3403 :
3404 : /* record that we found pathkeys for this aggregate */
3405 1760 : aggindexes = bms_add_member(aggindexes, i);
3406 : }
3407 : else
3408 : {
3409 : /* now look for a stronger set of matching pathkeys */
3410 :
3411 : /* include the GROUP BY pathkeys, if they exist */
3412 462 : if (grouppathkeys != NIL)
3413 288 : pathkeys = append_pathkeys(list_copy(grouppathkeys),
3414 : pathkeys);
3415 :
3416 : /* are 'pathkeys' compatible or better than 'currpathkeys'? */
3417 462 : switch (compare_pathkeys(currpathkeys, pathkeys))
3418 : {
3419 12 : case PATHKEYS_BETTER2:
3420 : /* 'pathkeys' are stronger, use these ones instead */
3421 12 : currpathkeys = pathkeys;
3422 : /* FALLTHROUGH */
3423 :
3424 72 : case PATHKEYS_BETTER1:
3425 : /* 'pathkeys' are less strict */
3426 : /* FALLTHROUGH */
3427 :
3428 : case PATHKEYS_EQUAL:
3429 : /* mark this aggregate as covered by 'currpathkeys' */
3430 72 : aggindexes = bms_add_member(aggindexes, i);
3431 72 : break;
3432 :
3433 390 : case PATHKEYS_DIFFERENT:
3434 390 : break;
3435 : }
3436 4006 : }
3437 : }
3438 :
3439 : /* remove the aggregates that we've just processed */
3440 1766 : unprocessed_aggs = bms_del_members(unprocessed_aggs, aggindexes);
3441 :
3442 : /*
3443 : * If this pass included more aggregates than the previous best then
3444 : * use these ones as the best set.
3445 : */
3446 1766 : if (bms_num_members(aggindexes) > bms_num_members(bestaggs))
3447 : {
3448 1658 : bestaggs = aggindexes;
3449 1658 : bestpathkeys = currpathkeys;
3450 : }
3451 : }
3452 :
3453 : /*
3454 : * If we found any ordered aggregates, update root->group_pathkeys to add
3455 : * the best set of aggregate pathkeys. Note that bestpathkeys includes
3456 : * the original GROUP BY pathkeys already.
3457 : */
3458 1814 : if (bestpathkeys != NIL)
3459 1610 : root->group_pathkeys = bestpathkeys;
3460 :
3461 : /*
3462 : * Now that we've found the best set of aggregates we can set the
3463 : * presorted flag to indicate to the executor that it needn't bother
3464 : * performing a sort for these Aggrefs. We're able to do this now as
3465 : * there's no chance of a Hash Aggregate plan as create_grouping_paths
3466 : * will not mark the GROUP BY as GROUPING_CAN_USE_HASH due to the presence
3467 : * of ordered aggregates.
3468 : */
3469 1814 : i = -1;
3470 3514 : while ((i = bms_next_member(bestaggs, i)) >= 0)
3471 : {
3472 1700 : AggInfo *agginfo = list_nth_node(AggInfo, root->agginfos, i);
3473 :
3474 3418 : foreach(lc, agginfo->aggrefs)
3475 : {
3476 1718 : Aggref *aggref = lfirst_node(Aggref, lc);
3477 :
3478 1718 : aggref->aggpresorted = true;
3479 : }
3480 : }
3481 : }
3482 :
3483 : /*
3484 : * Compute query_pathkeys and other pathkeys during plan generation
3485 : */
3486 : static void
3487 505614 : standard_qp_callback(PlannerInfo *root, void *extra)
3488 : {
3489 505614 : Query *parse = root->parse;
3490 505614 : standard_qp_extra *qp_extra = (standard_qp_extra *) extra;
3491 505614 : List *tlist = root->processed_tlist;
3492 505614 : List *activeWindows = qp_extra->activeWindows;
3493 :
3494 : /*
3495 : * Calculate pathkeys that represent grouping/ordering and/or ordered
3496 : * aggregate requirements.
3497 : */
3498 505614 : if (qp_extra->gset_data)
3499 : {
3500 : /*
3501 : * With grouping sets, just use the first RollupData's groupClause. We
3502 : * don't make any effort to optimize grouping clauses when there are
3503 : * grouping sets, nor can we combine aggregate ordering keys with
3504 : * grouping.
3505 : */
3506 848 : List *rollups = qp_extra->gset_data->rollups;
3507 848 : List *groupClause = (rollups ? linitial_node(RollupData, rollups)->groupClause : NIL);
3508 :
3509 848 : if (grouping_is_sortable(groupClause))
3510 : {
3511 : bool sortable;
3512 :
3513 : /*
3514 : * The groupClause is logically below the grouping step. So if
3515 : * there is an RTE entry for the grouping step, we need to remove
3516 : * its RT index from the sort expressions before we make PathKeys
3517 : * for them.
3518 : */
3519 848 : root->group_pathkeys =
3520 848 : make_pathkeys_for_sortclauses_extended(root,
3521 : &groupClause,
3522 : tlist,
3523 : false,
3524 848 : parse->hasGroupRTE,
3525 : &sortable,
3526 : false);
3527 : Assert(sortable);
3528 848 : root->num_groupby_pathkeys = list_length(root->group_pathkeys);
3529 : }
3530 : else
3531 : {
3532 0 : root->group_pathkeys = NIL;
3533 0 : root->num_groupby_pathkeys = 0;
3534 : }
3535 : }
3536 504766 : else if (parse->groupClause || root->numOrderedAggs > 0)
3537 5124 : {
3538 : /*
3539 : * With a plain GROUP BY list, we can remove any grouping items that
3540 : * are proven redundant by EquivalenceClass processing. For example,
3541 : * we can remove y given "WHERE x = y GROUP BY x, y". These aren't
3542 : * especially common cases, but they're nearly free to detect. Note
3543 : * that we remove redundant items from processed_groupClause but not
3544 : * the original parse->groupClause.
3545 : */
3546 : bool sortable;
3547 :
3548 : /*
3549 : * Convert group clauses into pathkeys. Set the ec_sortref field of
3550 : * EquivalenceClass'es if it's not set yet.
3551 : */
3552 5124 : root->group_pathkeys =
3553 5124 : make_pathkeys_for_sortclauses_extended(root,
3554 : &root->processed_groupClause,
3555 : tlist,
3556 : true,
3557 : false,
3558 : &sortable,
3559 : true);
3560 5124 : if (!sortable)
3561 : {
3562 : /* Can't sort; no point in considering aggregate ordering either */
3563 0 : root->group_pathkeys = NIL;
3564 0 : root->num_groupby_pathkeys = 0;
3565 : }
3566 : else
3567 : {
3568 5124 : root->num_groupby_pathkeys = list_length(root->group_pathkeys);
3569 : /* If we have ordered aggs, consider adding onto group_pathkeys */
3570 5124 : if (root->numOrderedAggs > 0)
3571 1820 : adjust_group_pathkeys_for_groupagg(root);
3572 : }
3573 : }
3574 : else
3575 : {
3576 499642 : root->group_pathkeys = NIL;
3577 499642 : root->num_groupby_pathkeys = 0;
3578 : }
3579 :
3580 : /* We consider only the first (bottom) window in pathkeys logic */
3581 505614 : if (activeWindows != NIL)
3582 : {
3583 2330 : WindowClause *wc = linitial_node(WindowClause, activeWindows);
3584 :
3585 2330 : root->window_pathkeys = make_pathkeys_for_window(root,
3586 : wc,
3587 : tlist);
3588 : }
3589 : else
3590 503284 : root->window_pathkeys = NIL;
3591 :
3592 : /*
3593 : * As with GROUP BY, we can discard any DISTINCT items that are proven
3594 : * redundant by EquivalenceClass processing. The non-redundant list is
3595 : * kept in root->processed_distinctClause, leaving the original
3596 : * parse->distinctClause alone.
3597 : */
3598 505614 : if (parse->distinctClause)
3599 : {
3600 : bool sortable;
3601 :
3602 : /* Make a copy since pathkey processing can modify the list */
3603 2266 : root->processed_distinctClause = list_copy(parse->distinctClause);
3604 2266 : root->distinct_pathkeys =
3605 2266 : make_pathkeys_for_sortclauses_extended(root,
3606 : &root->processed_distinctClause,
3607 : tlist,
3608 : true,
3609 : false,
3610 : &sortable,
3611 : false);
3612 2266 : if (!sortable)
3613 6 : root->distinct_pathkeys = NIL;
3614 : }
3615 : else
3616 503348 : root->distinct_pathkeys = NIL;
3617 :
3618 505614 : root->sort_pathkeys =
3619 505614 : make_pathkeys_for_sortclauses(root,
3620 : parse->sortClause,
3621 : tlist);
3622 :
3623 : /* setting setop_pathkeys might be useful to the union planner */
3624 520018 : if (qp_extra->setop != NULL &&
3625 14404 : set_operation_ordered_results_useful(qp_extra->setop))
3626 10836 : {
3627 : List *groupClauses;
3628 : bool sortable;
3629 :
3630 10836 : groupClauses = generate_setop_child_grouplist(qp_extra->setop, tlist);
3631 :
3632 10836 : root->setop_pathkeys =
3633 10836 : make_pathkeys_for_sortclauses_extended(root,
3634 : &groupClauses,
3635 : tlist,
3636 : false,
3637 : false,
3638 : &sortable,
3639 : false);
3640 10836 : if (!sortable)
3641 184 : root->setop_pathkeys = NIL;
3642 : }
3643 : else
3644 494778 : root->setop_pathkeys = NIL;
3645 :
3646 : /*
3647 : * Figure out whether we want a sorted result from query_planner.
3648 : *
3649 : * If we have a sortable GROUP BY clause, then we want a result sorted
3650 : * properly for grouping. Otherwise, if we have window functions to
3651 : * evaluate, we try to sort for the first window. Otherwise, if there's a
3652 : * sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
3653 : * we try to produce output that's sufficiently well sorted for the
3654 : * DISTINCT. Otherwise, if there is an ORDER BY clause, we want to sort
3655 : * by the ORDER BY clause. Otherwise, if we're a subquery being planned
3656 : * for a set operation which can benefit from presorted results and have a
3657 : * sortable targetlist, we want to sort by the target list.
3658 : *
3659 : * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
3660 : * of GROUP BY, it would be tempting to request sort by ORDER BY --- but
3661 : * that might just leave us failing to exploit an available sort order at
3662 : * all. Needs more thought. The choice for DISTINCT versus ORDER BY is
3663 : * much easier, since we know that the parser ensured that one is a
3664 : * superset of the other.
3665 : */
3666 505614 : if (root->group_pathkeys)
3667 5646 : root->query_pathkeys = root->group_pathkeys;
3668 499968 : else if (root->window_pathkeys)
3669 1988 : root->query_pathkeys = root->window_pathkeys;
3670 995960 : else if (list_length(root->distinct_pathkeys) >
3671 497980 : list_length(root->sort_pathkeys))
3672 1830 : root->query_pathkeys = root->distinct_pathkeys;
3673 496150 : else if (root->sort_pathkeys)
3674 56850 : root->query_pathkeys = root->sort_pathkeys;
3675 439300 : else if (root->setop_pathkeys != NIL)
3676 10030 : root->query_pathkeys = root->setop_pathkeys;
3677 : else
3678 429270 : root->query_pathkeys = NIL;
3679 505614 : }
3680 :
3681 : /*
3682 : * Estimate number of groups produced by grouping clauses (1 if not grouping)
3683 : *
3684 : * path_rows: number of output rows from scan/join step
3685 : * gd: grouping sets data including list of grouping sets and their clauses
3686 : * target_list: target list containing group clause references
3687 : *
3688 : * If doing grouping sets, we also annotate the gsets data with the estimates
3689 : * for each set and each individual rollup list, with a view to later
3690 : * determining whether some combination of them could be hashed instead.
3691 : */
3692 : static double
3693 42760 : get_number_of_groups(PlannerInfo *root,
3694 : double path_rows,
3695 : grouping_sets_data *gd,
3696 : List *target_list)
3697 : {
3698 42760 : Query *parse = root->parse;
3699 : double dNumGroups;
3700 :
3701 42760 : if (parse->groupClause)
3702 : {
3703 : List *groupExprs;
3704 :
3705 6822 : if (parse->groupingSets)
3706 : {
3707 : /* Add up the estimates for each grouping set */
3708 : ListCell *lc;
3709 :
3710 : Assert(gd); /* keep Coverity happy */
3711 :
3712 806 : dNumGroups = 0;
3713 :
3714 2118 : foreach(lc, gd->rollups)
3715 : {
3716 1312 : RollupData *rollup = lfirst_node(RollupData, lc);
3717 : ListCell *lc2;
3718 : ListCell *lc3;
3719 :
3720 1312 : groupExprs = get_sortgrouplist_exprs(rollup->groupClause,
3721 : target_list);
3722 :
3723 1312 : rollup->numGroups = 0.0;
3724 :
3725 3872 : forboth(lc2, rollup->gsets, lc3, rollup->gsets_data)
3726 : {
3727 2560 : List *gset = (List *) lfirst(lc2);
3728 2560 : GroupingSetData *gs = lfirst_node(GroupingSetData, lc3);
3729 2560 : double numGroups = estimate_num_groups(root,
3730 : groupExprs,
3731 : path_rows,
3732 : &gset,
3733 : NULL);
3734 :
3735 2560 : gs->numGroups = numGroups;
3736 2560 : rollup->numGroups += numGroups;
3737 : }
3738 :
3739 1312 : dNumGroups += rollup->numGroups;
3740 : }
3741 :
3742 806 : if (gd->hash_sets_idx)
3743 : {
3744 : ListCell *lc2;
3745 :
3746 36 : gd->dNumHashGroups = 0;
3747 :
3748 36 : groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3749 : target_list);
3750 :
3751 78 : forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets)
3752 : {
3753 42 : List *gset = (List *) lfirst(lc);
3754 42 : GroupingSetData *gs = lfirst_node(GroupingSetData, lc2);
3755 42 : double numGroups = estimate_num_groups(root,
3756 : groupExprs,
3757 : path_rows,
3758 : &gset,
3759 : NULL);
3760 :
3761 42 : gs->numGroups = numGroups;
3762 42 : gd->dNumHashGroups += numGroups;
3763 : }
3764 :
3765 36 : dNumGroups += gd->dNumHashGroups;
3766 : }
3767 : }
3768 : else
3769 : {
3770 : /* Plain GROUP BY -- estimate based on optimized groupClause */
3771 6016 : groupExprs = get_sortgrouplist_exprs(root->processed_groupClause,
3772 : target_list);
3773 :
3774 6016 : dNumGroups = estimate_num_groups(root, groupExprs, path_rows,
3775 : NULL, NULL);
3776 : }
3777 : }
3778 35938 : else if (parse->groupingSets)
3779 : {
3780 : /* Empty grouping sets ... one result row for each one */
3781 42 : dNumGroups = list_length(parse->groupingSets);
3782 : }
3783 35896 : else if (parse->hasAggs || root->hasHavingQual)
3784 : {
3785 : /* Plain aggregation, one result row */
3786 35896 : dNumGroups = 1;
3787 : }
3788 : else
3789 : {
3790 : /* Not grouping */
3791 0 : dNumGroups = 1;
3792 : }
3793 :
3794 42760 : return dNumGroups;
3795 : }
3796 :
3797 : /*
3798 : * create_grouping_paths
3799 : *
3800 : * Build a new upperrel containing Paths for grouping and/or aggregation.
3801 : * Along the way, we also build an upperrel for Paths which are partially
3802 : * grouped and/or aggregated. A partially grouped and/or aggregated path
3803 : * needs a FinalizeAggregate node to complete the aggregation. Currently,
3804 : * the only partially grouped paths we build are also partial paths; that
3805 : * is, they need a Gather and then a FinalizeAggregate.
3806 : *
3807 : * input_rel: contains the source-data Paths
3808 : * target: the pathtarget for the result Paths to compute
3809 : * gd: grouping sets data including list of grouping sets and their clauses
3810 : *
3811 : * Note: all Paths in input_rel are expected to return the target computed
3812 : * by make_group_input_target.
3813 : */
3814 : static RelOptInfo *
3815 39514 : create_grouping_paths(PlannerInfo *root,
3816 : RelOptInfo *input_rel,
3817 : PathTarget *target,
3818 : bool target_parallel_safe,
3819 : grouping_sets_data *gd)
3820 : {
3821 39514 : Query *parse = root->parse;
3822 : RelOptInfo *grouped_rel;
3823 : RelOptInfo *partially_grouped_rel;
3824 : AggClauseCosts agg_costs;
3825 :
3826 237084 : MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
3827 39514 : get_agg_clause_costs(root, AGGSPLIT_SIMPLE, &agg_costs);
3828 :
3829 : /*
3830 : * Create grouping relation to hold fully aggregated grouping and/or
3831 : * aggregation paths.
3832 : */
3833 39514 : grouped_rel = make_grouping_rel(root, input_rel, target,
3834 : target_parallel_safe, parse->havingQual);
3835 :
3836 : /*
3837 : * Create either paths for a degenerate grouping or paths for ordinary
3838 : * grouping, as appropriate.
3839 : */
3840 39514 : if (is_degenerate_grouping(root))
3841 18 : create_degenerate_grouping_paths(root, input_rel, grouped_rel);
3842 : else
3843 : {
3844 39496 : int flags = 0;
3845 : GroupPathExtraData extra;
3846 :
3847 : /*
3848 : * Determine whether it's possible to perform sort-based
3849 : * implementations of grouping. (Note that if processed_groupClause
3850 : * is empty, grouping_is_sortable() is trivially true, and all the
3851 : * pathkeys_contained_in() tests will succeed too, so that we'll
3852 : * consider every surviving input path.)
3853 : *
3854 : * If we have grouping sets, we might be able to sort some but not all
3855 : * of them; in this case, we need can_sort to be true as long as we
3856 : * must consider any sorted-input plan.
3857 : */
3858 39496 : if ((gd && gd->rollups != NIL)
3859 38654 : || grouping_is_sortable(root->processed_groupClause))
3860 39490 : flags |= GROUPING_CAN_USE_SORT;
3861 :
3862 : /*
3863 : * Determine whether we should consider hash-based implementations of
3864 : * grouping.
3865 : *
3866 : * Hashed aggregation only applies if we're grouping. If we have
3867 : * grouping sets, some groups might be hashable but others not; in
3868 : * this case we set can_hash true as long as there is nothing globally
3869 : * preventing us from hashing (and we should therefore consider plans
3870 : * with hashes).
3871 : *
3872 : * Executor doesn't support hashed aggregation with DISTINCT or ORDER
3873 : * BY aggregates. (Doing so would imply storing *all* the input
3874 : * values in the hash table, and/or running many sorts in parallel,
3875 : * either of which seems like a certain loser.) We similarly don't
3876 : * support ordered-set aggregates in hashed aggregation, but that case
3877 : * is also included in the numOrderedAggs count.
3878 : *
3879 : * Note: grouping_is_hashable() is much more expensive to check than
3880 : * the other gating conditions, so we want to do it last.
3881 : */
3882 39496 : if ((parse->groupClause != NIL &&
3883 8422 : root->numOrderedAggs == 0 &&
3884 4074 : (gd ? gd->any_hashable : grouping_is_hashable(root->processed_groupClause))))
3885 4070 : flags |= GROUPING_CAN_USE_HASH;
3886 :
3887 : /*
3888 : * Determine whether partial aggregation is possible.
3889 : */
3890 39496 : if (can_partial_agg(root))
3891 35414 : flags |= GROUPING_CAN_PARTIAL_AGG;
3892 :
3893 39496 : extra.flags = flags;
3894 39496 : extra.target_parallel_safe = target_parallel_safe;
3895 39496 : extra.havingQual = parse->havingQual;
3896 39496 : extra.targetList = parse->targetList;
3897 39496 : extra.partial_costs_set = false;
3898 :
3899 : /*
3900 : * Determine whether partitionwise aggregation is in theory possible.
3901 : * It can be disabled by the user, and for now, we don't try to
3902 : * support grouping sets. create_ordinary_grouping_paths() will check
3903 : * additional conditions, such as whether input_rel is partitioned.
3904 : */
3905 39496 : if (enable_partitionwise_aggregate && !parse->groupingSets)
3906 556 : extra.patype = PARTITIONWISE_AGGREGATE_FULL;
3907 : else
3908 38940 : extra.patype = PARTITIONWISE_AGGREGATE_NONE;
3909 :
3910 39496 : create_ordinary_grouping_paths(root, input_rel, grouped_rel,
3911 : &agg_costs, gd, &extra,
3912 : &partially_grouped_rel);
3913 : }
3914 :
3915 39508 : set_cheapest(grouped_rel);
3916 39508 : return grouped_rel;
3917 : }
3918 :
3919 : /*
3920 : * make_grouping_rel
3921 : *
3922 : * Create a new grouping rel and set basic properties.
3923 : *
3924 : * input_rel represents the underlying scan/join relation.
3925 : * target is the output expected from the grouping relation.
3926 : */
3927 : static RelOptInfo *
3928 41008 : make_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
3929 : PathTarget *target, bool target_parallel_safe,
3930 : Node *havingQual)
3931 : {
3932 : RelOptInfo *grouped_rel;
3933 :
3934 41008 : if (IS_OTHER_REL(input_rel))
3935 : {
3936 1494 : grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG,
3937 : input_rel->relids);
3938 1494 : grouped_rel->reloptkind = RELOPT_OTHER_UPPER_REL;
3939 : }
3940 : else
3941 : {
3942 : /*
3943 : * By tradition, the relids set for the main grouping relation is
3944 : * NULL. (This could be changed, but might require adjustments
3945 : * elsewhere.)
3946 : */
3947 39514 : grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
3948 : }
3949 :
3950 : /* Set target. */
3951 41008 : grouped_rel->reltarget = target;
3952 :
3953 : /*
3954 : * If the input relation is not parallel-safe, then the grouped relation
3955 : * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
3956 : * target list and HAVING quals are parallel-safe.
3957 : */
3958 71096 : if (input_rel->consider_parallel && target_parallel_safe &&
3959 30088 : is_parallel_safe(root, (Node *) havingQual))
3960 30070 : grouped_rel->consider_parallel = true;
3961 :
3962 : /*
3963 : * If the input rel belongs to a single FDW, so does the grouped rel.
3964 : */
3965 41008 : grouped_rel->serverid = input_rel->serverid;
3966 41008 : grouped_rel->userid = input_rel->userid;
3967 41008 : grouped_rel->useridiscurrent = input_rel->useridiscurrent;
3968 41008 : grouped_rel->fdwroutine = input_rel->fdwroutine;
3969 :
3970 41008 : return grouped_rel;
3971 : }
3972 :
3973 : /*
3974 : * is_degenerate_grouping
3975 : *
3976 : * A degenerate grouping is one in which the query has a HAVING qual and/or
3977 : * grouping sets, but no aggregates and no GROUP BY (which implies that the
3978 : * grouping sets are all empty).
3979 : */
3980 : static bool
3981 39514 : is_degenerate_grouping(PlannerInfo *root)
3982 : {
3983 39514 : Query *parse = root->parse;
3984 :
3985 38206 : return (root->hasHavingQual || parse->groupingSets) &&
3986 77720 : !parse->hasAggs && parse->groupClause == NIL;
3987 : }
3988 :
3989 : /*
3990 : * create_degenerate_grouping_paths
3991 : *
3992 : * When the grouping is degenerate (see is_degenerate_grouping), we are
3993 : * supposed to emit either zero or one row for each grouping set depending on
3994 : * whether HAVING succeeds. Furthermore, there cannot be any variables in
3995 : * either HAVING or the targetlist, so we actually do not need the FROM table
3996 : * at all! We can just throw away the plan-so-far and generate a Result node.
3997 : * This is a sufficiently unusual corner case that it's not worth contorting
3998 : * the structure of this module to avoid having to generate the earlier paths
3999 : * in the first place.
4000 : */
4001 : static void
4002 18 : create_degenerate_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
4003 : RelOptInfo *grouped_rel)
4004 : {
4005 18 : Query *parse = root->parse;
4006 : int nrows;
4007 : Path *path;
4008 :
4009 18 : nrows = list_length(parse->groupingSets);
4010 18 : if (nrows > 1)
4011 : {
4012 : /*
4013 : * Doesn't seem worthwhile writing code to cons up a generate_series
4014 : * or a values scan to emit multiple rows. Instead just make N clones
4015 : * and append them. (With a volatile HAVING clause, this means you
4016 : * might get between 0 and N output rows. Offhand I think that's
4017 : * desired.)
4018 : */
4019 0 : List *paths = NIL;
4020 :
4021 0 : while (--nrows >= 0)
4022 : {
4023 : path = (Path *)
4024 0 : create_group_result_path(root, grouped_rel,
4025 0 : grouped_rel->reltarget,
4026 0 : (List *) parse->havingQual);
4027 0 : paths = lappend(paths, path);
4028 : }
4029 : path = (Path *)
4030 0 : create_append_path(root,
4031 : grouped_rel,
4032 : paths,
4033 : NIL,
4034 : NIL,
4035 : NULL,
4036 : 0,
4037 : false,
4038 : -1);
4039 : }
4040 : else
4041 : {
4042 : /* No grouping sets, or just one, so one output row */
4043 : path = (Path *)
4044 18 : create_group_result_path(root, grouped_rel,
4045 18 : grouped_rel->reltarget,
4046 18 : (List *) parse->havingQual);
4047 : }
4048 :
4049 18 : add_path(grouped_rel, path);
4050 18 : }
4051 :
4052 : /*
4053 : * create_ordinary_grouping_paths
4054 : *
4055 : * Create grouping paths for the ordinary (that is, non-degenerate) case.
4056 : *
4057 : * We need to consider sorted and hashed aggregation in the same function,
4058 : * because otherwise (1) it would be harder to throw an appropriate error
4059 : * message if neither way works, and (2) we should not allow hashtable size
4060 : * considerations to dissuade us from using hashing if sorting is not possible.
4061 : *
4062 : * *partially_grouped_rel_p will be set to the partially grouped rel which this
4063 : * function creates, or to NULL if it doesn't create one.
4064 : */
4065 : static void
4066 40990 : create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
4067 : RelOptInfo *grouped_rel,
4068 : const AggClauseCosts *agg_costs,
4069 : grouping_sets_data *gd,
4070 : GroupPathExtraData *extra,
4071 : RelOptInfo **partially_grouped_rel_p)
4072 : {
4073 40990 : Path *cheapest_path = input_rel->cheapest_total_path;
4074 40990 : RelOptInfo *partially_grouped_rel = NULL;
4075 : double dNumGroups;
4076 40990 : PartitionwiseAggregateType patype = PARTITIONWISE_AGGREGATE_NONE;
4077 :
4078 : /*
4079 : * If this is the topmost grouping relation or if the parent relation is
4080 : * doing some form of partitionwise aggregation, then we may be able to do
4081 : * it at this level also. However, if the input relation is not
4082 : * partitioned, partitionwise aggregate is impossible.
4083 : */
4084 40990 : if (extra->patype != PARTITIONWISE_AGGREGATE_NONE &&
4085 2050 : IS_PARTITIONED_REL(input_rel))
4086 : {
4087 : /*
4088 : * If this is the topmost relation or if the parent relation is doing
4089 : * full partitionwise aggregation, then we can do full partitionwise
4090 : * aggregation provided that the GROUP BY clause contains all of the
4091 : * partitioning columns at this level and the collation used by GROUP
4092 : * BY matches the partitioning collation. Otherwise, we can do at
4093 : * most partial partitionwise aggregation. But if partial aggregation
4094 : * is not supported in general then we can't use it for partitionwise
4095 : * aggregation either.
4096 : *
4097 : * Check parse->groupClause not processed_groupClause, because it's
4098 : * okay if some of the partitioning columns were proved redundant.
4099 : */
4100 1160 : if (extra->patype == PARTITIONWISE_AGGREGATE_FULL &&
4101 556 : group_by_has_partkey(input_rel, extra->targetList,
4102 556 : root->parse->groupClause))
4103 320 : patype = PARTITIONWISE_AGGREGATE_FULL;
4104 284 : else if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0)
4105 242 : patype = PARTITIONWISE_AGGREGATE_PARTIAL;
4106 : else
4107 42 : patype = PARTITIONWISE_AGGREGATE_NONE;
4108 : }
4109 :
4110 : /*
4111 : * Before generating paths for grouped_rel, we first generate any possible
4112 : * partially grouped paths; that way, later code can easily consider both
4113 : * parallel and non-parallel approaches to grouping.
4114 : */
4115 40990 : if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0)
4116 : {
4117 : bool force_rel_creation;
4118 :
4119 : /*
4120 : * If we're doing partitionwise aggregation at this level, force
4121 : * creation of a partially_grouped_rel so we can add partitionwise
4122 : * paths to it.
4123 : */
4124 36836 : force_rel_creation = (patype == PARTITIONWISE_AGGREGATE_PARTIAL);
4125 :
4126 : partially_grouped_rel =
4127 36836 : create_partial_grouping_paths(root,
4128 : grouped_rel,
4129 : input_rel,
4130 : gd,
4131 : extra,
4132 : force_rel_creation);
4133 : }
4134 :
4135 : /* Set out parameter. */
4136 40990 : *partially_grouped_rel_p = partially_grouped_rel;
4137 :
4138 : /* Apply partitionwise aggregation technique, if possible. */
4139 40990 : if (patype != PARTITIONWISE_AGGREGATE_NONE)
4140 562 : create_partitionwise_grouping_paths(root, input_rel, grouped_rel,
4141 : partially_grouped_rel, agg_costs,
4142 : gd, patype, extra);
4143 :
4144 : /* If we are doing partial aggregation only, return. */
4145 40990 : if (extra->patype == PARTITIONWISE_AGGREGATE_PARTIAL)
4146 : {
4147 : Assert(partially_grouped_rel);
4148 :
4149 618 : if (partially_grouped_rel->pathlist)
4150 618 : set_cheapest(partially_grouped_rel);
4151 :
4152 618 : return;
4153 : }
4154 :
4155 : /* Gather any partially grouped partial paths. */
4156 40372 : if (partially_grouped_rel && partially_grouped_rel->partial_pathlist)
4157 : {
4158 1470 : gather_grouping_paths(root, partially_grouped_rel);
4159 1470 : set_cheapest(partially_grouped_rel);
4160 : }
4161 :
4162 : /*
4163 : * Estimate number of groups.
4164 : */
4165 40372 : dNumGroups = get_number_of_groups(root,
4166 : cheapest_path->rows,
4167 : gd,
4168 : extra->targetList);
4169 :
4170 : /* Build final grouping paths */
4171 40372 : add_paths_to_grouping_rel(root, input_rel, grouped_rel,
4172 : partially_grouped_rel, agg_costs, gd,
4173 : dNumGroups, extra);
4174 :
4175 : /* Give a helpful error if we failed to find any implementation */
4176 40372 : if (grouped_rel->pathlist == NIL)
4177 6 : ereport(ERROR,
4178 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4179 : errmsg("could not implement GROUP BY"),
4180 : errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4181 :
4182 : /*
4183 : * If there is an FDW that's responsible for all baserels of the query,
4184 : * let it consider adding ForeignPaths.
4185 : */
4186 40366 : if (grouped_rel->fdwroutine &&
4187 328 : grouped_rel->fdwroutine->GetForeignUpperPaths)
4188 328 : grouped_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_GROUP_AGG,
4189 : input_rel, grouped_rel,
4190 : extra);
4191 :
4192 : /* Let extensions possibly add some more paths */
4193 40366 : if (create_upper_paths_hook)
4194 0 : (*create_upper_paths_hook) (root, UPPERREL_GROUP_AGG,
4195 : input_rel, grouped_rel,
4196 : extra);
4197 : }
4198 :
4199 : /*
4200 : * For a given input path, consider the possible ways of doing grouping sets on
4201 : * it, by combinations of hashing and sorting. This can be called multiple
4202 : * times, so it's important that it not scribble on input. No result is
4203 : * returned, but any generated paths are added to grouped_rel.
4204 : */
4205 : static void
4206 1684 : consider_groupingsets_paths(PlannerInfo *root,
4207 : RelOptInfo *grouped_rel,
4208 : Path *path,
4209 : bool is_sorted,
4210 : bool can_hash,
4211 : grouping_sets_data *gd,
4212 : const AggClauseCosts *agg_costs,
4213 : double dNumGroups)
4214 : {
4215 1684 : Query *parse = root->parse;
4216 1684 : Size hash_mem_limit = get_hash_memory_limit();
4217 :
4218 : /*
4219 : * If we're not being offered sorted input, then only consider plans that
4220 : * can be done entirely by hashing.
4221 : *
4222 : * We can hash everything if it looks like it'll fit in hash_mem. But if
4223 : * the input is actually sorted despite not being advertised as such, we
4224 : * prefer to make use of that in order to use less memory.
4225 : *
4226 : * If none of the grouping sets are sortable, then ignore the hash_mem
4227 : * limit and generate a path anyway, since otherwise we'll just fail.
4228 : */
4229 1684 : if (!is_sorted)
4230 : {
4231 770 : List *new_rollups = NIL;
4232 770 : RollupData *unhashed_rollup = NULL;
4233 : List *sets_data;
4234 770 : List *empty_sets_data = NIL;
4235 770 : List *empty_sets = NIL;
4236 : ListCell *lc;
4237 770 : ListCell *l_start = list_head(gd->rollups);
4238 770 : AggStrategy strat = AGG_HASHED;
4239 : double hashsize;
4240 770 : double exclude_groups = 0.0;
4241 :
4242 : Assert(can_hash);
4243 :
4244 : /*
4245 : * If the input is coincidentally sorted usefully (which can happen
4246 : * even if is_sorted is false, since that only means that our caller
4247 : * has set up the sorting for us), then save some hashtable space by
4248 : * making use of that. But we need to watch out for degenerate cases:
4249 : *
4250 : * 1) If there are any empty grouping sets, then group_pathkeys might
4251 : * be NIL if all non-empty grouping sets are unsortable. In this case,
4252 : * there will be a rollup containing only empty groups, and the
4253 : * pathkeys_contained_in test is vacuously true; this is ok.
4254 : *
4255 : * XXX: the above relies on the fact that group_pathkeys is generated
4256 : * from the first rollup. If we add the ability to consider multiple
4257 : * sort orders for grouping input, this assumption might fail.
4258 : *
4259 : * 2) If there are no empty sets and only unsortable sets, then the
4260 : * rollups list will be empty (and thus l_start == NULL), and
4261 : * group_pathkeys will be NIL; we must ensure that the vacuously-true
4262 : * pathkeys_contained_in test doesn't cause us to crash.
4263 : */
4264 1534 : if (l_start != NULL &&
4265 764 : pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
4266 : {
4267 12 : unhashed_rollup = lfirst_node(RollupData, l_start);
4268 12 : exclude_groups = unhashed_rollup->numGroups;
4269 12 : l_start = lnext(gd->rollups, l_start);
4270 : }
4271 :
4272 770 : hashsize = estimate_hashagg_tablesize(root,
4273 : path,
4274 : agg_costs,
4275 : dNumGroups - exclude_groups);
4276 :
4277 : /*
4278 : * gd->rollups is empty if we have only unsortable columns to work
4279 : * with. Override hash_mem in that case; otherwise, we'll rely on the
4280 : * sorted-input case to generate usable mixed paths.
4281 : */
4282 770 : if (hashsize > hash_mem_limit && gd->rollups)
4283 18 : return; /* nope, won't fit */
4284 :
4285 : /*
4286 : * We need to burst the existing rollups list into individual grouping
4287 : * sets and recompute a groupClause for each set.
4288 : */
4289 752 : sets_data = list_copy(gd->unsortable_sets);
4290 :
4291 1872 : for_each_cell(lc, gd->rollups, l_start)
4292 : {
4293 1144 : RollupData *rollup = lfirst_node(RollupData, lc);
4294 :
4295 : /*
4296 : * If we find an unhashable rollup that's not been skipped by the
4297 : * "actually sorted" check above, we can't cope; we'd need sorted
4298 : * input (with a different sort order) but we can't get that here.
4299 : * So bail out; we'll get a valid path from the is_sorted case
4300 : * instead.
4301 : *
4302 : * The mere presence of empty grouping sets doesn't make a rollup
4303 : * unhashable (see preprocess_grouping_sets), we handle those
4304 : * specially below.
4305 : */
4306 1144 : if (!rollup->hashable)
4307 24 : return;
4308 :
4309 1120 : sets_data = list_concat(sets_data, rollup->gsets_data);
4310 : }
4311 3054 : foreach(lc, sets_data)
4312 : {
4313 2326 : GroupingSetData *gs = lfirst_node(GroupingSetData, lc);
4314 2326 : List *gset = gs->set;
4315 : RollupData *rollup;
4316 :
4317 2326 : if (gset == NIL)
4318 : {
4319 : /* Empty grouping sets can't be hashed. */
4320 484 : empty_sets_data = lappend(empty_sets_data, gs);
4321 484 : empty_sets = lappend(empty_sets, NIL);
4322 : }
4323 : else
4324 : {
4325 1842 : rollup = makeNode(RollupData);
4326 :
4327 1842 : rollup->groupClause = preprocess_groupclause(root, gset);
4328 1842 : rollup->gsets_data = list_make1(gs);
4329 1842 : rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4330 : rollup->gsets_data,
4331 : gd->tleref_to_colnum_map);
4332 1842 : rollup->numGroups = gs->numGroups;
4333 1842 : rollup->hashable = true;
4334 1842 : rollup->is_hashed = true;
4335 1842 : new_rollups = lappend(new_rollups, rollup);
4336 : }
4337 : }
4338 :
4339 : /*
4340 : * If we didn't find anything nonempty to hash, then bail. We'll
4341 : * generate a path from the is_sorted case.
4342 : */
4343 728 : if (new_rollups == NIL)
4344 0 : return;
4345 :
4346 : /*
4347 : * If there were empty grouping sets they should have been in the
4348 : * first rollup.
4349 : */
4350 : Assert(!unhashed_rollup || !empty_sets);
4351 :
4352 728 : if (unhashed_rollup)
4353 : {
4354 12 : new_rollups = lappend(new_rollups, unhashed_rollup);
4355 12 : strat = AGG_MIXED;
4356 : }
4357 716 : else if (empty_sets)
4358 : {
4359 436 : RollupData *rollup = makeNode(RollupData);
4360 :
4361 436 : rollup->groupClause = NIL;
4362 436 : rollup->gsets_data = empty_sets_data;
4363 436 : rollup->gsets = empty_sets;
4364 436 : rollup->numGroups = list_length(empty_sets);
4365 436 : rollup->hashable = false;
4366 436 : rollup->is_hashed = false;
4367 436 : new_rollups = lappend(new_rollups, rollup);
4368 436 : strat = AGG_MIXED;
4369 : }
4370 :
4371 728 : add_path(grouped_rel, (Path *)
4372 728 : create_groupingsets_path(root,
4373 : grouped_rel,
4374 : path,
4375 728 : (List *) parse->havingQual,
4376 : strat,
4377 : new_rollups,
4378 : agg_costs));
4379 728 : return;
4380 : }
4381 :
4382 : /*
4383 : * If we have sorted input but nothing we can do with it, bail.
4384 : */
4385 914 : if (gd->rollups == NIL)
4386 0 : return;
4387 :
4388 : /*
4389 : * Given sorted input, we try and make two paths: one sorted and one mixed
4390 : * sort/hash. (We need to try both because hashagg might be disabled, or
4391 : * some columns might not be sortable.)
4392 : *
4393 : * can_hash is passed in as false if some obstacle elsewhere (such as
4394 : * ordered aggs) means that we shouldn't consider hashing at all.
4395 : */
4396 914 : if (can_hash && gd->any_hashable)
4397 : {
4398 836 : List *rollups = NIL;
4399 836 : List *hash_sets = list_copy(gd->unsortable_sets);
4400 836 : double availspace = hash_mem_limit;
4401 : ListCell *lc;
4402 :
4403 : /*
4404 : * Account first for space needed for groups we can't sort at all.
4405 : */
4406 836 : availspace -= estimate_hashagg_tablesize(root,
4407 : path,
4408 : agg_costs,
4409 : gd->dNumHashGroups);
4410 :
4411 836 : if (availspace > 0 && list_length(gd->rollups) > 1)
4412 : {
4413 : double scale;
4414 420 : int num_rollups = list_length(gd->rollups);
4415 : int k_capacity;
4416 420 : int *k_weights = palloc(num_rollups * sizeof(int));
4417 420 : Bitmapset *hash_items = NULL;
4418 : int i;
4419 :
4420 : /*
4421 : * We treat this as a knapsack problem: the knapsack capacity
4422 : * represents hash_mem, the item weights are the estimated memory
4423 : * usage of the hashtables needed to implement a single rollup,
4424 : * and we really ought to use the cost saving as the item value;
4425 : * however, currently the costs assigned to sort nodes don't
4426 : * reflect the comparison costs well, and so we treat all items as
4427 : * of equal value (each rollup we hash instead saves us one sort).
4428 : *
4429 : * To use the discrete knapsack, we need to scale the values to a
4430 : * reasonably small bounded range. We choose to allow a 5% error
4431 : * margin; we have no more than 4096 rollups in the worst possible
4432 : * case, which with a 5% error margin will require a bit over 42MB
4433 : * of workspace. (Anyone wanting to plan queries that complex had
4434 : * better have the memory for it. In more reasonable cases, with
4435 : * no more than a couple of dozen rollups, the memory usage will
4436 : * be negligible.)
4437 : *
4438 : * k_capacity is naturally bounded, but we clamp the values for
4439 : * scale and weight (below) to avoid overflows or underflows (or
4440 : * uselessly trying to use a scale factor less than 1 byte).
4441 : */
4442 420 : scale = Max(availspace / (20.0 * num_rollups), 1.0);
4443 420 : k_capacity = (int) floor(availspace / scale);
4444 :
4445 : /*
4446 : * We leave the first rollup out of consideration since it's the
4447 : * one that matches the input sort order. We assign indexes "i"
4448 : * to only those entries considered for hashing; the second loop,
4449 : * below, must use the same condition.
4450 : */
4451 420 : i = 0;
4452 1056 : for_each_from(lc, gd->rollups, 1)
4453 : {
4454 636 : RollupData *rollup = lfirst_node(RollupData, lc);
4455 :
4456 636 : if (rollup->hashable)
4457 : {
4458 636 : double sz = estimate_hashagg_tablesize(root,
4459 : path,
4460 : agg_costs,
4461 : rollup->numGroups);
4462 :
4463 : /*
4464 : * If sz is enormous, but hash_mem (and hence scale) is
4465 : * small, avoid integer overflow here.
4466 : */
4467 636 : k_weights[i] = (int) Min(floor(sz / scale),
4468 : k_capacity + 1.0);
4469 636 : ++i;
4470 : }
4471 : }
4472 :
4473 : /*
4474 : * Apply knapsack algorithm; compute the set of items which
4475 : * maximizes the value stored (in this case the number of sorts
4476 : * saved) while keeping the total size (approximately) within
4477 : * capacity.
4478 : */
4479 420 : if (i > 0)
4480 420 : hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
4481 :
4482 420 : if (!bms_is_empty(hash_items))
4483 : {
4484 420 : rollups = list_make1(linitial(gd->rollups));
4485 :
4486 420 : i = 0;
4487 1056 : for_each_from(lc, gd->rollups, 1)
4488 : {
4489 636 : RollupData *rollup = lfirst_node(RollupData, lc);
4490 :
4491 636 : if (rollup->hashable)
4492 : {
4493 636 : if (bms_is_member(i, hash_items))
4494 600 : hash_sets = list_concat(hash_sets,
4495 600 : rollup->gsets_data);
4496 : else
4497 36 : rollups = lappend(rollups, rollup);
4498 636 : ++i;
4499 : }
4500 : else
4501 0 : rollups = lappend(rollups, rollup);
4502 : }
4503 : }
4504 : }
4505 :
4506 836 : if (!rollups && hash_sets)
4507 24 : rollups = list_copy(gd->rollups);
4508 :
4509 1576 : foreach(lc, hash_sets)
4510 : {
4511 740 : GroupingSetData *gs = lfirst_node(GroupingSetData, lc);
4512 740 : RollupData *rollup = makeNode(RollupData);
4513 :
4514 : Assert(gs->set != NIL);
4515 :
4516 740 : rollup->groupClause = preprocess_groupclause(root, gs->set);
4517 740 : rollup->gsets_data = list_make1(gs);
4518 740 : rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4519 : rollup->gsets_data,
4520 : gd->tleref_to_colnum_map);
4521 740 : rollup->numGroups = gs->numGroups;
4522 740 : rollup->hashable = true;
4523 740 : rollup->is_hashed = true;
4524 740 : rollups = lcons(rollup, rollups);
4525 : }
4526 :
4527 836 : if (rollups)
4528 : {
4529 444 : add_path(grouped_rel, (Path *)
4530 444 : create_groupingsets_path(root,
4531 : grouped_rel,
4532 : path,
4533 444 : (List *) parse->havingQual,
4534 : AGG_MIXED,
4535 : rollups,
4536 : agg_costs));
4537 : }
4538 : }
4539 :
4540 : /*
4541 : * Now try the simple sorted case.
4542 : */
4543 914 : if (!gd->unsortable_sets)
4544 884 : add_path(grouped_rel, (Path *)
4545 884 : create_groupingsets_path(root,
4546 : grouped_rel,
4547 : path,
4548 884 : (List *) parse->havingQual,
4549 : AGG_SORTED,
4550 : gd->rollups,
4551 : agg_costs));
4552 : }
4553 :
4554 : /*
4555 : * create_window_paths
4556 : *
4557 : * Build a new upperrel containing Paths for window-function evaluation.
4558 : *
4559 : * input_rel: contains the source-data Paths
4560 : * input_target: result of make_window_input_target
4561 : * output_target: what the topmost WindowAggPath should return
4562 : * wflists: result of find_window_functions
4563 : * activeWindows: result of select_active_windows
4564 : *
4565 : * Note: all Paths in input_rel are expected to return input_target.
4566 : */
4567 : static RelOptInfo *
4568 2330 : create_window_paths(PlannerInfo *root,
4569 : RelOptInfo *input_rel,
4570 : PathTarget *input_target,
4571 : PathTarget *output_target,
4572 : bool output_target_parallel_safe,
4573 : WindowFuncLists *wflists,
4574 : List *activeWindows)
4575 : {
4576 : RelOptInfo *window_rel;
4577 : ListCell *lc;
4578 :
4579 : /* For now, do all work in the (WINDOW, NULL) upperrel */
4580 2330 : window_rel = fetch_upper_rel(root, UPPERREL_WINDOW, NULL);
4581 :
4582 : /*
4583 : * If the input relation is not parallel-safe, then the window relation
4584 : * can't be parallel-safe, either. Otherwise, we need to examine the
4585 : * target list and active windows for non-parallel-safe constructs.
4586 : */
4587 2330 : if (input_rel->consider_parallel && output_target_parallel_safe &&
4588 0 : is_parallel_safe(root, (Node *) activeWindows))
4589 0 : window_rel->consider_parallel = true;
4590 :
4591 : /*
4592 : * If the input rel belongs to a single FDW, so does the window rel.
4593 : */
4594 2330 : window_rel->serverid = input_rel->serverid;
4595 2330 : window_rel->userid = input_rel->userid;
4596 2330 : window_rel->useridiscurrent = input_rel->useridiscurrent;
4597 2330 : window_rel->fdwroutine = input_rel->fdwroutine;
4598 :
4599 : /*
4600 : * Consider computing window functions starting from the existing
4601 : * cheapest-total path (which will likely require a sort) as well as any
4602 : * existing paths that satisfy or partially satisfy root->window_pathkeys.
4603 : */
4604 4972 : foreach(lc, input_rel->pathlist)
4605 : {
4606 2642 : Path *path = (Path *) lfirst(lc);
4607 : int presorted_keys;
4608 :
4609 2954 : if (path == input_rel->cheapest_total_path ||
4610 312 : pathkeys_count_contained_in(root->window_pathkeys, path->pathkeys,
4611 144 : &presorted_keys) ||
4612 144 : presorted_keys > 0)
4613 2528 : create_one_window_path(root,
4614 : window_rel,
4615 : path,
4616 : input_target,
4617 : output_target,
4618 : wflists,
4619 : activeWindows);
4620 : }
4621 :
4622 : /*
4623 : * If there is an FDW that's responsible for all baserels of the query,
4624 : * let it consider adding ForeignPaths.
4625 : */
4626 2330 : if (window_rel->fdwroutine &&
4627 12 : window_rel->fdwroutine->GetForeignUpperPaths)
4628 12 : window_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_WINDOW,
4629 : input_rel, window_rel,
4630 : NULL);
4631 :
4632 : /* Let extensions possibly add some more paths */
4633 2330 : if (create_upper_paths_hook)
4634 0 : (*create_upper_paths_hook) (root, UPPERREL_WINDOW,
4635 : input_rel, window_rel, NULL);
4636 :
4637 : /* Now choose the best path(s) */
4638 2330 : set_cheapest(window_rel);
4639 :
4640 2330 : return window_rel;
4641 : }
4642 :
4643 : /*
4644 : * Stack window-function implementation steps atop the given Path, and
4645 : * add the result to window_rel.
4646 : *
4647 : * window_rel: upperrel to contain result
4648 : * path: input Path to use (must return input_target)
4649 : * input_target: result of make_window_input_target
4650 : * output_target: what the topmost WindowAggPath should return
4651 : * wflists: result of find_window_functions
4652 : * activeWindows: result of select_active_windows
4653 : */
4654 : static void
4655 2528 : create_one_window_path(PlannerInfo *root,
4656 : RelOptInfo *window_rel,
4657 : Path *path,
4658 : PathTarget *input_target,
4659 : PathTarget *output_target,
4660 : WindowFuncLists *wflists,
4661 : List *activeWindows)
4662 : {
4663 : PathTarget *window_target;
4664 : ListCell *l;
4665 2528 : List *topqual = NIL;
4666 :
4667 : /*
4668 : * Since each window clause could require a different sort order, we stack
4669 : * up a WindowAgg node for each clause, with sort steps between them as
4670 : * needed. (We assume that select_active_windows chose a good order for
4671 : * executing the clauses in.)
4672 : *
4673 : * input_target should contain all Vars and Aggs needed for the result.
4674 : * (In some cases we wouldn't need to propagate all of these all the way
4675 : * to the top, since they might only be needed as inputs to WindowFuncs.
4676 : * It's probably not worth trying to optimize that though.) It must also
4677 : * contain all window partitioning and sorting expressions, to ensure
4678 : * they're computed only once at the bottom of the stack (that's critical
4679 : * for volatile functions). As we climb up the stack, we'll add outputs
4680 : * for the WindowFuncs computed at each level.
4681 : */
4682 2528 : window_target = input_target;
4683 :
4684 5206 : foreach(l, activeWindows)
4685 : {
4686 2678 : WindowClause *wc = lfirst_node(WindowClause, l);
4687 : List *window_pathkeys;
4688 2678 : List *runcondition = NIL;
4689 : int presorted_keys;
4690 : bool is_sorted;
4691 : bool topwindow;
4692 : ListCell *lc2;
4693 :
4694 2678 : window_pathkeys = make_pathkeys_for_window(root,
4695 : wc,
4696 : root->processed_tlist);
4697 :
4698 2678 : is_sorted = pathkeys_count_contained_in(window_pathkeys,
4699 : path->pathkeys,
4700 : &presorted_keys);
4701 :
4702 : /* Sort if necessary */
4703 2678 : if (!is_sorted)
4704 : {
4705 : /*
4706 : * No presorted keys or incremental sort disabled, just perform a
4707 : * complete sort.
4708 : */
4709 2066 : if (presorted_keys == 0 || !enable_incremental_sort)
4710 2000 : path = (Path *) create_sort_path(root, window_rel,
4711 : path,
4712 : window_pathkeys,
4713 : -1.0);
4714 : else
4715 : {
4716 : /*
4717 : * Since we have presorted keys and incremental sort is
4718 : * enabled, just use incremental sort.
4719 : */
4720 66 : path = (Path *) create_incremental_sort_path(root,
4721 : window_rel,
4722 : path,
4723 : window_pathkeys,
4724 : presorted_keys,
4725 : -1.0);
4726 : }
4727 : }
4728 :
4729 2678 : if (lnext(activeWindows, l))
4730 : {
4731 : /*
4732 : * Add the current WindowFuncs to the output target for this
4733 : * intermediate WindowAggPath. We must copy window_target to
4734 : * avoid changing the previous path's target.
4735 : *
4736 : * Note: a WindowFunc adds nothing to the target's eval costs; but
4737 : * we do need to account for the increase in tlist width.
4738 : */
4739 150 : int64 tuple_width = window_target->width;
4740 :
4741 150 : window_target = copy_pathtarget(window_target);
4742 342 : foreach(lc2, wflists->windowFuncs[wc->winref])
4743 : {
4744 192 : WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
4745 :
4746 192 : add_column_to_pathtarget(window_target, (Expr *) wfunc, 0);
4747 192 : tuple_width += get_typavgwidth(wfunc->wintype, -1);
4748 : }
4749 150 : window_target->width = clamp_width_est(tuple_width);
4750 : }
4751 : else
4752 : {
4753 : /* Install the goal target in the topmost WindowAgg */
4754 2528 : window_target = output_target;
4755 : }
4756 :
4757 : /* mark the final item in the list as the top-level window */
4758 2678 : topwindow = foreach_current_index(l) == list_length(activeWindows) - 1;
4759 :
4760 : /*
4761 : * Collect the WindowFuncRunConditions from each WindowFunc and
4762 : * convert them into OpExprs
4763 : */
4764 6058 : foreach(lc2, wflists->windowFuncs[wc->winref])
4765 : {
4766 : ListCell *lc3;
4767 3380 : WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
4768 :
4769 3554 : foreach(lc3, wfunc->runCondition)
4770 : {
4771 174 : WindowFuncRunCondition *wfuncrc =
4772 : lfirst_node(WindowFuncRunCondition, lc3);
4773 : Expr *opexpr;
4774 : Expr *leftop;
4775 : Expr *rightop;
4776 :
4777 174 : if (wfuncrc->wfunc_left)
4778 : {
4779 156 : leftop = (Expr *) copyObject(wfunc);
4780 156 : rightop = copyObject(wfuncrc->arg);
4781 : }
4782 : else
4783 : {
4784 18 : leftop = copyObject(wfuncrc->arg);
4785 18 : rightop = (Expr *) copyObject(wfunc);
4786 : }
4787 :
4788 174 : opexpr = make_opclause(wfuncrc->opno,
4789 : BOOLOID,
4790 : false,
4791 : leftop,
4792 : rightop,
4793 : InvalidOid,
4794 : wfuncrc->inputcollid);
4795 :
4796 174 : runcondition = lappend(runcondition, opexpr);
4797 :
4798 174 : if (!topwindow)
4799 24 : topqual = lappend(topqual, opexpr);
4800 : }
4801 : }
4802 :
4803 : path = (Path *)
4804 2678 : create_windowagg_path(root, window_rel, path, window_target,
4805 2678 : wflists->windowFuncs[wc->winref],
4806 : runcondition, wc,
4807 : topwindow ? topqual : NIL, topwindow);
4808 : }
4809 :
4810 2528 : add_path(window_rel, path);
4811 2528 : }
4812 :
4813 : /*
4814 : * create_distinct_paths
4815 : *
4816 : * Build a new upperrel containing Paths for SELECT DISTINCT evaluation.
4817 : *
4818 : * input_rel: contains the source-data Paths
4819 : * target: the pathtarget for the result Paths to compute
4820 : *
4821 : * Note: input paths should already compute the desired pathtarget, since
4822 : * Sort/Unique won't project anything.
4823 : */
4824 : static RelOptInfo *
4825 2266 : create_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
4826 : PathTarget *target)
4827 : {
4828 : RelOptInfo *distinct_rel;
4829 :
4830 : /* For now, do all work in the (DISTINCT, NULL) upperrel */
4831 2266 : distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL);
4832 :
4833 : /*
4834 : * We don't compute anything at this level, so distinct_rel will be
4835 : * parallel-safe if the input rel is parallel-safe. In particular, if
4836 : * there is a DISTINCT ON (...) clause, any path for the input_rel will
4837 : * output those expressions, and will not be parallel-safe unless those
4838 : * expressions are parallel-safe.
4839 : */
4840 2266 : distinct_rel->consider_parallel = input_rel->consider_parallel;
4841 :
4842 : /*
4843 : * If the input rel belongs to a single FDW, so does the distinct_rel.
4844 : */
4845 2266 : distinct_rel->serverid = input_rel->serverid;
4846 2266 : distinct_rel->userid = input_rel->userid;
4847 2266 : distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4848 2266 : distinct_rel->fdwroutine = input_rel->fdwroutine;
4849 :
4850 : /* build distinct paths based on input_rel's pathlist */
4851 2266 : create_final_distinct_paths(root, input_rel, distinct_rel);
4852 :
4853 : /* now build distinct paths based on input_rel's partial_pathlist */
4854 2266 : create_partial_distinct_paths(root, input_rel, distinct_rel, target);
4855 :
4856 : /* Give a helpful error if we failed to create any paths */
4857 2266 : if (distinct_rel->pathlist == NIL)
4858 0 : ereport(ERROR,
4859 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4860 : errmsg("could not implement DISTINCT"),
4861 : errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4862 :
4863 : /*
4864 : * If there is an FDW that's responsible for all baserels of the query,
4865 : * let it consider adding ForeignPaths.
4866 : */
4867 2266 : if (distinct_rel->fdwroutine &&
4868 16 : distinct_rel->fdwroutine->GetForeignUpperPaths)
4869 16 : distinct_rel->fdwroutine->GetForeignUpperPaths(root,
4870 : UPPERREL_DISTINCT,
4871 : input_rel,
4872 : distinct_rel,
4873 : NULL);
4874 :
4875 : /* Let extensions possibly add some more paths */
4876 2266 : if (create_upper_paths_hook)
4877 0 : (*create_upper_paths_hook) (root, UPPERREL_DISTINCT, input_rel,
4878 : distinct_rel, NULL);
4879 :
4880 : /* Now choose the best path(s) */
4881 2266 : set_cheapest(distinct_rel);
4882 :
4883 2266 : return distinct_rel;
4884 : }
4885 :
4886 : /*
4887 : * create_partial_distinct_paths
4888 : *
4889 : * Process 'input_rel' partial paths and add unique/aggregate paths to the
4890 : * UPPERREL_PARTIAL_DISTINCT rel. For paths created, add Gather/GatherMerge
4891 : * paths on top and add a final unique/aggregate path to remove any duplicate
4892 : * produced from combining rows from parallel workers.
4893 : */
4894 : static void
4895 2266 : create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
4896 : RelOptInfo *final_distinct_rel,
4897 : PathTarget *target)
4898 : {
4899 : RelOptInfo *partial_distinct_rel;
4900 : Query *parse;
4901 : List *distinctExprs;
4902 : double numDistinctRows;
4903 : Path *cheapest_partial_path;
4904 : ListCell *lc;
4905 :
4906 : /* nothing to do when there are no partial paths in the input rel */
4907 2266 : if (!input_rel->consider_parallel || input_rel->partial_pathlist == NIL)
4908 2170 : return;
4909 :
4910 96 : parse = root->parse;
4911 :
4912 : /* can't do parallel DISTINCT ON */
4913 96 : if (parse->hasDistinctOn)
4914 0 : return;
4915 :
4916 96 : partial_distinct_rel = fetch_upper_rel(root, UPPERREL_PARTIAL_DISTINCT,
4917 : NULL);
4918 96 : partial_distinct_rel->reltarget = target;
4919 96 : partial_distinct_rel->consider_parallel = input_rel->consider_parallel;
4920 :
4921 : /*
4922 : * If input_rel belongs to a single FDW, so does the partial_distinct_rel.
4923 : */
4924 96 : partial_distinct_rel->serverid = input_rel->serverid;
4925 96 : partial_distinct_rel->userid = input_rel->userid;
4926 96 : partial_distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4927 96 : partial_distinct_rel->fdwroutine = input_rel->fdwroutine;
4928 :
4929 96 : cheapest_partial_path = linitial(input_rel->partial_pathlist);
4930 :
4931 96 : distinctExprs = get_sortgrouplist_exprs(root->processed_distinctClause,
4932 : parse->targetList);
4933 :
4934 : /* estimate how many distinct rows we'll get from each worker */
4935 96 : numDistinctRows = estimate_num_groups(root, distinctExprs,
4936 : cheapest_partial_path->rows,
4937 : NULL, NULL);
4938 :
4939 : /*
4940 : * Try sorting the cheapest path and incrementally sorting any paths with
4941 : * presorted keys and put a unique paths atop of those.
4942 : */
4943 96 : if (grouping_is_sortable(root->processed_distinctClause))
4944 : {
4945 198 : foreach(lc, input_rel->partial_pathlist)
4946 : {
4947 102 : Path *input_path = (Path *) lfirst(lc);
4948 : Path *sorted_path;
4949 : bool is_sorted;
4950 : int presorted_keys;
4951 :
4952 102 : is_sorted = pathkeys_count_contained_in(root->distinct_pathkeys,
4953 : input_path->pathkeys,
4954 : &presorted_keys);
4955 :
4956 102 : if (is_sorted)
4957 6 : sorted_path = input_path;
4958 : else
4959 : {
4960 : /*
4961 : * Try at least sorting the cheapest path and also try
4962 : * incrementally sorting any path which is partially sorted
4963 : * already (no need to deal with paths which have presorted
4964 : * keys when incremental sort is disabled unless it's the
4965 : * cheapest partial path).
4966 : */
4967 96 : if (input_path != cheapest_partial_path &&
4968 6 : (presorted_keys == 0 || !enable_incremental_sort))
4969 0 : continue;
4970 :
4971 : /*
4972 : * We've no need to consider both a sort and incremental sort.
4973 : * We'll just do a sort if there are no presorted keys and an
4974 : * incremental sort when there are presorted keys.
4975 : */
4976 96 : if (presorted_keys == 0 || !enable_incremental_sort)
4977 90 : sorted_path = (Path *) create_sort_path(root,
4978 : partial_distinct_rel,
4979 : input_path,
4980 : root->distinct_pathkeys,
4981 : -1.0);
4982 : else
4983 6 : sorted_path = (Path *) create_incremental_sort_path(root,
4984 : partial_distinct_rel,
4985 : input_path,
4986 : root->distinct_pathkeys,
4987 : presorted_keys,
4988 : -1.0);
4989 : }
4990 :
4991 : /*
4992 : * An empty distinct_pathkeys means all tuples have the same value
4993 : * for the DISTINCT clause. See create_final_distinct_paths()
4994 : */
4995 102 : if (root->distinct_pathkeys == NIL)
4996 : {
4997 : Node *limitCount;
4998 :
4999 6 : limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
5000 : sizeof(int64),
5001 : Int64GetDatum(1), false,
5002 : FLOAT8PASSBYVAL);
5003 :
5004 : /*
5005 : * Apply a LimitPath onto the partial path to restrict the
5006 : * tuples from each worker to 1. create_final_distinct_paths
5007 : * will need to apply an additional LimitPath to restrict this
5008 : * to a single row after the Gather node. If the query
5009 : * already has a LIMIT clause, then we could end up with three
5010 : * Limit nodes in the final plan. Consolidating the top two
5011 : * of these could be done, but does not seem worth troubling
5012 : * over.
5013 : */
5014 6 : add_partial_path(partial_distinct_rel, (Path *)
5015 6 : create_limit_path(root, partial_distinct_rel,
5016 : sorted_path,
5017 : NULL,
5018 : limitCount,
5019 : LIMIT_OPTION_COUNT,
5020 : 0, 1));
5021 : }
5022 : else
5023 : {
5024 96 : add_partial_path(partial_distinct_rel, (Path *)
5025 96 : create_upper_unique_path(root, partial_distinct_rel,
5026 : sorted_path,
5027 96 : list_length(root->distinct_pathkeys),
5028 : numDistinctRows));
5029 : }
5030 : }
5031 : }
5032 :
5033 : /*
5034 : * Now try hash aggregate paths, if enabled and hashing is possible. Since
5035 : * we're not on the hook to ensure we do our best to create at least one
5036 : * path here, we treat enable_hashagg as a hard off-switch rather than the
5037 : * slightly softer variant in create_final_distinct_paths.
5038 : */
5039 96 : if (enable_hashagg && grouping_is_hashable(root->processed_distinctClause))
5040 : {
5041 78 : add_partial_path(partial_distinct_rel, (Path *)
5042 78 : create_agg_path(root,
5043 : partial_distinct_rel,
5044 : cheapest_partial_path,
5045 : cheapest_partial_path->pathtarget,
5046 : AGG_HASHED,
5047 : AGGSPLIT_SIMPLE,
5048 : root->processed_distinctClause,
5049 : NIL,
5050 : NULL,
5051 : numDistinctRows));
5052 : }
5053 :
5054 : /*
5055 : * If there is an FDW that's responsible for all baserels of the query,
5056 : * let it consider adding ForeignPaths.
5057 : */
5058 96 : if (partial_distinct_rel->fdwroutine &&
5059 0 : partial_distinct_rel->fdwroutine->GetForeignUpperPaths)
5060 0 : partial_distinct_rel->fdwroutine->GetForeignUpperPaths(root,
5061 : UPPERREL_PARTIAL_DISTINCT,
5062 : input_rel,
5063 : partial_distinct_rel,
5064 : NULL);
5065 :
5066 : /* Let extensions possibly add some more partial paths */
5067 96 : if (create_upper_paths_hook)
5068 0 : (*create_upper_paths_hook) (root, UPPERREL_PARTIAL_DISTINCT,
5069 : input_rel, partial_distinct_rel, NULL);
5070 :
5071 96 : if (partial_distinct_rel->partial_pathlist != NIL)
5072 : {
5073 96 : generate_useful_gather_paths(root, partial_distinct_rel, true);
5074 96 : set_cheapest(partial_distinct_rel);
5075 :
5076 : /*
5077 : * Finally, create paths to distinctify the final result. This step
5078 : * is needed to remove any duplicates due to combining rows from
5079 : * parallel workers.
5080 : */
5081 96 : create_final_distinct_paths(root, partial_distinct_rel,
5082 : final_distinct_rel);
5083 : }
5084 : }
5085 :
5086 : /*
5087 : * create_final_distinct_paths
5088 : * Create distinct paths in 'distinct_rel' based on 'input_rel' pathlist
5089 : *
5090 : * input_rel: contains the source-data paths
5091 : * distinct_rel: destination relation for storing created paths
5092 : */
5093 : static RelOptInfo *
5094 2362 : create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
5095 : RelOptInfo *distinct_rel)
5096 : {
5097 2362 : Query *parse = root->parse;
5098 2362 : Path *cheapest_input_path = input_rel->cheapest_total_path;
5099 : double numDistinctRows;
5100 : bool allow_hash;
5101 :
5102 : /* Estimate number of distinct rows there will be */
5103 2362 : if (parse->groupClause || parse->groupingSets || parse->hasAggs ||
5104 2288 : root->hasHavingQual)
5105 : {
5106 : /*
5107 : * If there was grouping or aggregation, use the number of input rows
5108 : * as the estimated number of DISTINCT rows (ie, assume the input is
5109 : * already mostly unique).
5110 : */
5111 74 : numDistinctRows = cheapest_input_path->rows;
5112 : }
5113 : else
5114 : {
5115 : /*
5116 : * Otherwise, the UNIQUE filter has effects comparable to GROUP BY.
5117 : */
5118 : List *distinctExprs;
5119 :
5120 2288 : distinctExprs = get_sortgrouplist_exprs(root->processed_distinctClause,
5121 : parse->targetList);
5122 2288 : numDistinctRows = estimate_num_groups(root, distinctExprs,
5123 : cheapest_input_path->rows,
5124 : NULL, NULL);
5125 : }
5126 :
5127 : /*
5128 : * Consider sort-based implementations of DISTINCT, if possible.
5129 : */
5130 2362 : if (grouping_is_sortable(root->processed_distinctClause))
5131 : {
5132 : /*
5133 : * Firstly, if we have any adequately-presorted paths, just stick a
5134 : * Unique node on those. We also, consider doing an explicit sort of
5135 : * the cheapest input path and Unique'ing that. If any paths have
5136 : * presorted keys then we'll create an incremental sort atop of those
5137 : * before adding a unique node on the top.
5138 : *
5139 : * When we have DISTINCT ON, we must sort by the more rigorous of
5140 : * DISTINCT and ORDER BY, else it won't have the desired behavior.
5141 : * Also, if we do have to do an explicit sort, we might as well use
5142 : * the more rigorous ordering to avoid a second sort later. (Note
5143 : * that the parser will have ensured that one clause is a prefix of
5144 : * the other.)
5145 : */
5146 : List *needed_pathkeys;
5147 : ListCell *lc;
5148 2356 : double limittuples = root->distinct_pathkeys == NIL ? 1.0 : -1.0;
5149 :
5150 2550 : if (parse->hasDistinctOn &&
5151 194 : list_length(root->distinct_pathkeys) <
5152 194 : list_length(root->sort_pathkeys))
5153 42 : needed_pathkeys = root->sort_pathkeys;
5154 : else
5155 2314 : needed_pathkeys = root->distinct_pathkeys;
5156 :
5157 5640 : foreach(lc, input_rel->pathlist)
5158 : {
5159 3284 : Path *input_path = (Path *) lfirst(lc);
5160 : Path *sorted_path;
5161 : bool is_sorted;
5162 : int presorted_keys;
5163 :
5164 3284 : is_sorted = pathkeys_count_contained_in(needed_pathkeys,
5165 : input_path->pathkeys,
5166 : &presorted_keys);
5167 :
5168 3284 : if (is_sorted)
5169 674 : sorted_path = input_path;
5170 : else
5171 : {
5172 : /*
5173 : * Try at least sorting the cheapest path and also try
5174 : * incrementally sorting any path which is partially sorted
5175 : * already (no need to deal with paths which have presorted
5176 : * keys when incremental sort is disabled unless it's the
5177 : * cheapest input path).
5178 : */
5179 2610 : if (input_path != cheapest_input_path &&
5180 442 : (presorted_keys == 0 || !enable_incremental_sort))
5181 82 : continue;
5182 :
5183 : /*
5184 : * We've no need to consider both a sort and incremental sort.
5185 : * We'll just do a sort if there are no presorted keys and an
5186 : * incremental sort when there are presorted keys.
5187 : */
5188 2528 : if (presorted_keys == 0 || !enable_incremental_sort)
5189 2150 : sorted_path = (Path *) create_sort_path(root,
5190 : distinct_rel,
5191 : input_path,
5192 : needed_pathkeys,
5193 : limittuples);
5194 : else
5195 378 : sorted_path = (Path *) create_incremental_sort_path(root,
5196 : distinct_rel,
5197 : input_path,
5198 : needed_pathkeys,
5199 : presorted_keys,
5200 : limittuples);
5201 : }
5202 :
5203 : /*
5204 : * distinct_pathkeys may have become empty if all of the pathkeys
5205 : * were determined to be redundant. If all of the pathkeys are
5206 : * redundant then each DISTINCT target must only allow a single
5207 : * value, therefore all resulting tuples must be identical (or at
5208 : * least indistinguishable by an equality check). We can uniquify
5209 : * these tuples simply by just taking the first tuple. All we do
5210 : * here is add a path to do "LIMIT 1" atop of 'sorted_path'. When
5211 : * doing a DISTINCT ON we may still have a non-NIL sort_pathkeys
5212 : * list, so we must still only do this with paths which are
5213 : * correctly sorted by sort_pathkeys.
5214 : */
5215 3202 : if (root->distinct_pathkeys == NIL)
5216 : {
5217 : Node *limitCount;
5218 :
5219 98 : limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
5220 : sizeof(int64),
5221 : Int64GetDatum(1), false,
5222 : FLOAT8PASSBYVAL);
5223 :
5224 : /*
5225 : * If the query already has a LIMIT clause, then we could end
5226 : * up with a duplicate LimitPath in the final plan. That does
5227 : * not seem worth troubling over too much.
5228 : */
5229 98 : add_path(distinct_rel, (Path *)
5230 98 : create_limit_path(root, distinct_rel, sorted_path,
5231 : NULL, limitCount,
5232 : LIMIT_OPTION_COUNT, 0, 1));
5233 : }
5234 : else
5235 : {
5236 3104 : add_path(distinct_rel, (Path *)
5237 3104 : create_upper_unique_path(root, distinct_rel,
5238 : sorted_path,
5239 3104 : list_length(root->distinct_pathkeys),
5240 : numDistinctRows));
5241 : }
5242 : }
5243 : }
5244 :
5245 : /*
5246 : * Consider hash-based implementations of DISTINCT, if possible.
5247 : *
5248 : * If we were not able to make any other types of path, we *must* hash or
5249 : * die trying. If we do have other choices, there are two things that
5250 : * should prevent selection of hashing: if the query uses DISTINCT ON
5251 : * (because it won't really have the expected behavior if we hash), or if
5252 : * enable_hashagg is off.
5253 : *
5254 : * Note: grouping_is_hashable() is much more expensive to check than the
5255 : * other gating conditions, so we want to do it last.
5256 : */
5257 2362 : if (distinct_rel->pathlist == NIL)
5258 6 : allow_hash = true; /* we have no alternatives */
5259 2356 : else if (parse->hasDistinctOn || !enable_hashagg)
5260 284 : allow_hash = false; /* policy-based decision not to hash */
5261 : else
5262 2072 : allow_hash = true; /* default */
5263 :
5264 2362 : if (allow_hash && grouping_is_hashable(root->processed_distinctClause))
5265 : {
5266 : /* Generate hashed aggregate path --- no sort needed */
5267 2078 : add_path(distinct_rel, (Path *)
5268 2078 : create_agg_path(root,
5269 : distinct_rel,
5270 : cheapest_input_path,
5271 : cheapest_input_path->pathtarget,
5272 : AGG_HASHED,
5273 : AGGSPLIT_SIMPLE,
5274 : root->processed_distinctClause,
5275 : NIL,
5276 : NULL,
5277 : numDistinctRows));
5278 : }
5279 :
5280 2362 : return distinct_rel;
5281 : }
5282 :
5283 : /*
5284 : * create_ordered_paths
5285 : *
5286 : * Build a new upperrel containing Paths for ORDER BY evaluation.
5287 : *
5288 : * All paths in the result must satisfy the ORDER BY ordering.
5289 : * The only new paths we need consider are an explicit full sort
5290 : * and incremental sort on the cheapest-total existing path.
5291 : *
5292 : * input_rel: contains the source-data Paths
5293 : * target: the output tlist the result Paths must emit
5294 : * limit_tuples: estimated bound on the number of output tuples,
5295 : * or -1 if no LIMIT or couldn't estimate
5296 : *
5297 : * XXX This only looks at sort_pathkeys. I wonder if it needs to look at the
5298 : * other pathkeys (grouping, ...) like generate_useful_gather_paths.
5299 : */
5300 : static RelOptInfo *
5301 62744 : create_ordered_paths(PlannerInfo *root,
5302 : RelOptInfo *input_rel,
5303 : PathTarget *target,
5304 : bool target_parallel_safe,
5305 : double limit_tuples)
5306 : {
5307 62744 : Path *cheapest_input_path = input_rel->cheapest_total_path;
5308 : RelOptInfo *ordered_rel;
5309 : ListCell *lc;
5310 :
5311 : /* For now, do all work in the (ORDERED, NULL) upperrel */
5312 62744 : ordered_rel = fetch_upper_rel(root, UPPERREL_ORDERED, NULL);
5313 :
5314 : /*
5315 : * If the input relation is not parallel-safe, then the ordered relation
5316 : * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
5317 : * target list is parallel-safe.
5318 : */
5319 62744 : if (input_rel->consider_parallel && target_parallel_safe)
5320 42056 : ordered_rel->consider_parallel = true;
5321 :
5322 : /*
5323 : * If the input rel belongs to a single FDW, so does the ordered_rel.
5324 : */
5325 62744 : ordered_rel->serverid = input_rel->serverid;
5326 62744 : ordered_rel->userid = input_rel->userid;
5327 62744 : ordered_rel->useridiscurrent = input_rel->useridiscurrent;
5328 62744 : ordered_rel->fdwroutine = input_rel->fdwroutine;
5329 :
5330 156580 : foreach(lc, input_rel->pathlist)
5331 : {
5332 93836 : Path *input_path = (Path *) lfirst(lc);
5333 : Path *sorted_path;
5334 : bool is_sorted;
5335 : int presorted_keys;
5336 :
5337 93836 : is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
5338 : input_path->pathkeys, &presorted_keys);
5339 :
5340 93836 : if (is_sorted)
5341 33680 : sorted_path = input_path;
5342 : else
5343 : {
5344 : /*
5345 : * Try at least sorting the cheapest path and also try
5346 : * incrementally sorting any path which is partially sorted
5347 : * already (no need to deal with paths which have presorted keys
5348 : * when incremental sort is disabled unless it's the cheapest
5349 : * input path).
5350 : */
5351 60156 : if (input_path != cheapest_input_path &&
5352 5336 : (presorted_keys == 0 || !enable_incremental_sort))
5353 1698 : continue;
5354 :
5355 : /*
5356 : * We've no need to consider both a sort and incremental sort.
5357 : * We'll just do a sort if there are no presorted keys and an
5358 : * incremental sort when there are presorted keys.
5359 : */
5360 58458 : if (presorted_keys == 0 || !enable_incremental_sort)
5361 54424 : sorted_path = (Path *) create_sort_path(root,
5362 : ordered_rel,
5363 : input_path,
5364 : root->sort_pathkeys,
5365 : limit_tuples);
5366 : else
5367 4034 : sorted_path = (Path *) create_incremental_sort_path(root,
5368 : ordered_rel,
5369 : input_path,
5370 : root->sort_pathkeys,
5371 : presorted_keys,
5372 : limit_tuples);
5373 : }
5374 :
5375 : /*
5376 : * If the pathtarget of the result path has different expressions from
5377 : * the target to be applied, a projection step is needed.
5378 : */
5379 92138 : if (!equal(sorted_path->pathtarget->exprs, target->exprs))
5380 294 : sorted_path = apply_projection_to_path(root, ordered_rel,
5381 : sorted_path, target);
5382 :
5383 92138 : add_path(ordered_rel, sorted_path);
5384 : }
5385 :
5386 : /*
5387 : * generate_gather_paths() will have already generated a simple Gather
5388 : * path for the best parallel path, if any, and the loop above will have
5389 : * considered sorting it. Similarly, generate_gather_paths() will also
5390 : * have generated order-preserving Gather Merge plans which can be used
5391 : * without sorting if they happen to match the sort_pathkeys, and the loop
5392 : * above will have handled those as well. However, there's one more
5393 : * possibility: it may make sense to sort the cheapest partial path or
5394 : * incrementally sort any partial path that is partially sorted according
5395 : * to the required output order and then use Gather Merge.
5396 : */
5397 62744 : if (ordered_rel->consider_parallel && root->sort_pathkeys != NIL &&
5398 41918 : input_rel->partial_pathlist != NIL)
5399 : {
5400 : Path *cheapest_partial_path;
5401 :
5402 2150 : cheapest_partial_path = linitial(input_rel->partial_pathlist);
5403 :
5404 4506 : foreach(lc, input_rel->partial_pathlist)
5405 : {
5406 2356 : Path *input_path = (Path *) lfirst(lc);
5407 : Path *sorted_path;
5408 : bool is_sorted;
5409 : int presorted_keys;
5410 : double total_groups;
5411 :
5412 2356 : is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
5413 : input_path->pathkeys,
5414 : &presorted_keys);
5415 :
5416 2356 : if (is_sorted)
5417 182 : continue;
5418 :
5419 : /*
5420 : * Try at least sorting the cheapest path and also try
5421 : * incrementally sorting any path which is partially sorted
5422 : * already (no need to deal with paths which have presorted keys
5423 : * when incremental sort is disabled unless it's the cheapest
5424 : * partial path).
5425 : */
5426 2174 : if (input_path != cheapest_partial_path &&
5427 42 : (presorted_keys == 0 || !enable_incremental_sort))
5428 0 : continue;
5429 :
5430 : /*
5431 : * We've no need to consider both a sort and incremental sort.
5432 : * We'll just do a sort if there are no presorted keys and an
5433 : * incremental sort when there are presorted keys.
5434 : */
5435 2174 : if (presorted_keys == 0 || !enable_incremental_sort)
5436 2114 : sorted_path = (Path *) create_sort_path(root,
5437 : ordered_rel,
5438 : input_path,
5439 : root->sort_pathkeys,
5440 : limit_tuples);
5441 : else
5442 60 : sorted_path = (Path *) create_incremental_sort_path(root,
5443 : ordered_rel,
5444 : input_path,
5445 : root->sort_pathkeys,
5446 : presorted_keys,
5447 : limit_tuples);
5448 2174 : total_groups = compute_gather_rows(sorted_path);
5449 : sorted_path = (Path *)
5450 2174 : create_gather_merge_path(root, ordered_rel,
5451 : sorted_path,
5452 : sorted_path->pathtarget,
5453 : root->sort_pathkeys, NULL,
5454 : &total_groups);
5455 :
5456 : /*
5457 : * If the pathtarget of the result path has different expressions
5458 : * from the target to be applied, a projection step is needed.
5459 : */
5460 2174 : if (!equal(sorted_path->pathtarget->exprs, target->exprs))
5461 6 : sorted_path = apply_projection_to_path(root, ordered_rel,
5462 : sorted_path, target);
5463 :
5464 2174 : add_path(ordered_rel, sorted_path);
5465 : }
5466 : }
5467 :
5468 : /*
5469 : * If there is an FDW that's responsible for all baserels of the query,
5470 : * let it consider adding ForeignPaths.
5471 : */
5472 62744 : if (ordered_rel->fdwroutine &&
5473 380 : ordered_rel->fdwroutine->GetForeignUpperPaths)
5474 366 : ordered_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_ORDERED,
5475 : input_rel, ordered_rel,
5476 : NULL);
5477 :
5478 : /* Let extensions possibly add some more paths */
5479 62744 : if (create_upper_paths_hook)
5480 0 : (*create_upper_paths_hook) (root, UPPERREL_ORDERED,
5481 : input_rel, ordered_rel, NULL);
5482 :
5483 : /*
5484 : * No need to bother with set_cheapest here; grouping_planner does not
5485 : * need us to do it.
5486 : */
5487 : Assert(ordered_rel->pathlist != NIL);
5488 :
5489 62744 : return ordered_rel;
5490 : }
5491 :
5492 :
5493 : /*
5494 : * make_group_input_target
5495 : * Generate appropriate PathTarget for initial input to grouping nodes.
5496 : *
5497 : * If there is grouping or aggregation, the scan/join subplan cannot emit
5498 : * the query's final targetlist; for example, it certainly can't emit any
5499 : * aggregate function calls. This routine generates the correct target
5500 : * for the scan/join subplan.
5501 : *
5502 : * The query target list passed from the parser already contains entries
5503 : * for all ORDER BY and GROUP BY expressions, but it will not have entries
5504 : * for variables used only in HAVING clauses; so we need to add those
5505 : * variables to the subplan target list. Also, we flatten all expressions
5506 : * except GROUP BY items into their component variables; other expressions
5507 : * will be computed by the upper plan nodes rather than by the subplan.
5508 : * For example, given a query like
5509 : * SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
5510 : * we want to pass this targetlist to the subplan:
5511 : * a+b,c,d
5512 : * where the a+b target will be used by the Sort/Group steps, and the
5513 : * other targets will be used for computing the final results.
5514 : *
5515 : * 'final_target' is the query's final target list (in PathTarget form)
5516 : *
5517 : * The result is the PathTarget to be computed by the Paths returned from
5518 : * query_planner().
5519 : */
5520 : static PathTarget *
5521 39514 : make_group_input_target(PlannerInfo *root, PathTarget *final_target)
5522 : {
5523 39514 : Query *parse = root->parse;
5524 : PathTarget *input_target;
5525 : List *non_group_cols;
5526 : List *non_group_vars;
5527 : int i;
5528 : ListCell *lc;
5529 :
5530 : /*
5531 : * We must build a target containing all grouping columns, plus any other
5532 : * Vars mentioned in the query's targetlist and HAVING qual.
5533 : */
5534 39514 : input_target = create_empty_pathtarget();
5535 39514 : non_group_cols = NIL;
5536 :
5537 39514 : i = 0;
5538 95004 : foreach(lc, final_target->exprs)
5539 : {
5540 55490 : Expr *expr = (Expr *) lfirst(lc);
5541 55490 : Index sgref = get_pathtarget_sortgroupref(final_target, i);
5542 :
5543 64016 : if (sgref && root->processed_groupClause &&
5544 8526 : get_sortgroupref_clause_noerr(sgref,
5545 : root->processed_groupClause) != NULL)
5546 : {
5547 : /*
5548 : * It's a grouping column, so add it to the input target as-is.
5549 : *
5550 : * Note that the target is logically below the grouping step. So
5551 : * with grouping sets we need to remove the RT index of the
5552 : * grouping step if there is any from the target expression.
5553 : */
5554 6826 : if (parse->hasGroupRTE && parse->groupingSets != NIL)
5555 : {
5556 : Assert(root->group_rtindex > 0);
5557 : expr = (Expr *)
5558 1752 : remove_nulling_relids((Node *) expr,
5559 1752 : bms_make_singleton(root->group_rtindex),
5560 : NULL);
5561 : }
5562 6826 : add_column_to_pathtarget(input_target, expr, sgref);
5563 : }
5564 : else
5565 : {
5566 : /*
5567 : * Non-grouping column, so just remember the expression for later
5568 : * call to pull_var_clause.
5569 : */
5570 48664 : non_group_cols = lappend(non_group_cols, expr);
5571 : }
5572 :
5573 55490 : i++;
5574 : }
5575 :
5576 : /*
5577 : * If there's a HAVING clause, we'll need the Vars it uses, too.
5578 : */
5579 39514 : if (parse->havingQual)
5580 1172 : non_group_cols = lappend(non_group_cols, parse->havingQual);
5581 :
5582 : /*
5583 : * Pull out all the Vars mentioned in non-group cols (plus HAVING), and
5584 : * add them to the input target if not already present. (A Var used
5585 : * directly as a GROUP BY item will be present already.) Note this
5586 : * includes Vars used in resjunk items, so we are covering the needs of
5587 : * ORDER BY and window specifications. Vars used within Aggrefs and
5588 : * WindowFuncs will be pulled out here, too.
5589 : *
5590 : * Note that the target is logically below the grouping step. So with
5591 : * grouping sets we need to remove the RT index of the grouping step if
5592 : * there is any from the non-group Vars.
5593 : */
5594 39514 : non_group_vars = pull_var_clause((Node *) non_group_cols,
5595 : PVC_RECURSE_AGGREGATES |
5596 : PVC_RECURSE_WINDOWFUNCS |
5597 : PVC_INCLUDE_PLACEHOLDERS);
5598 39514 : if (parse->hasGroupRTE && parse->groupingSets != NIL)
5599 : {
5600 : Assert(root->group_rtindex > 0);
5601 : non_group_vars = (List *)
5602 806 : remove_nulling_relids((Node *) non_group_vars,
5603 806 : bms_make_singleton(root->group_rtindex),
5604 : NULL);
5605 : }
5606 39514 : add_new_columns_to_pathtarget(input_target, non_group_vars);
5607 :
5608 : /* clean up cruft */
5609 39514 : list_free(non_group_vars);
5610 39514 : list_free(non_group_cols);
5611 :
5612 : /* XXX this causes some redundant cost calculation ... */
5613 39514 : return set_pathtarget_cost_width(root, input_target);
5614 : }
5615 :
5616 : /*
5617 : * make_partial_grouping_target
5618 : * Generate appropriate PathTarget for output of partial aggregate
5619 : * (or partial grouping, if there are no aggregates) nodes.
5620 : *
5621 : * A partial aggregation node needs to emit all the same aggregates that
5622 : * a regular aggregation node would, plus any aggregates used in HAVING;
5623 : * except that the Aggref nodes should be marked as partial aggregates.
5624 : *
5625 : * In addition, we'd better emit any Vars and PlaceHolderVars that are
5626 : * used outside of Aggrefs in the aggregation tlist and HAVING. (Presumably,
5627 : * these would be Vars that are grouped by or used in grouping expressions.)
5628 : *
5629 : * grouping_target is the tlist to be emitted by the topmost aggregation step.
5630 : * havingQual represents the HAVING clause.
5631 : */
5632 : static PathTarget *
5633 2186 : make_partial_grouping_target(PlannerInfo *root,
5634 : PathTarget *grouping_target,
5635 : Node *havingQual)
5636 : {
5637 : PathTarget *partial_target;
5638 : List *non_group_cols;
5639 : List *non_group_exprs;
5640 : int i;
5641 : ListCell *lc;
5642 :
5643 2186 : partial_target = create_empty_pathtarget();
5644 2186 : non_group_cols = NIL;
5645 :
5646 2186 : i = 0;
5647 7790 : foreach(lc, grouping_target->exprs)
5648 : {
5649 5604 : Expr *expr = (Expr *) lfirst(lc);
5650 5604 : Index sgref = get_pathtarget_sortgroupref(grouping_target, i);
5651 :
5652 9418 : if (sgref && root->processed_groupClause &&
5653 3814 : get_sortgroupref_clause_noerr(sgref,
5654 : root->processed_groupClause) != NULL)
5655 : {
5656 : /*
5657 : * It's a grouping column, so add it to the partial_target as-is.
5658 : * (This allows the upper agg step to repeat the grouping calcs.)
5659 : */
5660 1900 : add_column_to_pathtarget(partial_target, expr, sgref);
5661 : }
5662 : else
5663 : {
5664 : /*
5665 : * Non-grouping column, so just remember the expression for later
5666 : * call to pull_var_clause.
5667 : */
5668 3704 : non_group_cols = lappend(non_group_cols, expr);
5669 : }
5670 :
5671 5604 : i++;
5672 : }
5673 :
5674 : /*
5675 : * If there's a HAVING clause, we'll need the Vars/Aggrefs it uses, too.
5676 : */
5677 2186 : if (havingQual)
5678 824 : non_group_cols = lappend(non_group_cols, havingQual);
5679 :
5680 : /*
5681 : * Pull out all the Vars, PlaceHolderVars, and Aggrefs mentioned in
5682 : * non-group cols (plus HAVING), and add them to the partial_target if not
5683 : * already present. (An expression used directly as a GROUP BY item will
5684 : * be present already.) Note this includes Vars used in resjunk items, so
5685 : * we are covering the needs of ORDER BY and window specifications.
5686 : */
5687 2186 : non_group_exprs = pull_var_clause((Node *) non_group_cols,
5688 : PVC_INCLUDE_AGGREGATES |
5689 : PVC_RECURSE_WINDOWFUNCS |
5690 : PVC_INCLUDE_PLACEHOLDERS);
5691 :
5692 2186 : add_new_columns_to_pathtarget(partial_target, non_group_exprs);
5693 :
5694 : /*
5695 : * Adjust Aggrefs to put them in partial mode. At this point all Aggrefs
5696 : * are at the top level of the target list, so we can just scan the list
5697 : * rather than recursing through the expression trees.
5698 : */
5699 8362 : foreach(lc, partial_target->exprs)
5700 : {
5701 6176 : Aggref *aggref = (Aggref *) lfirst(lc);
5702 :
5703 6176 : if (IsA(aggref, Aggref))
5704 : {
5705 : Aggref *newaggref;
5706 :
5707 : /*
5708 : * We shouldn't need to copy the substructure of the Aggref node,
5709 : * but flat-copy the node itself to avoid damaging other trees.
5710 : */
5711 4246 : newaggref = makeNode(Aggref);
5712 4246 : memcpy(newaggref, aggref, sizeof(Aggref));
5713 :
5714 : /* For now, assume serialization is required */
5715 4246 : mark_partial_aggref(newaggref, AGGSPLIT_INITIAL_SERIAL);
5716 :
5717 4246 : lfirst(lc) = newaggref;
5718 : }
5719 : }
5720 :
5721 : /* clean up cruft */
5722 2186 : list_free(non_group_exprs);
5723 2186 : list_free(non_group_cols);
5724 :
5725 : /* XXX this causes some redundant cost calculation ... */
5726 2186 : return set_pathtarget_cost_width(root, partial_target);
5727 : }
5728 :
5729 : /*
5730 : * mark_partial_aggref
5731 : * Adjust an Aggref to make it represent a partial-aggregation step.
5732 : *
5733 : * The Aggref node is modified in-place; caller must do any copying required.
5734 : */
5735 : void
5736 7054 : mark_partial_aggref(Aggref *agg, AggSplit aggsplit)
5737 : {
5738 : /* aggtranstype should be computed by this point */
5739 : Assert(OidIsValid(agg->aggtranstype));
5740 : /* ... but aggsplit should still be as the parser left it */
5741 : Assert(agg->aggsplit == AGGSPLIT_SIMPLE);
5742 :
5743 : /* Mark the Aggref with the intended partial-aggregation mode */
5744 7054 : agg->aggsplit = aggsplit;
5745 :
5746 : /*
5747 : * Adjust result type if needed. Normally, a partial aggregate returns
5748 : * the aggregate's transition type; but if that's INTERNAL and we're
5749 : * serializing, it returns BYTEA instead.
5750 : */
5751 7054 : if (DO_AGGSPLIT_SKIPFINAL(aggsplit))
5752 : {
5753 5650 : if (agg->aggtranstype == INTERNALOID && DO_AGGSPLIT_SERIALIZE(aggsplit))
5754 242 : agg->aggtype = BYTEAOID;
5755 : else
5756 5408 : agg->aggtype = agg->aggtranstype;
5757 : }
5758 7054 : }
5759 :
5760 : /*
5761 : * postprocess_setop_tlist
5762 : * Fix up targetlist returned by plan_set_operations().
5763 : *
5764 : * We need to transpose sort key info from the orig_tlist into new_tlist.
5765 : * NOTE: this would not be good enough if we supported resjunk sort keys
5766 : * for results of set operations --- then, we'd need to project a whole
5767 : * new tlist to evaluate the resjunk columns. For now, just ereport if we
5768 : * find any resjunk columns in orig_tlist.
5769 : */
5770 : static List *
5771 5438 : postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
5772 : {
5773 : ListCell *l;
5774 5438 : ListCell *orig_tlist_item = list_head(orig_tlist);
5775 :
5776 22340 : foreach(l, new_tlist)
5777 : {
5778 16902 : TargetEntry *new_tle = lfirst_node(TargetEntry, l);
5779 : TargetEntry *orig_tle;
5780 :
5781 : /* ignore resjunk columns in setop result */
5782 16902 : if (new_tle->resjunk)
5783 560 : continue;
5784 :
5785 : Assert(orig_tlist_item != NULL);
5786 16342 : orig_tle = lfirst_node(TargetEntry, orig_tlist_item);
5787 16342 : orig_tlist_item = lnext(orig_tlist, orig_tlist_item);
5788 16342 : if (orig_tle->resjunk) /* should not happen */
5789 0 : elog(ERROR, "resjunk output columns are not implemented");
5790 : Assert(new_tle->resno == orig_tle->resno);
5791 16342 : new_tle->ressortgroupref = orig_tle->ressortgroupref;
5792 : }
5793 5438 : if (orig_tlist_item != NULL)
5794 0 : elog(ERROR, "resjunk output columns are not implemented");
5795 5438 : return new_tlist;
5796 : }
5797 :
5798 : /*
5799 : * optimize_window_clauses
5800 : * Call each WindowFunc's prosupport function to see if we're able to
5801 : * make any adjustments to any of the WindowClause's so that the executor
5802 : * can execute the window functions in a more optimal way.
5803 : *
5804 : * Currently we only allow adjustments to the WindowClause's frameOptions. We
5805 : * may allow more things to be done here in the future.
5806 : */
5807 : static void
5808 2330 : optimize_window_clauses(PlannerInfo *root, WindowFuncLists *wflists)
5809 : {
5810 2330 : List *windowClause = root->parse->windowClause;
5811 : ListCell *lc;
5812 :
5813 4870 : foreach(lc, windowClause)
5814 : {
5815 2540 : WindowClause *wc = lfirst_node(WindowClause, lc);
5816 : ListCell *lc2;
5817 2540 : int optimizedFrameOptions = 0;
5818 :
5819 : Assert(wc->winref <= wflists->maxWinRef);
5820 :
5821 : /* skip any WindowClauses that have no WindowFuncs */
5822 2540 : if (wflists->windowFuncs[wc->winref] == NIL)
5823 24 : continue;
5824 :
5825 3050 : foreach(lc2, wflists->windowFuncs[wc->winref])
5826 : {
5827 : SupportRequestOptimizeWindowClause req;
5828 : SupportRequestOptimizeWindowClause *res;
5829 2552 : WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
5830 : Oid prosupport;
5831 :
5832 2552 : prosupport = get_func_support(wfunc->winfnoid);
5833 :
5834 : /* Check if there's a support function for 'wfunc' */
5835 2552 : if (!OidIsValid(prosupport))
5836 2018 : break; /* can't optimize this WindowClause */
5837 :
5838 754 : req.type = T_SupportRequestOptimizeWindowClause;
5839 754 : req.window_clause = wc;
5840 754 : req.window_func = wfunc;
5841 754 : req.frameOptions = wc->frameOptions;
5842 :
5843 : /* call the support function */
5844 : res = (SupportRequestOptimizeWindowClause *)
5845 754 : DatumGetPointer(OidFunctionCall1(prosupport,
5846 : PointerGetDatum(&req)));
5847 :
5848 : /*
5849 : * Skip to next WindowClause if the support function does not
5850 : * support this request type.
5851 : */
5852 754 : if (res == NULL)
5853 220 : break;
5854 :
5855 : /*
5856 : * Save these frameOptions for the first WindowFunc for this
5857 : * WindowClause.
5858 : */
5859 534 : if (foreach_current_index(lc2) == 0)
5860 510 : optimizedFrameOptions = res->frameOptions;
5861 :
5862 : /*
5863 : * On subsequent WindowFuncs, if the frameOptions are not the same
5864 : * then we're unable to optimize the frameOptions for this
5865 : * WindowClause.
5866 : */
5867 24 : else if (optimizedFrameOptions != res->frameOptions)
5868 0 : break; /* skip to the next WindowClause, if any */
5869 : }
5870 :
5871 : /* adjust the frameOptions if all WindowFunc's agree that it's ok */
5872 2516 : if (lc2 == NULL && wc->frameOptions != optimizedFrameOptions)
5873 : {
5874 : ListCell *lc3;
5875 :
5876 : /* apply the new frame options */
5877 498 : wc->frameOptions = optimizedFrameOptions;
5878 :
5879 : /*
5880 : * We now check to see if changing the frameOptions has caused
5881 : * this WindowClause to be a duplicate of some other WindowClause.
5882 : * This can only happen if we have multiple WindowClauses, so
5883 : * don't bother if there's only 1.
5884 : */
5885 498 : if (list_length(windowClause) == 1)
5886 408 : continue;
5887 :
5888 : /*
5889 : * Do the duplicate check and reuse the existing WindowClause if
5890 : * we find a duplicate.
5891 : */
5892 228 : foreach(lc3, windowClause)
5893 : {
5894 174 : WindowClause *existing_wc = lfirst_node(WindowClause, lc3);
5895 :
5896 : /* skip over the WindowClause we're currently editing */
5897 174 : if (existing_wc == wc)
5898 54 : continue;
5899 :
5900 : /*
5901 : * Perform the same duplicate check that is done in
5902 : * transformWindowFuncCall.
5903 : */
5904 240 : if (equal(wc->partitionClause, existing_wc->partitionClause) &&
5905 120 : equal(wc->orderClause, existing_wc->orderClause) &&
5906 120 : wc->frameOptions == existing_wc->frameOptions &&
5907 72 : equal(wc->startOffset, existing_wc->startOffset) &&
5908 36 : equal(wc->endOffset, existing_wc->endOffset))
5909 : {
5910 : ListCell *lc4;
5911 :
5912 : /*
5913 : * Now move each WindowFunc in 'wc' into 'existing_wc'.
5914 : * This required adjusting each WindowFunc's winref and
5915 : * moving the WindowFuncs in 'wc' to the list of
5916 : * WindowFuncs in 'existing_wc'.
5917 : */
5918 78 : foreach(lc4, wflists->windowFuncs[wc->winref])
5919 : {
5920 42 : WindowFunc *wfunc = lfirst_node(WindowFunc, lc4);
5921 :
5922 42 : wfunc->winref = existing_wc->winref;
5923 : }
5924 :
5925 : /* move list items */
5926 72 : wflists->windowFuncs[existing_wc->winref] = list_concat(wflists->windowFuncs[existing_wc->winref],
5927 36 : wflists->windowFuncs[wc->winref]);
5928 36 : wflists->windowFuncs[wc->winref] = NIL;
5929 :
5930 : /*
5931 : * transformWindowFuncCall() should have made sure there
5932 : * are no other duplicates, so we needn't bother looking
5933 : * any further.
5934 : */
5935 36 : break;
5936 : }
5937 : }
5938 : }
5939 : }
5940 2330 : }
5941 :
5942 : /*
5943 : * select_active_windows
5944 : * Create a list of the "active" window clauses (ie, those referenced
5945 : * by non-deleted WindowFuncs) in the order they are to be executed.
5946 : */
5947 : static List *
5948 2330 : select_active_windows(PlannerInfo *root, WindowFuncLists *wflists)
5949 : {
5950 2330 : List *windowClause = root->parse->windowClause;
5951 2330 : List *result = NIL;
5952 : ListCell *lc;
5953 2330 : int nActive = 0;
5954 2330 : WindowClauseSortData *actives = palloc(sizeof(WindowClauseSortData)
5955 2330 : * list_length(windowClause));
5956 :
5957 : /* First, construct an array of the active windows */
5958 4870 : foreach(lc, windowClause)
5959 : {
5960 2540 : WindowClause *wc = lfirst_node(WindowClause, lc);
5961 :
5962 : /* It's only active if wflists shows some related WindowFuncs */
5963 : Assert(wc->winref <= wflists->maxWinRef);
5964 2540 : if (wflists->windowFuncs[wc->winref] == NIL)
5965 60 : continue;
5966 :
5967 2480 : actives[nActive].wc = wc; /* original clause */
5968 :
5969 : /*
5970 : * For sorting, we want the list of partition keys followed by the
5971 : * list of sort keys. But pathkeys construction will remove duplicates
5972 : * between the two, so we can as well (even though we can't detect all
5973 : * of the duplicates, since some may come from ECs - that might mean
5974 : * we miss optimization chances here). We must, however, ensure that
5975 : * the order of entries is preserved with respect to the ones we do
5976 : * keep.
5977 : *
5978 : * partitionClause and orderClause had their own duplicates removed in
5979 : * parse analysis, so we're only concerned here with removing
5980 : * orderClause entries that also appear in partitionClause.
5981 : */
5982 4960 : actives[nActive].uniqueOrder =
5983 2480 : list_concat_unique(list_copy(wc->partitionClause),
5984 2480 : wc->orderClause);
5985 2480 : nActive++;
5986 : }
5987 :
5988 : /*
5989 : * Sort active windows by their partitioning/ordering clauses, ignoring
5990 : * any framing clauses, so that the windows that need the same sorting are
5991 : * adjacent in the list. When we come to generate paths, this will avoid
5992 : * inserting additional Sort nodes.
5993 : *
5994 : * This is how we implement a specific requirement from the SQL standard,
5995 : * which says that when two or more windows are order-equivalent (i.e.
5996 : * have matching partition and order clauses, even if their names or
5997 : * framing clauses differ), then all peer rows must be presented in the
5998 : * same order in all of them. If we allowed multiple sort nodes for such
5999 : * cases, we'd risk having the peer rows end up in different orders in
6000 : * equivalent windows due to sort instability. (See General Rule 4 of
6001 : * <window clause> in SQL2008 - SQL2016.)
6002 : *
6003 : * Additionally, if the entire list of clauses of one window is a prefix
6004 : * of another, put first the window with stronger sorting requirements.
6005 : * This way we will first sort for stronger window, and won't have to sort
6006 : * again for the weaker one.
6007 : */
6008 2330 : qsort(actives, nActive, sizeof(WindowClauseSortData), common_prefix_cmp);
6009 :
6010 : /* build ordered list of the original WindowClause nodes */
6011 4810 : for (int i = 0; i < nActive; i++)
6012 2480 : result = lappend(result, actives[i].wc);
6013 :
6014 2330 : pfree(actives);
6015 :
6016 2330 : return result;
6017 : }
6018 :
6019 : /*
6020 : * common_prefix_cmp
6021 : * QSort comparison function for WindowClauseSortData
6022 : *
6023 : * Sort the windows by the required sorting clauses. First, compare the sort
6024 : * clauses themselves. Second, if one window's clauses are a prefix of another
6025 : * one's clauses, put the window with more sort clauses first.
6026 : *
6027 : * We purposefully sort by the highest tleSortGroupRef first. Since
6028 : * tleSortGroupRefs are assigned for the query's DISTINCT and ORDER BY first
6029 : * and because here we sort the lowest tleSortGroupRefs last, if a
6030 : * WindowClause is sharing a tleSortGroupRef with the query's DISTINCT or
6031 : * ORDER BY clause, this makes it more likely that the final WindowAgg will
6032 : * provide presorted input for the query's DISTINCT or ORDER BY clause, thus
6033 : * reducing the total number of sorts required for the query.
6034 : */
6035 : static int
6036 162 : common_prefix_cmp(const void *a, const void *b)
6037 : {
6038 162 : const WindowClauseSortData *wcsa = a;
6039 162 : const WindowClauseSortData *wcsb = b;
6040 : ListCell *item_a;
6041 : ListCell *item_b;
6042 :
6043 276 : forboth(item_a, wcsa->uniqueOrder, item_b, wcsb->uniqueOrder)
6044 : {
6045 216 : SortGroupClause *sca = lfirst_node(SortGroupClause, item_a);
6046 216 : SortGroupClause *scb = lfirst_node(SortGroupClause, item_b);
6047 :
6048 216 : if (sca->tleSortGroupRef > scb->tleSortGroupRef)
6049 102 : return -1;
6050 204 : else if (sca->tleSortGroupRef < scb->tleSortGroupRef)
6051 66 : return 1;
6052 138 : else if (sca->sortop > scb->sortop)
6053 0 : return -1;
6054 138 : else if (sca->sortop < scb->sortop)
6055 24 : return 1;
6056 114 : else if (sca->nulls_first && !scb->nulls_first)
6057 0 : return -1;
6058 114 : else if (!sca->nulls_first && scb->nulls_first)
6059 0 : return 1;
6060 : /* no need to compare eqop, since it is fully determined by sortop */
6061 : }
6062 :
6063 60 : if (list_length(wcsa->uniqueOrder) > list_length(wcsb->uniqueOrder))
6064 6 : return -1;
6065 54 : else if (list_length(wcsa->uniqueOrder) < list_length(wcsb->uniqueOrder))
6066 12 : return 1;
6067 :
6068 42 : return 0;
6069 : }
6070 :
6071 : /*
6072 : * make_window_input_target
6073 : * Generate appropriate PathTarget for initial input to WindowAgg nodes.
6074 : *
6075 : * When the query has window functions, this function computes the desired
6076 : * target to be computed by the node just below the first WindowAgg.
6077 : * This tlist must contain all values needed to evaluate the window functions,
6078 : * compute the final target list, and perform any required final sort step.
6079 : * If multiple WindowAggs are needed, each intermediate one adds its window
6080 : * function results onto this base tlist; only the topmost WindowAgg computes
6081 : * the actual desired target list.
6082 : *
6083 : * This function is much like make_group_input_target, though not quite enough
6084 : * like it to share code. As in that function, we flatten most expressions
6085 : * into their component variables. But we do not want to flatten window
6086 : * PARTITION BY/ORDER BY clauses, since that might result in multiple
6087 : * evaluations of them, which would be bad (possibly even resulting in
6088 : * inconsistent answers, if they contain volatile functions).
6089 : * Also, we must not flatten GROUP BY clauses that were left unflattened by
6090 : * make_group_input_target, because we may no longer have access to the
6091 : * individual Vars in them.
6092 : *
6093 : * Another key difference from make_group_input_target is that we don't
6094 : * flatten Aggref expressions, since those are to be computed below the
6095 : * window functions and just referenced like Vars above that.
6096 : *
6097 : * 'final_target' is the query's final target list (in PathTarget form)
6098 : * 'activeWindows' is the list of active windows previously identified by
6099 : * select_active_windows.
6100 : *
6101 : * The result is the PathTarget to be computed by the plan node immediately
6102 : * below the first WindowAgg node.
6103 : */
6104 : static PathTarget *
6105 2330 : make_window_input_target(PlannerInfo *root,
6106 : PathTarget *final_target,
6107 : List *activeWindows)
6108 : {
6109 : PathTarget *input_target;
6110 : Bitmapset *sgrefs;
6111 : List *flattenable_cols;
6112 : List *flattenable_vars;
6113 : int i;
6114 : ListCell *lc;
6115 :
6116 : Assert(root->parse->hasWindowFuncs);
6117 :
6118 : /*
6119 : * Collect the sortgroupref numbers of window PARTITION/ORDER BY clauses
6120 : * into a bitmapset for convenient reference below.
6121 : */
6122 2330 : sgrefs = NULL;
6123 4810 : foreach(lc, activeWindows)
6124 : {
6125 2480 : WindowClause *wc = lfirst_node(WindowClause, lc);
6126 : ListCell *lc2;
6127 :
6128 3186 : foreach(lc2, wc->partitionClause)
6129 : {
6130 706 : SortGroupClause *sortcl = lfirst_node(SortGroupClause, lc2);
6131 :
6132 706 : sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
6133 : }
6134 4624 : foreach(lc2, wc->orderClause)
6135 : {
6136 2144 : SortGroupClause *sortcl = lfirst_node(SortGroupClause, lc2);
6137 :
6138 2144 : sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
6139 : }
6140 : }
6141 :
6142 : /* Add in sortgroupref numbers of GROUP BY clauses, too */
6143 2516 : foreach(lc, root->processed_groupClause)
6144 : {
6145 186 : SortGroupClause *grpcl = lfirst_node(SortGroupClause, lc);
6146 :
6147 186 : sgrefs = bms_add_member(sgrefs, grpcl->tleSortGroupRef);
6148 : }
6149 :
6150 : /*
6151 : * Construct a target containing all the non-flattenable targetlist items,
6152 : * and save aside the others for a moment.
6153 : */
6154 2330 : input_target = create_empty_pathtarget();
6155 2330 : flattenable_cols = NIL;
6156 :
6157 2330 : i = 0;
6158 10090 : foreach(lc, final_target->exprs)
6159 : {
6160 7760 : Expr *expr = (Expr *) lfirst(lc);
6161 7760 : Index sgref = get_pathtarget_sortgroupref(final_target, i);
6162 :
6163 : /*
6164 : * Don't want to deconstruct window clauses or GROUP BY items. (Note
6165 : * that such items can't contain window functions, so it's okay to
6166 : * compute them below the WindowAgg nodes.)
6167 : */
6168 7760 : if (sgref != 0 && bms_is_member(sgref, sgrefs))
6169 : {
6170 : /*
6171 : * Don't want to deconstruct this value, so add it to the input
6172 : * target as-is.
6173 : */
6174 2740 : add_column_to_pathtarget(input_target, expr, sgref);
6175 : }
6176 : else
6177 : {
6178 : /*
6179 : * Column is to be flattened, so just remember the expression for
6180 : * later call to pull_var_clause.
6181 : */
6182 5020 : flattenable_cols = lappend(flattenable_cols, expr);
6183 : }
6184 :
6185 7760 : i++;
6186 : }
6187 :
6188 : /*
6189 : * Pull out all the Vars and Aggrefs mentioned in flattenable columns, and
6190 : * add them to the input target if not already present. (Some might be
6191 : * there already because they're used directly as window/group clauses.)
6192 : *
6193 : * Note: it's essential to use PVC_INCLUDE_AGGREGATES here, so that any
6194 : * Aggrefs are placed in the Agg node's tlist and not left to be computed
6195 : * at higher levels. On the other hand, we should recurse into
6196 : * WindowFuncs to make sure their input expressions are available.
6197 : */
6198 2330 : flattenable_vars = pull_var_clause((Node *) flattenable_cols,
6199 : PVC_INCLUDE_AGGREGATES |
6200 : PVC_RECURSE_WINDOWFUNCS |
6201 : PVC_INCLUDE_PLACEHOLDERS);
6202 2330 : add_new_columns_to_pathtarget(input_target, flattenable_vars);
6203 :
6204 : /* clean up cruft */
6205 2330 : list_free(flattenable_vars);
6206 2330 : list_free(flattenable_cols);
6207 :
6208 : /* XXX this causes some redundant cost calculation ... */
6209 2330 : return set_pathtarget_cost_width(root, input_target);
6210 : }
6211 :
6212 : /*
6213 : * make_pathkeys_for_window
6214 : * Create a pathkeys list describing the required input ordering
6215 : * for the given WindowClause.
6216 : *
6217 : * Modifies wc's partitionClause to remove any clauses which are deemed
6218 : * redundant by the pathkey logic.
6219 : *
6220 : * The required ordering is first the PARTITION keys, then the ORDER keys.
6221 : * In the future we might try to implement windowing using hashing, in which
6222 : * case the ordering could be relaxed, but for now we always sort.
6223 : */
6224 : static List *
6225 5008 : make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
6226 : List *tlist)
6227 : {
6228 5008 : List *window_pathkeys = NIL;
6229 :
6230 : /* Throw error if can't sort */
6231 5008 : if (!grouping_is_sortable(wc->partitionClause))
6232 0 : ereport(ERROR,
6233 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
6234 : errmsg("could not implement window PARTITION BY"),
6235 : errdetail("Window partitioning columns must be of sortable datatypes.")));
6236 5008 : if (!grouping_is_sortable(wc->orderClause))
6237 0 : ereport(ERROR,
6238 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
6239 : errmsg("could not implement window ORDER BY"),
6240 : errdetail("Window ordering columns must be of sortable datatypes.")));
6241 :
6242 : /*
6243 : * First fetch the pathkeys for the PARTITION BY clause. We can safely
6244 : * remove any clauses from the wc->partitionClause for redundant pathkeys.
6245 : */
6246 5008 : if (wc->partitionClause != NIL)
6247 : {
6248 : bool sortable;
6249 :
6250 1186 : window_pathkeys = make_pathkeys_for_sortclauses_extended(root,
6251 : &wc->partitionClause,
6252 : tlist,
6253 : true,
6254 : false,
6255 : &sortable,
6256 : false);
6257 :
6258 : Assert(sortable);
6259 : }
6260 :
6261 : /*
6262 : * In principle, we could also consider removing redundant ORDER BY items
6263 : * too as doing so does not alter the result of peer row checks done by
6264 : * the executor. However, we must *not* remove the ordering column for
6265 : * RANGE OFFSET cases, as the executor needs that for in_range tests even
6266 : * if it's known to be equal to some partitioning column.
6267 : */
6268 5008 : if (wc->orderClause != NIL)
6269 : {
6270 : List *orderby_pathkeys;
6271 :
6272 4210 : orderby_pathkeys = make_pathkeys_for_sortclauses(root,
6273 : wc->orderClause,
6274 : tlist);
6275 :
6276 : /* Okay, make the combined pathkeys */
6277 4210 : if (window_pathkeys != NIL)
6278 880 : window_pathkeys = append_pathkeys(window_pathkeys, orderby_pathkeys);
6279 : else
6280 3330 : window_pathkeys = orderby_pathkeys;
6281 : }
6282 :
6283 5008 : return window_pathkeys;
6284 : }
6285 :
6286 : /*
6287 : * make_sort_input_target
6288 : * Generate appropriate PathTarget for initial input to Sort step.
6289 : *
6290 : * If the query has ORDER BY, this function chooses the target to be computed
6291 : * by the node just below the Sort (and DISTINCT, if any, since Unique can't
6292 : * project) steps. This might or might not be identical to the query's final
6293 : * output target.
6294 : *
6295 : * The main argument for keeping the sort-input tlist the same as the final
6296 : * is that we avoid a separate projection node (which will be needed if
6297 : * they're different, because Sort can't project). However, there are also
6298 : * advantages to postponing tlist evaluation till after the Sort: it ensures
6299 : * a consistent order of evaluation for any volatile functions in the tlist,
6300 : * and if there's also a LIMIT, we can stop the query without ever computing
6301 : * tlist functions for later rows, which is beneficial for both volatile and
6302 : * expensive functions.
6303 : *
6304 : * Our current policy is to postpone volatile expressions till after the sort
6305 : * unconditionally (assuming that that's possible, ie they are in plain tlist
6306 : * columns and not ORDER BY/GROUP BY/DISTINCT columns). We also prefer to
6307 : * postpone set-returning expressions, because running them beforehand would
6308 : * bloat the sort dataset, and because it might cause unexpected output order
6309 : * if the sort isn't stable. However there's a constraint on that: all SRFs
6310 : * in the tlist should be evaluated at the same plan step, so that they can
6311 : * run in sync in nodeProjectSet. So if any SRFs are in sort columns, we
6312 : * mustn't postpone any SRFs. (Note that in principle that policy should
6313 : * probably get applied to the group/window input targetlists too, but we
6314 : * have not done that historically.) Lastly, expensive expressions are
6315 : * postponed if there is a LIMIT, or if root->tuple_fraction shows that
6316 : * partial evaluation of the query is possible (if neither is true, we expect
6317 : * to have to evaluate the expressions for every row anyway), or if there are
6318 : * any volatile or set-returning expressions (since once we've put in a
6319 : * projection at all, it won't cost any more to postpone more stuff).
6320 : *
6321 : * Another issue that could potentially be considered here is that
6322 : * evaluating tlist expressions could result in data that's either wider
6323 : * or narrower than the input Vars, thus changing the volume of data that
6324 : * has to go through the Sort. However, we usually have only a very bad
6325 : * idea of the output width of any expression more complex than a Var,
6326 : * so for now it seems too risky to try to optimize on that basis.
6327 : *
6328 : * Note that if we do produce a modified sort-input target, and then the
6329 : * query ends up not using an explicit Sort, no particular harm is done:
6330 : * we'll initially use the modified target for the preceding path nodes,
6331 : * but then change them to the final target with apply_projection_to_path.
6332 : * Moreover, in such a case the guarantees about evaluation order of
6333 : * volatile functions still hold, since the rows are sorted already.
6334 : *
6335 : * This function has some things in common with make_group_input_target and
6336 : * make_window_input_target, though the detailed rules for what to do are
6337 : * different. We never flatten/postpone any grouping or ordering columns;
6338 : * those are needed before the sort. If we do flatten a particular
6339 : * expression, we leave Aggref and WindowFunc nodes alone, since those were
6340 : * computed earlier.
6341 : *
6342 : * 'final_target' is the query's final target list (in PathTarget form)
6343 : * 'have_postponed_srfs' is an output argument, see below
6344 : *
6345 : * The result is the PathTarget to be computed by the plan node immediately
6346 : * below the Sort step (and the Distinct step, if any). This will be
6347 : * exactly final_target if we decide a projection step wouldn't be helpful.
6348 : *
6349 : * In addition, *have_postponed_srfs is set to true if we choose to postpone
6350 : * any set-returning functions to after the Sort.
6351 : */
6352 : static PathTarget *
6353 59228 : make_sort_input_target(PlannerInfo *root,
6354 : PathTarget *final_target,
6355 : bool *have_postponed_srfs)
6356 : {
6357 59228 : Query *parse = root->parse;
6358 : PathTarget *input_target;
6359 : int ncols;
6360 : bool *col_is_srf;
6361 : bool *postpone_col;
6362 : bool have_srf;
6363 : bool have_volatile;
6364 : bool have_expensive;
6365 : bool have_srf_sortcols;
6366 : bool postpone_srfs;
6367 : List *postponable_cols;
6368 : List *postponable_vars;
6369 : int i;
6370 : ListCell *lc;
6371 :
6372 : /* Shouldn't get here unless query has ORDER BY */
6373 : Assert(parse->sortClause);
6374 :
6375 59228 : *have_postponed_srfs = false; /* default result */
6376 :
6377 : /* Inspect tlist and collect per-column information */
6378 59228 : ncols = list_length(final_target->exprs);
6379 59228 : col_is_srf = (bool *) palloc0(ncols * sizeof(bool));
6380 59228 : postpone_col = (bool *) palloc0(ncols * sizeof(bool));
6381 59228 : have_srf = have_volatile = have_expensive = have_srf_sortcols = false;
6382 :
6383 59228 : i = 0;
6384 355478 : foreach(lc, final_target->exprs)
6385 : {
6386 296250 : Expr *expr = (Expr *) lfirst(lc);
6387 :
6388 : /*
6389 : * If the column has a sortgroupref, assume it has to be evaluated
6390 : * before sorting. Generally such columns would be ORDER BY, GROUP
6391 : * BY, etc targets. One exception is columns that were removed from
6392 : * GROUP BY by remove_useless_groupby_columns() ... but those would
6393 : * only be Vars anyway. There don't seem to be any cases where it
6394 : * would be worth the trouble to double-check.
6395 : */
6396 296250 : if (get_pathtarget_sortgroupref(final_target, i) == 0)
6397 : {
6398 : /*
6399 : * Check for SRF or volatile functions. Check the SRF case first
6400 : * because we must know whether we have any postponed SRFs.
6401 : */
6402 211762 : if (parse->hasTargetSRFs &&
6403 216 : expression_returns_set((Node *) expr))
6404 : {
6405 : /* We'll decide below whether these are postponable */
6406 96 : col_is_srf[i] = true;
6407 96 : have_srf = true;
6408 : }
6409 211450 : else if (contain_volatile_functions((Node *) expr))
6410 : {
6411 : /* Unconditionally postpone */
6412 154 : postpone_col[i] = true;
6413 154 : have_volatile = true;
6414 : }
6415 : else
6416 : {
6417 : /*
6418 : * Else check the cost. XXX it's annoying to have to do this
6419 : * when set_pathtarget_cost_width() just did it. Refactor to
6420 : * allow sharing the work?
6421 : */
6422 : QualCost cost;
6423 :
6424 211296 : cost_qual_eval_node(&cost, (Node *) expr, root);
6425 :
6426 : /*
6427 : * We arbitrarily define "expensive" as "more than 10X
6428 : * cpu_operator_cost". Note this will take in any PL function
6429 : * with default cost.
6430 : */
6431 211296 : if (cost.per_tuple > 10 * cpu_operator_cost)
6432 : {
6433 14942 : postpone_col[i] = true;
6434 14942 : have_expensive = true;
6435 : }
6436 : }
6437 : }
6438 : else
6439 : {
6440 : /* For sortgroupref cols, just check if any contain SRFs */
6441 84704 : if (!have_srf_sortcols &&
6442 85014 : parse->hasTargetSRFs &&
6443 310 : expression_returns_set((Node *) expr))
6444 124 : have_srf_sortcols = true;
6445 : }
6446 :
6447 296250 : i++;
6448 : }
6449 :
6450 : /*
6451 : * We can postpone SRFs if we have some but none are in sortgroupref cols.
6452 : */
6453 59228 : postpone_srfs = (have_srf && !have_srf_sortcols);
6454 :
6455 : /*
6456 : * If we don't need a post-sort projection, just return final_target.
6457 : */
6458 59228 : if (!(postpone_srfs || have_volatile ||
6459 59018 : (have_expensive &&
6460 8978 : (parse->limitCount || root->tuple_fraction > 0))))
6461 58982 : return final_target;
6462 :
6463 : /*
6464 : * Report whether the post-sort projection will contain set-returning
6465 : * functions. This is important because it affects whether the Sort can
6466 : * rely on the query's LIMIT (if any) to bound the number of rows it needs
6467 : * to return.
6468 : */
6469 246 : *have_postponed_srfs = postpone_srfs;
6470 :
6471 : /*
6472 : * Construct the sort-input target, taking all non-postponable columns and
6473 : * then adding Vars, PlaceHolderVars, Aggrefs, and WindowFuncs found in
6474 : * the postponable ones.
6475 : */
6476 246 : input_target = create_empty_pathtarget();
6477 246 : postponable_cols = NIL;
6478 :
6479 246 : i = 0;
6480 1996 : foreach(lc, final_target->exprs)
6481 : {
6482 1750 : Expr *expr = (Expr *) lfirst(lc);
6483 :
6484 1750 : if (postpone_col[i] || (postpone_srfs && col_is_srf[i]))
6485 298 : postponable_cols = lappend(postponable_cols, expr);
6486 : else
6487 1452 : add_column_to_pathtarget(input_target, expr,
6488 1452 : get_pathtarget_sortgroupref(final_target, i));
6489 :
6490 1750 : i++;
6491 : }
6492 :
6493 : /*
6494 : * Pull out all the Vars, Aggrefs, and WindowFuncs mentioned in
6495 : * postponable columns, and add them to the sort-input target if not
6496 : * already present. (Some might be there already.) We mustn't
6497 : * deconstruct Aggrefs or WindowFuncs here, since the projection node
6498 : * would be unable to recompute them.
6499 : */
6500 246 : postponable_vars = pull_var_clause((Node *) postponable_cols,
6501 : PVC_INCLUDE_AGGREGATES |
6502 : PVC_INCLUDE_WINDOWFUNCS |
6503 : PVC_INCLUDE_PLACEHOLDERS);
6504 246 : add_new_columns_to_pathtarget(input_target, postponable_vars);
6505 :
6506 : /* clean up cruft */
6507 246 : list_free(postponable_vars);
6508 246 : list_free(postponable_cols);
6509 :
6510 : /* XXX this represents even more redundant cost calculation ... */
6511 246 : return set_pathtarget_cost_width(root, input_target);
6512 : }
6513 :
6514 : /*
6515 : * get_cheapest_fractional_path
6516 : * Find the cheapest path for retrieving a specified fraction of all
6517 : * the tuples expected to be returned by the given relation.
6518 : *
6519 : * We interpret tuple_fraction the same way as grouping_planner.
6520 : *
6521 : * We assume set_cheapest() has been run on the given rel.
6522 : */
6523 : Path *
6524 484988 : get_cheapest_fractional_path(RelOptInfo *rel, double tuple_fraction)
6525 : {
6526 484988 : Path *best_path = rel->cheapest_total_path;
6527 : ListCell *l;
6528 :
6529 : /* If all tuples will be retrieved, just return the cheapest-total path */
6530 484988 : if (tuple_fraction <= 0.0)
6531 479506 : return best_path;
6532 :
6533 : /* Convert absolute # of tuples to a fraction; no need to clamp to 0..1 */
6534 5482 : if (tuple_fraction >= 1.0 && best_path->rows > 0)
6535 2092 : tuple_fraction /= best_path->rows;
6536 :
6537 13536 : foreach(l, rel->pathlist)
6538 : {
6539 8054 : Path *path = (Path *) lfirst(l);
6540 :
6541 10626 : if (path == rel->cheapest_total_path ||
6542 2572 : compare_fractional_path_costs(best_path, path, tuple_fraction) <= 0)
6543 7828 : continue;
6544 :
6545 226 : best_path = path;
6546 : }
6547 :
6548 5482 : return best_path;
6549 : }
6550 :
6551 : /*
6552 : * adjust_paths_for_srfs
6553 : * Fix up the Paths of the given upperrel to handle tSRFs properly.
6554 : *
6555 : * The executor can only handle set-returning functions that appear at the
6556 : * top level of the targetlist of a ProjectSet plan node. If we have any SRFs
6557 : * that are not at top level, we need to split up the evaluation into multiple
6558 : * plan levels in which each level satisfies this constraint. This function
6559 : * modifies each Path of an upperrel that (might) compute any SRFs in its
6560 : * output tlist to insert appropriate projection steps.
6561 : *
6562 : * The given targets and targets_contain_srfs lists are from
6563 : * split_pathtarget_at_srfs(). We assume the existing Paths emit the first
6564 : * target in targets.
6565 : */
6566 : static void
6567 9260 : adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
6568 : List *targets, List *targets_contain_srfs)
6569 : {
6570 : ListCell *lc;
6571 :
6572 : Assert(list_length(targets) == list_length(targets_contain_srfs));
6573 : Assert(!linitial_int(targets_contain_srfs));
6574 :
6575 : /* If no SRFs appear at this plan level, nothing to do */
6576 9260 : if (list_length(targets) == 1)
6577 598 : return;
6578 :
6579 : /*
6580 : * Stack SRF-evaluation nodes atop each path for the rel.
6581 : *
6582 : * In principle we should re-run set_cheapest() here to identify the
6583 : * cheapest path, but it seems unlikely that adding the same tlist eval
6584 : * costs to all the paths would change that, so we don't bother. Instead,
6585 : * just assume that the cheapest-startup and cheapest-total paths remain
6586 : * so. (There should be no parameterized paths anymore, so we needn't
6587 : * worry about updating cheapest_parameterized_paths.)
6588 : */
6589 17352 : foreach(lc, rel->pathlist)
6590 : {
6591 8690 : Path *subpath = (Path *) lfirst(lc);
6592 8690 : Path *newpath = subpath;
6593 : ListCell *lc1,
6594 : *lc2;
6595 :
6596 : Assert(subpath->param_info == NULL);
6597 27188 : forboth(lc1, targets, lc2, targets_contain_srfs)
6598 : {
6599 18498 : PathTarget *thistarget = lfirst_node(PathTarget, lc1);
6600 18498 : bool contains_srfs = (bool) lfirst_int(lc2);
6601 :
6602 : /* If this level doesn't contain SRFs, do regular projection */
6603 18498 : if (contains_srfs)
6604 8750 : newpath = (Path *) create_set_projection_path(root,
6605 : rel,
6606 : newpath,
6607 : thistarget);
6608 : else
6609 9748 : newpath = (Path *) apply_projection_to_path(root,
6610 : rel,
6611 : newpath,
6612 : thistarget);
6613 : }
6614 8690 : lfirst(lc) = newpath;
6615 8690 : if (subpath == rel->cheapest_startup_path)
6616 342 : rel->cheapest_startup_path = newpath;
6617 8690 : if (subpath == rel->cheapest_total_path)
6618 342 : rel->cheapest_total_path = newpath;
6619 : }
6620 :
6621 : /* Likewise for partial paths, if any */
6622 8668 : foreach(lc, rel->partial_pathlist)
6623 : {
6624 6 : Path *subpath = (Path *) lfirst(lc);
6625 6 : Path *newpath = subpath;
6626 : ListCell *lc1,
6627 : *lc2;
6628 :
6629 : Assert(subpath->param_info == NULL);
6630 24 : forboth(lc1, targets, lc2, targets_contain_srfs)
6631 : {
6632 18 : PathTarget *thistarget = lfirst_node(PathTarget, lc1);
6633 18 : bool contains_srfs = (bool) lfirst_int(lc2);
6634 :
6635 : /* If this level doesn't contain SRFs, do regular projection */
6636 18 : if (contains_srfs)
6637 6 : newpath = (Path *) create_set_projection_path(root,
6638 : rel,
6639 : newpath,
6640 : thistarget);
6641 : else
6642 : {
6643 : /* avoid apply_projection_to_path, in case of multiple refs */
6644 12 : newpath = (Path *) create_projection_path(root,
6645 : rel,
6646 : newpath,
6647 : thistarget);
6648 : }
6649 : }
6650 6 : lfirst(lc) = newpath;
6651 : }
6652 : }
6653 :
6654 : /*
6655 : * expression_planner
6656 : * Perform planner's transformations on a standalone expression.
6657 : *
6658 : * Various utility commands need to evaluate expressions that are not part
6659 : * of a plannable query. They can do so using the executor's regular
6660 : * expression-execution machinery, but first the expression has to be fed
6661 : * through here to transform it from parser output to something executable.
6662 : *
6663 : * Currently, we disallow sublinks in standalone expressions, so there's no
6664 : * real "planning" involved here. (That might not always be true though.)
6665 : * What we must do is run eval_const_expressions to ensure that any function
6666 : * calls are converted to positional notation and function default arguments
6667 : * get inserted. The fact that constant subexpressions get simplified is a
6668 : * side-effect that is useful when the expression will get evaluated more than
6669 : * once. Also, we must fix operator function IDs.
6670 : *
6671 : * This does not return any information about dependencies of the expression.
6672 : * Hence callers should use the results only for the duration of the current
6673 : * query. Callers that would like to cache the results for longer should use
6674 : * expression_planner_with_deps, probably via the plancache.
6675 : *
6676 : * Note: this must not make any damaging changes to the passed-in expression
6677 : * tree. (It would actually be okay to apply fix_opfuncids to it, but since
6678 : * we first do an expression_tree_mutator-based walk, what is returned will
6679 : * be a new node tree.) The result is constructed in the current memory
6680 : * context; beware that this can leak a lot of additional stuff there, too.
6681 : */
6682 : Expr *
6683 235158 : expression_planner(Expr *expr)
6684 : {
6685 : Node *result;
6686 :
6687 : /*
6688 : * Convert named-argument function calls, insert default arguments and
6689 : * simplify constant subexprs
6690 : */
6691 235158 : result = eval_const_expressions(NULL, (Node *) expr);
6692 :
6693 : /* Fill in opfuncid values if missing */
6694 235140 : fix_opfuncids(result);
6695 :
6696 235140 : return (Expr *) result;
6697 : }
6698 :
6699 : /*
6700 : * expression_planner_with_deps
6701 : * Perform planner's transformations on a standalone expression,
6702 : * returning expression dependency information along with the result.
6703 : *
6704 : * This is identical to expression_planner() except that it also returns
6705 : * information about possible dependencies of the expression, ie identities of
6706 : * objects whose definitions affect the result. As in a PlannedStmt, these
6707 : * are expressed as a list of relation Oids and a list of PlanInvalItems.
6708 : */
6709 : Expr *
6710 342 : expression_planner_with_deps(Expr *expr,
6711 : List **relationOids,
6712 : List **invalItems)
6713 : {
6714 : Node *result;
6715 : PlannerGlobal glob;
6716 : PlannerInfo root;
6717 :
6718 : /* Make up dummy planner state so we can use setrefs machinery */
6719 6498 : MemSet(&glob, 0, sizeof(glob));
6720 342 : glob.type = T_PlannerGlobal;
6721 342 : glob.relationOids = NIL;
6722 342 : glob.invalItems = NIL;
6723 :
6724 30096 : MemSet(&root, 0, sizeof(root));
6725 342 : root.type = T_PlannerInfo;
6726 342 : root.glob = &glob;
6727 :
6728 : /*
6729 : * Convert named-argument function calls, insert default arguments and
6730 : * simplify constant subexprs. Collect identities of inlined functions
6731 : * and elided domains, too.
6732 : */
6733 342 : result = eval_const_expressions(&root, (Node *) expr);
6734 :
6735 : /* Fill in opfuncid values if missing */
6736 342 : fix_opfuncids(result);
6737 :
6738 : /*
6739 : * Now walk the finished expression to find anything else we ought to
6740 : * record as an expression dependency.
6741 : */
6742 342 : (void) extract_query_dependencies_walker(result, &root);
6743 :
6744 342 : *relationOids = glob.relationOids;
6745 342 : *invalItems = glob.invalItems;
6746 :
6747 342 : return (Expr *) result;
6748 : }
6749 :
6750 :
6751 : /*
6752 : * plan_cluster_use_sort
6753 : * Use the planner to decide how CLUSTER should implement sorting
6754 : *
6755 : * tableOid is the OID of a table to be clustered on its index indexOid
6756 : * (which is already known to be a btree index). Decide whether it's
6757 : * cheaper to do an indexscan or a seqscan-plus-sort to execute the CLUSTER.
6758 : * Return true to use sorting, false to use an indexscan.
6759 : *
6760 : * Note: caller had better already hold some type of lock on the table.
6761 : */
6762 : bool
6763 190 : plan_cluster_use_sort(Oid tableOid, Oid indexOid)
6764 : {
6765 : PlannerInfo *root;
6766 : Query *query;
6767 : PlannerGlobal *glob;
6768 : RangeTblEntry *rte;
6769 : RelOptInfo *rel;
6770 : IndexOptInfo *indexInfo;
6771 : QualCost indexExprCost;
6772 : Cost comparisonCost;
6773 : Path *seqScanPath;
6774 : Path seqScanAndSortPath;
6775 : IndexPath *indexScanPath;
6776 : ListCell *lc;
6777 :
6778 : /* We can short-circuit the cost comparison if indexscans are disabled */
6779 190 : if (!enable_indexscan)
6780 30 : return true; /* use sort */
6781 :
6782 : /* Set up mostly-dummy planner state */
6783 160 : query = makeNode(Query);
6784 160 : query->commandType = CMD_SELECT;
6785 :
6786 160 : glob = makeNode(PlannerGlobal);
6787 :
6788 160 : root = makeNode(PlannerInfo);
6789 160 : root->parse = query;
6790 160 : root->glob = glob;
6791 160 : root->query_level = 1;
6792 160 : root->planner_cxt = CurrentMemoryContext;
6793 160 : root->wt_param_id = -1;
6794 160 : root->join_domains = list_make1(makeNode(JoinDomain));
6795 :
6796 : /* Build a minimal RTE for the rel */
6797 160 : rte = makeNode(RangeTblEntry);
6798 160 : rte->rtekind = RTE_RELATION;
6799 160 : rte->relid = tableOid;
6800 160 : rte->relkind = RELKIND_RELATION; /* Don't be too picky. */
6801 160 : rte->rellockmode = AccessShareLock;
6802 160 : rte->lateral = false;
6803 160 : rte->inh = false;
6804 160 : rte->inFromCl = true;
6805 160 : query->rtable = list_make1(rte);
6806 160 : addRTEPermissionInfo(&query->rteperminfos, rte);
6807 :
6808 : /* Set up RTE/RelOptInfo arrays */
6809 160 : setup_simple_rel_arrays(root);
6810 :
6811 : /* Build RelOptInfo */
6812 160 : rel = build_simple_rel(root, 1, NULL);
6813 :
6814 : /* Locate IndexOptInfo for the target index */
6815 160 : indexInfo = NULL;
6816 198 : foreach(lc, rel->indexlist)
6817 : {
6818 198 : indexInfo = lfirst_node(IndexOptInfo, lc);
6819 198 : if (indexInfo->indexoid == indexOid)
6820 160 : break;
6821 : }
6822 :
6823 : /*
6824 : * It's possible that get_relation_info did not generate an IndexOptInfo
6825 : * for the desired index; this could happen if it's not yet reached its
6826 : * indcheckxmin usability horizon, or if it's a system index and we're
6827 : * ignoring system indexes. In such cases we should tell CLUSTER to not
6828 : * trust the index contents but use seqscan-and-sort.
6829 : */
6830 160 : if (lc == NULL) /* not in the list? */
6831 0 : return true; /* use sort */
6832 :
6833 : /*
6834 : * Rather than doing all the pushups that would be needed to use
6835 : * set_baserel_size_estimates, just do a quick hack for rows and width.
6836 : */
6837 160 : rel->rows = rel->tuples;
6838 160 : rel->reltarget->width = get_relation_data_width(tableOid, NULL);
6839 :
6840 160 : root->total_table_pages = rel->pages;
6841 :
6842 : /*
6843 : * Determine eval cost of the index expressions, if any. We need to
6844 : * charge twice that amount for each tuple comparison that happens during
6845 : * the sort, since tuplesort.c will have to re-evaluate the index
6846 : * expressions each time. (XXX that's pretty inefficient...)
6847 : */
6848 160 : cost_qual_eval(&indexExprCost, indexInfo->indexprs, root);
6849 160 : comparisonCost = 2.0 * (indexExprCost.startup + indexExprCost.per_tuple);
6850 :
6851 : /* Estimate the cost of seq scan + sort */
6852 160 : seqScanPath = create_seqscan_path(root, rel, NULL, 0);
6853 160 : cost_sort(&seqScanAndSortPath, root, NIL,
6854 : seqScanPath->disabled_nodes,
6855 160 : seqScanPath->total_cost, rel->tuples, rel->reltarget->width,
6856 : comparisonCost, maintenance_work_mem, -1.0);
6857 :
6858 : /* Estimate the cost of index scan */
6859 160 : indexScanPath = create_index_path(root, indexInfo,
6860 : NIL, NIL, NIL, NIL,
6861 : ForwardScanDirection, false,
6862 : NULL, 1.0, false);
6863 :
6864 160 : return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost);
6865 : }
6866 :
6867 : /*
6868 : * plan_create_index_workers
6869 : * Use the planner to decide how many parallel worker processes
6870 : * CREATE INDEX should request for use
6871 : *
6872 : * tableOid is the table on which the index is to be built. indexOid is the
6873 : * OID of an index to be created or reindexed (which must be a btree index).
6874 : *
6875 : * Return value is the number of parallel worker processes to request. It
6876 : * may be unsafe to proceed if this is 0. Note that this does not include the
6877 : * leader participating as a worker (value is always a number of parallel
6878 : * worker processes).
6879 : *
6880 : * Note: caller had better already hold some type of lock on the table and
6881 : * index.
6882 : */
6883 : int
6884 33218 : plan_create_index_workers(Oid tableOid, Oid indexOid)
6885 : {
6886 : PlannerInfo *root;
6887 : Query *query;
6888 : PlannerGlobal *glob;
6889 : RangeTblEntry *rte;
6890 : Relation heap;
6891 : Relation index;
6892 : RelOptInfo *rel;
6893 : int parallel_workers;
6894 : BlockNumber heap_blocks;
6895 : double reltuples;
6896 : double allvisfrac;
6897 :
6898 : /*
6899 : * We don't allow performing parallel operation in standalone backend or
6900 : * when parallelism is disabled.
6901 : */
6902 33218 : if (!IsUnderPostmaster || max_parallel_maintenance_workers == 0)
6903 466 : return 0;
6904 :
6905 : /* Set up largely-dummy planner state */
6906 32752 : query = makeNode(Query);
6907 32752 : query->commandType = CMD_SELECT;
6908 :
6909 32752 : glob = makeNode(PlannerGlobal);
6910 :
6911 32752 : root = makeNode(PlannerInfo);
6912 32752 : root->parse = query;
6913 32752 : root->glob = glob;
6914 32752 : root->query_level = 1;
6915 32752 : root->planner_cxt = CurrentMemoryContext;
6916 32752 : root->wt_param_id = -1;
6917 32752 : root->join_domains = list_make1(makeNode(JoinDomain));
6918 :
6919 : /*
6920 : * Build a minimal RTE.
6921 : *
6922 : * Mark the RTE with inh = true. This is a kludge to prevent
6923 : * get_relation_info() from fetching index info, which is necessary
6924 : * because it does not expect that any IndexOptInfo is currently
6925 : * undergoing REINDEX.
6926 : */
6927 32752 : rte = makeNode(RangeTblEntry);
6928 32752 : rte->rtekind = RTE_RELATION;
6929 32752 : rte->relid = tableOid;
6930 32752 : rte->relkind = RELKIND_RELATION; /* Don't be too picky. */
6931 32752 : rte->rellockmode = AccessShareLock;
6932 32752 : rte->lateral = false;
6933 32752 : rte->inh = true;
6934 32752 : rte->inFromCl = true;
6935 32752 : query->rtable = list_make1(rte);
6936 32752 : addRTEPermissionInfo(&query->rteperminfos, rte);
6937 :
6938 : /* Set up RTE/RelOptInfo arrays */
6939 32752 : setup_simple_rel_arrays(root);
6940 :
6941 : /* Build RelOptInfo */
6942 32752 : rel = build_simple_rel(root, 1, NULL);
6943 :
6944 : /* Rels are assumed already locked by the caller */
6945 32752 : heap = table_open(tableOid, NoLock);
6946 32752 : index = index_open(indexOid, NoLock);
6947 :
6948 : /*
6949 : * Determine if it's safe to proceed.
6950 : *
6951 : * Currently, parallel workers can't access the leader's temporary tables.
6952 : * Furthermore, any index predicate or index expressions must be parallel
6953 : * safe.
6954 : */
6955 32752 : if (heap->rd_rel->relpersistence == RELPERSISTENCE_TEMP ||
6956 30824 : !is_parallel_safe(root, (Node *) RelationGetIndexExpressions(index)) ||
6957 30704 : !is_parallel_safe(root, (Node *) RelationGetIndexPredicate(index)))
6958 : {
6959 2048 : parallel_workers = 0;
6960 2048 : goto done;
6961 : }
6962 :
6963 : /*
6964 : * If parallel_workers storage parameter is set for the table, accept that
6965 : * as the number of parallel worker processes to launch (though still cap
6966 : * at max_parallel_maintenance_workers). Note that we deliberately do not
6967 : * consider any other factor when parallel_workers is set. (e.g., memory
6968 : * use by workers.)
6969 : */
6970 30704 : if (rel->rel_parallel_workers != -1)
6971 : {
6972 14 : parallel_workers = Min(rel->rel_parallel_workers,
6973 : max_parallel_maintenance_workers);
6974 14 : goto done;
6975 : }
6976 :
6977 : /*
6978 : * Estimate heap relation size ourselves, since rel->pages cannot be
6979 : * trusted (heap RTE was marked as inheritance parent)
6980 : */
6981 30690 : estimate_rel_size(heap, NULL, &heap_blocks, &reltuples, &allvisfrac);
6982 :
6983 : /*
6984 : * Determine number of workers to scan the heap relation using generic
6985 : * model
6986 : */
6987 30690 : parallel_workers = compute_parallel_worker(rel, heap_blocks, -1,
6988 : max_parallel_maintenance_workers);
6989 :
6990 : /*
6991 : * Cap workers based on available maintenance_work_mem as needed.
6992 : *
6993 : * Note that each tuplesort participant receives an even share of the
6994 : * total maintenance_work_mem budget. Aim to leave participants
6995 : * (including the leader as a participant) with no less than 32MB of
6996 : * memory. This leaves cases where maintenance_work_mem is set to 64MB
6997 : * immediately past the threshold of being capable of launching a single
6998 : * parallel worker to sort.
6999 : */
7000 30846 : while (parallel_workers > 0 &&
7001 314 : maintenance_work_mem / (parallel_workers + 1) < 32768L)
7002 156 : parallel_workers--;
7003 :
7004 30690 : done:
7005 32752 : index_close(index, NoLock);
7006 32752 : table_close(heap, NoLock);
7007 :
7008 32752 : return parallel_workers;
7009 : }
7010 :
7011 : /*
7012 : * make_ordered_path
7013 : * Return a path ordered by 'pathkeys' based on the given 'path'. May
7014 : * return NULL if it doesn't make sense to generate an ordered path in
7015 : * this case.
7016 : */
7017 : static Path *
7018 47896 : make_ordered_path(PlannerInfo *root, RelOptInfo *rel, Path *path,
7019 : Path *cheapest_path, List *pathkeys)
7020 : {
7021 : bool is_sorted;
7022 : int presorted_keys;
7023 :
7024 47896 : is_sorted = pathkeys_count_contained_in(pathkeys,
7025 : path->pathkeys,
7026 : &presorted_keys);
7027 :
7028 47896 : if (!is_sorted)
7029 : {
7030 : /*
7031 : * Try at least sorting the cheapest path and also try incrementally
7032 : * sorting any path which is partially sorted already (no need to deal
7033 : * with paths which have presorted keys when incremental sort is
7034 : * disabled unless it's the cheapest input path).
7035 : */
7036 8644 : if (path != cheapest_path &&
7037 686 : (presorted_keys == 0 || !enable_incremental_sort))
7038 476 : return NULL;
7039 :
7040 : /*
7041 : * We've no need to consider both a sort and incremental sort. We'll
7042 : * just do a sort if there are no presorted keys and an incremental
7043 : * sort when there are presorted keys.
7044 : */
7045 8168 : if (presorted_keys == 0 || !enable_incremental_sort)
7046 7844 : path = (Path *) create_sort_path(root,
7047 : rel,
7048 : path,
7049 : pathkeys,
7050 : -1.0);
7051 : else
7052 324 : path = (Path *) create_incremental_sort_path(root,
7053 : rel,
7054 : path,
7055 : pathkeys,
7056 : presorted_keys,
7057 : -1.0);
7058 : }
7059 :
7060 47420 : return path;
7061 : }
7062 :
7063 : /*
7064 : * add_paths_to_grouping_rel
7065 : *
7066 : * Add non-partial paths to grouping relation.
7067 : */
7068 : static void
7069 40372 : add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
7070 : RelOptInfo *grouped_rel,
7071 : RelOptInfo *partially_grouped_rel,
7072 : const AggClauseCosts *agg_costs,
7073 : grouping_sets_data *gd, double dNumGroups,
7074 : GroupPathExtraData *extra)
7075 : {
7076 40372 : Query *parse = root->parse;
7077 40372 : Path *cheapest_path = input_rel->cheapest_total_path;
7078 : ListCell *lc;
7079 40372 : bool can_hash = (extra->flags & GROUPING_CAN_USE_HASH) != 0;
7080 40372 : bool can_sort = (extra->flags & GROUPING_CAN_USE_SORT) != 0;
7081 40372 : List *havingQual = (List *) extra->havingQual;
7082 40372 : AggClauseCosts *agg_final_costs = &extra->agg_final_costs;
7083 :
7084 40372 : if (can_sort)
7085 : {
7086 : /*
7087 : * Use any available suitably-sorted path as input, and also consider
7088 : * sorting the cheapest-total path and incremental sort on any paths
7089 : * with presorted keys.
7090 : */
7091 83326 : foreach(lc, input_rel->pathlist)
7092 : {
7093 : ListCell *lc2;
7094 42960 : Path *path = (Path *) lfirst(lc);
7095 42960 : Path *path_save = path;
7096 42960 : List *pathkey_orderings = NIL;
7097 :
7098 : /* generate alternative group orderings that might be useful */
7099 42960 : pathkey_orderings = get_useful_group_keys_orderings(root, path);
7100 :
7101 : Assert(list_length(pathkey_orderings) > 0);
7102 :
7103 86058 : foreach(lc2, pathkey_orderings)
7104 : {
7105 43098 : GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
7106 :
7107 : /* restore the path (we replace it in the loop) */
7108 43098 : path = path_save;
7109 :
7110 43098 : path = make_ordered_path(root,
7111 : grouped_rel,
7112 : path,
7113 : cheapest_path,
7114 : info->pathkeys);
7115 43098 : if (path == NULL)
7116 362 : continue;
7117 :
7118 : /* Now decide what to stick atop it */
7119 42736 : if (parse->groupingSets)
7120 : {
7121 914 : consider_groupingsets_paths(root, grouped_rel,
7122 : path, true, can_hash,
7123 : gd, agg_costs, dNumGroups);
7124 : }
7125 41822 : else if (parse->hasAggs)
7126 : {
7127 : /*
7128 : * We have aggregation, possibly with plain GROUP BY. Make
7129 : * an AggPath.
7130 : */
7131 41140 : add_path(grouped_rel, (Path *)
7132 41140 : create_agg_path(root,
7133 : grouped_rel,
7134 : path,
7135 41140 : grouped_rel->reltarget,
7136 41140 : parse->groupClause ? AGG_SORTED : AGG_PLAIN,
7137 : AGGSPLIT_SIMPLE,
7138 : info->clauses,
7139 : havingQual,
7140 : agg_costs,
7141 : dNumGroups));
7142 : }
7143 682 : else if (parse->groupClause)
7144 : {
7145 : /*
7146 : * We have GROUP BY without aggregation or grouping sets.
7147 : * Make a GroupPath.
7148 : */
7149 682 : add_path(grouped_rel, (Path *)
7150 682 : create_group_path(root,
7151 : grouped_rel,
7152 : path,
7153 : info->clauses,
7154 : havingQual,
7155 : dNumGroups));
7156 : }
7157 : else
7158 : {
7159 : /* Other cases should have been handled above */
7160 : Assert(false);
7161 : }
7162 : }
7163 : }
7164 :
7165 : /*
7166 : * Instead of operating directly on the input relation, we can
7167 : * consider finalizing a partially aggregated path.
7168 : */
7169 40366 : if (partially_grouped_rel != NULL)
7170 : {
7171 3966 : foreach(lc, partially_grouped_rel->pathlist)
7172 : {
7173 : ListCell *lc2;
7174 2398 : Path *path = (Path *) lfirst(lc);
7175 2398 : Path *path_save = path;
7176 2398 : List *pathkey_orderings = NIL;
7177 :
7178 : /* generate alternative group orderings that might be useful */
7179 2398 : pathkey_orderings = get_useful_group_keys_orderings(root, path);
7180 :
7181 : Assert(list_length(pathkey_orderings) > 0);
7182 :
7183 : /* process all potentially interesting grouping reorderings */
7184 4796 : foreach(lc2, pathkey_orderings)
7185 : {
7186 2398 : GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
7187 :
7188 : /* restore the path (we replace it in the loop) */
7189 2398 : path = path_save;
7190 :
7191 2398 : path = make_ordered_path(root,
7192 : grouped_rel,
7193 : path,
7194 2398 : partially_grouped_rel->cheapest_total_path,
7195 : info->pathkeys);
7196 :
7197 2398 : if (path == NULL)
7198 108 : continue;
7199 :
7200 2290 : if (parse->hasAggs)
7201 2048 : add_path(grouped_rel, (Path *)
7202 2048 : create_agg_path(root,
7203 : grouped_rel,
7204 : path,
7205 2048 : grouped_rel->reltarget,
7206 2048 : parse->groupClause ? AGG_SORTED : AGG_PLAIN,
7207 : AGGSPLIT_FINAL_DESERIAL,
7208 : info->clauses,
7209 : havingQual,
7210 : agg_final_costs,
7211 : dNumGroups));
7212 : else
7213 242 : add_path(grouped_rel, (Path *)
7214 242 : create_group_path(root,
7215 : grouped_rel,
7216 : path,
7217 : info->clauses,
7218 : havingQual,
7219 : dNumGroups));
7220 :
7221 : }
7222 : }
7223 : }
7224 : }
7225 :
7226 40372 : if (can_hash)
7227 : {
7228 4874 : if (parse->groupingSets)
7229 : {
7230 : /*
7231 : * Try for a hash-only groupingsets path over unsorted input.
7232 : */
7233 770 : consider_groupingsets_paths(root, grouped_rel,
7234 : cheapest_path, false, true,
7235 : gd, agg_costs, dNumGroups);
7236 : }
7237 : else
7238 : {
7239 : /*
7240 : * Generate a HashAgg Path. We just need an Agg over the
7241 : * cheapest-total input path, since input order won't matter.
7242 : */
7243 4104 : add_path(grouped_rel, (Path *)
7244 4104 : create_agg_path(root, grouped_rel,
7245 : cheapest_path,
7246 4104 : grouped_rel->reltarget,
7247 : AGG_HASHED,
7248 : AGGSPLIT_SIMPLE,
7249 : root->processed_groupClause,
7250 : havingQual,
7251 : agg_costs,
7252 : dNumGroups));
7253 : }
7254 :
7255 : /*
7256 : * Generate a Finalize HashAgg Path atop of the cheapest partially
7257 : * grouped path, assuming there is one
7258 : */
7259 4874 : if (partially_grouped_rel && partially_grouped_rel->pathlist)
7260 : {
7261 778 : Path *path = partially_grouped_rel->cheapest_total_path;
7262 :
7263 778 : add_path(grouped_rel, (Path *)
7264 778 : create_agg_path(root,
7265 : grouped_rel,
7266 : path,
7267 778 : grouped_rel->reltarget,
7268 : AGG_HASHED,
7269 : AGGSPLIT_FINAL_DESERIAL,
7270 : root->processed_groupClause,
7271 : havingQual,
7272 : agg_final_costs,
7273 : dNumGroups));
7274 : }
7275 : }
7276 :
7277 : /*
7278 : * When partitionwise aggregate is used, we might have fully aggregated
7279 : * paths in the partial pathlist, because add_paths_to_append_rel() will
7280 : * consider a path for grouped_rel consisting of a Parallel Append of
7281 : * non-partial paths from each child.
7282 : */
7283 40372 : if (grouped_rel->partial_pathlist != NIL)
7284 162 : gather_grouping_paths(root, grouped_rel);
7285 40372 : }
7286 :
7287 : /*
7288 : * create_partial_grouping_paths
7289 : *
7290 : * Create a new upper relation representing the result of partial aggregation
7291 : * and populate it with appropriate paths. Note that we don't finalize the
7292 : * lists of paths here, so the caller can add additional partial or non-partial
7293 : * paths and must afterward call gather_grouping_paths and set_cheapest on
7294 : * the returned upper relation.
7295 : *
7296 : * All paths for this new upper relation -- both partial and non-partial --
7297 : * have been partially aggregated but require a subsequent FinalizeAggregate
7298 : * step.
7299 : *
7300 : * NB: This function is allowed to return NULL if it determines that there is
7301 : * no real need to create a new RelOptInfo.
7302 : */
7303 : static RelOptInfo *
7304 36836 : create_partial_grouping_paths(PlannerInfo *root,
7305 : RelOptInfo *grouped_rel,
7306 : RelOptInfo *input_rel,
7307 : grouping_sets_data *gd,
7308 : GroupPathExtraData *extra,
7309 : bool force_rel_creation)
7310 : {
7311 36836 : Query *parse = root->parse;
7312 : RelOptInfo *partially_grouped_rel;
7313 36836 : AggClauseCosts *agg_partial_costs = &extra->agg_partial_costs;
7314 36836 : AggClauseCosts *agg_final_costs = &extra->agg_final_costs;
7315 36836 : Path *cheapest_partial_path = NULL;
7316 36836 : Path *cheapest_total_path = NULL;
7317 36836 : double dNumPartialGroups = 0;
7318 36836 : double dNumPartialPartialGroups = 0;
7319 : ListCell *lc;
7320 36836 : bool can_hash = (extra->flags & GROUPING_CAN_USE_HASH) != 0;
7321 36836 : bool can_sort = (extra->flags & GROUPING_CAN_USE_SORT) != 0;
7322 :
7323 : /*
7324 : * Consider whether we should generate partially aggregated non-partial
7325 : * paths. We can only do this if we have a non-partial path, and only if
7326 : * the parent of the input rel is performing partial partitionwise
7327 : * aggregation. (Note that extra->patype is the type of partitionwise
7328 : * aggregation being used at the parent level, not this level.)
7329 : */
7330 36836 : if (input_rel->pathlist != NIL &&
7331 36836 : extra->patype == PARTITIONWISE_AGGREGATE_PARTIAL)
7332 618 : cheapest_total_path = input_rel->cheapest_total_path;
7333 :
7334 : /*
7335 : * If parallelism is possible for grouped_rel, then we should consider
7336 : * generating partially-grouped partial paths. However, if the input rel
7337 : * has no partial paths, then we can't.
7338 : */
7339 36836 : if (grouped_rel->consider_parallel && input_rel->partial_pathlist != NIL)
7340 1770 : cheapest_partial_path = linitial(input_rel->partial_pathlist);
7341 :
7342 : /*
7343 : * If we can't partially aggregate partial paths, and we can't partially
7344 : * aggregate non-partial paths, then don't bother creating the new
7345 : * RelOptInfo at all, unless the caller specified force_rel_creation.
7346 : */
7347 36836 : if (cheapest_total_path == NULL &&
7348 34748 : cheapest_partial_path == NULL &&
7349 34748 : !force_rel_creation)
7350 34650 : return NULL;
7351 :
7352 : /*
7353 : * Build a new upper relation to represent the result of partially
7354 : * aggregating the rows from the input relation.
7355 : */
7356 2186 : partially_grouped_rel = fetch_upper_rel(root,
7357 : UPPERREL_PARTIAL_GROUP_AGG,
7358 : grouped_rel->relids);
7359 2186 : partially_grouped_rel->consider_parallel =
7360 2186 : grouped_rel->consider_parallel;
7361 2186 : partially_grouped_rel->reloptkind = grouped_rel->reloptkind;
7362 2186 : partially_grouped_rel->serverid = grouped_rel->serverid;
7363 2186 : partially_grouped_rel->userid = grouped_rel->userid;
7364 2186 : partially_grouped_rel->useridiscurrent = grouped_rel->useridiscurrent;
7365 2186 : partially_grouped_rel->fdwroutine = grouped_rel->fdwroutine;
7366 :
7367 : /*
7368 : * Build target list for partial aggregate paths. These paths cannot just
7369 : * emit the same tlist as regular aggregate paths, because (1) we must
7370 : * include Vars and Aggrefs needed in HAVING, which might not appear in
7371 : * the result tlist, and (2) the Aggrefs must be set in partial mode.
7372 : */
7373 2186 : partially_grouped_rel->reltarget =
7374 2186 : make_partial_grouping_target(root, grouped_rel->reltarget,
7375 : extra->havingQual);
7376 :
7377 2186 : if (!extra->partial_costs_set)
7378 : {
7379 : /*
7380 : * Collect statistics about aggregates for estimating costs of
7381 : * performing aggregation in parallel.
7382 : */
7383 7680 : MemSet(agg_partial_costs, 0, sizeof(AggClauseCosts));
7384 7680 : MemSet(agg_final_costs, 0, sizeof(AggClauseCosts));
7385 1280 : if (parse->hasAggs)
7386 : {
7387 : /* partial phase */
7388 1152 : get_agg_clause_costs(root, AGGSPLIT_INITIAL_SERIAL,
7389 : agg_partial_costs);
7390 :
7391 : /* final phase */
7392 1152 : get_agg_clause_costs(root, AGGSPLIT_FINAL_DESERIAL,
7393 : agg_final_costs);
7394 : }
7395 :
7396 1280 : extra->partial_costs_set = true;
7397 : }
7398 :
7399 : /* Estimate number of partial groups. */
7400 2186 : if (cheapest_total_path != NULL)
7401 : dNumPartialGroups =
7402 618 : get_number_of_groups(root,
7403 : cheapest_total_path->rows,
7404 : gd,
7405 : extra->targetList);
7406 2186 : if (cheapest_partial_path != NULL)
7407 : dNumPartialPartialGroups =
7408 1770 : get_number_of_groups(root,
7409 : cheapest_partial_path->rows,
7410 : gd,
7411 : extra->targetList);
7412 :
7413 2186 : if (can_sort && cheapest_total_path != NULL)
7414 : {
7415 : /* This should have been checked previously */
7416 : Assert(parse->hasAggs || parse->groupClause);
7417 :
7418 : /*
7419 : * Use any available suitably-sorted path as input, and also consider
7420 : * sorting the cheapest partial path.
7421 : */
7422 1236 : foreach(lc, input_rel->pathlist)
7423 : {
7424 : ListCell *lc2;
7425 618 : Path *path = (Path *) lfirst(lc);
7426 618 : Path *path_save = path;
7427 618 : List *pathkey_orderings = NIL;
7428 :
7429 : /* generate alternative group orderings that might be useful */
7430 618 : pathkey_orderings = get_useful_group_keys_orderings(root, path);
7431 :
7432 : Assert(list_length(pathkey_orderings) > 0);
7433 :
7434 : /* process all potentially interesting grouping reorderings */
7435 1236 : foreach(lc2, pathkey_orderings)
7436 : {
7437 618 : GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
7438 :
7439 : /* restore the path (we replace it in the loop) */
7440 618 : path = path_save;
7441 :
7442 618 : path = make_ordered_path(root,
7443 : partially_grouped_rel,
7444 : path,
7445 : cheapest_total_path,
7446 : info->pathkeys);
7447 :
7448 618 : if (path == NULL)
7449 0 : continue;
7450 :
7451 618 : if (parse->hasAggs)
7452 546 : add_path(partially_grouped_rel, (Path *)
7453 546 : create_agg_path(root,
7454 : partially_grouped_rel,
7455 : path,
7456 546 : partially_grouped_rel->reltarget,
7457 546 : parse->groupClause ? AGG_SORTED : AGG_PLAIN,
7458 : AGGSPLIT_INITIAL_SERIAL,
7459 : info->clauses,
7460 : NIL,
7461 : agg_partial_costs,
7462 : dNumPartialGroups));
7463 : else
7464 72 : add_path(partially_grouped_rel, (Path *)
7465 72 : create_group_path(root,
7466 : partially_grouped_rel,
7467 : path,
7468 : info->clauses,
7469 : NIL,
7470 : dNumPartialGroups));
7471 : }
7472 : }
7473 : }
7474 :
7475 2186 : if (can_sort && cheapest_partial_path != NULL)
7476 : {
7477 : /* Similar to above logic, but for partial paths. */
7478 3552 : foreach(lc, input_rel->partial_pathlist)
7479 : {
7480 : ListCell *lc2;
7481 1782 : Path *path = (Path *) lfirst(lc);
7482 1782 : Path *path_save = path;
7483 1782 : List *pathkey_orderings = NIL;
7484 :
7485 : /* generate alternative group orderings that might be useful */
7486 1782 : pathkey_orderings = get_useful_group_keys_orderings(root, path);
7487 :
7488 : Assert(list_length(pathkey_orderings) > 0);
7489 :
7490 : /* process all potentially interesting grouping reorderings */
7491 3564 : foreach(lc2, pathkey_orderings)
7492 : {
7493 1782 : GroupByOrdering *info = (GroupByOrdering *) lfirst(lc2);
7494 :
7495 :
7496 : /* restore the path (we replace it in the loop) */
7497 1782 : path = path_save;
7498 :
7499 1782 : path = make_ordered_path(root,
7500 : partially_grouped_rel,
7501 : path,
7502 : cheapest_partial_path,
7503 : info->pathkeys);
7504 :
7505 1782 : if (path == NULL)
7506 6 : continue;
7507 :
7508 1776 : if (parse->hasAggs)
7509 1660 : add_partial_path(partially_grouped_rel, (Path *)
7510 1660 : create_agg_path(root,
7511 : partially_grouped_rel,
7512 : path,
7513 1660 : partially_grouped_rel->reltarget,
7514 1660 : parse->groupClause ? AGG_SORTED : AGG_PLAIN,
7515 : AGGSPLIT_INITIAL_SERIAL,
7516 : info->clauses,
7517 : NIL,
7518 : agg_partial_costs,
7519 : dNumPartialPartialGroups));
7520 : else
7521 116 : add_partial_path(partially_grouped_rel, (Path *)
7522 116 : create_group_path(root,
7523 : partially_grouped_rel,
7524 : path,
7525 : info->clauses,
7526 : NIL,
7527 : dNumPartialPartialGroups));
7528 : }
7529 : }
7530 : }
7531 :
7532 : /*
7533 : * Add a partially-grouped HashAgg Path where possible
7534 : */
7535 2186 : if (can_hash && cheapest_total_path != NULL)
7536 : {
7537 : /* Checked above */
7538 : Assert(parse->hasAggs || parse->groupClause);
7539 :
7540 618 : add_path(partially_grouped_rel, (Path *)
7541 618 : create_agg_path(root,
7542 : partially_grouped_rel,
7543 : cheapest_total_path,
7544 618 : partially_grouped_rel->reltarget,
7545 : AGG_HASHED,
7546 : AGGSPLIT_INITIAL_SERIAL,
7547 : root->processed_groupClause,
7548 : NIL,
7549 : agg_partial_costs,
7550 : dNumPartialGroups));
7551 : }
7552 :
7553 : /*
7554 : * Now add a partially-grouped HashAgg partial Path where possible
7555 : */
7556 2186 : if (can_hash && cheapest_partial_path != NULL)
7557 : {
7558 980 : add_partial_path(partially_grouped_rel, (Path *)
7559 980 : create_agg_path(root,
7560 : partially_grouped_rel,
7561 : cheapest_partial_path,
7562 980 : partially_grouped_rel->reltarget,
7563 : AGG_HASHED,
7564 : AGGSPLIT_INITIAL_SERIAL,
7565 : root->processed_groupClause,
7566 : NIL,
7567 : agg_partial_costs,
7568 : dNumPartialPartialGroups));
7569 : }
7570 :
7571 : /*
7572 : * If there is an FDW that's responsible for all baserels of the query,
7573 : * let it consider adding partially grouped ForeignPaths.
7574 : */
7575 2186 : if (partially_grouped_rel->fdwroutine &&
7576 6 : partially_grouped_rel->fdwroutine->GetForeignUpperPaths)
7577 : {
7578 6 : FdwRoutine *fdwroutine = partially_grouped_rel->fdwroutine;
7579 :
7580 6 : fdwroutine->GetForeignUpperPaths(root,
7581 : UPPERREL_PARTIAL_GROUP_AGG,
7582 : input_rel, partially_grouped_rel,
7583 : extra);
7584 : }
7585 :
7586 2186 : return partially_grouped_rel;
7587 : }
7588 :
7589 : /*
7590 : * Generate Gather and Gather Merge paths for a grouping relation or partial
7591 : * grouping relation.
7592 : *
7593 : * generate_useful_gather_paths does most of the work, but we also consider a
7594 : * special case: we could try sorting the data by the group_pathkeys and then
7595 : * applying Gather Merge.
7596 : *
7597 : * NB: This function shouldn't be used for anything other than a grouped or
7598 : * partially grouped relation not only because of the fact that it explicitly
7599 : * references group_pathkeys but we pass "true" as the third argument to
7600 : * generate_useful_gather_paths().
7601 : */
7602 : static void
7603 1632 : gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
7604 : {
7605 : ListCell *lc;
7606 : Path *cheapest_partial_path;
7607 : List *groupby_pathkeys;
7608 :
7609 : /*
7610 : * This occurs after any partial aggregation has taken place, so trim off
7611 : * any pathkeys added for ORDER BY / DISTINCT aggregates.
7612 : */
7613 1632 : if (list_length(root->group_pathkeys) > root->num_groupby_pathkeys)
7614 18 : groupby_pathkeys = list_copy_head(root->group_pathkeys,
7615 : root->num_groupby_pathkeys);
7616 : else
7617 1614 : groupby_pathkeys = root->group_pathkeys;
7618 :
7619 : /* Try Gather for unordered paths and Gather Merge for ordered ones. */
7620 1632 : generate_useful_gather_paths(root, rel, true);
7621 :
7622 1632 : cheapest_partial_path = linitial(rel->partial_pathlist);
7623 :
7624 : /* XXX Shouldn't this also consider the group-key-reordering? */
7625 3864 : foreach(lc, rel->partial_pathlist)
7626 : {
7627 2232 : Path *path = (Path *) lfirst(lc);
7628 : bool is_sorted;
7629 : int presorted_keys;
7630 : double total_groups;
7631 :
7632 2232 : is_sorted = pathkeys_count_contained_in(groupby_pathkeys,
7633 : path->pathkeys,
7634 : &presorted_keys);
7635 :
7636 2232 : if (is_sorted)
7637 1458 : continue;
7638 :
7639 : /*
7640 : * Try at least sorting the cheapest path and also try incrementally
7641 : * sorting any path which is partially sorted already (no need to deal
7642 : * with paths which have presorted keys when incremental sort is
7643 : * disabled unless it's the cheapest input path).
7644 : */
7645 774 : if (path != cheapest_partial_path &&
7646 0 : (presorted_keys == 0 || !enable_incremental_sort))
7647 0 : continue;
7648 :
7649 : /*
7650 : * We've no need to consider both a sort and incremental sort. We'll
7651 : * just do a sort if there are no presorted keys and an incremental
7652 : * sort when there are presorted keys.
7653 : */
7654 774 : if (presorted_keys == 0 || !enable_incremental_sort)
7655 774 : path = (Path *) create_sort_path(root, rel, path,
7656 : groupby_pathkeys,
7657 : -1.0);
7658 : else
7659 0 : path = (Path *) create_incremental_sort_path(root,
7660 : rel,
7661 : path,
7662 : groupby_pathkeys,
7663 : presorted_keys,
7664 : -1.0);
7665 774 : total_groups = compute_gather_rows(path);
7666 : path = (Path *)
7667 774 : create_gather_merge_path(root,
7668 : rel,
7669 : path,
7670 774 : rel->reltarget,
7671 : groupby_pathkeys,
7672 : NULL,
7673 : &total_groups);
7674 :
7675 774 : add_path(rel, path);
7676 : }
7677 1632 : }
7678 :
7679 : /*
7680 : * can_partial_agg
7681 : *
7682 : * Determines whether or not partial grouping and/or aggregation is possible.
7683 : * Returns true when possible, false otherwise.
7684 : */
7685 : static bool
7686 39496 : can_partial_agg(PlannerInfo *root)
7687 : {
7688 39496 : Query *parse = root->parse;
7689 :
7690 39496 : if (!parse->hasAggs && parse->groupClause == NIL)
7691 : {
7692 : /*
7693 : * We don't know how to do parallel aggregation unless we have either
7694 : * some aggregates or a grouping clause.
7695 : */
7696 0 : return false;
7697 : }
7698 39496 : else if (parse->groupingSets)
7699 : {
7700 : /* We don't know how to do grouping sets in parallel. */
7701 848 : return false;
7702 : }
7703 38648 : else if (root->hasNonPartialAggs || root->hasNonSerialAggs)
7704 : {
7705 : /* Insufficient support for partial mode. */
7706 3234 : return false;
7707 : }
7708 :
7709 : /* Everything looks good. */
7710 35414 : return true;
7711 : }
7712 :
7713 : /*
7714 : * apply_scanjoin_target_to_paths
7715 : *
7716 : * Adjust the final scan/join relation, and recursively all of its children,
7717 : * to generate the final scan/join target. It would be more correct to model
7718 : * this as a separate planning step with a new RelOptInfo at the toplevel and
7719 : * for each child relation, but doing it this way is noticeably cheaper.
7720 : * Maybe that problem can be solved at some point, but for now we do this.
7721 : *
7722 : * If tlist_same_exprs is true, then the scan/join target to be applied has
7723 : * the same expressions as the existing reltarget, so we need only insert the
7724 : * appropriate sortgroupref information. By avoiding the creation of
7725 : * projection paths we save effort both immediately and at plan creation time.
7726 : */
7727 : static void
7728 528046 : apply_scanjoin_target_to_paths(PlannerInfo *root,
7729 : RelOptInfo *rel,
7730 : List *scanjoin_targets,
7731 : List *scanjoin_targets_contain_srfs,
7732 : bool scanjoin_target_parallel_safe,
7733 : bool tlist_same_exprs)
7734 : {
7735 528046 : bool rel_is_partitioned = IS_PARTITIONED_REL(rel);
7736 : PathTarget *scanjoin_target;
7737 : ListCell *lc;
7738 :
7739 : /* This recurses, so be paranoid. */
7740 528046 : check_stack_depth();
7741 :
7742 : /*
7743 : * If the rel is partitioned, we want to drop its existing paths and
7744 : * generate new ones. This function would still be correct if we kept the
7745 : * existing paths: we'd modify them to generate the correct target above
7746 : * the partitioning Append, and then they'd compete on cost with paths
7747 : * generating the target below the Append. However, in our current cost
7748 : * model the latter way is always the same or cheaper cost, so modifying
7749 : * the existing paths would just be useless work. Moreover, when the cost
7750 : * is the same, varying roundoff errors might sometimes allow an existing
7751 : * path to be picked, resulting in undesirable cross-platform plan
7752 : * variations. So we drop old paths and thereby force the work to be done
7753 : * below the Append, except in the case of a non-parallel-safe target.
7754 : *
7755 : * Some care is needed, because we have to allow
7756 : * generate_useful_gather_paths to see the old partial paths in the next
7757 : * stanza. Hence, zap the main pathlist here, then allow
7758 : * generate_useful_gather_paths to add path(s) to the main list, and
7759 : * finally zap the partial pathlist.
7760 : */
7761 528046 : if (rel_is_partitioned)
7762 12412 : rel->pathlist = NIL;
7763 :
7764 : /*
7765 : * If the scan/join target is not parallel-safe, partial paths cannot
7766 : * generate it.
7767 : */
7768 528046 : if (!scanjoin_target_parallel_safe)
7769 : {
7770 : /*
7771 : * Since we can't generate the final scan/join target in parallel
7772 : * workers, this is our last opportunity to use any partial paths that
7773 : * exist; so build Gather path(s) that use them and emit whatever the
7774 : * current reltarget is. We don't do this in the case where the
7775 : * target is parallel-safe, since we will be able to generate superior
7776 : * paths by doing it after the final scan/join target has been
7777 : * applied.
7778 : */
7779 80732 : generate_useful_gather_paths(root, rel, false);
7780 :
7781 : /* Can't use parallel query above this level. */
7782 80732 : rel->partial_pathlist = NIL;
7783 80732 : rel->consider_parallel = false;
7784 : }
7785 :
7786 : /* Finish dropping old paths for a partitioned rel, per comment above */
7787 528046 : if (rel_is_partitioned)
7788 12412 : rel->partial_pathlist = NIL;
7789 :
7790 : /* Extract SRF-free scan/join target. */
7791 528046 : scanjoin_target = linitial_node(PathTarget, scanjoin_targets);
7792 :
7793 : /*
7794 : * Apply the SRF-free scan/join target to each existing path.
7795 : *
7796 : * If the tlist exprs are the same, we can just inject the sortgroupref
7797 : * information into the existing pathtargets. Otherwise, replace each
7798 : * path with a projection path that generates the SRF-free scan/join
7799 : * target. This can't change the ordering of paths within rel->pathlist,
7800 : * so we just modify the list in place.
7801 : */
7802 1085630 : foreach(lc, rel->pathlist)
7803 : {
7804 557584 : Path *subpath = (Path *) lfirst(lc);
7805 :
7806 : /* Shouldn't have any parameterized paths anymore */
7807 : Assert(subpath->param_info == NULL);
7808 :
7809 557584 : if (tlist_same_exprs)
7810 187980 : subpath->pathtarget->sortgrouprefs =
7811 187980 : scanjoin_target->sortgrouprefs;
7812 : else
7813 : {
7814 : Path *newpath;
7815 :
7816 369604 : newpath = (Path *) create_projection_path(root, rel, subpath,
7817 : scanjoin_target);
7818 369604 : lfirst(lc) = newpath;
7819 : }
7820 : }
7821 :
7822 : /* Likewise adjust the targets for any partial paths. */
7823 547322 : foreach(lc, rel->partial_pathlist)
7824 : {
7825 19276 : Path *subpath = (Path *) lfirst(lc);
7826 :
7827 : /* Shouldn't have any parameterized paths anymore */
7828 : Assert(subpath->param_info == NULL);
7829 :
7830 19276 : if (tlist_same_exprs)
7831 15744 : subpath->pathtarget->sortgrouprefs =
7832 15744 : scanjoin_target->sortgrouprefs;
7833 : else
7834 : {
7835 : Path *newpath;
7836 :
7837 3532 : newpath = (Path *) create_projection_path(root, rel, subpath,
7838 : scanjoin_target);
7839 3532 : lfirst(lc) = newpath;
7840 : }
7841 : }
7842 :
7843 : /*
7844 : * Now, if final scan/join target contains SRFs, insert ProjectSetPath(s)
7845 : * atop each existing path. (Note that this function doesn't look at the
7846 : * cheapest-path fields, which is a good thing because they're bogus right
7847 : * now.)
7848 : */
7849 528046 : if (root->parse->hasTargetSRFs)
7850 8662 : adjust_paths_for_srfs(root, rel,
7851 : scanjoin_targets,
7852 : scanjoin_targets_contain_srfs);
7853 :
7854 : /*
7855 : * Update the rel's target to be the final (with SRFs) scan/join target.
7856 : * This now matches the actual output of all the paths, and we might get
7857 : * confused in createplan.c if they don't agree. We must do this now so
7858 : * that any append paths made in the next part will use the correct
7859 : * pathtarget (cf. create_append_path).
7860 : *
7861 : * Note that this is also necessary if GetForeignUpperPaths() gets called
7862 : * on the final scan/join relation or on any of its children, since the
7863 : * FDW might look at the rel's target to create ForeignPaths.
7864 : */
7865 528046 : rel->reltarget = llast_node(PathTarget, scanjoin_targets);
7866 :
7867 : /*
7868 : * If the relation is partitioned, recursively apply the scan/join target
7869 : * to all partitions, and generate brand-new Append paths in which the
7870 : * scan/join target is computed below the Append rather than above it.
7871 : * Since Append is not projection-capable, that might save a separate
7872 : * Result node, and it also is important for partitionwise aggregate.
7873 : */
7874 528046 : if (rel_is_partitioned)
7875 : {
7876 12412 : List *live_children = NIL;
7877 : int i;
7878 :
7879 : /* Adjust each partition. */
7880 12412 : i = -1;
7881 34916 : while ((i = bms_next_member(rel->live_parts, i)) >= 0)
7882 : {
7883 22504 : RelOptInfo *child_rel = rel->part_rels[i];
7884 : AppendRelInfo **appinfos;
7885 : int nappinfos;
7886 22504 : List *child_scanjoin_targets = NIL;
7887 :
7888 : Assert(child_rel != NULL);
7889 :
7890 : /* Dummy children can be ignored. */
7891 22504 : if (IS_DUMMY_REL(child_rel))
7892 42 : continue;
7893 :
7894 : /* Translate scan/join targets for this child. */
7895 22462 : appinfos = find_appinfos_by_relids(root, child_rel->relids,
7896 : &nappinfos);
7897 44924 : foreach(lc, scanjoin_targets)
7898 : {
7899 22462 : PathTarget *target = lfirst_node(PathTarget, lc);
7900 :
7901 22462 : target = copy_pathtarget(target);
7902 22462 : target->exprs = (List *)
7903 22462 : adjust_appendrel_attrs(root,
7904 22462 : (Node *) target->exprs,
7905 : nappinfos, appinfos);
7906 22462 : child_scanjoin_targets = lappend(child_scanjoin_targets,
7907 : target);
7908 : }
7909 22462 : pfree(appinfos);
7910 :
7911 : /* Recursion does the real work. */
7912 22462 : apply_scanjoin_target_to_paths(root, child_rel,
7913 : child_scanjoin_targets,
7914 : scanjoin_targets_contain_srfs,
7915 : scanjoin_target_parallel_safe,
7916 : tlist_same_exprs);
7917 :
7918 : /* Save non-dummy children for Append paths. */
7919 22462 : if (!IS_DUMMY_REL(child_rel))
7920 22462 : live_children = lappend(live_children, child_rel);
7921 : }
7922 :
7923 : /* Build new paths for this relation by appending child paths. */
7924 12412 : add_paths_to_append_rel(root, rel, live_children);
7925 : }
7926 :
7927 : /*
7928 : * Consider generating Gather or Gather Merge paths. We must only do this
7929 : * if the relation is parallel safe, and we don't do it for child rels to
7930 : * avoid creating multiple Gather nodes within the same plan. We must do
7931 : * this after all paths have been generated and before set_cheapest, since
7932 : * one of the generated paths may turn out to be the cheapest one.
7933 : */
7934 528046 : if (rel->consider_parallel && !IS_OTHER_REL(rel))
7935 153898 : generate_useful_gather_paths(root, rel, false);
7936 :
7937 : /*
7938 : * Reassess which paths are the cheapest, now that we've potentially added
7939 : * new Gather (or Gather Merge) and/or Append (or MergeAppend) paths to
7940 : * this relation.
7941 : */
7942 528046 : set_cheapest(rel);
7943 528046 : }
7944 :
7945 : /*
7946 : * create_partitionwise_grouping_paths
7947 : *
7948 : * If the partition keys of input relation are part of the GROUP BY clause, all
7949 : * the rows belonging to a given group come from a single partition. This
7950 : * allows aggregation/grouping over a partitioned relation to be broken down
7951 : * into aggregation/grouping on each partition. This should be no worse, and
7952 : * often better, than the normal approach.
7953 : *
7954 : * However, if the GROUP BY clause does not contain all the partition keys,
7955 : * rows from a given group may be spread across multiple partitions. In that
7956 : * case, we perform partial aggregation for each group, append the results,
7957 : * and then finalize aggregation. This is less certain to win than the
7958 : * previous case. It may win if the PartialAggregate stage greatly reduces
7959 : * the number of groups, because fewer rows will pass through the Append node.
7960 : * It may lose if we have lots of small groups.
7961 : */
7962 : static void
7963 562 : create_partitionwise_grouping_paths(PlannerInfo *root,
7964 : RelOptInfo *input_rel,
7965 : RelOptInfo *grouped_rel,
7966 : RelOptInfo *partially_grouped_rel,
7967 : const AggClauseCosts *agg_costs,
7968 : grouping_sets_data *gd,
7969 : PartitionwiseAggregateType patype,
7970 : GroupPathExtraData *extra)
7971 : {
7972 562 : List *grouped_live_children = NIL;
7973 562 : List *partially_grouped_live_children = NIL;
7974 562 : PathTarget *target = grouped_rel->reltarget;
7975 562 : bool partial_grouping_valid = true;
7976 : int i;
7977 :
7978 : Assert(patype != PARTITIONWISE_AGGREGATE_NONE);
7979 : Assert(patype != PARTITIONWISE_AGGREGATE_PARTIAL ||
7980 : partially_grouped_rel != NULL);
7981 :
7982 : /* Add paths for partitionwise aggregation/grouping. */
7983 562 : i = -1;
7984 2056 : while ((i = bms_next_member(input_rel->live_parts, i)) >= 0)
7985 : {
7986 1494 : RelOptInfo *child_input_rel = input_rel->part_rels[i];
7987 : PathTarget *child_target;
7988 : AppendRelInfo **appinfos;
7989 : int nappinfos;
7990 : GroupPathExtraData child_extra;
7991 : RelOptInfo *child_grouped_rel;
7992 : RelOptInfo *child_partially_grouped_rel;
7993 :
7994 : Assert(child_input_rel != NULL);
7995 :
7996 : /* Dummy children can be ignored. */
7997 1494 : if (IS_DUMMY_REL(child_input_rel))
7998 0 : continue;
7999 :
8000 1494 : child_target = copy_pathtarget(target);
8001 :
8002 : /*
8003 : * Copy the given "extra" structure as is and then override the
8004 : * members specific to this child.
8005 : */
8006 1494 : memcpy(&child_extra, extra, sizeof(child_extra));
8007 :
8008 1494 : appinfos = find_appinfos_by_relids(root, child_input_rel->relids,
8009 : &nappinfos);
8010 :
8011 1494 : child_target->exprs = (List *)
8012 1494 : adjust_appendrel_attrs(root,
8013 1494 : (Node *) target->exprs,
8014 : nappinfos, appinfos);
8015 :
8016 : /* Translate havingQual and targetList. */
8017 1494 : child_extra.havingQual = (Node *)
8018 : adjust_appendrel_attrs(root,
8019 : extra->havingQual,
8020 : nappinfos, appinfos);
8021 1494 : child_extra.targetList = (List *)
8022 1494 : adjust_appendrel_attrs(root,
8023 1494 : (Node *) extra->targetList,
8024 : nappinfos, appinfos);
8025 :
8026 : /*
8027 : * extra->patype was the value computed for our parent rel; patype is
8028 : * the value for this relation. For the child, our value is its
8029 : * parent rel's value.
8030 : */
8031 1494 : child_extra.patype = patype;
8032 :
8033 : /*
8034 : * Create grouping relation to hold fully aggregated grouping and/or
8035 : * aggregation paths for the child.
8036 : */
8037 1494 : child_grouped_rel = make_grouping_rel(root, child_input_rel,
8038 : child_target,
8039 1494 : extra->target_parallel_safe,
8040 : child_extra.havingQual);
8041 :
8042 : /* Create grouping paths for this child relation. */
8043 1494 : create_ordinary_grouping_paths(root, child_input_rel,
8044 : child_grouped_rel,
8045 : agg_costs, gd, &child_extra,
8046 : &child_partially_grouped_rel);
8047 :
8048 1494 : if (child_partially_grouped_rel)
8049 : {
8050 : partially_grouped_live_children =
8051 906 : lappend(partially_grouped_live_children,
8052 : child_partially_grouped_rel);
8053 : }
8054 : else
8055 588 : partial_grouping_valid = false;
8056 :
8057 1494 : if (patype == PARTITIONWISE_AGGREGATE_FULL)
8058 : {
8059 876 : set_cheapest(child_grouped_rel);
8060 876 : grouped_live_children = lappend(grouped_live_children,
8061 : child_grouped_rel);
8062 : }
8063 :
8064 1494 : pfree(appinfos);
8065 : }
8066 :
8067 : /*
8068 : * Try to create append paths for partially grouped children. For full
8069 : * partitionwise aggregation, we might have paths in the partial_pathlist
8070 : * if parallel aggregation is possible. For partial partitionwise
8071 : * aggregation, we may have paths in both pathlist and partial_pathlist.
8072 : *
8073 : * NB: We must have a partially grouped path for every child in order to
8074 : * generate a partially grouped path for this relation.
8075 : */
8076 562 : if (partially_grouped_rel && partial_grouping_valid)
8077 : {
8078 : Assert(partially_grouped_live_children != NIL);
8079 :
8080 350 : add_paths_to_append_rel(root, partially_grouped_rel,
8081 : partially_grouped_live_children);
8082 :
8083 : /*
8084 : * We need call set_cheapest, since the finalization step will use the
8085 : * cheapest path from the rel.
8086 : */
8087 350 : if (partially_grouped_rel->pathlist)
8088 350 : set_cheapest(partially_grouped_rel);
8089 : }
8090 :
8091 : /* If possible, create append paths for fully grouped children. */
8092 562 : if (patype == PARTITIONWISE_AGGREGATE_FULL)
8093 : {
8094 : Assert(grouped_live_children != NIL);
8095 :
8096 320 : add_paths_to_append_rel(root, grouped_rel, grouped_live_children);
8097 : }
8098 562 : }
8099 :
8100 : /*
8101 : * group_by_has_partkey
8102 : *
8103 : * Returns true if all the partition keys of the given relation are part of
8104 : * the GROUP BY clauses, including having matching collation, false otherwise.
8105 : */
8106 : static bool
8107 556 : group_by_has_partkey(RelOptInfo *input_rel,
8108 : List *targetList,
8109 : List *groupClause)
8110 : {
8111 556 : List *groupexprs = get_sortgrouplist_exprs(groupClause, targetList);
8112 556 : int cnt = 0;
8113 : int partnatts;
8114 :
8115 : /* Input relation should be partitioned. */
8116 : Assert(input_rel->part_scheme);
8117 :
8118 : /* Rule out early, if there are no partition keys present. */
8119 556 : if (!input_rel->partexprs)
8120 0 : return false;
8121 :
8122 556 : partnatts = input_rel->part_scheme->partnatts;
8123 :
8124 912 : for (cnt = 0; cnt < partnatts; cnt++)
8125 : {
8126 592 : List *partexprs = input_rel->partexprs[cnt];
8127 : ListCell *lc;
8128 592 : bool found = false;
8129 :
8130 810 : foreach(lc, partexprs)
8131 : {
8132 : ListCell *lg;
8133 586 : Expr *partexpr = lfirst(lc);
8134 586 : Oid partcoll = input_rel->part_scheme->partcollation[cnt];
8135 :
8136 924 : foreach(lg, groupexprs)
8137 : {
8138 706 : Expr *groupexpr = lfirst(lg);
8139 706 : Oid groupcoll = exprCollation((Node *) groupexpr);
8140 :
8141 : /*
8142 : * Note: we can assume there is at most one RelabelType node;
8143 : * eval_const_expressions() will have simplified if more than
8144 : * one.
8145 : */
8146 706 : if (IsA(groupexpr, RelabelType))
8147 24 : groupexpr = ((RelabelType *) groupexpr)->arg;
8148 :
8149 706 : if (equal(groupexpr, partexpr))
8150 : {
8151 : /*
8152 : * Reject a match if the grouping collation does not match
8153 : * the partitioning collation.
8154 : */
8155 368 : if (OidIsValid(partcoll) && OidIsValid(groupcoll) &&
8156 : partcoll != groupcoll)
8157 12 : return false;
8158 :
8159 356 : found = true;
8160 356 : break;
8161 : }
8162 : }
8163 :
8164 574 : if (found)
8165 356 : break;
8166 : }
8167 :
8168 : /*
8169 : * If none of the partition key expressions match with any of the
8170 : * GROUP BY expression, return false.
8171 : */
8172 580 : if (!found)
8173 224 : return false;
8174 : }
8175 :
8176 320 : return true;
8177 : }
8178 :
8179 : /*
8180 : * generate_setop_child_grouplist
8181 : * Build a SortGroupClause list defining the sort/grouping properties
8182 : * of the child of a set operation.
8183 : *
8184 : * This is similar to generate_setop_grouplist() but differs as the setop
8185 : * child query's targetlist entries may already have a tleSortGroupRef
8186 : * assigned for other purposes, such as GROUP BYs. Here we keep the
8187 : * SortGroupClause list in the same order as 'op' groupClauses and just adjust
8188 : * the tleSortGroupRef to reference the TargetEntry's 'ressortgroupref'.
8189 : */
8190 : static List *
8191 10836 : generate_setop_child_grouplist(SetOperationStmt *op, List *targetlist)
8192 : {
8193 10836 : List *grouplist = copyObject(op->groupClauses);
8194 : ListCell *lg;
8195 : ListCell *lt;
8196 :
8197 10836 : lg = list_head(grouplist);
8198 42552 : foreach(lt, targetlist)
8199 : {
8200 31716 : TargetEntry *tle = (TargetEntry *) lfirst(lt);
8201 : SortGroupClause *sgc;
8202 :
8203 : /* resjunk columns could have sortgrouprefs. Leave these alone */
8204 31716 : if (tle->resjunk)
8205 0 : continue;
8206 :
8207 : /* we expect every non-resjunk target to have a SortGroupClause */
8208 : Assert(lg != NULL);
8209 31716 : sgc = (SortGroupClause *) lfirst(lg);
8210 31716 : lg = lnext(grouplist, lg);
8211 :
8212 : /* assign a tleSortGroupRef, or reuse the existing one */
8213 31716 : sgc->tleSortGroupRef = assignSortGroupRef(tle, targetlist);
8214 : }
8215 : Assert(lg == NULL);
8216 10836 : return grouplist;
8217 : }
|