Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * planner.c
4 : * The query optimizer external interface.
5 : *
6 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/optimizer/plan/planner.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 :
16 : #include "postgres.h"
17 :
18 : #include <limits.h>
19 : #include <math.h>
20 :
21 : #include "access/genam.h"
22 : #include "access/parallel.h"
23 : #include "access/sysattr.h"
24 : #include "access/table.h"
25 : #include "catalog/pg_aggregate.h"
26 : #include "catalog/pg_constraint.h"
27 : #include "catalog/pg_inherits.h"
28 : #include "catalog/pg_proc.h"
29 : #include "catalog/pg_type.h"
30 : #include "executor/executor.h"
31 : #include "foreign/fdwapi.h"
32 : #include "jit/jit.h"
33 : #include "lib/bipartite_match.h"
34 : #include "lib/knapsack.h"
35 : #include "miscadmin.h"
36 : #include "nodes/makefuncs.h"
37 : #include "nodes/nodeFuncs.h"
38 : #ifdef OPTIMIZER_DEBUG
39 : #include "nodes/print.h"
40 : #endif
41 : #include "nodes/supportnodes.h"
42 : #include "optimizer/appendinfo.h"
43 : #include "optimizer/clauses.h"
44 : #include "optimizer/cost.h"
45 : #include "optimizer/optimizer.h"
46 : #include "optimizer/paramassign.h"
47 : #include "optimizer/pathnode.h"
48 : #include "optimizer/paths.h"
49 : #include "optimizer/plancat.h"
50 : #include "optimizer/planmain.h"
51 : #include "optimizer/planner.h"
52 : #include "optimizer/prep.h"
53 : #include "optimizer/subselect.h"
54 : #include "optimizer/tlist.h"
55 : #include "parser/analyze.h"
56 : #include "parser/parse_agg.h"
57 : #include "parser/parse_clause.h"
58 : #include "parser/parse_relation.h"
59 : #include "parser/parsetree.h"
60 : #include "partitioning/partdesc.h"
61 : #include "utils/lsyscache.h"
62 : #include "utils/rel.h"
63 : #include "utils/selfuncs.h"
64 :
65 : /* GUC parameters */
66 : double cursor_tuple_fraction = DEFAULT_CURSOR_TUPLE_FRACTION;
67 : int debug_parallel_query = DEBUG_PARALLEL_OFF;
68 : bool parallel_leader_participation = true;
69 :
70 : /* Hook for plugins to get control in planner() */
71 : planner_hook_type planner_hook = NULL;
72 :
73 : /* Hook for plugins to get control when grouping_planner() plans upper rels */
74 : create_upper_paths_hook_type create_upper_paths_hook = NULL;
75 :
76 :
77 : /* Expression kind codes for preprocess_expression */
78 : #define EXPRKIND_QUAL 0
79 : #define EXPRKIND_TARGET 1
80 : #define EXPRKIND_RTFUNC 2
81 : #define EXPRKIND_RTFUNC_LATERAL 3
82 : #define EXPRKIND_VALUES 4
83 : #define EXPRKIND_VALUES_LATERAL 5
84 : #define EXPRKIND_LIMIT 6
85 : #define EXPRKIND_APPINFO 7
86 : #define EXPRKIND_PHV 8
87 : #define EXPRKIND_TABLESAMPLE 9
88 : #define EXPRKIND_ARBITER_ELEM 10
89 : #define EXPRKIND_TABLEFUNC 11
90 : #define EXPRKIND_TABLEFUNC_LATERAL 12
91 :
92 : /*
93 : * Data specific to grouping sets
94 : */
95 : typedef struct
96 : {
97 : List *rollups;
98 : List *hash_sets_idx;
99 : double dNumHashGroups;
100 : bool any_hashable;
101 : Bitmapset *unsortable_refs;
102 : Bitmapset *unhashable_refs;
103 : List *unsortable_sets;
104 : int *tleref_to_colnum_map;
105 : } grouping_sets_data;
106 :
107 : /*
108 : * Temporary structure for use during WindowClause reordering in order to be
109 : * able to sort WindowClauses on partitioning/ordering prefix.
110 : */
111 : typedef struct
112 : {
113 : WindowClause *wc;
114 : List *uniqueOrder; /* A List of unique ordering/partitioning
115 : * clauses per Window */
116 : } WindowClauseSortData;
117 :
118 : /* Passthrough data for standard_qp_callback */
119 : typedef struct
120 : {
121 : List *activeWindows; /* active windows, if any */
122 : grouping_sets_data *gset_data; /* grouping sets data, if any */
123 : SetOperationStmt *setop; /* parent set operation or NULL if not a
124 : * subquery belonging to a set operation */
125 : } standard_qp_extra;
126 :
127 : /* Local functions */
128 : static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
129 : static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
130 : static void grouping_planner(PlannerInfo *root, double tuple_fraction,
131 : SetOperationStmt *setops);
132 : static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root);
133 : static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
134 : int *tleref_to_colnum_map);
135 : static void preprocess_rowmarks(PlannerInfo *root);
136 : static double preprocess_limit(PlannerInfo *root,
137 : double tuple_fraction,
138 : int64 *offset_est, int64 *count_est);
139 : static void remove_useless_groupby_columns(PlannerInfo *root);
140 : static List *groupclause_apply_groupingset(PlannerInfo *root, List *force);
141 : static List *extract_rollup_sets(List *groupingSets);
142 : static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
143 : static void standard_qp_callback(PlannerInfo *root, void *extra);
144 : static double get_number_of_groups(PlannerInfo *root,
145 : double path_rows,
146 : grouping_sets_data *gd,
147 : List *target_list);
148 : static RelOptInfo *create_grouping_paths(PlannerInfo *root,
149 : RelOptInfo *input_rel,
150 : PathTarget *target,
151 : bool target_parallel_safe,
152 : grouping_sets_data *gd);
153 : static bool is_degenerate_grouping(PlannerInfo *root);
154 : static void create_degenerate_grouping_paths(PlannerInfo *root,
155 : RelOptInfo *input_rel,
156 : RelOptInfo *grouped_rel);
157 : static RelOptInfo *make_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
158 : PathTarget *target, bool target_parallel_safe,
159 : Node *havingQual);
160 : static void create_ordinary_grouping_paths(PlannerInfo *root,
161 : RelOptInfo *input_rel,
162 : RelOptInfo *grouped_rel,
163 : const AggClauseCosts *agg_costs,
164 : grouping_sets_data *gd,
165 : GroupPathExtraData *extra,
166 : RelOptInfo **partially_grouped_rel_p);
167 : static void consider_groupingsets_paths(PlannerInfo *root,
168 : RelOptInfo *grouped_rel,
169 : Path *path,
170 : bool is_sorted,
171 : bool can_hash,
172 : grouping_sets_data *gd,
173 : const AggClauseCosts *agg_costs,
174 : double dNumGroups);
175 : static RelOptInfo *create_window_paths(PlannerInfo *root,
176 : RelOptInfo *input_rel,
177 : PathTarget *input_target,
178 : PathTarget *output_target,
179 : bool output_target_parallel_safe,
180 : WindowFuncLists *wflists,
181 : List *activeWindows);
182 : static void create_one_window_path(PlannerInfo *root,
183 : RelOptInfo *window_rel,
184 : Path *path,
185 : PathTarget *input_target,
186 : PathTarget *output_target,
187 : WindowFuncLists *wflists,
188 : List *activeWindows);
189 : static RelOptInfo *create_distinct_paths(PlannerInfo *root,
190 : RelOptInfo *input_rel,
191 : PathTarget *target);
192 : static void create_partial_distinct_paths(PlannerInfo *root,
193 : RelOptInfo *input_rel,
194 : RelOptInfo *final_distinct_rel,
195 : PathTarget *target);
196 : static RelOptInfo *create_final_distinct_paths(PlannerInfo *root,
197 : RelOptInfo *input_rel,
198 : RelOptInfo *distinct_rel);
199 : static RelOptInfo *create_ordered_paths(PlannerInfo *root,
200 : RelOptInfo *input_rel,
201 : PathTarget *target,
202 : bool target_parallel_safe,
203 : double limit_tuples);
204 : static PathTarget *make_group_input_target(PlannerInfo *root,
205 : PathTarget *final_target);
206 : static PathTarget *make_partial_grouping_target(PlannerInfo *root,
207 : PathTarget *grouping_target,
208 : Node *havingQual);
209 : static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
210 : static void optimize_window_clauses(PlannerInfo *root,
211 : WindowFuncLists *wflists);
212 : static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
213 : static PathTarget *make_window_input_target(PlannerInfo *root,
214 : PathTarget *final_target,
215 : List *activeWindows);
216 : static List *make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
217 : List *tlist);
218 : static PathTarget *make_sort_input_target(PlannerInfo *root,
219 : PathTarget *final_target,
220 : bool *have_postponed_srfs);
221 : static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
222 : List *targets, List *targets_contain_srfs);
223 : static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
224 : RelOptInfo *grouped_rel,
225 : RelOptInfo *partially_grouped_rel,
226 : const AggClauseCosts *agg_costs,
227 : grouping_sets_data *gd,
228 : double dNumGroups,
229 : GroupPathExtraData *extra);
230 : static RelOptInfo *create_partial_grouping_paths(PlannerInfo *root,
231 : RelOptInfo *grouped_rel,
232 : RelOptInfo *input_rel,
233 : grouping_sets_data *gd,
234 : GroupPathExtraData *extra,
235 : bool force_rel_creation);
236 : static void gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel);
237 : static bool can_partial_agg(PlannerInfo *root);
238 : static void apply_scanjoin_target_to_paths(PlannerInfo *root,
239 : RelOptInfo *rel,
240 : List *scanjoin_targets,
241 : List *scanjoin_targets_contain_srfs,
242 : bool scanjoin_target_parallel_safe,
243 : bool tlist_same_exprs);
244 : static void create_partitionwise_grouping_paths(PlannerInfo *root,
245 : RelOptInfo *input_rel,
246 : RelOptInfo *grouped_rel,
247 : RelOptInfo *partially_grouped_rel,
248 : const AggClauseCosts *agg_costs,
249 : grouping_sets_data *gd,
250 : PartitionwiseAggregateType patype,
251 : GroupPathExtraData *extra);
252 : static bool group_by_has_partkey(RelOptInfo *input_rel,
253 : List *targetList,
254 : List *groupClause);
255 : static int common_prefix_cmp(const void *a, const void *b);
256 : static List *generate_setop_child_grouplist(SetOperationStmt *op,
257 : List *targetlist);
258 :
259 :
260 : /*****************************************************************************
261 : *
262 : * Query optimizer entry point
263 : *
264 : * To support loadable plugins that monitor or modify planner behavior,
265 : * we provide a hook variable that lets a plugin get control before and
266 : * after the standard planning process. The plugin would normally call
267 : * standard_planner().
268 : *
269 : * Note to plugin authors: standard_planner() scribbles on its Query input,
270 : * so you'd better copy that data structure if you want to plan more than once.
271 : *
272 : *****************************************************************************/
273 : PlannedStmt *
274 433300 : planner(Query *parse, const char *query_string, int cursorOptions,
275 : ParamListInfo boundParams)
276 : {
277 : PlannedStmt *result;
278 :
279 433300 : if (planner_hook)
280 81520 : result = (*planner_hook) (parse, query_string, cursorOptions, boundParams);
281 : else
282 351780 : result = standard_planner(parse, query_string, cursorOptions, boundParams);
283 429498 : return result;
284 : }
285 :
286 : PlannedStmt *
287 433300 : standard_planner(Query *parse, const char *query_string, int cursorOptions,
288 : ParamListInfo boundParams)
289 : {
290 : PlannedStmt *result;
291 : PlannerGlobal *glob;
292 : double tuple_fraction;
293 : PlannerInfo *root;
294 : RelOptInfo *final_rel;
295 : Path *best_path;
296 : Plan *top_plan;
297 : ListCell *lp,
298 : *lr;
299 :
300 : /*
301 : * Set up global state for this planner invocation. This data is needed
302 : * across all levels of sub-Query that might exist in the given command,
303 : * so we keep it in a separate struct that's linked to by each per-Query
304 : * PlannerInfo.
305 : */
306 433300 : glob = makeNode(PlannerGlobal);
307 :
308 433300 : glob->boundParams = boundParams;
309 433300 : glob->subplans = NIL;
310 433300 : glob->subpaths = NIL;
311 433300 : glob->subroots = NIL;
312 433300 : glob->rewindPlanIDs = NULL;
313 433300 : glob->finalrtable = NIL;
314 433300 : glob->finalrteperminfos = NIL;
315 433300 : glob->finalrowmarks = NIL;
316 433300 : glob->resultRelations = NIL;
317 433300 : glob->appendRelations = NIL;
318 433300 : glob->relationOids = NIL;
319 433300 : glob->invalItems = NIL;
320 433300 : glob->paramExecTypes = NIL;
321 433300 : glob->lastPHId = 0;
322 433300 : glob->lastRowMarkId = 0;
323 433300 : glob->lastPlanNodeId = 0;
324 433300 : glob->transientPlan = false;
325 433300 : glob->dependsOnRole = false;
326 :
327 : /*
328 : * Assess whether it's feasible to use parallel mode for this query. We
329 : * can't do this in a standalone backend, or if the command will try to
330 : * modify any data, or if this is a cursor operation, or if GUCs are set
331 : * to values that don't permit parallelism, or if parallel-unsafe
332 : * functions are present in the query tree.
333 : *
334 : * (Note that we do allow CREATE TABLE AS, SELECT INTO, and CREATE
335 : * MATERIALIZED VIEW to use parallel plans, but this is safe only because
336 : * the command is writing into a completely new table which workers won't
337 : * be able to see. If the workers could see the table, the fact that
338 : * group locking would cause them to ignore the leader's heavyweight GIN
339 : * page locks would make this unsafe. We'll have to fix that somehow if
340 : * we want to allow parallel inserts in general; updates and deletes have
341 : * additional problems especially around combo CIDs.)
342 : *
343 : * For now, we don't try to use parallel mode if we're running inside a
344 : * parallel worker. We might eventually be able to relax this
345 : * restriction, but for now it seems best not to have parallel workers
346 : * trying to create their own parallel workers.
347 : */
348 433300 : if ((cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 &&
349 408618 : IsUnderPostmaster &&
350 408618 : parse->commandType == CMD_SELECT &&
351 323952 : !parse->hasModifyingCTE &&
352 323812 : max_parallel_workers_per_gather > 0 &&
353 323350 : !IsParallelWorker())
354 : {
355 : /* all the cheap tests pass, so scan the query tree */
356 323298 : glob->maxParallelHazard = max_parallel_hazard(parse);
357 323298 : glob->parallelModeOK = (glob->maxParallelHazard != PROPARALLEL_UNSAFE);
358 : }
359 : else
360 : {
361 : /* skip the query tree scan, just assume it's unsafe */
362 110002 : glob->maxParallelHazard = PROPARALLEL_UNSAFE;
363 110002 : glob->parallelModeOK = false;
364 : }
365 :
366 : /*
367 : * glob->parallelModeNeeded is normally set to false here and changed to
368 : * true during plan creation if a Gather or Gather Merge plan is actually
369 : * created (cf. create_gather_plan, create_gather_merge_plan).
370 : *
371 : * However, if debug_parallel_query = on or debug_parallel_query =
372 : * regress, then we impose parallel mode whenever it's safe to do so, even
373 : * if the final plan doesn't use parallelism. It's not safe to do so if
374 : * the query contains anything parallel-unsafe; parallelModeOK will be
375 : * false in that case. Note that parallelModeOK can't change after this
376 : * point. Otherwise, everything in the query is either parallel-safe or
377 : * parallel-restricted, and in either case it should be OK to impose
378 : * parallel-mode restrictions. If that ends up breaking something, then
379 : * either some function the user included in the query is incorrectly
380 : * labeled as parallel-safe or parallel-restricted when in reality it's
381 : * parallel-unsafe, or else the query planner itself has a bug.
382 : */
383 702256 : glob->parallelModeNeeded = glob->parallelModeOK &&
384 268956 : (debug_parallel_query != DEBUG_PARALLEL_OFF);
385 :
386 : /* Determine what fraction of the plan is likely to be scanned */
387 433300 : if (cursorOptions & CURSOR_OPT_FAST_PLAN)
388 : {
389 : /*
390 : * We have no real idea how many tuples the user will ultimately FETCH
391 : * from a cursor, but it is often the case that he doesn't want 'em
392 : * all, or would prefer a fast-start plan anyway so that he can
393 : * process some of the tuples sooner. Use a GUC parameter to decide
394 : * what fraction to optimize for.
395 : */
396 2826 : tuple_fraction = cursor_tuple_fraction;
397 :
398 : /*
399 : * We document cursor_tuple_fraction as simply being a fraction, which
400 : * means the edge cases 0 and 1 have to be treated specially here. We
401 : * convert 1 to 0 ("all the tuples") and 0 to a very small fraction.
402 : */
403 2826 : if (tuple_fraction >= 1.0)
404 0 : tuple_fraction = 0.0;
405 2826 : else if (tuple_fraction <= 0.0)
406 0 : tuple_fraction = 1e-10;
407 : }
408 : else
409 : {
410 : /* Default assumption is we need all the tuples */
411 430474 : tuple_fraction = 0.0;
412 : }
413 :
414 : /* primary planning entry point (may recurse for subqueries) */
415 433300 : root = subquery_planner(glob, parse, NULL, false, tuple_fraction, NULL);
416 :
417 : /* Select best Path and turn it into a Plan */
418 429676 : final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
419 429676 : best_path = get_cheapest_fractional_path(final_rel, tuple_fraction);
420 :
421 429676 : top_plan = create_plan(root, best_path);
422 :
423 : /*
424 : * If creating a plan for a scrollable cursor, make sure it can run
425 : * backwards on demand. Add a Material node at the top at need.
426 : */
427 429498 : if (cursorOptions & CURSOR_OPT_SCROLL)
428 : {
429 264 : if (!ExecSupportsBackwardScan(top_plan))
430 30 : top_plan = materialize_finished_plan(top_plan);
431 : }
432 :
433 : /*
434 : * Optionally add a Gather node for testing purposes, provided this is
435 : * actually a safe thing to do.
436 : *
437 : * We can add Gather even when top_plan has parallel-safe initPlans, but
438 : * then we have to move the initPlans to the Gather node because of
439 : * SS_finalize_plan's limitations. That would cause cosmetic breakage of
440 : * regression tests when debug_parallel_query = regress, because initPlans
441 : * that would normally appear on the top_plan move to the Gather, causing
442 : * them to disappear from EXPLAIN output. That doesn't seem worth kluging
443 : * EXPLAIN to hide, so skip it when debug_parallel_query = regress.
444 : */
445 429498 : if (debug_parallel_query != DEBUG_PARALLEL_OFF &&
446 116 : top_plan->parallel_safe &&
447 36 : (top_plan->initPlan == NIL ||
448 0 : debug_parallel_query != DEBUG_PARALLEL_REGRESS))
449 : {
450 36 : Gather *gather = makeNode(Gather);
451 : Cost initplan_cost;
452 : bool unsafe_initplans;
453 :
454 36 : gather->plan.targetlist = top_plan->targetlist;
455 36 : gather->plan.qual = NIL;
456 36 : gather->plan.lefttree = top_plan;
457 36 : gather->plan.righttree = NULL;
458 36 : gather->num_workers = 1;
459 36 : gather->single_copy = true;
460 36 : gather->invisible = (debug_parallel_query == DEBUG_PARALLEL_REGRESS);
461 :
462 : /* Transfer any initPlans to the new top node */
463 36 : gather->plan.initPlan = top_plan->initPlan;
464 36 : top_plan->initPlan = NIL;
465 :
466 : /*
467 : * Since this Gather has no parallel-aware descendants to signal to,
468 : * we don't need a rescan Param.
469 : */
470 36 : gather->rescan_param = -1;
471 :
472 : /*
473 : * Ideally we'd use cost_gather here, but setting up dummy path data
474 : * to satisfy it doesn't seem much cleaner than knowing what it does.
475 : */
476 36 : gather->plan.startup_cost = top_plan->startup_cost +
477 : parallel_setup_cost;
478 36 : gather->plan.total_cost = top_plan->total_cost +
479 36 : parallel_setup_cost + parallel_tuple_cost * top_plan->plan_rows;
480 36 : gather->plan.plan_rows = top_plan->plan_rows;
481 36 : gather->plan.plan_width = top_plan->plan_width;
482 36 : gather->plan.parallel_aware = false;
483 36 : gather->plan.parallel_safe = false;
484 :
485 : /*
486 : * Delete the initplans' cost from top_plan. We needn't add it to the
487 : * Gather node, since the above coding already included it there.
488 : */
489 36 : SS_compute_initplan_cost(gather->plan.initPlan,
490 : &initplan_cost, &unsafe_initplans);
491 36 : top_plan->startup_cost -= initplan_cost;
492 36 : top_plan->total_cost -= initplan_cost;
493 :
494 : /* use parallel mode for parallel plans. */
495 36 : root->glob->parallelModeNeeded = true;
496 :
497 36 : top_plan = &gather->plan;
498 : }
499 :
500 : /*
501 : * If any Params were generated, run through the plan tree and compute
502 : * each plan node's extParam/allParam sets. Ideally we'd merge this into
503 : * set_plan_references' tree traversal, but for now it has to be separate
504 : * because we need to visit subplans before not after main plan.
505 : */
506 429498 : if (glob->paramExecTypes != NIL)
507 : {
508 : Assert(list_length(glob->subplans) == list_length(glob->subroots));
509 182442 : forboth(lp, glob->subplans, lr, glob->subroots)
510 : {
511 36138 : Plan *subplan = (Plan *) lfirst(lp);
512 36138 : PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
513 :
514 36138 : SS_finalize_plan(subroot, subplan);
515 : }
516 146304 : SS_finalize_plan(root, top_plan);
517 : }
518 :
519 : /* final cleanup of the plan */
520 : Assert(glob->finalrtable == NIL);
521 : Assert(glob->finalrteperminfos == NIL);
522 : Assert(glob->finalrowmarks == NIL);
523 : Assert(glob->resultRelations == NIL);
524 : Assert(glob->appendRelations == NIL);
525 429498 : top_plan = set_plan_references(root, top_plan);
526 : /* ... and the subplans (both regular subplans and initplans) */
527 : Assert(list_length(glob->subplans) == list_length(glob->subroots));
528 465636 : forboth(lp, glob->subplans, lr, glob->subroots)
529 : {
530 36138 : Plan *subplan = (Plan *) lfirst(lp);
531 36138 : PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
532 :
533 36138 : lfirst(lp) = set_plan_references(subroot, subplan);
534 : }
535 :
536 : /* build the PlannedStmt result */
537 429498 : result = makeNode(PlannedStmt);
538 :
539 429498 : result->commandType = parse->commandType;
540 429498 : result->queryId = parse->queryId;
541 429498 : result->hasReturning = (parse->returningList != NIL);
542 429498 : result->hasModifyingCTE = parse->hasModifyingCTE;
543 429498 : result->canSetTag = parse->canSetTag;
544 429498 : result->transientPlan = glob->transientPlan;
545 429498 : result->dependsOnRole = glob->dependsOnRole;
546 429498 : result->parallelModeNeeded = glob->parallelModeNeeded;
547 429498 : result->planTree = top_plan;
548 429498 : result->rtable = glob->finalrtable;
549 429498 : result->permInfos = glob->finalrteperminfos;
550 429498 : result->resultRelations = glob->resultRelations;
551 429498 : result->appendRelations = glob->appendRelations;
552 429498 : result->subplans = glob->subplans;
553 429498 : result->rewindPlanIDs = glob->rewindPlanIDs;
554 429498 : result->rowMarks = glob->finalrowmarks;
555 429498 : result->relationOids = glob->relationOids;
556 429498 : result->invalItems = glob->invalItems;
557 429498 : result->paramExecTypes = glob->paramExecTypes;
558 : /* utilityStmt should be null, but we might as well copy it */
559 429498 : result->utilityStmt = parse->utilityStmt;
560 429498 : result->stmt_location = parse->stmt_location;
561 429498 : result->stmt_len = parse->stmt_len;
562 :
563 429498 : result->jitFlags = PGJIT_NONE;
564 429498 : if (jit_enabled && jit_above_cost >= 0 &&
565 429082 : top_plan->total_cost > jit_above_cost)
566 : {
567 1344 : result->jitFlags |= PGJIT_PERFORM;
568 :
569 : /*
570 : * Decide how much effort should be put into generating better code.
571 : */
572 1344 : if (jit_optimize_above_cost >= 0 &&
573 1344 : top_plan->total_cost > jit_optimize_above_cost)
574 964 : result->jitFlags |= PGJIT_OPT3;
575 1344 : if (jit_inline_above_cost >= 0 &&
576 1344 : top_plan->total_cost > jit_inline_above_cost)
577 964 : result->jitFlags |= PGJIT_INLINE;
578 :
579 : /*
580 : * Decide which operations should be JITed.
581 : */
582 1344 : if (jit_expressions)
583 1344 : result->jitFlags |= PGJIT_EXPR;
584 1344 : if (jit_tuple_deforming)
585 1344 : result->jitFlags |= PGJIT_DEFORM;
586 : }
587 :
588 429498 : if (glob->partition_directory != NULL)
589 11154 : DestroyPartitionDirectory(glob->partition_directory);
590 :
591 429498 : return result;
592 : }
593 :
594 :
595 : /*--------------------
596 : * subquery_planner
597 : * Invokes the planner on a subquery. We recurse to here for each
598 : * sub-SELECT found in the query tree.
599 : *
600 : * glob is the global state for the current planner run.
601 : * parse is the querytree produced by the parser & rewriter.
602 : * parent_root is the immediate parent Query's info (NULL at the top level).
603 : * hasRecursion is true if this is a recursive WITH query.
604 : * tuple_fraction is the fraction of tuples we expect will be retrieved.
605 : * tuple_fraction is interpreted as explained for grouping_planner, below.
606 : * setops is used for set operation subqueries to provide the subquery with
607 : * the context in which it's being used so that Paths correctly sorted for the
608 : * set operation can be generated. NULL when not planning a set operation
609 : * child.
610 : *
611 : * Basically, this routine does the stuff that should only be done once
612 : * per Query object. It then calls grouping_planner. At one time,
613 : * grouping_planner could be invoked recursively on the same Query object;
614 : * that's not currently true, but we keep the separation between the two
615 : * routines anyway, in case we need it again someday.
616 : *
617 : * subquery_planner will be called recursively to handle sub-Query nodes
618 : * found within the query's expressions and rangetable.
619 : *
620 : * Returns the PlannerInfo struct ("root") that contains all data generated
621 : * while planning the subquery. In particular, the Path(s) attached to
622 : * the (UPPERREL_FINAL, NULL) upperrel represent our conclusions about the
623 : * cheapest way(s) to implement the query. The top level will select the
624 : * best Path and pass it through createplan.c to produce a finished Plan.
625 : *--------------------
626 : */
627 : PlannerInfo *
628 490612 : subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root,
629 : bool hasRecursion, double tuple_fraction,
630 : SetOperationStmt *setops)
631 : {
632 : PlannerInfo *root;
633 : List *newWithCheckOptions;
634 : List *newHaving;
635 : bool hasOuterJoins;
636 : bool hasResultRTEs;
637 : RelOptInfo *final_rel;
638 : ListCell *l;
639 :
640 : /* Create a PlannerInfo data structure for this subquery */
641 490612 : root = makeNode(PlannerInfo);
642 490612 : root->parse = parse;
643 490612 : root->glob = glob;
644 490612 : root->query_level = parent_root ? parent_root->query_level + 1 : 1;
645 490612 : root->parent_root = parent_root;
646 490612 : root->plan_params = NIL;
647 490612 : root->outer_params = NULL;
648 490612 : root->planner_cxt = CurrentMemoryContext;
649 490612 : root->init_plans = NIL;
650 490612 : root->cte_plan_ids = NIL;
651 490612 : root->multiexpr_params = NIL;
652 490612 : root->join_domains = NIL;
653 490612 : root->eq_classes = NIL;
654 490612 : root->ec_merging_done = false;
655 490612 : root->last_rinfo_serial = 0;
656 490612 : root->all_result_relids =
657 490612 : parse->resultRelation ? bms_make_singleton(parse->resultRelation) : NULL;
658 490612 : root->leaf_result_relids = NULL; /* we'll find out leaf-ness later */
659 490612 : root->append_rel_list = NIL;
660 490612 : root->row_identity_vars = NIL;
661 490612 : root->rowMarks = NIL;
662 490612 : memset(root->upper_rels, 0, sizeof(root->upper_rels));
663 490612 : memset(root->upper_targets, 0, sizeof(root->upper_targets));
664 490612 : root->processed_groupClause = NIL;
665 490612 : root->processed_distinctClause = NIL;
666 490612 : root->processed_tlist = NIL;
667 490612 : root->update_colnos = NIL;
668 490612 : root->grouping_map = NULL;
669 490612 : root->minmax_aggs = NIL;
670 490612 : root->qual_security_level = 0;
671 490612 : root->hasPseudoConstantQuals = false;
672 490612 : root->hasAlternativeSubPlans = false;
673 490612 : root->placeholdersFrozen = false;
674 490612 : root->hasRecursion = hasRecursion;
675 490612 : if (hasRecursion)
676 806 : root->wt_param_id = assign_special_exec_param(root);
677 : else
678 489806 : root->wt_param_id = -1;
679 490612 : root->non_recursive_path = NULL;
680 490612 : root->partColsUpdated = false;
681 :
682 : /*
683 : * Create the top-level join domain. This won't have valid contents until
684 : * deconstruct_jointree fills it in, but the node needs to exist before
685 : * that so we can build EquivalenceClasses referencing it.
686 : */
687 490612 : root->join_domains = list_make1(makeNode(JoinDomain));
688 :
689 : /*
690 : * If there is a WITH list, process each WITH query and either convert it
691 : * to RTE_SUBQUERY RTE(s) or build an initplan SubPlan structure for it.
692 : */
693 490612 : if (parse->cteList)
694 2388 : SS_process_ctes(root);
695 :
696 : /*
697 : * If it's a MERGE command, transform the joinlist as appropriate.
698 : */
699 490606 : transform_MERGE_to_join(parse);
700 :
701 : /*
702 : * If the FROM clause is empty, replace it with a dummy RTE_RESULT RTE, so
703 : * that we don't need so many special cases to deal with that situation.
704 : */
705 490606 : replace_empty_jointree(parse);
706 :
707 : /*
708 : * Look for ANY and EXISTS SubLinks in WHERE and JOIN/ON clauses, and try
709 : * to transform them into joins. Note that this step does not descend
710 : * into subqueries; if we pull up any subqueries below, their SubLinks are
711 : * processed just before pulling them up.
712 : */
713 490606 : if (parse->hasSubLinks)
714 28074 : pull_up_sublinks(root);
715 :
716 : /*
717 : * Scan the rangetable for function RTEs, do const-simplification on them,
718 : * and then inline them if possible (producing subqueries that might get
719 : * pulled up next). Recursion issues here are handled in the same way as
720 : * for SubLinks.
721 : */
722 490606 : preprocess_function_rtes(root);
723 :
724 : /*
725 : * Check to see if any subqueries in the jointree can be merged into this
726 : * query.
727 : */
728 490600 : pull_up_subqueries(root);
729 :
730 : /*
731 : * If this is a simple UNION ALL query, flatten it into an appendrel. We
732 : * do this now because it requires applying pull_up_subqueries to the leaf
733 : * queries of the UNION ALL, which weren't touched above because they
734 : * weren't referenced by the jointree (they will be after we do this).
735 : */
736 490594 : if (parse->setOperations)
737 5568 : flatten_simple_union_all(root);
738 :
739 : /*
740 : * Survey the rangetable to see what kinds of entries are present. We can
741 : * skip some later processing if relevant SQL features are not used; for
742 : * example if there are no JOIN RTEs we can avoid the expense of doing
743 : * flatten_join_alias_vars(). This must be done after we have finished
744 : * adding rangetable entries, of course. (Note: actually, processing of
745 : * inherited or partitioned rels can cause RTEs for their child tables to
746 : * get added later; but those must all be RTE_RELATION entries, so they
747 : * don't invalidate the conclusions drawn here.)
748 : */
749 490594 : root->hasJoinRTEs = false;
750 490594 : root->hasLateralRTEs = false;
751 490594 : hasOuterJoins = false;
752 490594 : hasResultRTEs = false;
753 1301652 : foreach(l, parse->rtable)
754 : {
755 811058 : RangeTblEntry *rte = lfirst_node(RangeTblEntry, l);
756 :
757 811058 : switch (rte->rtekind)
758 : {
759 418282 : case RTE_RELATION:
760 418282 : if (rte->inh)
761 : {
762 : /*
763 : * Check to see if the relation actually has any children;
764 : * if not, clear the inh flag so we can treat it as a
765 : * plain base relation.
766 : *
767 : * Note: this could give a false-positive result, if the
768 : * rel once had children but no longer does. We used to
769 : * be able to clear rte->inh later on when we discovered
770 : * that, but no more; we have to handle such cases as
771 : * full-fledged inheritance.
772 : */
773 334054 : rte->inh = has_subclass(rte->relid);
774 : }
775 418282 : break;
776 75758 : case RTE_JOIN:
777 75758 : root->hasJoinRTEs = true;
778 75758 : if (IS_OUTER_JOIN(rte->jointype))
779 44678 : hasOuterJoins = true;
780 75758 : break;
781 212396 : case RTE_RESULT:
782 212396 : hasResultRTEs = true;
783 212396 : break;
784 104622 : default:
785 : /* No work here for other RTE types */
786 104622 : break;
787 : }
788 :
789 811058 : if (rte->lateral)
790 9514 : root->hasLateralRTEs = true;
791 :
792 : /*
793 : * We can also determine the maximum security level required for any
794 : * securityQuals now. Addition of inheritance-child RTEs won't affect
795 : * this, because child tables don't have their own securityQuals; see
796 : * expand_single_inheritance_child().
797 : */
798 811058 : if (rte->securityQuals)
799 2394 : root->qual_security_level = Max(root->qual_security_level,
800 : list_length(rte->securityQuals));
801 : }
802 :
803 : /*
804 : * If we have now verified that the query target relation is
805 : * non-inheriting, mark it as a leaf target.
806 : */
807 490594 : if (parse->resultRelation)
808 : {
809 90346 : RangeTblEntry *rte = rt_fetch(parse->resultRelation, parse->rtable);
810 :
811 90346 : if (!rte->inh)
812 87796 : root->leaf_result_relids =
813 87796 : bms_make_singleton(parse->resultRelation);
814 : }
815 :
816 : /*
817 : * Preprocess RowMark information. We need to do this after subquery
818 : * pullup, so that all base relations are present.
819 : */
820 490594 : preprocess_rowmarks(root);
821 :
822 : /*
823 : * Set hasHavingQual to remember if HAVING clause is present. Needed
824 : * because preprocess_expression will reduce a constant-true condition to
825 : * an empty qual list ... but "HAVING TRUE" is not a semantic no-op.
826 : */
827 490594 : root->hasHavingQual = (parse->havingQual != NULL);
828 :
829 : /*
830 : * Do expression preprocessing on targetlist and quals, as well as other
831 : * random expressions in the querytree. Note that we do not need to
832 : * handle sort/group expressions explicitly, because they are actually
833 : * part of the targetlist.
834 : */
835 487046 : parse->targetList = (List *)
836 490594 : preprocess_expression(root, (Node *) parse->targetList,
837 : EXPRKIND_TARGET);
838 :
839 : /* Constant-folding might have removed all set-returning functions */
840 487046 : if (parse->hasTargetSRFs)
841 8410 : parse->hasTargetSRFs = expression_returns_set((Node *) parse->targetList);
842 :
843 487046 : newWithCheckOptions = NIL;
844 489378 : foreach(l, parse->withCheckOptions)
845 : {
846 2332 : WithCheckOption *wco = lfirst_node(WithCheckOption, l);
847 :
848 2332 : wco->qual = preprocess_expression(root, wco->qual,
849 : EXPRKIND_QUAL);
850 2332 : if (wco->qual != NULL)
851 1932 : newWithCheckOptions = lappend(newWithCheckOptions, wco);
852 : }
853 487046 : parse->withCheckOptions = newWithCheckOptions;
854 :
855 487046 : parse->returningList = (List *)
856 487046 : preprocess_expression(root, (Node *) parse->returningList,
857 : EXPRKIND_TARGET);
858 :
859 487046 : preprocess_qual_conditions(root, (Node *) parse->jointree);
860 :
861 487046 : parse->havingQual = preprocess_expression(root, parse->havingQual,
862 : EXPRKIND_QUAL);
863 :
864 489568 : foreach(l, parse->windowClause)
865 : {
866 2522 : WindowClause *wc = lfirst_node(WindowClause, l);
867 :
868 : /* partitionClause/orderClause are sort/group expressions */
869 2522 : wc->startOffset = preprocess_expression(root, wc->startOffset,
870 : EXPRKIND_LIMIT);
871 2522 : wc->endOffset = preprocess_expression(root, wc->endOffset,
872 : EXPRKIND_LIMIT);
873 : }
874 :
875 487046 : parse->limitOffset = preprocess_expression(root, parse->limitOffset,
876 : EXPRKIND_LIMIT);
877 487046 : parse->limitCount = preprocess_expression(root, parse->limitCount,
878 : EXPRKIND_LIMIT);
879 :
880 487046 : if (parse->onConflict)
881 : {
882 2860 : parse->onConflict->arbiterElems = (List *)
883 1430 : preprocess_expression(root,
884 1430 : (Node *) parse->onConflict->arbiterElems,
885 : EXPRKIND_ARBITER_ELEM);
886 2860 : parse->onConflict->arbiterWhere =
887 1430 : preprocess_expression(root,
888 1430 : parse->onConflict->arbiterWhere,
889 : EXPRKIND_QUAL);
890 2860 : parse->onConflict->onConflictSet = (List *)
891 1430 : preprocess_expression(root,
892 1430 : (Node *) parse->onConflict->onConflictSet,
893 : EXPRKIND_TARGET);
894 1430 : parse->onConflict->onConflictWhere =
895 1430 : preprocess_expression(root,
896 1430 : parse->onConflict->onConflictWhere,
897 : EXPRKIND_QUAL);
898 : /* exclRelTlist contains only Vars, so no preprocessing needed */
899 : }
900 :
901 489652 : foreach(l, parse->mergeActionList)
902 : {
903 2606 : MergeAction *action = (MergeAction *) lfirst(l);
904 :
905 2606 : action->targetList = (List *)
906 2606 : preprocess_expression(root,
907 2606 : (Node *) action->targetList,
908 : EXPRKIND_TARGET);
909 2606 : action->qual =
910 2606 : preprocess_expression(root,
911 : (Node *) action->qual,
912 : EXPRKIND_QUAL);
913 : }
914 :
915 487046 : parse->mergeJoinCondition =
916 487046 : preprocess_expression(root, parse->mergeJoinCondition, EXPRKIND_QUAL);
917 :
918 487046 : root->append_rel_list = (List *)
919 487046 : preprocess_expression(root, (Node *) root->append_rel_list,
920 : EXPRKIND_APPINFO);
921 :
922 : /* Also need to preprocess expressions within RTEs */
923 1294290 : foreach(l, parse->rtable)
924 : {
925 807244 : RangeTblEntry *rte = lfirst_node(RangeTblEntry, l);
926 : int kind;
927 : ListCell *lcsq;
928 :
929 807244 : if (rte->rtekind == RTE_RELATION)
930 : {
931 418028 : if (rte->tablesample)
932 222 : rte->tablesample = (TableSampleClause *)
933 222 : preprocess_expression(root,
934 222 : (Node *) rte->tablesample,
935 : EXPRKIND_TABLESAMPLE);
936 : }
937 389216 : else if (rte->rtekind == RTE_SUBQUERY)
938 : {
939 : /*
940 : * We don't want to do all preprocessing yet on the subquery's
941 : * expressions, since that will happen when we plan it. But if it
942 : * contains any join aliases of our level, those have to get
943 : * expanded now, because planning of the subquery won't do it.
944 : * That's only possible if the subquery is LATERAL.
945 : */
946 52158 : if (rte->lateral && root->hasJoinRTEs)
947 848 : rte->subquery = (Query *)
948 848 : flatten_join_alias_vars(root, root->parse,
949 848 : (Node *) rte->subquery);
950 : }
951 337058 : else if (rte->rtekind == RTE_FUNCTION)
952 : {
953 : /* Preprocess the function expression(s) fully */
954 39694 : kind = rte->lateral ? EXPRKIND_RTFUNC_LATERAL : EXPRKIND_RTFUNC;
955 39694 : rte->functions = (List *)
956 39694 : preprocess_expression(root, (Node *) rte->functions, kind);
957 : }
958 297364 : else if (rte->rtekind == RTE_TABLEFUNC)
959 : {
960 : /* Preprocess the function expression(s) fully */
961 548 : kind = rte->lateral ? EXPRKIND_TABLEFUNC_LATERAL : EXPRKIND_TABLEFUNC;
962 548 : rte->tablefunc = (TableFunc *)
963 548 : preprocess_expression(root, (Node *) rte->tablefunc, kind);
964 : }
965 296816 : else if (rte->rtekind == RTE_VALUES)
966 : {
967 : /* Preprocess the values lists fully */
968 7746 : kind = rte->lateral ? EXPRKIND_VALUES_LATERAL : EXPRKIND_VALUES;
969 7746 : rte->values_lists = (List *)
970 7746 : preprocess_expression(root, (Node *) rte->values_lists, kind);
971 : }
972 :
973 : /*
974 : * Process each element of the securityQuals list as if it were a
975 : * separate qual expression (as indeed it is). We need to do it this
976 : * way to get proper canonicalization of AND/OR structure. Note that
977 : * this converts each element into an implicit-AND sublist.
978 : */
979 809984 : foreach(lcsq, rte->securityQuals)
980 : {
981 2740 : lfirst(lcsq) = preprocess_expression(root,
982 2740 : (Node *) lfirst(lcsq),
983 : EXPRKIND_QUAL);
984 : }
985 : }
986 :
987 : /*
988 : * Now that we are done preprocessing expressions, and in particular done
989 : * flattening join alias variables, get rid of the joinaliasvars lists.
990 : * They no longer match what expressions in the rest of the tree look
991 : * like, because we have not preprocessed expressions in those lists (and
992 : * do not want to; for example, expanding a SubLink there would result in
993 : * a useless unreferenced subplan). Leaving them in place simply creates
994 : * a hazard for later scans of the tree. We could try to prevent that by
995 : * using QTW_IGNORE_JOINALIASES in every tree scan done after this point,
996 : * but that doesn't sound very reliable.
997 : */
998 487046 : if (root->hasJoinRTEs)
999 : {
1000 259766 : foreach(l, parse->rtable)
1001 : {
1002 214310 : RangeTblEntry *rte = lfirst_node(RangeTblEntry, l);
1003 :
1004 214310 : rte->joinaliasvars = NIL;
1005 : }
1006 : }
1007 :
1008 : /*
1009 : * In some cases we may want to transfer a HAVING clause into WHERE. We
1010 : * cannot do so if the HAVING clause contains aggregates (obviously) or
1011 : * volatile functions (since a HAVING clause is supposed to be executed
1012 : * only once per group). We also can't do this if there are any nonempty
1013 : * grouping sets; moving such a clause into WHERE would potentially change
1014 : * the results, if any referenced column isn't present in all the grouping
1015 : * sets. (If there are only empty grouping sets, then the HAVING clause
1016 : * must be degenerate as discussed below.)
1017 : *
1018 : * Also, it may be that the clause is so expensive to execute that we're
1019 : * better off doing it only once per group, despite the loss of
1020 : * selectivity. This is hard to estimate short of doing the entire
1021 : * planning process twice, so we use a heuristic: clauses containing
1022 : * subplans are left in HAVING. Otherwise, we move or copy the HAVING
1023 : * clause into WHERE, in hopes of eliminating tuples before aggregation
1024 : * instead of after.
1025 : *
1026 : * If the query has explicit grouping then we can simply move such a
1027 : * clause into WHERE; any group that fails the clause will not be in the
1028 : * output because none of its tuples will reach the grouping or
1029 : * aggregation stage. Otherwise we must have a degenerate (variable-free)
1030 : * HAVING clause, which we put in WHERE so that query_planner() can use it
1031 : * in a gating Result node, but also keep in HAVING to ensure that we
1032 : * don't emit a bogus aggregated row. (This could be done better, but it
1033 : * seems not worth optimizing.)
1034 : *
1035 : * Note that both havingQual and parse->jointree->quals are in
1036 : * implicitly-ANDed-list form at this point, even though they are declared
1037 : * as Node *.
1038 : */
1039 487046 : newHaving = NIL;
1040 488180 : foreach(l, (List *) parse->havingQual)
1041 : {
1042 1134 : Node *havingclause = (Node *) lfirst(l);
1043 :
1044 2226 : if ((parse->groupClause && parse->groupingSets) ||
1045 1234 : contain_agg_clause(havingclause) ||
1046 284 : contain_volatile_functions(havingclause) ||
1047 142 : contain_subplans(havingclause))
1048 : {
1049 : /* keep it in HAVING */
1050 992 : newHaving = lappend(newHaving, havingclause);
1051 : }
1052 142 : else if (parse->groupClause && !parse->groupingSets)
1053 : {
1054 : /* move it to WHERE */
1055 124 : parse->jointree->quals = (Node *)
1056 124 : lappend((List *) parse->jointree->quals, havingclause);
1057 : }
1058 : else
1059 : {
1060 : /* put a copy in WHERE, keep it in HAVING */
1061 36 : parse->jointree->quals = (Node *)
1062 18 : lappend((List *) parse->jointree->quals,
1063 : copyObject(havingclause));
1064 18 : newHaving = lappend(newHaving, havingclause);
1065 : }
1066 : }
1067 487046 : parse->havingQual = (Node *) newHaving;
1068 :
1069 : /*
1070 : * If we have any outer joins, try to reduce them to plain inner joins.
1071 : * This step is most easily done after we've done expression
1072 : * preprocessing.
1073 : */
1074 487046 : if (hasOuterJoins)
1075 29526 : reduce_outer_joins(root);
1076 :
1077 : /*
1078 : * If we have any RTE_RESULT relations, see if they can be deleted from
1079 : * the jointree. We also rely on this processing to flatten single-child
1080 : * FromExprs underneath outer joins. This step is most effectively done
1081 : * after we've done expression preprocessing and outer join reduction.
1082 : */
1083 487046 : if (hasResultRTEs || hasOuterJoins)
1084 237362 : remove_useless_result_rtes(root);
1085 :
1086 : /*
1087 : * Do the main planning.
1088 : */
1089 487046 : grouping_planner(root, tuple_fraction, setops);
1090 :
1091 : /*
1092 : * Capture the set of outer-level param IDs we have access to, for use in
1093 : * extParam/allParam calculations later.
1094 : */
1095 486982 : SS_identify_outer_params(root);
1096 :
1097 : /*
1098 : * If any initPlans were created in this query level, adjust the surviving
1099 : * Paths' costs and parallel-safety flags to account for them. The
1100 : * initPlans won't actually get attached to the plan tree till
1101 : * create_plan() runs, but we must include their effects now.
1102 : */
1103 486982 : final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1104 486982 : SS_charge_for_initplans(root, final_rel);
1105 :
1106 : /*
1107 : * Make sure we've identified the cheapest Path for the final rel. (By
1108 : * doing this here not in grouping_planner, we include initPlan costs in
1109 : * the decision, though it's unlikely that will change anything.)
1110 : */
1111 486982 : set_cheapest(final_rel);
1112 :
1113 486982 : return root;
1114 : }
1115 :
1116 : /*
1117 : * preprocess_expression
1118 : * Do subquery_planner's preprocessing work for an expression,
1119 : * which can be a targetlist, a WHERE clause (including JOIN/ON
1120 : * conditions), a HAVING clause, or a few other things.
1121 : */
1122 : static Node *
1123 4060218 : preprocess_expression(PlannerInfo *root, Node *expr, int kind)
1124 : {
1125 : /*
1126 : * Fall out quickly if expression is empty. This occurs often enough to
1127 : * be worth checking. Note that null->null is the correct conversion for
1128 : * implicit-AND result format, too.
1129 : */
1130 4060218 : if (expr == NULL)
1131 3246022 : return NULL;
1132 :
1133 : /*
1134 : * If the query has any join RTEs, replace join alias variables with
1135 : * base-relation variables. We must do this first, since any expressions
1136 : * we may extract from the joinaliasvars lists have not been preprocessed.
1137 : * For example, if we did this after sublink processing, sublinks expanded
1138 : * out from join aliases would not get processed. But we can skip this in
1139 : * non-lateral RTE functions, VALUES lists, and TABLESAMPLE clauses, since
1140 : * they can't contain any Vars of the current query level.
1141 : */
1142 814196 : if (root->hasJoinRTEs &&
1143 328580 : !(kind == EXPRKIND_RTFUNC ||
1144 164092 : kind == EXPRKIND_VALUES ||
1145 : kind == EXPRKIND_TABLESAMPLE ||
1146 : kind == EXPRKIND_TABLEFUNC))
1147 164080 : expr = flatten_join_alias_vars(root, root->parse, expr);
1148 :
1149 : /*
1150 : * Simplify constant expressions. For function RTEs, this was already
1151 : * done by preprocess_function_rtes. (But note we must do it again for
1152 : * EXPRKIND_RTFUNC_LATERAL, because those might by now contain
1153 : * un-simplified subexpressions inserted by flattening of subqueries or
1154 : * join alias variables.)
1155 : *
1156 : * Note: an essential effect of this is to convert named-argument function
1157 : * calls to positional notation and insert the current actual values of
1158 : * any default arguments for functions. To ensure that happens, we *must*
1159 : * process all expressions here. Previous PG versions sometimes skipped
1160 : * const-simplification if it didn't seem worth the trouble, but we can't
1161 : * do that anymore.
1162 : *
1163 : * Note: this also flattens nested AND and OR expressions into N-argument
1164 : * form. All processing of a qual expression after this point must be
1165 : * careful to maintain AND/OR flatness --- that is, do not generate a tree
1166 : * with AND directly under AND, nor OR directly under OR.
1167 : */
1168 814196 : if (kind != EXPRKIND_RTFUNC)
1169 782304 : expr = eval_const_expressions(root, expr);
1170 :
1171 : /*
1172 : * If it's a qual or havingQual, canonicalize it.
1173 : */
1174 810648 : if (kind == EXPRKIND_QUAL)
1175 : {
1176 278322 : expr = (Node *) canonicalize_qual((Expr *) expr, false);
1177 :
1178 : #ifdef OPTIMIZER_DEBUG
1179 : printf("After canonicalize_qual()\n");
1180 : pprint(expr);
1181 : #endif
1182 : }
1183 :
1184 : /*
1185 : * Check for ANY ScalarArrayOpExpr with Const arrays and set the
1186 : * hashfuncid of any that might execute more quickly by using hash lookups
1187 : * instead of a linear search.
1188 : */
1189 810648 : if (kind == EXPRKIND_QUAL || kind == EXPRKIND_TARGET)
1190 : {
1191 752690 : convert_saop_to_hashed_saop(expr);
1192 : }
1193 :
1194 : /* Expand SubLinks to SubPlans */
1195 810648 : if (root->parse->hasSubLinks)
1196 77722 : expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
1197 :
1198 : /*
1199 : * XXX do not insert anything here unless you have grokked the comments in
1200 : * SS_replace_correlation_vars ...
1201 : */
1202 :
1203 : /* Replace uplevel vars with Param nodes (this IS possible in VALUES) */
1204 810648 : if (root->query_level > 1)
1205 126968 : expr = SS_replace_correlation_vars(root, expr);
1206 :
1207 : /*
1208 : * If it's a qual or havingQual, convert it to implicit-AND format. (We
1209 : * don't want to do this before eval_const_expressions, since the latter
1210 : * would be unable to simplify a top-level AND correctly. Also,
1211 : * SS_process_sublinks expects explicit-AND format.)
1212 : */
1213 810648 : if (kind == EXPRKIND_QUAL)
1214 278322 : expr = (Node *) make_ands_implicit((Expr *) expr);
1215 :
1216 810648 : return expr;
1217 : }
1218 :
1219 : /*
1220 : * preprocess_qual_conditions
1221 : * Recursively scan the query's jointree and do subquery_planner's
1222 : * preprocessing work on each qual condition found therein.
1223 : */
1224 : static void
1225 1183634 : preprocess_qual_conditions(PlannerInfo *root, Node *jtnode)
1226 : {
1227 1183634 : if (jtnode == NULL)
1228 0 : return;
1229 1183634 : if (IsA(jtnode, RangeTblRef))
1230 : {
1231 : /* nothing to do here */
1232 : }
1233 578018 : else if (IsA(jtnode, FromExpr))
1234 : {
1235 497878 : FromExpr *f = (FromExpr *) jtnode;
1236 : ListCell *l;
1237 :
1238 1034186 : foreach(l, f->fromlist)
1239 536308 : preprocess_qual_conditions(root, lfirst(l));
1240 :
1241 497878 : f->quals = preprocess_expression(root, f->quals, EXPRKIND_QUAL);
1242 : }
1243 80140 : else if (IsA(jtnode, JoinExpr))
1244 : {
1245 80140 : JoinExpr *j = (JoinExpr *) jtnode;
1246 :
1247 80140 : preprocess_qual_conditions(root, j->larg);
1248 80140 : preprocess_qual_conditions(root, j->rarg);
1249 :
1250 80140 : j->quals = preprocess_expression(root, j->quals, EXPRKIND_QUAL);
1251 : }
1252 : else
1253 0 : elog(ERROR, "unrecognized node type: %d",
1254 : (int) nodeTag(jtnode));
1255 : }
1256 :
1257 : /*
1258 : * preprocess_phv_expression
1259 : * Do preprocessing on a PlaceHolderVar expression that's been pulled up.
1260 : *
1261 : * If a LATERAL subquery references an output of another subquery, and that
1262 : * output must be wrapped in a PlaceHolderVar because of an intermediate outer
1263 : * join, then we'll push the PlaceHolderVar expression down into the subquery
1264 : * and later pull it back up during find_lateral_references, which runs after
1265 : * subquery_planner has preprocessed all the expressions that were in the
1266 : * current query level to start with. So we need to preprocess it then.
1267 : */
1268 : Expr *
1269 72 : preprocess_phv_expression(PlannerInfo *root, Expr *expr)
1270 : {
1271 72 : return (Expr *) preprocess_expression(root, (Node *) expr, EXPRKIND_PHV);
1272 : }
1273 :
1274 : /*--------------------
1275 : * grouping_planner
1276 : * Perform planning steps related to grouping, aggregation, etc.
1277 : *
1278 : * This function adds all required top-level processing to the scan/join
1279 : * Path(s) produced by query_planner.
1280 : *
1281 : * tuple_fraction is the fraction of tuples we expect will be retrieved.
1282 : * tuple_fraction is interpreted as follows:
1283 : * 0: expect all tuples to be retrieved (normal case)
1284 : * 0 < tuple_fraction < 1: expect the given fraction of tuples available
1285 : * from the plan to be retrieved
1286 : * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
1287 : * expected to be retrieved (ie, a LIMIT specification).
1288 : * setops is used for set operation subqueries to provide the subquery with
1289 : * the context in which it's being used so that Paths correctly sorted for the
1290 : * set operation can be generated. NULL when not planning a set operation
1291 : * child.
1292 : *
1293 : * Returns nothing; the useful output is in the Paths we attach to the
1294 : * (UPPERREL_FINAL, NULL) upperrel in *root. In addition,
1295 : * root->processed_tlist contains the final processed targetlist.
1296 : *
1297 : * Note that we have not done set_cheapest() on the final rel; it's convenient
1298 : * to leave this to the caller.
1299 : *--------------------
1300 : */
1301 : static void
1302 487046 : grouping_planner(PlannerInfo *root, double tuple_fraction,
1303 : SetOperationStmt *setops)
1304 : {
1305 487046 : Query *parse = root->parse;
1306 487046 : int64 offset_est = 0;
1307 487046 : int64 count_est = 0;
1308 487046 : double limit_tuples = -1.0;
1309 487046 : bool have_postponed_srfs = false;
1310 : PathTarget *final_target;
1311 : List *final_targets;
1312 : List *final_targets_contain_srfs;
1313 : bool final_target_parallel_safe;
1314 : RelOptInfo *current_rel;
1315 : RelOptInfo *final_rel;
1316 : FinalPathExtraData extra;
1317 : ListCell *lc;
1318 :
1319 : /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
1320 487046 : if (parse->limitCount || parse->limitOffset)
1321 : {
1322 4632 : tuple_fraction = preprocess_limit(root, tuple_fraction,
1323 : &offset_est, &count_est);
1324 :
1325 : /*
1326 : * If we have a known LIMIT, and don't have an unknown OFFSET, we can
1327 : * estimate the effects of using a bounded sort.
1328 : */
1329 4632 : if (count_est > 0 && offset_est >= 0)
1330 4168 : limit_tuples = (double) count_est + (double) offset_est;
1331 : }
1332 :
1333 : /* Make tuple_fraction accessible to lower-level routines */
1334 487046 : root->tuple_fraction = tuple_fraction;
1335 :
1336 487046 : if (parse->setOperations)
1337 : {
1338 : /*
1339 : * Construct Paths for set operations. The results will not need any
1340 : * work except perhaps a top-level sort and/or LIMIT. Note that any
1341 : * special work for recursive unions is the responsibility of
1342 : * plan_set_operations.
1343 : */
1344 5244 : current_rel = plan_set_operations(root);
1345 :
1346 : /*
1347 : * We should not need to call preprocess_targetlist, since we must be
1348 : * in a SELECT query node. Instead, use the processed_tlist returned
1349 : * by plan_set_operations (since this tells whether it returned any
1350 : * resjunk columns!), and transfer any sort key information from the
1351 : * original tlist.
1352 : */
1353 : Assert(parse->commandType == CMD_SELECT);
1354 :
1355 : /* for safety, copy processed_tlist instead of modifying in-place */
1356 5238 : root->processed_tlist =
1357 5238 : postprocess_setop_tlist(copyObject(root->processed_tlist),
1358 : parse->targetList);
1359 :
1360 : /* Also extract the PathTarget form of the setop result tlist */
1361 5238 : final_target = current_rel->cheapest_total_path->pathtarget;
1362 :
1363 : /* And check whether it's parallel safe */
1364 : final_target_parallel_safe =
1365 5238 : is_parallel_safe(root, (Node *) final_target->exprs);
1366 :
1367 : /* The setop result tlist couldn't contain any SRFs */
1368 : Assert(!parse->hasTargetSRFs);
1369 5238 : final_targets = final_targets_contain_srfs = NIL;
1370 :
1371 : /*
1372 : * Can't handle FOR [KEY] UPDATE/SHARE here (parser should have
1373 : * checked already, but let's make sure).
1374 : */
1375 5238 : if (parse->rowMarks)
1376 0 : ereport(ERROR,
1377 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1378 : /*------
1379 : translator: %s is a SQL row locking clause such as FOR UPDATE */
1380 : errmsg("%s is not allowed with UNION/INTERSECT/EXCEPT",
1381 : LCS_asString(linitial_node(RowMarkClause,
1382 : parse->rowMarks)->strength))));
1383 :
1384 : /*
1385 : * Calculate pathkeys that represent result ordering requirements
1386 : */
1387 : Assert(parse->distinctClause == NIL);
1388 5238 : root->sort_pathkeys = make_pathkeys_for_sortclauses(root,
1389 : parse->sortClause,
1390 : root->processed_tlist);
1391 : }
1392 : else
1393 : {
1394 : /* No set operations, do regular planning */
1395 : PathTarget *sort_input_target;
1396 : List *sort_input_targets;
1397 : List *sort_input_targets_contain_srfs;
1398 : bool sort_input_target_parallel_safe;
1399 : PathTarget *grouping_target;
1400 : List *grouping_targets;
1401 : List *grouping_targets_contain_srfs;
1402 : bool grouping_target_parallel_safe;
1403 : PathTarget *scanjoin_target;
1404 : List *scanjoin_targets;
1405 : List *scanjoin_targets_contain_srfs;
1406 : bool scanjoin_target_parallel_safe;
1407 : bool scanjoin_target_same_exprs;
1408 : bool have_grouping;
1409 481802 : WindowFuncLists *wflists = NULL;
1410 481802 : List *activeWindows = NIL;
1411 481802 : grouping_sets_data *gset_data = NULL;
1412 : standard_qp_extra qp_extra;
1413 :
1414 : /* A recursive query should always have setOperations */
1415 : Assert(!root->hasRecursion);
1416 :
1417 : /* Preprocess grouping sets and GROUP BY clause, if any */
1418 481802 : if (parse->groupingSets)
1419 : {
1420 734 : gset_data = preprocess_grouping_sets(root);
1421 : }
1422 481068 : else if (parse->groupClause)
1423 : {
1424 : /* Preprocess regular GROUP BY clause, if any */
1425 3360 : root->processed_groupClause = list_copy(parse->groupClause);
1426 : /* Remove any redundant GROUP BY columns */
1427 3360 : remove_useless_groupby_columns(root);
1428 : }
1429 :
1430 : /*
1431 : * Preprocess targetlist. Note that much of the remaining planning
1432 : * work will be done with the PathTarget representation of tlists, but
1433 : * we must also maintain the full representation of the final tlist so
1434 : * that we can transfer its decoration (resnames etc) to the topmost
1435 : * tlist of the finished Plan. This is kept in processed_tlist.
1436 : */
1437 481796 : preprocess_targetlist(root);
1438 :
1439 : /*
1440 : * Mark all the aggregates with resolved aggtranstypes, and detect
1441 : * aggregates that are duplicates or can share transition state. We
1442 : * must do this before slicing and dicing the tlist into various
1443 : * pathtargets, else some copies of the Aggref nodes might escape
1444 : * being marked.
1445 : */
1446 481796 : if (parse->hasAggs)
1447 : {
1448 33672 : preprocess_aggrefs(root, (Node *) root->processed_tlist);
1449 33672 : preprocess_aggrefs(root, (Node *) parse->havingQual);
1450 : }
1451 :
1452 : /*
1453 : * Locate any window functions in the tlist. (We don't need to look
1454 : * anywhere else, since expressions used in ORDER BY will be in there
1455 : * too.) Note that they could all have been eliminated by constant
1456 : * folding, in which case we don't need to do any more work.
1457 : */
1458 481796 : if (parse->hasWindowFuncs)
1459 : {
1460 2306 : wflists = find_window_functions((Node *) root->processed_tlist,
1461 2306 : list_length(parse->windowClause));
1462 2306 : if (wflists->numWindowFuncs > 0)
1463 : {
1464 : /*
1465 : * See if any modifications can be made to each WindowClause
1466 : * to allow the executor to execute the WindowFuncs more
1467 : * quickly.
1468 : */
1469 2300 : optimize_window_clauses(root, wflists);
1470 :
1471 2300 : activeWindows = select_active_windows(root, wflists);
1472 : }
1473 : else
1474 6 : parse->hasWindowFuncs = false;
1475 : }
1476 :
1477 : /*
1478 : * Preprocess MIN/MAX aggregates, if any. Note: be careful about
1479 : * adding logic between here and the query_planner() call. Anything
1480 : * that is needed in MIN/MAX-optimizable cases will have to be
1481 : * duplicated in planagg.c.
1482 : */
1483 481796 : if (parse->hasAggs)
1484 33672 : preprocess_minmax_aggregates(root);
1485 :
1486 : /*
1487 : * Figure out whether there's a hard limit on the number of rows that
1488 : * query_planner's result subplan needs to return. Even if we know a
1489 : * hard limit overall, it doesn't apply if the query has any
1490 : * grouping/aggregation operations, or SRFs in the tlist.
1491 : */
1492 481796 : if (parse->groupClause ||
1493 477750 : parse->groupingSets ||
1494 477708 : parse->distinctClause ||
1495 475522 : parse->hasAggs ||
1496 445500 : parse->hasWindowFuncs ||
1497 443326 : parse->hasTargetSRFs ||
1498 435340 : root->hasHavingQual)
1499 46474 : root->limit_tuples = -1.0;
1500 : else
1501 435322 : root->limit_tuples = limit_tuples;
1502 :
1503 : /* Set up data needed by standard_qp_callback */
1504 481796 : qp_extra.activeWindows = activeWindows;
1505 481796 : qp_extra.gset_data = gset_data;
1506 :
1507 : /*
1508 : * If we're a subquery for a set operation, store the SetOperationStmt
1509 : * in qp_extra.
1510 : */
1511 481796 : qp_extra.setop = setops;
1512 :
1513 : /*
1514 : * Generate the best unsorted and presorted paths for the scan/join
1515 : * portion of this Query, ie the processing represented by the
1516 : * FROM/WHERE clauses. (Note there may not be any presorted paths.)
1517 : * We also generate (in standard_qp_callback) pathkey representations
1518 : * of the query's sort clause, distinct clause, etc.
1519 : */
1520 481796 : current_rel = query_planner(root, standard_qp_callback, &qp_extra);
1521 :
1522 : /*
1523 : * Convert the query's result tlist into PathTarget format.
1524 : *
1525 : * Note: this cannot be done before query_planner() has performed
1526 : * appendrel expansion, because that might add resjunk entries to
1527 : * root->processed_tlist. Waiting till afterwards is also helpful
1528 : * because the target width estimates can use per-Var width numbers
1529 : * that were obtained within query_planner().
1530 : */
1531 481752 : final_target = create_pathtarget(root, root->processed_tlist);
1532 : final_target_parallel_safe =
1533 481752 : is_parallel_safe(root, (Node *) final_target->exprs);
1534 :
1535 : /*
1536 : * If ORDER BY was given, consider whether we should use a post-sort
1537 : * projection, and compute the adjusted target for preceding steps if
1538 : * so.
1539 : */
1540 481752 : if (parse->sortClause)
1541 : {
1542 54616 : sort_input_target = make_sort_input_target(root,
1543 : final_target,
1544 : &have_postponed_srfs);
1545 : sort_input_target_parallel_safe =
1546 54616 : is_parallel_safe(root, (Node *) sort_input_target->exprs);
1547 : }
1548 : else
1549 : {
1550 427136 : sort_input_target = final_target;
1551 427136 : sort_input_target_parallel_safe = final_target_parallel_safe;
1552 : }
1553 :
1554 : /*
1555 : * If we have window functions to deal with, the output from any
1556 : * grouping step needs to be what the window functions want;
1557 : * otherwise, it should be sort_input_target.
1558 : */
1559 481752 : if (activeWindows)
1560 : {
1561 2300 : grouping_target = make_window_input_target(root,
1562 : final_target,
1563 : activeWindows);
1564 : grouping_target_parallel_safe =
1565 2300 : is_parallel_safe(root, (Node *) grouping_target->exprs);
1566 : }
1567 : else
1568 : {
1569 479452 : grouping_target = sort_input_target;
1570 479452 : grouping_target_parallel_safe = sort_input_target_parallel_safe;
1571 : }
1572 :
1573 : /*
1574 : * If we have grouping or aggregation to do, the topmost scan/join
1575 : * plan node must emit what the grouping step wants; otherwise, it
1576 : * should emit grouping_target.
1577 : */
1578 477706 : have_grouping = (parse->groupClause || parse->groupingSets ||
1579 959458 : parse->hasAggs || root->hasHavingQual);
1580 481752 : if (have_grouping)
1581 : {
1582 34154 : scanjoin_target = make_group_input_target(root, final_target);
1583 : scanjoin_target_parallel_safe =
1584 34154 : is_parallel_safe(root, (Node *) scanjoin_target->exprs);
1585 : }
1586 : else
1587 : {
1588 447598 : scanjoin_target = grouping_target;
1589 447598 : scanjoin_target_parallel_safe = grouping_target_parallel_safe;
1590 : }
1591 :
1592 : /*
1593 : * If there are any SRFs in the targetlist, we must separate each of
1594 : * these PathTargets into SRF-computing and SRF-free targets. Replace
1595 : * each of the named targets with a SRF-free version, and remember the
1596 : * list of additional projection steps we need to add afterwards.
1597 : */
1598 481752 : if (parse->hasTargetSRFs)
1599 : {
1600 : /* final_target doesn't recompute any SRFs in sort_input_target */
1601 8410 : split_pathtarget_at_srfs(root, final_target, sort_input_target,
1602 : &final_targets,
1603 : &final_targets_contain_srfs);
1604 8410 : final_target = linitial_node(PathTarget, final_targets);
1605 : Assert(!linitial_int(final_targets_contain_srfs));
1606 : /* likewise for sort_input_target vs. grouping_target */
1607 8410 : split_pathtarget_at_srfs(root, sort_input_target, grouping_target,
1608 : &sort_input_targets,
1609 : &sort_input_targets_contain_srfs);
1610 8410 : sort_input_target = linitial_node(PathTarget, sort_input_targets);
1611 : Assert(!linitial_int(sort_input_targets_contain_srfs));
1612 : /* likewise for grouping_target vs. scanjoin_target */
1613 8410 : split_pathtarget_at_srfs(root, grouping_target, scanjoin_target,
1614 : &grouping_targets,
1615 : &grouping_targets_contain_srfs);
1616 8410 : grouping_target = linitial_node(PathTarget, grouping_targets);
1617 : Assert(!linitial_int(grouping_targets_contain_srfs));
1618 : /* scanjoin_target will not have any SRFs precomputed for it */
1619 8410 : split_pathtarget_at_srfs(root, scanjoin_target, NULL,
1620 : &scanjoin_targets,
1621 : &scanjoin_targets_contain_srfs);
1622 8410 : scanjoin_target = linitial_node(PathTarget, scanjoin_targets);
1623 : Assert(!linitial_int(scanjoin_targets_contain_srfs));
1624 : }
1625 : else
1626 : {
1627 : /* initialize lists; for most of these, dummy values are OK */
1628 473342 : final_targets = final_targets_contain_srfs = NIL;
1629 473342 : sort_input_targets = sort_input_targets_contain_srfs = NIL;
1630 473342 : grouping_targets = grouping_targets_contain_srfs = NIL;
1631 473342 : scanjoin_targets = list_make1(scanjoin_target);
1632 473342 : scanjoin_targets_contain_srfs = NIL;
1633 : }
1634 :
1635 : /* Apply scan/join target. */
1636 481752 : scanjoin_target_same_exprs = list_length(scanjoin_targets) == 1
1637 481752 : && equal(scanjoin_target->exprs, current_rel->reltarget->exprs);
1638 481752 : apply_scanjoin_target_to_paths(root, current_rel, scanjoin_targets,
1639 : scanjoin_targets_contain_srfs,
1640 : scanjoin_target_parallel_safe,
1641 : scanjoin_target_same_exprs);
1642 :
1643 : /*
1644 : * Save the various upper-rel PathTargets we just computed into
1645 : * root->upper_targets[]. The core code doesn't use this, but it
1646 : * provides a convenient place for extensions to get at the info. For
1647 : * consistency, we save all the intermediate targets, even though some
1648 : * of the corresponding upperrels might not be needed for this query.
1649 : */
1650 481752 : root->upper_targets[UPPERREL_FINAL] = final_target;
1651 481752 : root->upper_targets[UPPERREL_ORDERED] = final_target;
1652 481752 : root->upper_targets[UPPERREL_DISTINCT] = sort_input_target;
1653 481752 : root->upper_targets[UPPERREL_PARTIAL_DISTINCT] = sort_input_target;
1654 481752 : root->upper_targets[UPPERREL_WINDOW] = sort_input_target;
1655 481752 : root->upper_targets[UPPERREL_GROUP_AGG] = grouping_target;
1656 :
1657 : /*
1658 : * If we have grouping and/or aggregation, consider ways to implement
1659 : * that. We build a new upperrel representing the output of this
1660 : * phase.
1661 : */
1662 481752 : if (have_grouping)
1663 : {
1664 34154 : current_rel = create_grouping_paths(root,
1665 : current_rel,
1666 : grouping_target,
1667 : grouping_target_parallel_safe,
1668 : gset_data);
1669 : /* Fix things up if grouping_target contains SRFs */
1670 34146 : if (parse->hasTargetSRFs)
1671 382 : adjust_paths_for_srfs(root, current_rel,
1672 : grouping_targets,
1673 : grouping_targets_contain_srfs);
1674 : }
1675 :
1676 : /*
1677 : * If we have window functions, consider ways to implement those. We
1678 : * build a new upperrel representing the output of this phase.
1679 : */
1680 481744 : if (activeWindows)
1681 : {
1682 2300 : current_rel = create_window_paths(root,
1683 : current_rel,
1684 : grouping_target,
1685 : sort_input_target,
1686 : sort_input_target_parallel_safe,
1687 : wflists,
1688 : activeWindows);
1689 : /* Fix things up if sort_input_target contains SRFs */
1690 2300 : if (parse->hasTargetSRFs)
1691 12 : adjust_paths_for_srfs(root, current_rel,
1692 : sort_input_targets,
1693 : sort_input_targets_contain_srfs);
1694 : }
1695 :
1696 : /*
1697 : * If there is a DISTINCT clause, consider ways to implement that. We
1698 : * build a new upperrel representing the output of this phase.
1699 : */
1700 481744 : if (parse->distinctClause)
1701 : {
1702 2196 : current_rel = create_distinct_paths(root,
1703 : current_rel,
1704 : sort_input_target);
1705 : }
1706 : } /* end of if (setOperations) */
1707 :
1708 : /*
1709 : * If ORDER BY was given, consider ways to implement that, and generate a
1710 : * new upperrel containing only paths that emit the correct ordering and
1711 : * project the correct final_target. We can apply the original
1712 : * limit_tuples limit in sort costing here, but only if there are no
1713 : * postponed SRFs.
1714 : */
1715 486982 : if (parse->sortClause)
1716 : {
1717 58044 : current_rel = create_ordered_paths(root,
1718 : current_rel,
1719 : final_target,
1720 : final_target_parallel_safe,
1721 : have_postponed_srfs ? -1.0 :
1722 : limit_tuples);
1723 : /* Fix things up if final_target contains SRFs */
1724 58044 : if (parse->hasTargetSRFs)
1725 196 : adjust_paths_for_srfs(root, current_rel,
1726 : final_targets,
1727 : final_targets_contain_srfs);
1728 : }
1729 :
1730 : /*
1731 : * Now we are prepared to build the final-output upperrel.
1732 : */
1733 486982 : final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1734 :
1735 : /*
1736 : * If the input rel is marked consider_parallel and there's nothing that's
1737 : * not parallel-safe in the LIMIT clause, then the final_rel can be marked
1738 : * consider_parallel as well. Note that if the query has rowMarks or is
1739 : * not a SELECT, consider_parallel will be false for every relation in the
1740 : * query.
1741 : */
1742 632466 : if (current_rel->consider_parallel &&
1743 290944 : is_parallel_safe(root, parse->limitOffset) &&
1744 145460 : is_parallel_safe(root, parse->limitCount))
1745 145454 : final_rel->consider_parallel = true;
1746 :
1747 : /*
1748 : * If the current_rel belongs to a single FDW, so does the final_rel.
1749 : */
1750 486982 : final_rel->serverid = current_rel->serverid;
1751 486982 : final_rel->userid = current_rel->userid;
1752 486982 : final_rel->useridiscurrent = current_rel->useridiscurrent;
1753 486982 : final_rel->fdwroutine = current_rel->fdwroutine;
1754 :
1755 : /*
1756 : * Generate paths for the final_rel. Insert all surviving paths, with
1757 : * LockRows, Limit, and/or ModifyTable steps added if needed.
1758 : */
1759 990928 : foreach(lc, current_rel->pathlist)
1760 : {
1761 503946 : Path *path = (Path *) lfirst(lc);
1762 :
1763 : /*
1764 : * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node.
1765 : * (Note: we intentionally test parse->rowMarks not root->rowMarks
1766 : * here. If there are only non-locking rowmarks, they should be
1767 : * handled by the ModifyTable node instead. However, root->rowMarks
1768 : * is what goes into the LockRows node.)
1769 : */
1770 503946 : if (parse->rowMarks)
1771 : {
1772 8036 : path = (Path *) create_lockrows_path(root, final_rel, path,
1773 : root->rowMarks,
1774 : assign_special_exec_param(root));
1775 : }
1776 :
1777 : /*
1778 : * If there is a LIMIT/OFFSET clause, add the LIMIT node.
1779 : */
1780 503946 : if (limit_needed(parse))
1781 : {
1782 5536 : path = (Path *) create_limit_path(root, final_rel, path,
1783 : parse->limitOffset,
1784 : parse->limitCount,
1785 : parse->limitOption,
1786 : offset_est, count_est);
1787 : }
1788 :
1789 : /*
1790 : * If this is an INSERT/UPDATE/DELETE/MERGE, add the ModifyTable node.
1791 : */
1792 503946 : if (parse->commandType != CMD_SELECT)
1793 : {
1794 : Index rootRelation;
1795 90108 : List *resultRelations = NIL;
1796 90108 : List *updateColnosLists = NIL;
1797 90108 : List *withCheckOptionLists = NIL;
1798 90108 : List *returningLists = NIL;
1799 90108 : List *mergeActionLists = NIL;
1800 90108 : List *mergeJoinConditions = NIL;
1801 : List *rowMarks;
1802 :
1803 90108 : if (bms_membership(root->all_result_relids) == BMS_MULTIPLE)
1804 : {
1805 : /* Inherited UPDATE/DELETE/MERGE */
1806 2520 : RelOptInfo *top_result_rel = find_base_rel(root,
1807 : parse->resultRelation);
1808 2520 : int resultRelation = -1;
1809 :
1810 : /* Pass the root result rel forward to the executor. */
1811 2520 : rootRelation = parse->resultRelation;
1812 :
1813 : /* Add only leaf children to ModifyTable. */
1814 7422 : while ((resultRelation = bms_next_member(root->leaf_result_relids,
1815 : resultRelation)) >= 0)
1816 : {
1817 4902 : RelOptInfo *this_result_rel = find_base_rel(root,
1818 : resultRelation);
1819 :
1820 : /*
1821 : * Also exclude any leaf rels that have turned dummy since
1822 : * being added to the list, for example, by being excluded
1823 : * by constraint exclusion.
1824 : */
1825 4902 : if (IS_DUMMY_REL(this_result_rel))
1826 84 : continue;
1827 :
1828 : /* Build per-target-rel lists needed by ModifyTable */
1829 4818 : resultRelations = lappend_int(resultRelations,
1830 : resultRelation);
1831 4818 : if (parse->commandType == CMD_UPDATE)
1832 : {
1833 3436 : List *update_colnos = root->update_colnos;
1834 :
1835 3436 : if (this_result_rel != top_result_rel)
1836 : update_colnos =
1837 3436 : adjust_inherited_attnums_multilevel(root,
1838 : update_colnos,
1839 : this_result_rel->relid,
1840 : top_result_rel->relid);
1841 3436 : updateColnosLists = lappend(updateColnosLists,
1842 : update_colnos);
1843 : }
1844 4818 : if (parse->withCheckOptions)
1845 : {
1846 426 : List *withCheckOptions = parse->withCheckOptions;
1847 :
1848 426 : if (this_result_rel != top_result_rel)
1849 : withCheckOptions = (List *)
1850 426 : adjust_appendrel_attrs_multilevel(root,
1851 : (Node *) withCheckOptions,
1852 : this_result_rel,
1853 : top_result_rel);
1854 426 : withCheckOptionLists = lappend(withCheckOptionLists,
1855 : withCheckOptions);
1856 : }
1857 4818 : if (parse->returningList)
1858 : {
1859 552 : List *returningList = parse->returningList;
1860 :
1861 552 : if (this_result_rel != top_result_rel)
1862 : returningList = (List *)
1863 552 : adjust_appendrel_attrs_multilevel(root,
1864 : (Node *) returningList,
1865 : this_result_rel,
1866 : top_result_rel);
1867 552 : returningLists = lappend(returningLists,
1868 : returningList);
1869 : }
1870 4818 : if (parse->mergeActionList)
1871 : {
1872 : ListCell *l;
1873 390 : List *mergeActionList = NIL;
1874 :
1875 : /*
1876 : * Copy MergeActions and translate stuff that
1877 : * references attribute numbers.
1878 : */
1879 1344 : foreach(l, parse->mergeActionList)
1880 : {
1881 954 : MergeAction *action = lfirst(l),
1882 954 : *leaf_action = copyObject(action);
1883 :
1884 954 : leaf_action->qual =
1885 954 : adjust_appendrel_attrs_multilevel(root,
1886 : (Node *) action->qual,
1887 : this_result_rel,
1888 : top_result_rel);
1889 954 : leaf_action->targetList = (List *)
1890 954 : adjust_appendrel_attrs_multilevel(root,
1891 954 : (Node *) action->targetList,
1892 : this_result_rel,
1893 : top_result_rel);
1894 954 : if (leaf_action->commandType == CMD_UPDATE)
1895 592 : leaf_action->updateColnos =
1896 592 : adjust_inherited_attnums_multilevel(root,
1897 : action->updateColnos,
1898 : this_result_rel->relid,
1899 : top_result_rel->relid);
1900 954 : mergeActionList = lappend(mergeActionList,
1901 : leaf_action);
1902 : }
1903 :
1904 390 : mergeActionLists = lappend(mergeActionLists,
1905 : mergeActionList);
1906 : }
1907 4818 : if (parse->commandType == CMD_MERGE)
1908 : {
1909 390 : Node *mergeJoinCondition = parse->mergeJoinCondition;
1910 :
1911 390 : if (this_result_rel != top_result_rel)
1912 : mergeJoinCondition =
1913 390 : adjust_appendrel_attrs_multilevel(root,
1914 : mergeJoinCondition,
1915 : this_result_rel,
1916 : top_result_rel);
1917 390 : mergeJoinConditions = lappend(mergeJoinConditions,
1918 : mergeJoinCondition);
1919 : }
1920 : }
1921 :
1922 2520 : if (resultRelations == NIL)
1923 : {
1924 : /*
1925 : * We managed to exclude every child rel, so generate a
1926 : * dummy one-relation plan using info for the top target
1927 : * rel (even though that may not be a leaf target).
1928 : * Although it's clear that no data will be updated or
1929 : * deleted, we still need to have a ModifyTable node so
1930 : * that any statement triggers will be executed. (This
1931 : * could be cleaner if we fixed nodeModifyTable.c to allow
1932 : * zero target relations, but that probably wouldn't be a
1933 : * net win.)
1934 : */
1935 30 : resultRelations = list_make1_int(parse->resultRelation);
1936 30 : if (parse->commandType == CMD_UPDATE)
1937 30 : updateColnosLists = list_make1(root->update_colnos);
1938 30 : if (parse->withCheckOptions)
1939 0 : withCheckOptionLists = list_make1(parse->withCheckOptions);
1940 30 : if (parse->returningList)
1941 18 : returningLists = list_make1(parse->returningList);
1942 30 : if (parse->mergeActionList)
1943 0 : mergeActionLists = list_make1(parse->mergeActionList);
1944 30 : if (parse->commandType == CMD_MERGE)
1945 0 : mergeJoinConditions = list_make1(parse->mergeJoinCondition);
1946 : }
1947 : }
1948 : else
1949 : {
1950 : /* Single-relation INSERT/UPDATE/DELETE/MERGE. */
1951 87588 : rootRelation = 0; /* there's no separate root rel */
1952 87588 : resultRelations = list_make1_int(parse->resultRelation);
1953 87588 : if (parse->commandType == CMD_UPDATE)
1954 11512 : updateColnosLists = list_make1(root->update_colnos);
1955 87588 : if (parse->withCheckOptions)
1956 908 : withCheckOptionLists = list_make1(parse->withCheckOptions);
1957 87588 : if (parse->returningList)
1958 2162 : returningLists = list_make1(parse->returningList);
1959 87588 : if (parse->mergeActionList)
1960 1548 : mergeActionLists = list_make1(parse->mergeActionList);
1961 87588 : if (parse->commandType == CMD_MERGE)
1962 1548 : mergeJoinConditions = list_make1(parse->mergeJoinCondition);
1963 : }
1964 :
1965 : /*
1966 : * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node
1967 : * will have dealt with fetching non-locked marked rows, else we
1968 : * need to have ModifyTable do that.
1969 : */
1970 90108 : if (parse->rowMarks)
1971 0 : rowMarks = NIL;
1972 : else
1973 90108 : rowMarks = root->rowMarks;
1974 :
1975 : path = (Path *)
1976 90108 : create_modifytable_path(root, final_rel,
1977 : path,
1978 : parse->commandType,
1979 90108 : parse->canSetTag,
1980 90108 : parse->resultRelation,
1981 : rootRelation,
1982 90108 : root->partColsUpdated,
1983 : resultRelations,
1984 : updateColnosLists,
1985 : withCheckOptionLists,
1986 : returningLists,
1987 : rowMarks,
1988 : parse->onConflict,
1989 : mergeActionLists,
1990 : mergeJoinConditions,
1991 : assign_special_exec_param(root));
1992 : }
1993 :
1994 : /* And shove it into final_rel */
1995 503946 : add_path(final_rel, path);
1996 : }
1997 :
1998 : /*
1999 : * Generate partial paths for final_rel, too, if outer query levels might
2000 : * be able to make use of them.
2001 : */
2002 486982 : if (final_rel->consider_parallel && root->query_level > 1 &&
2003 18606 : !limit_needed(parse))
2004 : {
2005 : Assert(!parse->rowMarks && parse->commandType == CMD_SELECT);
2006 18552 : foreach(lc, current_rel->partial_pathlist)
2007 : {
2008 102 : Path *partial_path = (Path *) lfirst(lc);
2009 :
2010 102 : add_partial_path(final_rel, partial_path);
2011 : }
2012 : }
2013 :
2014 486982 : extra.limit_needed = limit_needed(parse);
2015 486982 : extra.limit_tuples = limit_tuples;
2016 486982 : extra.count_est = count_est;
2017 486982 : extra.offset_est = offset_est;
2018 :
2019 : /*
2020 : * If there is an FDW that's responsible for all baserels of the query,
2021 : * let it consider adding ForeignPaths.
2022 : */
2023 486982 : if (final_rel->fdwroutine &&
2024 1214 : final_rel->fdwroutine->GetForeignUpperPaths)
2025 1154 : final_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_FINAL,
2026 : current_rel, final_rel,
2027 : &extra);
2028 :
2029 : /* Let extensions possibly add some more paths */
2030 486982 : if (create_upper_paths_hook)
2031 0 : (*create_upper_paths_hook) (root, UPPERREL_FINAL,
2032 : current_rel, final_rel, &extra);
2033 :
2034 : /* Note: currently, we leave it to callers to do set_cheapest() */
2035 486982 : }
2036 :
2037 : /*
2038 : * Do preprocessing for groupingSets clause and related data. This handles the
2039 : * preliminary steps of expanding the grouping sets, organizing them into lists
2040 : * of rollups, and preparing annotations which will later be filled in with
2041 : * size estimates.
2042 : */
2043 : static grouping_sets_data *
2044 734 : preprocess_grouping_sets(PlannerInfo *root)
2045 : {
2046 734 : Query *parse = root->parse;
2047 : List *sets;
2048 734 : int maxref = 0;
2049 : ListCell *lc_set;
2050 734 : grouping_sets_data *gd = palloc0(sizeof(grouping_sets_data));
2051 :
2052 734 : parse->groupingSets = expand_grouping_sets(parse->groupingSets, parse->groupDistinct, -1);
2053 :
2054 734 : gd->any_hashable = false;
2055 734 : gd->unhashable_refs = NULL;
2056 734 : gd->unsortable_refs = NULL;
2057 734 : gd->unsortable_sets = NIL;
2058 :
2059 : /*
2060 : * We don't currently make any attempt to optimize the groupClause when
2061 : * there are grouping sets, so just duplicate it in processed_groupClause.
2062 : */
2063 734 : root->processed_groupClause = parse->groupClause;
2064 :
2065 734 : if (parse->groupClause)
2066 : {
2067 : ListCell *lc;
2068 :
2069 2252 : foreach(lc, parse->groupClause)
2070 : {
2071 1560 : SortGroupClause *gc = lfirst_node(SortGroupClause, lc);
2072 1560 : Index ref = gc->tleSortGroupRef;
2073 :
2074 1560 : if (ref > maxref)
2075 1524 : maxref = ref;
2076 :
2077 1560 : if (!gc->hashable)
2078 30 : gd->unhashable_refs = bms_add_member(gd->unhashable_refs, ref);
2079 :
2080 1560 : if (!OidIsValid(gc->sortop))
2081 42 : gd->unsortable_refs = bms_add_member(gd->unsortable_refs, ref);
2082 : }
2083 : }
2084 :
2085 : /* Allocate workspace array for remapping */
2086 734 : gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
2087 :
2088 : /*
2089 : * If we have any unsortable sets, we must extract them before trying to
2090 : * prepare rollups. Unsortable sets don't go through
2091 : * reorder_grouping_sets, so we must apply the GroupingSetData annotation
2092 : * here.
2093 : */
2094 734 : if (!bms_is_empty(gd->unsortable_refs))
2095 : {
2096 42 : List *sortable_sets = NIL;
2097 : ListCell *lc;
2098 :
2099 126 : foreach(lc, parse->groupingSets)
2100 : {
2101 90 : List *gset = (List *) lfirst(lc);
2102 :
2103 90 : if (bms_overlap_list(gd->unsortable_refs, gset))
2104 : {
2105 48 : GroupingSetData *gs = makeNode(GroupingSetData);
2106 :
2107 48 : gs->set = gset;
2108 48 : gd->unsortable_sets = lappend(gd->unsortable_sets, gs);
2109 :
2110 : /*
2111 : * We must enforce here that an unsortable set is hashable;
2112 : * later code assumes this. Parse analysis only checks that
2113 : * every individual column is either hashable or sortable.
2114 : *
2115 : * Note that passing this test doesn't guarantee we can
2116 : * generate a plan; there might be other showstoppers.
2117 : */
2118 48 : if (bms_overlap_list(gd->unhashable_refs, gset))
2119 6 : ereport(ERROR,
2120 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2121 : errmsg("could not implement GROUP BY"),
2122 : errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
2123 : }
2124 : else
2125 42 : sortable_sets = lappend(sortable_sets, gset);
2126 : }
2127 :
2128 36 : if (sortable_sets)
2129 30 : sets = extract_rollup_sets(sortable_sets);
2130 : else
2131 6 : sets = NIL;
2132 : }
2133 : else
2134 692 : sets = extract_rollup_sets(parse->groupingSets);
2135 :
2136 1938 : foreach(lc_set, sets)
2137 : {
2138 1210 : List *current_sets = (List *) lfirst(lc_set);
2139 1210 : RollupData *rollup = makeNode(RollupData);
2140 : GroupingSetData *gs;
2141 :
2142 : /*
2143 : * Reorder the current list of grouping sets into correct prefix
2144 : * order. If only one aggregation pass is needed, try to make the
2145 : * list match the ORDER BY clause; if more than one pass is needed, we
2146 : * don't bother with that.
2147 : *
2148 : * Note that this reorders the sets from smallest-member-first to
2149 : * largest-member-first, and applies the GroupingSetData annotations,
2150 : * though the data will be filled in later.
2151 : */
2152 1210 : current_sets = reorder_grouping_sets(current_sets,
2153 1210 : (list_length(sets) == 1
2154 : ? parse->sortClause
2155 : : NIL));
2156 :
2157 : /*
2158 : * Get the initial (and therefore largest) grouping set.
2159 : */
2160 1210 : gs = linitial_node(GroupingSetData, current_sets);
2161 :
2162 : /*
2163 : * Order the groupClause appropriately. If the first grouping set is
2164 : * empty, then the groupClause must also be empty; otherwise we have
2165 : * to force the groupClause to match that grouping set's order.
2166 : *
2167 : * (The first grouping set can be empty even though parse->groupClause
2168 : * is not empty only if all non-empty grouping sets are unsortable.
2169 : * The groupClauses for hashed grouping sets are built later on.)
2170 : */
2171 1210 : if (gs->set)
2172 1168 : rollup->groupClause = groupclause_apply_groupingset(root, gs->set);
2173 : else
2174 42 : rollup->groupClause = NIL;
2175 :
2176 : /*
2177 : * Is it hashable? We pretend empty sets are hashable even though we
2178 : * actually force them not to be hashed later. But don't bother if
2179 : * there's nothing but empty sets (since in that case we can't hash
2180 : * anything).
2181 : */
2182 1210 : if (gs->set &&
2183 1168 : !bms_overlap_list(gd->unhashable_refs, gs->set))
2184 : {
2185 1144 : rollup->hashable = true;
2186 1144 : gd->any_hashable = true;
2187 : }
2188 :
2189 : /*
2190 : * Now that we've pinned down an order for the groupClause for this
2191 : * list of grouping sets, we need to remap the entries in the grouping
2192 : * sets from sortgrouprefs to plain indices (0-based) into the
2193 : * groupClause for this collection of grouping sets. We keep the
2194 : * original form for later use, though.
2195 : */
2196 1210 : rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
2197 : current_sets,
2198 : gd->tleref_to_colnum_map);
2199 1210 : rollup->gsets_data = current_sets;
2200 :
2201 1210 : gd->rollups = lappend(gd->rollups, rollup);
2202 : }
2203 :
2204 728 : if (gd->unsortable_sets)
2205 : {
2206 : /*
2207 : * We have not yet pinned down a groupclause for this, but we will
2208 : * need index-based lists for estimation purposes. Construct
2209 : * hash_sets_idx based on the entire original groupclause for now.
2210 : */
2211 36 : gd->hash_sets_idx = remap_to_groupclause_idx(parse->groupClause,
2212 : gd->unsortable_sets,
2213 : gd->tleref_to_colnum_map);
2214 36 : gd->any_hashable = true;
2215 : }
2216 :
2217 728 : return gd;
2218 : }
2219 :
2220 : /*
2221 : * Given a groupclause and a list of GroupingSetData, return equivalent sets
2222 : * (without annotation) mapped to indexes into the given groupclause.
2223 : */
2224 : static List *
2225 3588 : remap_to_groupclause_idx(List *groupClause,
2226 : List *gsets,
2227 : int *tleref_to_colnum_map)
2228 : {
2229 3588 : int ref = 0;
2230 3588 : List *result = NIL;
2231 : ListCell *lc;
2232 :
2233 8920 : foreach(lc, groupClause)
2234 : {
2235 5332 : SortGroupClause *gc = lfirst_node(SortGroupClause, lc);
2236 :
2237 5332 : tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
2238 : }
2239 :
2240 8382 : foreach(lc, gsets)
2241 : {
2242 4794 : List *set = NIL;
2243 : ListCell *lc2;
2244 4794 : GroupingSetData *gs = lfirst_node(GroupingSetData, lc);
2245 :
2246 10904 : foreach(lc2, gs->set)
2247 : {
2248 6110 : set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]);
2249 : }
2250 :
2251 4794 : result = lappend(result, set);
2252 : }
2253 :
2254 3588 : return result;
2255 : }
2256 :
2257 :
2258 : /*
2259 : * preprocess_rowmarks - set up PlanRowMarks if needed
2260 : */
2261 : static void
2262 490594 : preprocess_rowmarks(PlannerInfo *root)
2263 : {
2264 490594 : Query *parse = root->parse;
2265 : Bitmapset *rels;
2266 : List *prowmarks;
2267 : ListCell *l;
2268 : int i;
2269 :
2270 490594 : if (parse->rowMarks)
2271 : {
2272 : /*
2273 : * We've got trouble if FOR [KEY] UPDATE/SHARE appears inside
2274 : * grouping, since grouping renders a reference to individual tuple
2275 : * CTIDs invalid. This is also checked at parse time, but that's
2276 : * insufficient because of rule substitution, query pullup, etc.
2277 : */
2278 7574 : CheckSelectLocking(parse, linitial_node(RowMarkClause,
2279 : parse->rowMarks)->strength);
2280 : }
2281 : else
2282 : {
2283 : /*
2284 : * We only need rowmarks for UPDATE, DELETE, MERGE, or FOR [KEY]
2285 : * UPDATE/SHARE.
2286 : */
2287 483020 : if (parse->commandType != CMD_UPDATE &&
2288 469672 : parse->commandType != CMD_DELETE &&
2289 465442 : parse->commandType != CMD_MERGE)
2290 463738 : return;
2291 : }
2292 :
2293 : /*
2294 : * We need to have rowmarks for all base relations except the target. We
2295 : * make a bitmapset of all base rels and then remove the items we don't
2296 : * need or have FOR [KEY] UPDATE/SHARE marks for.
2297 : */
2298 26856 : rels = get_relids_in_jointree((Node *) parse->jointree, false, false);
2299 26856 : if (parse->resultRelation)
2300 19282 : rels = bms_del_member(rels, parse->resultRelation);
2301 :
2302 : /*
2303 : * Convert RowMarkClauses to PlanRowMark representation.
2304 : */
2305 26856 : prowmarks = NIL;
2306 34686 : foreach(l, parse->rowMarks)
2307 : {
2308 7830 : RowMarkClause *rc = lfirst_node(RowMarkClause, l);
2309 7830 : RangeTblEntry *rte = rt_fetch(rc->rti, parse->rtable);
2310 : PlanRowMark *newrc;
2311 :
2312 : /*
2313 : * Currently, it is syntactically impossible to have FOR UPDATE et al
2314 : * applied to an update/delete target rel. If that ever becomes
2315 : * possible, we should drop the target from the PlanRowMark list.
2316 : */
2317 : Assert(rc->rti != parse->resultRelation);
2318 :
2319 : /*
2320 : * Ignore RowMarkClauses for subqueries; they aren't real tables and
2321 : * can't support true locking. Subqueries that got flattened into the
2322 : * main query should be ignored completely. Any that didn't will get
2323 : * ROW_MARK_COPY items in the next loop.
2324 : */
2325 7830 : if (rte->rtekind != RTE_RELATION)
2326 108 : continue;
2327 :
2328 7722 : rels = bms_del_member(rels, rc->rti);
2329 :
2330 7722 : newrc = makeNode(PlanRowMark);
2331 7722 : newrc->rti = newrc->prti = rc->rti;
2332 7722 : newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2333 7722 : newrc->markType = select_rowmark_type(rte, rc->strength);
2334 7722 : newrc->allMarkTypes = (1 << newrc->markType);
2335 7722 : newrc->strength = rc->strength;
2336 7722 : newrc->waitPolicy = rc->waitPolicy;
2337 7722 : newrc->isParent = false;
2338 :
2339 7722 : prowmarks = lappend(prowmarks, newrc);
2340 : }
2341 :
2342 : /*
2343 : * Now, add rowmarks for any non-target, non-locked base relations.
2344 : */
2345 26856 : i = 0;
2346 63904 : foreach(l, parse->rtable)
2347 : {
2348 37048 : RangeTblEntry *rte = lfirst_node(RangeTblEntry, l);
2349 : PlanRowMark *newrc;
2350 :
2351 37048 : i++;
2352 37048 : if (!bms_is_member(i, rels))
2353 33766 : continue;
2354 :
2355 3282 : newrc = makeNode(PlanRowMark);
2356 3282 : newrc->rti = newrc->prti = i;
2357 3282 : newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2358 3282 : newrc->markType = select_rowmark_type(rte, LCS_NONE);
2359 3282 : newrc->allMarkTypes = (1 << newrc->markType);
2360 3282 : newrc->strength = LCS_NONE;
2361 3282 : newrc->waitPolicy = LockWaitBlock; /* doesn't matter */
2362 3282 : newrc->isParent = false;
2363 :
2364 3282 : prowmarks = lappend(prowmarks, newrc);
2365 : }
2366 :
2367 26856 : root->rowMarks = prowmarks;
2368 : }
2369 :
2370 : /*
2371 : * Select RowMarkType to use for a given table
2372 : */
2373 : RowMarkType
2374 13128 : select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength)
2375 : {
2376 13128 : if (rte->rtekind != RTE_RELATION)
2377 : {
2378 : /* If it's not a table at all, use ROW_MARK_COPY */
2379 1270 : return ROW_MARK_COPY;
2380 : }
2381 11858 : else if (rte->relkind == RELKIND_FOREIGN_TABLE)
2382 : {
2383 : /* Let the FDW select the rowmark type, if it wants to */
2384 192 : FdwRoutine *fdwroutine = GetFdwRoutineByRelId(rte->relid);
2385 :
2386 192 : if (fdwroutine->GetForeignRowMarkType != NULL)
2387 0 : return fdwroutine->GetForeignRowMarkType(rte, strength);
2388 : /* Otherwise, use ROW_MARK_COPY by default */
2389 192 : return ROW_MARK_COPY;
2390 : }
2391 : else
2392 : {
2393 : /* Regular table, apply the appropriate lock type */
2394 11666 : switch (strength)
2395 : {
2396 2208 : case LCS_NONE:
2397 :
2398 : /*
2399 : * We don't need a tuple lock, only the ability to re-fetch
2400 : * the row.
2401 : */
2402 2208 : return ROW_MARK_REFERENCE;
2403 : break;
2404 7654 : case LCS_FORKEYSHARE:
2405 7654 : return ROW_MARK_KEYSHARE;
2406 : break;
2407 300 : case LCS_FORSHARE:
2408 300 : return ROW_MARK_SHARE;
2409 : break;
2410 62 : case LCS_FORNOKEYUPDATE:
2411 62 : return ROW_MARK_NOKEYEXCLUSIVE;
2412 : break;
2413 1442 : case LCS_FORUPDATE:
2414 1442 : return ROW_MARK_EXCLUSIVE;
2415 : break;
2416 : }
2417 0 : elog(ERROR, "unrecognized LockClauseStrength %d", (int) strength);
2418 : return ROW_MARK_EXCLUSIVE; /* keep compiler quiet */
2419 : }
2420 : }
2421 :
2422 : /*
2423 : * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses
2424 : *
2425 : * We try to estimate the values of the LIMIT/OFFSET clauses, and pass the
2426 : * results back in *count_est and *offset_est. These variables are set to
2427 : * 0 if the corresponding clause is not present, and -1 if it's present
2428 : * but we couldn't estimate the value for it. (The "0" convention is OK
2429 : * for OFFSET but a little bit bogus for LIMIT: effectively we estimate
2430 : * LIMIT 0 as though it were LIMIT 1. But this is in line with the planner's
2431 : * usual practice of never estimating less than one row.) These values will
2432 : * be passed to create_limit_path, which see if you change this code.
2433 : *
2434 : * The return value is the suitably adjusted tuple_fraction to use for
2435 : * planning the query. This adjustment is not overridable, since it reflects
2436 : * plan actions that grouping_planner() will certainly take, not assumptions
2437 : * about context.
2438 : */
2439 : static double
2440 4632 : preprocess_limit(PlannerInfo *root, double tuple_fraction,
2441 : int64 *offset_est, int64 *count_est)
2442 : {
2443 4632 : Query *parse = root->parse;
2444 : Node *est;
2445 : double limit_fraction;
2446 :
2447 : /* Should not be called unless LIMIT or OFFSET */
2448 : Assert(parse->limitCount || parse->limitOffset);
2449 :
2450 : /*
2451 : * Try to obtain the clause values. We use estimate_expression_value
2452 : * primarily because it can sometimes do something useful with Params.
2453 : */
2454 4632 : if (parse->limitCount)
2455 : {
2456 4192 : est = estimate_expression_value(root, parse->limitCount);
2457 4192 : if (est && IsA(est, Const))
2458 : {
2459 4186 : if (((Const *) est)->constisnull)
2460 : {
2461 : /* NULL indicates LIMIT ALL, ie, no limit */
2462 0 : *count_est = 0; /* treat as not present */
2463 : }
2464 : else
2465 : {
2466 4186 : *count_est = DatumGetInt64(((Const *) est)->constvalue);
2467 4186 : if (*count_est <= 0)
2468 150 : *count_est = 1; /* force to at least 1 */
2469 : }
2470 : }
2471 : else
2472 6 : *count_est = -1; /* can't estimate */
2473 : }
2474 : else
2475 440 : *count_est = 0; /* not present */
2476 :
2477 4632 : if (parse->limitOffset)
2478 : {
2479 800 : est = estimate_expression_value(root, parse->limitOffset);
2480 800 : if (est && IsA(est, Const))
2481 : {
2482 776 : if (((Const *) est)->constisnull)
2483 : {
2484 : /* Treat NULL as no offset; the executor will too */
2485 0 : *offset_est = 0; /* treat as not present */
2486 : }
2487 : else
2488 : {
2489 776 : *offset_est = DatumGetInt64(((Const *) est)->constvalue);
2490 776 : if (*offset_est < 0)
2491 0 : *offset_est = 0; /* treat as not present */
2492 : }
2493 : }
2494 : else
2495 24 : *offset_est = -1; /* can't estimate */
2496 : }
2497 : else
2498 3832 : *offset_est = 0; /* not present */
2499 :
2500 4632 : if (*count_est != 0)
2501 : {
2502 : /*
2503 : * A LIMIT clause limits the absolute number of tuples returned.
2504 : * However, if it's not a constant LIMIT then we have to guess; for
2505 : * lack of a better idea, assume 10% of the plan's result is wanted.
2506 : */
2507 4192 : if (*count_est < 0 || *offset_est < 0)
2508 : {
2509 : /* LIMIT or OFFSET is an expression ... punt ... */
2510 24 : limit_fraction = 0.10;
2511 : }
2512 : else
2513 : {
2514 : /* LIMIT (plus OFFSET, if any) is max number of tuples needed */
2515 4168 : limit_fraction = (double) *count_est + (double) *offset_est;
2516 : }
2517 :
2518 : /*
2519 : * If we have absolute limits from both caller and LIMIT, use the
2520 : * smaller value; likewise if they are both fractional. If one is
2521 : * fractional and the other absolute, we can't easily determine which
2522 : * is smaller, but we use the heuristic that the absolute will usually
2523 : * be smaller.
2524 : */
2525 4192 : if (tuple_fraction >= 1.0)
2526 : {
2527 6 : if (limit_fraction >= 1.0)
2528 : {
2529 : /* both absolute */
2530 6 : tuple_fraction = Min(tuple_fraction, limit_fraction);
2531 : }
2532 : else
2533 : {
2534 : /* caller absolute, limit fractional; use caller's value */
2535 : }
2536 : }
2537 4186 : else if (tuple_fraction > 0.0)
2538 : {
2539 144 : if (limit_fraction >= 1.0)
2540 : {
2541 : /* caller fractional, limit absolute; use limit */
2542 144 : tuple_fraction = limit_fraction;
2543 : }
2544 : else
2545 : {
2546 : /* both fractional */
2547 0 : tuple_fraction = Min(tuple_fraction, limit_fraction);
2548 : }
2549 : }
2550 : else
2551 : {
2552 : /* no info from caller, just use limit */
2553 4042 : tuple_fraction = limit_fraction;
2554 : }
2555 : }
2556 440 : else if (*offset_est != 0 && tuple_fraction > 0.0)
2557 : {
2558 : /*
2559 : * We have an OFFSET but no LIMIT. This acts entirely differently
2560 : * from the LIMIT case: here, we need to increase rather than decrease
2561 : * the caller's tuple_fraction, because the OFFSET acts to cause more
2562 : * tuples to be fetched instead of fewer. This only matters if we got
2563 : * a tuple_fraction > 0, however.
2564 : *
2565 : * As above, use 10% if OFFSET is present but unestimatable.
2566 : */
2567 12 : if (*offset_est < 0)
2568 0 : limit_fraction = 0.10;
2569 : else
2570 12 : limit_fraction = (double) *offset_est;
2571 :
2572 : /*
2573 : * If we have absolute counts from both caller and OFFSET, add them
2574 : * together; likewise if they are both fractional. If one is
2575 : * fractional and the other absolute, we want to take the larger, and
2576 : * we heuristically assume that's the fractional one.
2577 : */
2578 12 : if (tuple_fraction >= 1.0)
2579 : {
2580 0 : if (limit_fraction >= 1.0)
2581 : {
2582 : /* both absolute, so add them together */
2583 0 : tuple_fraction += limit_fraction;
2584 : }
2585 : else
2586 : {
2587 : /* caller absolute, limit fractional; use limit */
2588 0 : tuple_fraction = limit_fraction;
2589 : }
2590 : }
2591 : else
2592 : {
2593 12 : if (limit_fraction >= 1.0)
2594 : {
2595 : /* caller fractional, limit absolute; use caller's value */
2596 : }
2597 : else
2598 : {
2599 : /* both fractional, so add them together */
2600 0 : tuple_fraction += limit_fraction;
2601 0 : if (tuple_fraction >= 1.0)
2602 0 : tuple_fraction = 0.0; /* assume fetch all */
2603 : }
2604 : }
2605 : }
2606 :
2607 4632 : return tuple_fraction;
2608 : }
2609 :
2610 : /*
2611 : * limit_needed - do we actually need a Limit plan node?
2612 : *
2613 : * If we have constant-zero OFFSET and constant-null LIMIT, we can skip adding
2614 : * a Limit node. This is worth checking for because "OFFSET 0" is a common
2615 : * locution for an optimization fence. (Because other places in the planner
2616 : * merely check whether parse->limitOffset isn't NULL, it will still work as
2617 : * an optimization fence --- we're just suppressing unnecessary run-time
2618 : * overhead.)
2619 : *
2620 : * This might look like it could be merged into preprocess_limit, but there's
2621 : * a key distinction: here we need hard constants in OFFSET/LIMIT, whereas
2622 : * in preprocess_limit it's good enough to consider estimated values.
2623 : */
2624 : bool
2625 1015818 : limit_needed(Query *parse)
2626 : {
2627 : Node *node;
2628 :
2629 1015818 : node = parse->limitCount;
2630 1015818 : if (node)
2631 : {
2632 10004 : if (IsA(node, Const))
2633 : {
2634 : /* NULL indicates LIMIT ALL, ie, no limit */
2635 9812 : if (!((Const *) node)->constisnull)
2636 9812 : return true; /* LIMIT with a constant value */
2637 : }
2638 : else
2639 192 : return true; /* non-constant LIMIT */
2640 : }
2641 :
2642 1005814 : node = parse->limitOffset;
2643 1005814 : if (node)
2644 : {
2645 1298 : if (IsA(node, Const))
2646 : {
2647 : /* Treat NULL as no offset; the executor would too */
2648 1032 : if (!((Const *) node)->constisnull)
2649 : {
2650 1032 : int64 offset = DatumGetInt64(((Const *) node)->constvalue);
2651 :
2652 1032 : if (offset != 0)
2653 84 : return true; /* OFFSET with a nonzero value */
2654 : }
2655 : }
2656 : else
2657 266 : return true; /* non-constant OFFSET */
2658 : }
2659 :
2660 1005464 : return false; /* don't need a Limit plan node */
2661 : }
2662 :
2663 :
2664 : /*
2665 : * remove_useless_groupby_columns
2666 : * Remove any columns in the GROUP BY clause that are redundant due to
2667 : * being functionally dependent on other GROUP BY columns.
2668 : *
2669 : * Since some other DBMSes do not allow references to ungrouped columns, it's
2670 : * not unusual to find all columns listed in GROUP BY even though listing the
2671 : * primary-key columns would be sufficient. Deleting such excess columns
2672 : * avoids redundant sorting work, so it's worth doing.
2673 : *
2674 : * Relcache invalidations will ensure that cached plans become invalidated
2675 : * when the underlying index of the pkey constraint is dropped.
2676 : *
2677 : * Currently, we only make use of pkey constraints for this, however, we may
2678 : * wish to take this further in the future and also use unique constraints
2679 : * which have NOT NULL columns. In that case, plan invalidation will still
2680 : * work since relations will receive a relcache invalidation when a NOT NULL
2681 : * constraint is dropped.
2682 : */
2683 : static void
2684 3360 : remove_useless_groupby_columns(PlannerInfo *root)
2685 : {
2686 3360 : Query *parse = root->parse;
2687 : Bitmapset **groupbyattnos;
2688 : Bitmapset **surplusvars;
2689 : ListCell *lc;
2690 : int relid;
2691 :
2692 : /* No chance to do anything if there are less than two GROUP BY items */
2693 3360 : if (list_length(root->processed_groupClause) < 2)
2694 2084 : return;
2695 :
2696 : /* Don't fiddle with the GROUP BY clause if the query has grouping sets */
2697 1276 : if (parse->groupingSets)
2698 0 : return;
2699 :
2700 : /*
2701 : * Scan the GROUP BY clause to find GROUP BY items that are simple Vars.
2702 : * Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k
2703 : * that are GROUP BY items.
2704 : */
2705 1276 : groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2706 1276 : (list_length(parse->rtable) + 1));
2707 4562 : foreach(lc, root->processed_groupClause)
2708 : {
2709 3286 : SortGroupClause *sgc = lfirst_node(SortGroupClause, lc);
2710 3286 : TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2711 3286 : Var *var = (Var *) tle->expr;
2712 :
2713 : /*
2714 : * Ignore non-Vars and Vars from other query levels.
2715 : *
2716 : * XXX in principle, stable expressions containing Vars could also be
2717 : * removed, if all the Vars are functionally dependent on other GROUP
2718 : * BY items. But it's not clear that such cases occur often enough to
2719 : * be worth troubling over.
2720 : */
2721 3286 : if (!IsA(var, Var) ||
2722 2414 : var->varlevelsup > 0)
2723 872 : continue;
2724 :
2725 : /* OK, remember we have this Var */
2726 2414 : relid = var->varno;
2727 : Assert(relid <= list_length(parse->rtable));
2728 2414 : groupbyattnos[relid] = bms_add_member(groupbyattnos[relid],
2729 2414 : var->varattno - FirstLowInvalidHeapAttributeNumber);
2730 : }
2731 :
2732 : /*
2733 : * Consider each relation and see if it is possible to remove some of its
2734 : * Vars from GROUP BY. For simplicity and speed, we do the actual removal
2735 : * in a separate pass. Here, we just fill surplusvars[k] with a bitmapset
2736 : * of the column attnos of RTE k that are removable GROUP BY items.
2737 : */
2738 1276 : surplusvars = NULL; /* don't allocate array unless required */
2739 1276 : relid = 0;
2740 3502 : foreach(lc, parse->rtable)
2741 : {
2742 2226 : RangeTblEntry *rte = lfirst_node(RangeTblEntry, lc);
2743 : Bitmapset *relattnos;
2744 : Bitmapset *pkattnos;
2745 : Oid constraintOid;
2746 :
2747 2226 : relid++;
2748 :
2749 : /* Only plain relations could have primary-key constraints */
2750 2226 : if (rte->rtekind != RTE_RELATION)
2751 2044 : continue;
2752 :
2753 : /*
2754 : * We must skip inheritance parent tables as some of the child rels
2755 : * may cause duplicate rows. This cannot happen with partitioned
2756 : * tables, however.
2757 : */
2758 1652 : if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
2759 18 : continue;
2760 :
2761 : /* Nothing to do unless this rel has multiple Vars in GROUP BY */
2762 1634 : relattnos = groupbyattnos[relid];
2763 1634 : if (bms_membership(relattnos) != BMS_MULTIPLE)
2764 874 : continue;
2765 :
2766 : /*
2767 : * Can't remove any columns for this rel if there is no suitable
2768 : * (i.e., nondeferrable) primary key constraint.
2769 : */
2770 760 : pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid);
2771 760 : if (pkattnos == NULL)
2772 578 : continue;
2773 :
2774 : /*
2775 : * If the primary key is a proper subset of relattnos then we have
2776 : * some items in the GROUP BY that can be removed.
2777 : */
2778 182 : if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1)
2779 : {
2780 : /*
2781 : * To easily remember whether we've found anything to do, we don't
2782 : * allocate the surplusvars[] array until we find something.
2783 : */
2784 164 : if (surplusvars == NULL)
2785 158 : surplusvars = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2786 158 : (list_length(parse->rtable) + 1));
2787 :
2788 : /* Remember the attnos of the removable columns */
2789 164 : surplusvars[relid] = bms_difference(relattnos, pkattnos);
2790 : }
2791 : }
2792 :
2793 : /*
2794 : * If we found any surplus Vars, build a new GROUP BY clause without them.
2795 : * (Note: this may leave some TLEs with unreferenced ressortgroupref
2796 : * markings, but that's harmless.)
2797 : */
2798 1276 : if (surplusvars != NULL)
2799 : {
2800 158 : List *new_groupby = NIL;
2801 :
2802 678 : foreach(lc, root->processed_groupClause)
2803 : {
2804 520 : SortGroupClause *sgc = lfirst_node(SortGroupClause, lc);
2805 520 : TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2806 520 : Var *var = (Var *) tle->expr;
2807 :
2808 : /*
2809 : * New list must include non-Vars, outer Vars, and anything not
2810 : * marked as surplus.
2811 : */
2812 520 : if (!IsA(var, Var) ||
2813 520 : var->varlevelsup > 0 ||
2814 520 : !bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber,
2815 520 : surplusvars[var->varno]))
2816 326 : new_groupby = lappend(new_groupby, sgc);
2817 : }
2818 :
2819 158 : root->processed_groupClause = new_groupby;
2820 : }
2821 : }
2822 :
2823 : /*
2824 : * groupclause_apply_groupingset
2825 : * Apply the order of GROUP BY clauses defined by grouping sets. Items
2826 : * not in the grouping set are skipped.
2827 : */
2828 : static List *
2829 3510 : groupclause_apply_groupingset(PlannerInfo *root, List *gset)
2830 : {
2831 3510 : Query *parse = root->parse;
2832 3510 : List *new_groupclause = NIL;
2833 : ListCell *sl;
2834 :
2835 8764 : foreach(sl, gset)
2836 : {
2837 5254 : Index ref = lfirst_int(sl);
2838 5254 : SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
2839 :
2840 5254 : new_groupclause = lappend(new_groupclause, cl);
2841 : }
2842 3510 : return new_groupclause;
2843 : }
2844 :
2845 : /*
2846 : * Extract lists of grouping sets that can be implemented using a single
2847 : * rollup-type aggregate pass each. Returns a list of lists of grouping sets.
2848 : *
2849 : * Input must be sorted with smallest sets first. Result has each sublist
2850 : * sorted with smallest sets first.
2851 : *
2852 : * We want to produce the absolute minimum possible number of lists here to
2853 : * avoid excess sorts. Fortunately, there is an algorithm for this; the problem
2854 : * of finding the minimal partition of a partially-ordered set into chains
2855 : * (which is what we need, taking the list of grouping sets as a poset ordered
2856 : * by set inclusion) can be mapped to the problem of finding the maximum
2857 : * cardinality matching on a bipartite graph, which is solvable in polynomial
2858 : * time with a worst case of no worse than O(n^2.5) and usually much
2859 : * better. Since our N is at most 4096, we don't need to consider fallbacks to
2860 : * heuristic or approximate methods. (Planning time for a 12-d cube is under
2861 : * half a second on my modest system even with optimization off and assertions
2862 : * on.)
2863 : */
2864 : static List *
2865 722 : extract_rollup_sets(List *groupingSets)
2866 : {
2867 722 : int num_sets_raw = list_length(groupingSets);
2868 722 : int num_empty = 0;
2869 722 : int num_sets = 0; /* distinct sets */
2870 722 : int num_chains = 0;
2871 722 : List *result = NIL;
2872 : List **results;
2873 : List **orig_sets;
2874 : Bitmapset **set_masks;
2875 : int *chains;
2876 : short **adjacency;
2877 : short *adjacency_buf;
2878 : BipartiteMatchState *state;
2879 : int i;
2880 : int j;
2881 : int j_size;
2882 722 : ListCell *lc1 = list_head(groupingSets);
2883 : ListCell *lc;
2884 :
2885 : /*
2886 : * Start by stripping out empty sets. The algorithm doesn't require this,
2887 : * but the planner currently needs all empty sets to be returned in the
2888 : * first list, so we strip them here and add them back after.
2889 : */
2890 1296 : while (lc1 && lfirst(lc1) == NIL)
2891 : {
2892 574 : ++num_empty;
2893 574 : lc1 = lnext(groupingSets, lc1);
2894 : }
2895 :
2896 : /* bail out now if it turns out that all we had were empty sets. */
2897 722 : if (!lc1)
2898 42 : return list_make1(groupingSets);
2899 :
2900 : /*----------
2901 : * We don't strictly need to remove duplicate sets here, but if we don't,
2902 : * they tend to become scattered through the result, which is a bit
2903 : * confusing (and irritating if we ever decide to optimize them out).
2904 : * So we remove them here and add them back after.
2905 : *
2906 : * For each non-duplicate set, we fill in the following:
2907 : *
2908 : * orig_sets[i] = list of the original set lists
2909 : * set_masks[i] = bitmapset for testing inclusion
2910 : * adjacency[i] = array [n, v1, v2, ... vn] of adjacency indices
2911 : *
2912 : * chains[i] will be the result group this set is assigned to.
2913 : *
2914 : * We index all of these from 1 rather than 0 because it is convenient
2915 : * to leave 0 free for the NIL node in the graph algorithm.
2916 : *----------
2917 : */
2918 680 : orig_sets = palloc0((num_sets_raw + 1) * sizeof(List *));
2919 680 : set_masks = palloc0((num_sets_raw + 1) * sizeof(Bitmapset *));
2920 680 : adjacency = palloc0((num_sets_raw + 1) * sizeof(short *));
2921 680 : adjacency_buf = palloc((num_sets_raw + 1) * sizeof(short));
2922 :
2923 680 : j_size = 0;
2924 680 : j = 0;
2925 680 : i = 1;
2926 :
2927 2516 : for_each_cell(lc, groupingSets, lc1)
2928 : {
2929 1836 : List *candidate = (List *) lfirst(lc);
2930 1836 : Bitmapset *candidate_set = NULL;
2931 : ListCell *lc2;
2932 1836 : int dup_of = 0;
2933 :
2934 4506 : foreach(lc2, candidate)
2935 : {
2936 2670 : candidate_set = bms_add_member(candidate_set, lfirst_int(lc2));
2937 : }
2938 :
2939 : /* we can only be a dup if we're the same length as a previous set */
2940 1836 : if (j_size == list_length(candidate))
2941 : {
2942 : int k;
2943 :
2944 1664 : for (k = j; k < i; ++k)
2945 : {
2946 1080 : if (bms_equal(set_masks[k], candidate_set))
2947 : {
2948 158 : dup_of = k;
2949 158 : break;
2950 : }
2951 : }
2952 : }
2953 1094 : else if (j_size < list_length(candidate))
2954 : {
2955 1094 : j_size = list_length(candidate);
2956 1094 : j = i;
2957 : }
2958 :
2959 1836 : if (dup_of > 0)
2960 : {
2961 158 : orig_sets[dup_of] = lappend(orig_sets[dup_of], candidate);
2962 158 : bms_free(candidate_set);
2963 : }
2964 : else
2965 : {
2966 : int k;
2967 1678 : int n_adj = 0;
2968 :
2969 1678 : orig_sets[i] = list_make1(candidate);
2970 1678 : set_masks[i] = candidate_set;
2971 :
2972 : /* fill in adjacency list; no need to compare equal-size sets */
2973 :
2974 2890 : for (k = j - 1; k > 0; --k)
2975 : {
2976 1212 : if (bms_is_subset(set_masks[k], candidate_set))
2977 1050 : adjacency_buf[++n_adj] = k;
2978 : }
2979 :
2980 1678 : if (n_adj > 0)
2981 : {
2982 538 : adjacency_buf[0] = n_adj;
2983 538 : adjacency[i] = palloc((n_adj + 1) * sizeof(short));
2984 538 : memcpy(adjacency[i], adjacency_buf, (n_adj + 1) * sizeof(short));
2985 : }
2986 : else
2987 1140 : adjacency[i] = NULL;
2988 :
2989 1678 : ++i;
2990 : }
2991 : }
2992 :
2993 680 : num_sets = i - 1;
2994 :
2995 : /*
2996 : * Apply the graph matching algorithm to do the work.
2997 : */
2998 680 : state = BipartiteMatch(num_sets, num_sets, adjacency);
2999 :
3000 : /*
3001 : * Now, the state->pair* fields have the info we need to assign sets to
3002 : * chains. Two sets (u,v) belong to the same chain if pair_uv[u] = v or
3003 : * pair_vu[v] = u (both will be true, but we check both so that we can do
3004 : * it in one pass)
3005 : */
3006 680 : chains = palloc0((num_sets + 1) * sizeof(int));
3007 :
3008 2358 : for (i = 1; i <= num_sets; ++i)
3009 : {
3010 1678 : int u = state->pair_vu[i];
3011 1678 : int v = state->pair_uv[i];
3012 :
3013 1678 : if (u > 0 && u < i)
3014 0 : chains[i] = chains[u];
3015 1678 : else if (v > 0 && v < i)
3016 510 : chains[i] = chains[v];
3017 : else
3018 1168 : chains[i] = ++num_chains;
3019 : }
3020 :
3021 : /* build result lists. */
3022 680 : results = palloc0((num_chains + 1) * sizeof(List *));
3023 :
3024 2358 : for (i = 1; i <= num_sets; ++i)
3025 : {
3026 1678 : int c = chains[i];
3027 :
3028 : Assert(c > 0);
3029 :
3030 1678 : results[c] = list_concat(results[c], orig_sets[i]);
3031 : }
3032 :
3033 : /* push any empty sets back on the first list. */
3034 1164 : while (num_empty-- > 0)
3035 484 : results[1] = lcons(NIL, results[1]);
3036 :
3037 : /* make result list */
3038 1848 : for (i = 1; i <= num_chains; ++i)
3039 1168 : result = lappend(result, results[i]);
3040 :
3041 : /*
3042 : * Free all the things.
3043 : *
3044 : * (This is over-fussy for small sets but for large sets we could have
3045 : * tied up a nontrivial amount of memory.)
3046 : */
3047 680 : BipartiteMatchFree(state);
3048 680 : pfree(results);
3049 680 : pfree(chains);
3050 2358 : for (i = 1; i <= num_sets; ++i)
3051 1678 : if (adjacency[i])
3052 538 : pfree(adjacency[i]);
3053 680 : pfree(adjacency);
3054 680 : pfree(adjacency_buf);
3055 680 : pfree(orig_sets);
3056 2358 : for (i = 1; i <= num_sets; ++i)
3057 1678 : bms_free(set_masks[i]);
3058 680 : pfree(set_masks);
3059 :
3060 680 : return result;
3061 : }
3062 :
3063 : /*
3064 : * Reorder the elements of a list of grouping sets such that they have correct
3065 : * prefix relationships. Also inserts the GroupingSetData annotations.
3066 : *
3067 : * The input must be ordered with smallest sets first; the result is returned
3068 : * with largest sets first. Note that the result shares no list substructure
3069 : * with the input, so it's safe for the caller to modify it later.
3070 : *
3071 : * If we're passed in a sortclause, we follow its order of columns to the
3072 : * extent possible, to minimize the chance that we add unnecessary sorts.
3073 : * (We're trying here to ensure that GROUPING SETS ((a,b,c),(c)) ORDER BY c,b,a
3074 : * gets implemented in one pass.)
3075 : */
3076 : static List *
3077 1210 : reorder_grouping_sets(List *groupingSets, List *sortclause)
3078 : {
3079 : ListCell *lc;
3080 1210 : List *previous = NIL;
3081 1210 : List *result = NIL;
3082 :
3083 3620 : foreach(lc, groupingSets)
3084 : {
3085 2410 : List *candidate = (List *) lfirst(lc);
3086 2410 : List *new_elems = list_difference_int(candidate, previous);
3087 2410 : GroupingSetData *gs = makeNode(GroupingSetData);
3088 :
3089 2490 : while (list_length(sortclause) > list_length(previous) &&
3090 : new_elems != NIL)
3091 : {
3092 188 : SortGroupClause *sc = list_nth(sortclause, list_length(previous));
3093 188 : int ref = sc->tleSortGroupRef;
3094 :
3095 188 : if (list_member_int(new_elems, ref))
3096 : {
3097 80 : previous = lappend_int(previous, ref);
3098 80 : new_elems = list_delete_int(new_elems, ref);
3099 : }
3100 : else
3101 : {
3102 : /* diverged from the sortclause; give up on it */
3103 108 : sortclause = NIL;
3104 108 : break;
3105 : }
3106 : }
3107 :
3108 2410 : previous = list_concat(previous, new_elems);
3109 :
3110 2410 : gs->set = list_copy(previous);
3111 2410 : result = lcons(gs, result);
3112 : }
3113 :
3114 1210 : list_free(previous);
3115 :
3116 1210 : return result;
3117 : }
3118 :
3119 : /*
3120 : * has_volatile_pathkey
3121 : * Returns true if any PathKey in 'keys' has an EquivalenceClass
3122 : * containing a volatile function. Otherwise returns false.
3123 : */
3124 : static bool
3125 2216 : has_volatile_pathkey(List *keys)
3126 : {
3127 : ListCell *lc;
3128 :
3129 4588 : foreach(lc, keys)
3130 : {
3131 2390 : PathKey *pathkey = lfirst_node(PathKey, lc);
3132 :
3133 2390 : if (pathkey->pk_eclass->ec_has_volatile)
3134 18 : return true;
3135 : }
3136 :
3137 2198 : return false;
3138 : }
3139 :
3140 : /*
3141 : * adjust_group_pathkeys_for_groupagg
3142 : * Add pathkeys to root->group_pathkeys to reflect the best set of
3143 : * pre-ordered input for ordered aggregates.
3144 : *
3145 : * We define "best" as the pathkeys that suit the largest number of
3146 : * aggregate functions. We find these by looking at the first ORDER BY /
3147 : * DISTINCT aggregate and take the pathkeys for that before searching for
3148 : * other aggregates that require the same or a more strict variation of the
3149 : * same pathkeys. We then repeat that process for any remaining aggregates
3150 : * with different pathkeys and if we find another set of pathkeys that suits a
3151 : * larger number of aggregates then we select those pathkeys instead.
3152 : *
3153 : * When the best pathkeys are found we also mark each Aggref that can use
3154 : * those pathkeys as aggpresorted = true.
3155 : *
3156 : * Note: When an aggregate function's ORDER BY / DISTINCT clause contains any
3157 : * volatile functions, we never make use of these pathkeys. We want to ensure
3158 : * that sorts using volatile functions are done independently in each Aggref
3159 : * rather than once at the query level. If we were to allow this then Aggrefs
3160 : * with compatible sort orders would all transition their rows in the same
3161 : * order if those pathkeys were deemed to be the best pathkeys to sort on.
3162 : * Whereas, if some other set of Aggref's pathkeys happened to be deemed
3163 : * better pathkeys to sort on, then the volatile function Aggrefs would be
3164 : * left to perform their sorts individually. To avoid this inconsistent
3165 : * behavior which could make Aggref results depend on what other Aggrefs the
3166 : * query contains, we always force Aggrefs with volatile functions to perform
3167 : * their own sorts.
3168 : */
3169 : static void
3170 1796 : adjust_group_pathkeys_for_groupagg(PlannerInfo *root)
3171 : {
3172 1796 : List *grouppathkeys = root->group_pathkeys;
3173 : List *bestpathkeys;
3174 : Bitmapset *bestaggs;
3175 : Bitmapset *unprocessed_aggs;
3176 : ListCell *lc;
3177 : int i;
3178 :
3179 : /* Shouldn't be here if there are grouping sets */
3180 : Assert(root->parse->groupingSets == NIL);
3181 : /* Shouldn't be here unless there are some ordered aggregates */
3182 : Assert(root->numOrderedAggs > 0);
3183 :
3184 : /* Do nothing if disabled */
3185 1796 : if (!enable_presorted_aggregate)
3186 6 : return;
3187 :
3188 : /*
3189 : * Make a first pass over all AggInfos to collect a Bitmapset containing
3190 : * the indexes of all AggInfos to be processed below.
3191 : */
3192 1790 : unprocessed_aggs = NULL;
3193 4264 : foreach(lc, root->agginfos)
3194 : {
3195 2474 : AggInfo *agginfo = lfirst_node(AggInfo, lc);
3196 2474 : Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs);
3197 :
3198 2474 : if (AGGKIND_IS_ORDERED_SET(aggref->aggkind))
3199 264 : continue;
3200 :
3201 : /* only add aggregates with a DISTINCT or ORDER BY */
3202 2210 : if (aggref->aggdistinct != NIL || aggref->aggorder != NIL)
3203 1910 : unprocessed_aggs = bms_add_member(unprocessed_aggs,
3204 : foreach_current_index(lc));
3205 : }
3206 :
3207 : /*
3208 : * Now process all the unprocessed_aggs to find the best set of pathkeys
3209 : * for the given set of aggregates.
3210 : *
3211 : * On the first outer loop here 'bestaggs' will be empty. We'll populate
3212 : * this during the first loop using the pathkeys for the very first
3213 : * AggInfo then taking any stronger pathkeys from any other AggInfos with
3214 : * a more strict set of compatible pathkeys. Once the outer loop is
3215 : * complete, we mark off all the aggregates with compatible pathkeys then
3216 : * remove those from the unprocessed_aggs and repeat the process to try to
3217 : * find another set of pathkeys that are suitable for a larger number of
3218 : * aggregates. The outer loop will stop when there are not enough
3219 : * unprocessed aggregates for it to be possible to find a set of pathkeys
3220 : * to suit a larger number of aggregates.
3221 : */
3222 1790 : bestpathkeys = NIL;
3223 1790 : bestaggs = NULL;
3224 3532 : while (bms_num_members(unprocessed_aggs) > bms_num_members(bestaggs))
3225 : {
3226 1742 : Bitmapset *aggindexes = NULL;
3227 1742 : List *currpathkeys = NIL;
3228 :
3229 1742 : i = -1;
3230 5700 : while ((i = bms_next_member(unprocessed_aggs, i)) >= 0)
3231 : {
3232 2216 : AggInfo *agginfo = list_nth_node(AggInfo, root->agginfos, i);
3233 2216 : Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs);
3234 : List *sortlist;
3235 : List *pathkeys;
3236 :
3237 2216 : if (aggref->aggdistinct != NIL)
3238 706 : sortlist = aggref->aggdistinct;
3239 : else
3240 1510 : sortlist = aggref->aggorder;
3241 :
3242 2216 : pathkeys = make_pathkeys_for_sortclauses(root, sortlist,
3243 : aggref->args);
3244 :
3245 : /*
3246 : * Ignore Aggrefs which have volatile functions in their ORDER BY
3247 : * or DISTINCT clause.
3248 : */
3249 2216 : if (has_volatile_pathkey(pathkeys))
3250 : {
3251 18 : unprocessed_aggs = bms_del_member(unprocessed_aggs, i);
3252 18 : continue;
3253 : }
3254 :
3255 : /*
3256 : * When not set yet, take the pathkeys from the first unprocessed
3257 : * aggregate.
3258 : */
3259 2198 : if (currpathkeys == NIL)
3260 : {
3261 1736 : currpathkeys = pathkeys;
3262 :
3263 : /* include the GROUP BY pathkeys, if they exist */
3264 1736 : if (grouppathkeys != NIL)
3265 270 : currpathkeys = append_pathkeys(list_copy(grouppathkeys),
3266 : currpathkeys);
3267 :
3268 : /* record that we found pathkeys for this aggregate */
3269 1736 : aggindexes = bms_add_member(aggindexes, i);
3270 : }
3271 : else
3272 : {
3273 : /* now look for a stronger set of matching pathkeys */
3274 :
3275 : /* include the GROUP BY pathkeys, if they exist */
3276 462 : if (grouppathkeys != NIL)
3277 288 : pathkeys = append_pathkeys(list_copy(grouppathkeys),
3278 : pathkeys);
3279 :
3280 : /* are 'pathkeys' compatible or better than 'currpathkeys'? */
3281 462 : switch (compare_pathkeys(currpathkeys, pathkeys))
3282 : {
3283 12 : case PATHKEYS_BETTER2:
3284 : /* 'pathkeys' are stronger, use these ones instead */
3285 12 : currpathkeys = pathkeys;
3286 : /* FALLTHROUGH */
3287 :
3288 72 : case PATHKEYS_BETTER1:
3289 : /* 'pathkeys' are less strict */
3290 : /* FALLTHROUGH */
3291 :
3292 : case PATHKEYS_EQUAL:
3293 : /* mark this aggregate as covered by 'currpathkeys' */
3294 72 : aggindexes = bms_add_member(aggindexes, i);
3295 72 : break;
3296 :
3297 390 : case PATHKEYS_DIFFERENT:
3298 390 : break;
3299 : }
3300 3958 : }
3301 : }
3302 :
3303 : /* remove the aggregates that we've just processed */
3304 1742 : unprocessed_aggs = bms_del_members(unprocessed_aggs, aggindexes);
3305 :
3306 : /*
3307 : * If this pass included more aggregates than the previous best then
3308 : * use these ones as the best set.
3309 : */
3310 1742 : if (bms_num_members(aggindexes) > bms_num_members(bestaggs))
3311 : {
3312 1634 : bestaggs = aggindexes;
3313 1634 : bestpathkeys = currpathkeys;
3314 : }
3315 : }
3316 :
3317 : /*
3318 : * If we found any ordered aggregates, update root->group_pathkeys to add
3319 : * the best set of aggregate pathkeys. Note that bestpathkeys includes
3320 : * the original GROUP BY pathkeys already.
3321 : */
3322 1790 : if (bestpathkeys != NIL)
3323 1586 : root->group_pathkeys = bestpathkeys;
3324 :
3325 : /*
3326 : * Now that we've found the best set of aggregates we can set the
3327 : * presorted flag to indicate to the executor that it needn't bother
3328 : * performing a sort for these Aggrefs. We're able to do this now as
3329 : * there's no chance of a Hash Aggregate plan as create_grouping_paths
3330 : * will not mark the GROUP BY as GROUPING_CAN_USE_HASH due to the presence
3331 : * of ordered aggregates.
3332 : */
3333 1790 : i = -1;
3334 3466 : while ((i = bms_next_member(bestaggs, i)) >= 0)
3335 : {
3336 1676 : AggInfo *agginfo = list_nth_node(AggInfo, root->agginfos, i);
3337 :
3338 3370 : foreach(lc, agginfo->aggrefs)
3339 : {
3340 1694 : Aggref *aggref = lfirst_node(Aggref, lc);
3341 :
3342 1694 : aggref->aggpresorted = true;
3343 : }
3344 : }
3345 : }
3346 :
3347 : /*
3348 : * Compute query_pathkeys and other pathkeys during plan generation
3349 : */
3350 : static void
3351 481782 : standard_qp_callback(PlannerInfo *root, void *extra)
3352 : {
3353 481782 : Query *parse = root->parse;
3354 481782 : standard_qp_extra *qp_extra = (standard_qp_extra *) extra;
3355 481782 : List *tlist = root->processed_tlist;
3356 481782 : List *activeWindows = qp_extra->activeWindows;
3357 :
3358 : /*
3359 : * Calculate pathkeys that represent grouping/ordering and/or ordered
3360 : * aggregate requirements.
3361 : */
3362 481782 : if (qp_extra->gset_data)
3363 : {
3364 : /*
3365 : * With grouping sets, just use the first RollupData's groupClause. We
3366 : * don't make any effort to optimize grouping clauses when there are
3367 : * grouping sets, nor can we combine aggregate ordering keys with
3368 : * grouping.
3369 : */
3370 728 : List *rollups = qp_extra->gset_data->rollups;
3371 728 : List *groupClause = (rollups ? linitial_node(RollupData, rollups)->groupClause : NIL);
3372 :
3373 728 : if (grouping_is_sortable(groupClause))
3374 : {
3375 728 : root->group_pathkeys = make_pathkeys_for_sortclauses(root,
3376 : groupClause,
3377 : tlist);
3378 728 : root->num_groupby_pathkeys = list_length(root->group_pathkeys);
3379 : }
3380 : else
3381 : {
3382 0 : root->group_pathkeys = NIL;
3383 0 : root->num_groupby_pathkeys = 0;
3384 : }
3385 : }
3386 481054 : else if (parse->groupClause || root->numOrderedAggs > 0)
3387 4918 : {
3388 : /*
3389 : * With a plain GROUP BY list, we can remove any grouping items that
3390 : * are proven redundant by EquivalenceClass processing. For example,
3391 : * we can remove y given "WHERE x = y GROUP BY x, y". These aren't
3392 : * especially common cases, but they're nearly free to detect. Note
3393 : * that we remove redundant items from processed_groupClause but not
3394 : * the original parse->groupClause.
3395 : */
3396 : bool sortable;
3397 :
3398 4918 : root->group_pathkeys =
3399 4918 : make_pathkeys_for_sortclauses_extended(root,
3400 : &root->processed_groupClause,
3401 : tlist,
3402 : true,
3403 : &sortable);
3404 4918 : if (!sortable)
3405 : {
3406 : /* Can't sort; no point in considering aggregate ordering either */
3407 0 : root->group_pathkeys = NIL;
3408 0 : root->num_groupby_pathkeys = 0;
3409 : }
3410 : else
3411 : {
3412 4918 : root->num_groupby_pathkeys = list_length(root->group_pathkeys);
3413 : /* If we have ordered aggs, consider adding onto group_pathkeys */
3414 4918 : if (root->numOrderedAggs > 0)
3415 1796 : adjust_group_pathkeys_for_groupagg(root);
3416 : }
3417 : }
3418 : else
3419 : {
3420 476136 : root->group_pathkeys = NIL;
3421 476136 : root->num_groupby_pathkeys = 0;
3422 : }
3423 :
3424 : /* We consider only the first (bottom) window in pathkeys logic */
3425 481782 : if (activeWindows != NIL)
3426 : {
3427 2300 : WindowClause *wc = linitial_node(WindowClause, activeWindows);
3428 :
3429 2300 : root->window_pathkeys = make_pathkeys_for_window(root,
3430 : wc,
3431 : tlist);
3432 : }
3433 : else
3434 479482 : root->window_pathkeys = NIL;
3435 :
3436 : /*
3437 : * As with GROUP BY, we can discard any DISTINCT items that are proven
3438 : * redundant by EquivalenceClass processing. The non-redundant list is
3439 : * kept in root->processed_distinctClause, leaving the original
3440 : * parse->distinctClause alone.
3441 : */
3442 481782 : if (parse->distinctClause)
3443 : {
3444 : bool sortable;
3445 :
3446 : /* Make a copy since pathkey processing can modify the list */
3447 2196 : root->processed_distinctClause = list_copy(parse->distinctClause);
3448 2196 : root->distinct_pathkeys =
3449 2196 : make_pathkeys_for_sortclauses_extended(root,
3450 : &root->processed_distinctClause,
3451 : tlist,
3452 : true,
3453 : &sortable);
3454 2196 : if (!sortable)
3455 6 : root->distinct_pathkeys = NIL;
3456 : }
3457 : else
3458 479586 : root->distinct_pathkeys = NIL;
3459 :
3460 481782 : root->sort_pathkeys =
3461 481782 : make_pathkeys_for_sortclauses(root,
3462 : parse->sortClause,
3463 : tlist);
3464 :
3465 : /* setting setop_pathkeys might be useful to the union planner */
3466 495686 : if (qp_extra->setop != NULL &&
3467 13904 : set_operation_ordered_results_useful(qp_extra->setop))
3468 10474 : {
3469 : List *groupClauses;
3470 : bool sortable;
3471 :
3472 10474 : groupClauses = generate_setop_child_grouplist(qp_extra->setop, tlist);
3473 :
3474 10474 : root->setop_pathkeys =
3475 10474 : make_pathkeys_for_sortclauses_extended(root,
3476 : &groupClauses,
3477 : tlist,
3478 : false,
3479 : &sortable);
3480 10474 : if (!sortable)
3481 152 : root->setop_pathkeys = NIL;
3482 : }
3483 : else
3484 471308 : root->setop_pathkeys = NIL;
3485 :
3486 : /*
3487 : * Figure out whether we want a sorted result from query_planner.
3488 : *
3489 : * If we have a sortable GROUP BY clause, then we want a result sorted
3490 : * properly for grouping. Otherwise, if we have window functions to
3491 : * evaluate, we try to sort for the first window. Otherwise, if there's a
3492 : * sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
3493 : * we try to produce output that's sufficiently well sorted for the
3494 : * DISTINCT. Otherwise, if there is an ORDER BY clause, we want to sort
3495 : * by the ORDER BY clause. Otherwise, if we're a subquery being planned
3496 : * for a set operation which can benefit from presorted results and have a
3497 : * sortable targetlist, we want to sort by the target list.
3498 : *
3499 : * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
3500 : * of GROUP BY, it would be tempting to request sort by ORDER BY --- but
3501 : * that might just leave us failing to exploit an available sort order at
3502 : * all. Needs more thought. The choice for DISTINCT versus ORDER BY is
3503 : * much easier, since we know that the parser ensured that one is a
3504 : * superset of the other.
3505 : */
3506 481782 : if (root->group_pathkeys)
3507 5308 : root->query_pathkeys = root->group_pathkeys;
3508 476474 : else if (root->window_pathkeys)
3509 1982 : root->query_pathkeys = root->window_pathkeys;
3510 948984 : else if (list_length(root->distinct_pathkeys) >
3511 474492 : list_length(root->sort_pathkeys))
3512 1792 : root->query_pathkeys = root->distinct_pathkeys;
3513 472700 : else if (root->sort_pathkeys)
3514 52450 : root->query_pathkeys = root->sort_pathkeys;
3515 420250 : else if (root->setop_pathkeys != NIL)
3516 9752 : root->query_pathkeys = root->setop_pathkeys;
3517 : else
3518 410498 : root->query_pathkeys = NIL;
3519 481782 : }
3520 :
3521 : /*
3522 : * Estimate number of groups produced by grouping clauses (1 if not grouping)
3523 : *
3524 : * path_rows: number of output rows from scan/join step
3525 : * gd: grouping sets data including list of grouping sets and their clauses
3526 : * target_list: target list containing group clause references
3527 : *
3528 : * If doing grouping sets, we also annotate the gsets data with the estimates
3529 : * for each set and each individual rollup list, with a view to later
3530 : * determining whether some combination of them could be hashed instead.
3531 : */
3532 : static double
3533 37354 : get_number_of_groups(PlannerInfo *root,
3534 : double path_rows,
3535 : grouping_sets_data *gd,
3536 : List *target_list)
3537 : {
3538 37354 : Query *parse = root->parse;
3539 : double dNumGroups;
3540 :
3541 37354 : if (parse->groupClause)
3542 : {
3543 : List *groupExprs;
3544 :
3545 6460 : if (parse->groupingSets)
3546 : {
3547 : /* Add up the estimates for each grouping set */
3548 : ListCell *lc;
3549 :
3550 : Assert(gd); /* keep Coverity happy */
3551 :
3552 686 : dNumGroups = 0;
3553 :
3554 1854 : foreach(lc, gd->rollups)
3555 : {
3556 1168 : RollupData *rollup = lfirst_node(RollupData, lc);
3557 : ListCell *lc2;
3558 : ListCell *lc3;
3559 :
3560 1168 : groupExprs = get_sortgrouplist_exprs(rollup->groupClause,
3561 : target_list);
3562 :
3563 1168 : rollup->numGroups = 0.0;
3564 :
3565 3488 : forboth(lc2, rollup->gsets, lc3, rollup->gsets_data)
3566 : {
3567 2320 : List *gset = (List *) lfirst(lc2);
3568 2320 : GroupingSetData *gs = lfirst_node(GroupingSetData, lc3);
3569 2320 : double numGroups = estimate_num_groups(root,
3570 : groupExprs,
3571 : path_rows,
3572 : &gset,
3573 : NULL);
3574 :
3575 2320 : gs->numGroups = numGroups;
3576 2320 : rollup->numGroups += numGroups;
3577 : }
3578 :
3579 1168 : dNumGroups += rollup->numGroups;
3580 : }
3581 :
3582 686 : if (gd->hash_sets_idx)
3583 : {
3584 : ListCell *lc2;
3585 :
3586 36 : gd->dNumHashGroups = 0;
3587 :
3588 36 : groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3589 : target_list);
3590 :
3591 78 : forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets)
3592 : {
3593 42 : List *gset = (List *) lfirst(lc);
3594 42 : GroupingSetData *gs = lfirst_node(GroupingSetData, lc2);
3595 42 : double numGroups = estimate_num_groups(root,
3596 : groupExprs,
3597 : path_rows,
3598 : &gset,
3599 : NULL);
3600 :
3601 42 : gs->numGroups = numGroups;
3602 42 : gd->dNumHashGroups += numGroups;
3603 : }
3604 :
3605 36 : dNumGroups += gd->dNumHashGroups;
3606 : }
3607 : }
3608 : else
3609 : {
3610 : /* Plain GROUP BY -- estimate based on optimized groupClause */
3611 5774 : groupExprs = get_sortgrouplist_exprs(root->processed_groupClause,
3612 : target_list);
3613 :
3614 5774 : dNumGroups = estimate_num_groups(root, groupExprs, path_rows,
3615 : NULL, NULL);
3616 : }
3617 : }
3618 30894 : else if (parse->groupingSets)
3619 : {
3620 : /* Empty grouping sets ... one result row for each one */
3621 42 : dNumGroups = list_length(parse->groupingSets);
3622 : }
3623 30852 : else if (parse->hasAggs || root->hasHavingQual)
3624 : {
3625 : /* Plain aggregation, one result row */
3626 30852 : dNumGroups = 1;
3627 : }
3628 : else
3629 : {
3630 : /* Not grouping */
3631 0 : dNumGroups = 1;
3632 : }
3633 :
3634 37354 : return dNumGroups;
3635 : }
3636 :
3637 : /*
3638 : * create_grouping_paths
3639 : *
3640 : * Build a new upperrel containing Paths for grouping and/or aggregation.
3641 : * Along the way, we also build an upperrel for Paths which are partially
3642 : * grouped and/or aggregated. A partially grouped and/or aggregated path
3643 : * needs a FinalizeAggregate node to complete the aggregation. Currently,
3644 : * the only partially grouped paths we build are also partial paths; that
3645 : * is, they need a Gather and then a FinalizeAggregate.
3646 : *
3647 : * input_rel: contains the source-data Paths
3648 : * target: the pathtarget for the result Paths to compute
3649 : * gd: grouping sets data including list of grouping sets and their clauses
3650 : *
3651 : * Note: all Paths in input_rel are expected to return the target computed
3652 : * by make_group_input_target.
3653 : */
3654 : static RelOptInfo *
3655 34154 : create_grouping_paths(PlannerInfo *root,
3656 : RelOptInfo *input_rel,
3657 : PathTarget *target,
3658 : bool target_parallel_safe,
3659 : grouping_sets_data *gd)
3660 : {
3661 34154 : Query *parse = root->parse;
3662 : RelOptInfo *grouped_rel;
3663 : RelOptInfo *partially_grouped_rel;
3664 : AggClauseCosts agg_costs;
3665 :
3666 204924 : MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
3667 34154 : get_agg_clause_costs(root, AGGSPLIT_SIMPLE, &agg_costs);
3668 :
3669 : /*
3670 : * Create grouping relation to hold fully aggregated grouping and/or
3671 : * aggregation paths.
3672 : */
3673 34154 : grouped_rel = make_grouping_rel(root, input_rel, target,
3674 : target_parallel_safe, parse->havingQual);
3675 :
3676 : /*
3677 : * Create either paths for a degenerate grouping or paths for ordinary
3678 : * grouping, as appropriate.
3679 : */
3680 34154 : if (is_degenerate_grouping(root))
3681 18 : create_degenerate_grouping_paths(root, input_rel, grouped_rel);
3682 : else
3683 : {
3684 34136 : int flags = 0;
3685 : GroupPathExtraData extra;
3686 :
3687 : /*
3688 : * Determine whether it's possible to perform sort-based
3689 : * implementations of grouping. (Note that if processed_groupClause
3690 : * is empty, grouping_is_sortable() is trivially true, and all the
3691 : * pathkeys_contained_in() tests will succeed too, so that we'll
3692 : * consider every surviving input path.)
3693 : *
3694 : * If we have grouping sets, we might be able to sort some but not all
3695 : * of them; in this case, we need can_sort to be true as long as we
3696 : * must consider any sorted-input plan.
3697 : */
3698 34136 : if ((gd && gd->rollups != NIL)
3699 33414 : || grouping_is_sortable(root->processed_groupClause))
3700 34130 : flags |= GROUPING_CAN_USE_SORT;
3701 :
3702 : /*
3703 : * Determine whether we should consider hash-based implementations of
3704 : * grouping.
3705 : *
3706 : * Hashed aggregation only applies if we're grouping. If we have
3707 : * grouping sets, some groups might be hashable but others not; in
3708 : * this case we set can_hash true as long as there is nothing globally
3709 : * preventing us from hashing (and we should therefore consider plans
3710 : * with hashes).
3711 : *
3712 : * Executor doesn't support hashed aggregation with DISTINCT or ORDER
3713 : * BY aggregates. (Doing so would imply storing *all* the input
3714 : * values in the hash table, and/or running many sorts in parallel,
3715 : * either of which seems like a certain loser.) We similarly don't
3716 : * support ordered-set aggregates in hashed aggregation, but that case
3717 : * is also included in the numOrderedAggs count.
3718 : *
3719 : * Note: grouping_is_hashable() is much more expensive to check than
3720 : * the other gating conditions, so we want to do it last.
3721 : */
3722 34136 : if ((parse->groupClause != NIL &&
3723 7818 : root->numOrderedAggs == 0 &&
3724 3772 : (gd ? gd->any_hashable : grouping_is_hashable(root->processed_groupClause))))
3725 3768 : flags |= GROUPING_CAN_USE_HASH;
3726 :
3727 : /*
3728 : * Determine whether partial aggregation is possible.
3729 : */
3730 34136 : if (can_partial_agg(root))
3731 30424 : flags |= GROUPING_CAN_PARTIAL_AGG;
3732 :
3733 34136 : extra.flags = flags;
3734 34136 : extra.target_parallel_safe = target_parallel_safe;
3735 34136 : extra.havingQual = parse->havingQual;
3736 34136 : extra.targetList = parse->targetList;
3737 34136 : extra.partial_costs_set = false;
3738 :
3739 : /*
3740 : * Determine whether partitionwise aggregation is in theory possible.
3741 : * It can be disabled by the user, and for now, we don't try to
3742 : * support grouping sets. create_ordinary_grouping_paths() will check
3743 : * additional conditions, such as whether input_rel is partitioned.
3744 : */
3745 34136 : if (enable_partitionwise_aggregate && !parse->groupingSets)
3746 448 : extra.patype = PARTITIONWISE_AGGREGATE_FULL;
3747 : else
3748 33688 : extra.patype = PARTITIONWISE_AGGREGATE_NONE;
3749 :
3750 34136 : create_ordinary_grouping_paths(root, input_rel, grouped_rel,
3751 : &agg_costs, gd, &extra,
3752 : &partially_grouped_rel);
3753 : }
3754 :
3755 34146 : set_cheapest(grouped_rel);
3756 34146 : return grouped_rel;
3757 : }
3758 :
3759 : /*
3760 : * make_grouping_rel
3761 : *
3762 : * Create a new grouping rel and set basic properties.
3763 : *
3764 : * input_rel represents the underlying scan/join relation.
3765 : * target is the output expected from the grouping relation.
3766 : */
3767 : static RelOptInfo *
3768 35552 : make_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
3769 : PathTarget *target, bool target_parallel_safe,
3770 : Node *havingQual)
3771 : {
3772 : RelOptInfo *grouped_rel;
3773 :
3774 35552 : if (IS_OTHER_REL(input_rel))
3775 : {
3776 1398 : grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG,
3777 : input_rel->relids);
3778 1398 : grouped_rel->reloptkind = RELOPT_OTHER_UPPER_REL;
3779 : }
3780 : else
3781 : {
3782 : /*
3783 : * By tradition, the relids set for the main grouping relation is
3784 : * NULL. (This could be changed, but might require adjustments
3785 : * elsewhere.)
3786 : */
3787 34154 : grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
3788 : }
3789 :
3790 : /* Set target. */
3791 35552 : grouped_rel->reltarget = target;
3792 :
3793 : /*
3794 : * If the input relation is not parallel-safe, then the grouped relation
3795 : * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
3796 : * target list and HAVING quals are parallel-safe.
3797 : */
3798 60910 : if (input_rel->consider_parallel && target_parallel_safe &&
3799 25358 : is_parallel_safe(root, (Node *) havingQual))
3800 25340 : grouped_rel->consider_parallel = true;
3801 :
3802 : /*
3803 : * If the input rel belongs to a single FDW, so does the grouped rel.
3804 : */
3805 35552 : grouped_rel->serverid = input_rel->serverid;
3806 35552 : grouped_rel->userid = input_rel->userid;
3807 35552 : grouped_rel->useridiscurrent = input_rel->useridiscurrent;
3808 35552 : grouped_rel->fdwroutine = input_rel->fdwroutine;
3809 :
3810 35552 : return grouped_rel;
3811 : }
3812 :
3813 : /*
3814 : * is_degenerate_grouping
3815 : *
3816 : * A degenerate grouping is one in which the query has a HAVING qual and/or
3817 : * grouping sets, but no aggregates and no GROUP BY (which implies that the
3818 : * grouping sets are all empty).
3819 : */
3820 : static bool
3821 34154 : is_degenerate_grouping(PlannerInfo *root)
3822 : {
3823 34154 : Query *parse = root->parse;
3824 :
3825 33074 : return (root->hasHavingQual || parse->groupingSets) &&
3826 67228 : !parse->hasAggs && parse->groupClause == NIL;
3827 : }
3828 :
3829 : /*
3830 : * create_degenerate_grouping_paths
3831 : *
3832 : * When the grouping is degenerate (see is_degenerate_grouping), we are
3833 : * supposed to emit either zero or one row for each grouping set depending on
3834 : * whether HAVING succeeds. Furthermore, there cannot be any variables in
3835 : * either HAVING or the targetlist, so we actually do not need the FROM table
3836 : * at all! We can just throw away the plan-so-far and generate a Result node.
3837 : * This is a sufficiently unusual corner case that it's not worth contorting
3838 : * the structure of this module to avoid having to generate the earlier paths
3839 : * in the first place.
3840 : */
3841 : static void
3842 18 : create_degenerate_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
3843 : RelOptInfo *grouped_rel)
3844 : {
3845 18 : Query *parse = root->parse;
3846 : int nrows;
3847 : Path *path;
3848 :
3849 18 : nrows = list_length(parse->groupingSets);
3850 18 : if (nrows > 1)
3851 : {
3852 : /*
3853 : * Doesn't seem worthwhile writing code to cons up a generate_series
3854 : * or a values scan to emit multiple rows. Instead just make N clones
3855 : * and append them. (With a volatile HAVING clause, this means you
3856 : * might get between 0 and N output rows. Offhand I think that's
3857 : * desired.)
3858 : */
3859 0 : List *paths = NIL;
3860 :
3861 0 : while (--nrows >= 0)
3862 : {
3863 : path = (Path *)
3864 0 : create_group_result_path(root, grouped_rel,
3865 0 : grouped_rel->reltarget,
3866 0 : (List *) parse->havingQual);
3867 0 : paths = lappend(paths, path);
3868 : }
3869 : path = (Path *)
3870 0 : create_append_path(root,
3871 : grouped_rel,
3872 : paths,
3873 : NIL,
3874 : NIL,
3875 : NULL,
3876 : 0,
3877 : false,
3878 : -1);
3879 : }
3880 : else
3881 : {
3882 : /* No grouping sets, or just one, so one output row */
3883 : path = (Path *)
3884 18 : create_group_result_path(root, grouped_rel,
3885 18 : grouped_rel->reltarget,
3886 18 : (List *) parse->havingQual);
3887 : }
3888 :
3889 18 : add_path(grouped_rel, path);
3890 18 : }
3891 :
3892 : /*
3893 : * create_ordinary_grouping_paths
3894 : *
3895 : * Create grouping paths for the ordinary (that is, non-degenerate) case.
3896 : *
3897 : * We need to consider sorted and hashed aggregation in the same function,
3898 : * because otherwise (1) it would be harder to throw an appropriate error
3899 : * message if neither way works, and (2) we should not allow hashtable size
3900 : * considerations to dissuade us from using hashing if sorting is not possible.
3901 : *
3902 : * *partially_grouped_rel_p will be set to the partially grouped rel which this
3903 : * function creates, or to NULL if it doesn't create one.
3904 : */
3905 : static void
3906 35534 : create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
3907 : RelOptInfo *grouped_rel,
3908 : const AggClauseCosts *agg_costs,
3909 : grouping_sets_data *gd,
3910 : GroupPathExtraData *extra,
3911 : RelOptInfo **partially_grouped_rel_p)
3912 : {
3913 35534 : Path *cheapest_path = input_rel->cheapest_total_path;
3914 35534 : RelOptInfo *partially_grouped_rel = NULL;
3915 : double dNumGroups;
3916 35534 : PartitionwiseAggregateType patype = PARTITIONWISE_AGGREGATE_NONE;
3917 :
3918 : /*
3919 : * If this is the topmost grouping relation or if the parent relation is
3920 : * doing some form of partitionwise aggregation, then we may be able to do
3921 : * it at this level also. However, if the input relation is not
3922 : * partitioned, partitionwise aggregate is impossible.
3923 : */
3924 35534 : if (extra->patype != PARTITIONWISE_AGGREGATE_NONE &&
3925 1846 : IS_PARTITIONED_REL(input_rel))
3926 : {
3927 : /*
3928 : * If this is the topmost relation or if the parent relation is doing
3929 : * full partitionwise aggregation, then we can do full partitionwise
3930 : * aggregation provided that the GROUP BY clause contains all of the
3931 : * partitioning columns at this level. Otherwise, we can do at most
3932 : * partial partitionwise aggregation. But if partial aggregation is
3933 : * not supported in general then we can't use it for partitionwise
3934 : * aggregation either.
3935 : *
3936 : * Check parse->groupClause not processed_groupClause, because it's
3937 : * okay if some of the partitioning columns were proved redundant.
3938 : */
3939 1064 : if (extra->patype == PARTITIONWISE_AGGREGATE_FULL &&
3940 508 : group_by_has_partkey(input_rel, extra->targetList,
3941 508 : root->parse->groupClause))
3942 284 : patype = PARTITIONWISE_AGGREGATE_FULL;
3943 272 : else if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0)
3944 230 : patype = PARTITIONWISE_AGGREGATE_PARTIAL;
3945 : else
3946 42 : patype = PARTITIONWISE_AGGREGATE_NONE;
3947 : }
3948 :
3949 : /*
3950 : * Before generating paths for grouped_rel, we first generate any possible
3951 : * partially grouped paths; that way, later code can easily consider both
3952 : * parallel and non-parallel approaches to grouping.
3953 : */
3954 35534 : if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0)
3955 : {
3956 : bool force_rel_creation;
3957 :
3958 : /*
3959 : * If we're doing partitionwise aggregation at this level, force
3960 : * creation of a partially_grouped_rel so we can add partitionwise
3961 : * paths to it.
3962 : */
3963 31750 : force_rel_creation = (patype == PARTITIONWISE_AGGREGATE_PARTIAL);
3964 :
3965 : partially_grouped_rel =
3966 31750 : create_partial_grouping_paths(root,
3967 : grouped_rel,
3968 : input_rel,
3969 : gd,
3970 : extra,
3971 : force_rel_creation);
3972 : }
3973 :
3974 : /* Set out parameter. */
3975 35534 : *partially_grouped_rel_p = partially_grouped_rel;
3976 :
3977 : /* Apply partitionwise aggregation technique, if possible. */
3978 35534 : if (patype != PARTITIONWISE_AGGREGATE_NONE)
3979 514 : create_partitionwise_grouping_paths(root, input_rel, grouped_rel,
3980 : partially_grouped_rel, agg_costs,
3981 : gd, patype, extra);
3982 :
3983 : /* If we are doing partial aggregation only, return. */
3984 35534 : if (extra->patype == PARTITIONWISE_AGGREGATE_PARTIAL)
3985 : {
3986 : Assert(partially_grouped_rel);
3987 :
3988 594 : if (partially_grouped_rel->pathlist)
3989 594 : set_cheapest(partially_grouped_rel);
3990 :
3991 594 : return;
3992 : }
3993 :
3994 : /* Gather any partially grouped partial paths. */
3995 34940 : if (partially_grouped_rel && partially_grouped_rel->partial_pathlist)
3996 : {
3997 1484 : gather_grouping_paths(root, partially_grouped_rel);
3998 1484 : set_cheapest(partially_grouped_rel);
3999 : }
4000 :
4001 : /*
4002 : * Estimate number of groups.
4003 : */
4004 34940 : dNumGroups = get_number_of_groups(root,
4005 : cheapest_path->rows,
4006 : gd,
4007 : extra->targetList);
4008 :
4009 : /* Build final grouping paths */
4010 34940 : add_paths_to_grouping_rel(root, input_rel, grouped_rel,
4011 : partially_grouped_rel, agg_costs, gd,
4012 : dNumGroups, extra);
4013 :
4014 : /* Give a helpful error if we failed to find any implementation */
4015 34940 : if (grouped_rel->pathlist == NIL)
4016 6 : ereport(ERROR,
4017 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4018 : errmsg("could not implement GROUP BY"),
4019 : errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4020 :
4021 : /*
4022 : * If there is an FDW that's responsible for all baserels of the query,
4023 : * let it consider adding ForeignPaths.
4024 : */
4025 34934 : if (grouped_rel->fdwroutine &&
4026 326 : grouped_rel->fdwroutine->GetForeignUpperPaths)
4027 326 : grouped_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_GROUP_AGG,
4028 : input_rel, grouped_rel,
4029 : extra);
4030 :
4031 : /* Let extensions possibly add some more paths */
4032 34932 : if (create_upper_paths_hook)
4033 0 : (*create_upper_paths_hook) (root, UPPERREL_GROUP_AGG,
4034 : input_rel, grouped_rel,
4035 : extra);
4036 : }
4037 :
4038 : /*
4039 : * For a given input path, consider the possible ways of doing grouping sets on
4040 : * it, by combinations of hashing and sorting. This can be called multiple
4041 : * times, so it's important that it not scribble on input. No result is
4042 : * returned, but any generated paths are added to grouped_rel.
4043 : */
4044 : static void
4045 1444 : consider_groupingsets_paths(PlannerInfo *root,
4046 : RelOptInfo *grouped_rel,
4047 : Path *path,
4048 : bool is_sorted,
4049 : bool can_hash,
4050 : grouping_sets_data *gd,
4051 : const AggClauseCosts *agg_costs,
4052 : double dNumGroups)
4053 : {
4054 1444 : Query *parse = root->parse;
4055 1444 : Size hash_mem_limit = get_hash_memory_limit();
4056 :
4057 : /*
4058 : * If we're not being offered sorted input, then only consider plans that
4059 : * can be done entirely by hashing.
4060 : *
4061 : * We can hash everything if it looks like it'll fit in hash_mem. But if
4062 : * the input is actually sorted despite not being advertised as such, we
4063 : * prefer to make use of that in order to use less memory.
4064 : *
4065 : * If none of the grouping sets are sortable, then ignore the hash_mem
4066 : * limit and generate a path anyway, since otherwise we'll just fail.
4067 : */
4068 1444 : if (!is_sorted)
4069 : {
4070 650 : List *new_rollups = NIL;
4071 650 : RollupData *unhashed_rollup = NULL;
4072 : List *sets_data;
4073 650 : List *empty_sets_data = NIL;
4074 650 : List *empty_sets = NIL;
4075 : ListCell *lc;
4076 650 : ListCell *l_start = list_head(gd->rollups);
4077 650 : AggStrategy strat = AGG_HASHED;
4078 : double hashsize;
4079 650 : double exclude_groups = 0.0;
4080 :
4081 : Assert(can_hash);
4082 :
4083 : /*
4084 : * If the input is coincidentally sorted usefully (which can happen
4085 : * even if is_sorted is false, since that only means that our caller
4086 : * has set up the sorting for us), then save some hashtable space by
4087 : * making use of that. But we need to watch out for degenerate cases:
4088 : *
4089 : * 1) If there are any empty grouping sets, then group_pathkeys might
4090 : * be NIL if all non-empty grouping sets are unsortable. In this case,
4091 : * there will be a rollup containing only empty groups, and the
4092 : * pathkeys_contained_in test is vacuously true; this is ok.
4093 : *
4094 : * XXX: the above relies on the fact that group_pathkeys is generated
4095 : * from the first rollup. If we add the ability to consider multiple
4096 : * sort orders for grouping input, this assumption might fail.
4097 : *
4098 : * 2) If there are no empty sets and only unsortable sets, then the
4099 : * rollups list will be empty (and thus l_start == NULL), and
4100 : * group_pathkeys will be NIL; we must ensure that the vacuously-true
4101 : * pathkeys_contained_in test doesn't cause us to crash.
4102 : */
4103 1294 : if (l_start != NULL &&
4104 644 : pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
4105 : {
4106 24 : unhashed_rollup = lfirst_node(RollupData, l_start);
4107 24 : exclude_groups = unhashed_rollup->numGroups;
4108 24 : l_start = lnext(gd->rollups, l_start);
4109 : }
4110 :
4111 650 : hashsize = estimate_hashagg_tablesize(root,
4112 : path,
4113 : agg_costs,
4114 : dNumGroups - exclude_groups);
4115 :
4116 : /*
4117 : * gd->rollups is empty if we have only unsortable columns to work
4118 : * with. Override hash_mem in that case; otherwise, we'll rely on the
4119 : * sorted-input case to generate usable mixed paths.
4120 : */
4121 650 : if (hashsize > hash_mem_limit && gd->rollups)
4122 18 : return; /* nope, won't fit */
4123 :
4124 : /*
4125 : * We need to burst the existing rollups list into individual grouping
4126 : * sets and recompute a groupClause for each set.
4127 : */
4128 632 : sets_data = list_copy(gd->unsortable_sets);
4129 :
4130 1596 : for_each_cell(lc, gd->rollups, l_start)
4131 : {
4132 988 : RollupData *rollup = lfirst_node(RollupData, lc);
4133 :
4134 : /*
4135 : * If we find an unhashable rollup that's not been skipped by the
4136 : * "actually sorted" check above, we can't cope; we'd need sorted
4137 : * input (with a different sort order) but we can't get that here.
4138 : * So bail out; we'll get a valid path from the is_sorted case
4139 : * instead.
4140 : *
4141 : * The mere presence of empty grouping sets doesn't make a rollup
4142 : * unhashable (see preprocess_grouping_sets), we handle those
4143 : * specially below.
4144 : */
4145 988 : if (!rollup->hashable)
4146 24 : return;
4147 :
4148 964 : sets_data = list_concat(sets_data, rollup->gsets_data);
4149 : }
4150 2682 : foreach(lc, sets_data)
4151 : {
4152 2074 : GroupingSetData *gs = lfirst_node(GroupingSetData, lc);
4153 2074 : List *gset = gs->set;
4154 : RollupData *rollup;
4155 :
4156 2074 : if (gset == NIL)
4157 : {
4158 : /* Empty grouping sets can't be hashed. */
4159 448 : empty_sets_data = lappend(empty_sets_data, gs);
4160 448 : empty_sets = lappend(empty_sets, NIL);
4161 : }
4162 : else
4163 : {
4164 1626 : rollup = makeNode(RollupData);
4165 :
4166 1626 : rollup->groupClause = groupclause_apply_groupingset(root, gset);
4167 1626 : rollup->gsets_data = list_make1(gs);
4168 1626 : rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4169 : rollup->gsets_data,
4170 : gd->tleref_to_colnum_map);
4171 1626 : rollup->numGroups = gs->numGroups;
4172 1626 : rollup->hashable = true;
4173 1626 : rollup->is_hashed = true;
4174 1626 : new_rollups = lappend(new_rollups, rollup);
4175 : }
4176 : }
4177 :
4178 : /*
4179 : * If we didn't find anything nonempty to hash, then bail. We'll
4180 : * generate a path from the is_sorted case.
4181 : */
4182 608 : if (new_rollups == NIL)
4183 0 : return;
4184 :
4185 : /*
4186 : * If there were empty grouping sets they should have been in the
4187 : * first rollup.
4188 : */
4189 : Assert(!unhashed_rollup || !empty_sets);
4190 :
4191 608 : if (unhashed_rollup)
4192 : {
4193 24 : new_rollups = lappend(new_rollups, unhashed_rollup);
4194 24 : strat = AGG_MIXED;
4195 : }
4196 584 : else if (empty_sets)
4197 : {
4198 400 : RollupData *rollup = makeNode(RollupData);
4199 :
4200 400 : rollup->groupClause = NIL;
4201 400 : rollup->gsets_data = empty_sets_data;
4202 400 : rollup->gsets = empty_sets;
4203 400 : rollup->numGroups = list_length(empty_sets);
4204 400 : rollup->hashable = false;
4205 400 : rollup->is_hashed = false;
4206 400 : new_rollups = lappend(new_rollups, rollup);
4207 400 : strat = AGG_MIXED;
4208 : }
4209 :
4210 608 : add_path(grouped_rel, (Path *)
4211 608 : create_groupingsets_path(root,
4212 : grouped_rel,
4213 : path,
4214 608 : (List *) parse->havingQual,
4215 : strat,
4216 : new_rollups,
4217 : agg_costs));
4218 608 : return;
4219 : }
4220 :
4221 : /*
4222 : * If we have sorted input but nothing we can do with it, bail.
4223 : */
4224 794 : if (gd->rollups == NIL)
4225 0 : return;
4226 :
4227 : /*
4228 : * Given sorted input, we try and make two paths: one sorted and one mixed
4229 : * sort/hash. (We need to try both because hashagg might be disabled, or
4230 : * some columns might not be sortable.)
4231 : *
4232 : * can_hash is passed in as false if some obstacle elsewhere (such as
4233 : * ordered aggs) means that we shouldn't consider hashing at all.
4234 : */
4235 794 : if (can_hash && gd->any_hashable)
4236 : {
4237 716 : List *rollups = NIL;
4238 716 : List *hash_sets = list_copy(gd->unsortable_sets);
4239 716 : double availspace = hash_mem_limit;
4240 : ListCell *lc;
4241 :
4242 : /*
4243 : * Account first for space needed for groups we can't sort at all.
4244 : */
4245 716 : availspace -= estimate_hashagg_tablesize(root,
4246 : path,
4247 : agg_costs,
4248 : gd->dNumHashGroups);
4249 :
4250 716 : if (availspace > 0 && list_length(gd->rollups) > 1)
4251 : {
4252 : double scale;
4253 396 : int num_rollups = list_length(gd->rollups);
4254 : int k_capacity;
4255 396 : int *k_weights = palloc(num_rollups * sizeof(int));
4256 396 : Bitmapset *hash_items = NULL;
4257 : int i;
4258 :
4259 : /*
4260 : * We treat this as a knapsack problem: the knapsack capacity
4261 : * represents hash_mem, the item weights are the estimated memory
4262 : * usage of the hashtables needed to implement a single rollup,
4263 : * and we really ought to use the cost saving as the item value;
4264 : * however, currently the costs assigned to sort nodes don't
4265 : * reflect the comparison costs well, and so we treat all items as
4266 : * of equal value (each rollup we hash instead saves us one sort).
4267 : *
4268 : * To use the discrete knapsack, we need to scale the values to a
4269 : * reasonably small bounded range. We choose to allow a 5% error
4270 : * margin; we have no more than 4096 rollups in the worst possible
4271 : * case, which with a 5% error margin will require a bit over 42MB
4272 : * of workspace. (Anyone wanting to plan queries that complex had
4273 : * better have the memory for it. In more reasonable cases, with
4274 : * no more than a couple of dozen rollups, the memory usage will
4275 : * be negligible.)
4276 : *
4277 : * k_capacity is naturally bounded, but we clamp the values for
4278 : * scale and weight (below) to avoid overflows or underflows (or
4279 : * uselessly trying to use a scale factor less than 1 byte).
4280 : */
4281 396 : scale = Max(availspace / (20.0 * num_rollups), 1.0);
4282 396 : k_capacity = (int) floor(availspace / scale);
4283 :
4284 : /*
4285 : * We leave the first rollup out of consideration since it's the
4286 : * one that matches the input sort order. We assign indexes "i"
4287 : * to only those entries considered for hashing; the second loop,
4288 : * below, must use the same condition.
4289 : */
4290 396 : i = 0;
4291 1008 : for_each_from(lc, gd->rollups, 1)
4292 : {
4293 612 : RollupData *rollup = lfirst_node(RollupData, lc);
4294 :
4295 612 : if (rollup->hashable)
4296 : {
4297 612 : double sz = estimate_hashagg_tablesize(root,
4298 : path,
4299 : agg_costs,
4300 : rollup->numGroups);
4301 :
4302 : /*
4303 : * If sz is enormous, but hash_mem (and hence scale) is
4304 : * small, avoid integer overflow here.
4305 : */
4306 612 : k_weights[i] = (int) Min(floor(sz / scale),
4307 : k_capacity + 1.0);
4308 612 : ++i;
4309 : }
4310 : }
4311 :
4312 : /*
4313 : * Apply knapsack algorithm; compute the set of items which
4314 : * maximizes the value stored (in this case the number of sorts
4315 : * saved) while keeping the total size (approximately) within
4316 : * capacity.
4317 : */
4318 396 : if (i > 0)
4319 396 : hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
4320 :
4321 396 : if (!bms_is_empty(hash_items))
4322 : {
4323 396 : rollups = list_make1(linitial(gd->rollups));
4324 :
4325 396 : i = 0;
4326 1008 : for_each_from(lc, gd->rollups, 1)
4327 : {
4328 612 : RollupData *rollup = lfirst_node(RollupData, lc);
4329 :
4330 612 : if (rollup->hashable)
4331 : {
4332 612 : if (bms_is_member(i, hash_items))
4333 576 : hash_sets = list_concat(hash_sets,
4334 576 : rollup->gsets_data);
4335 : else
4336 36 : rollups = lappend(rollups, rollup);
4337 612 : ++i;
4338 : }
4339 : else
4340 0 : rollups = lappend(rollups, rollup);
4341 : }
4342 : }
4343 : }
4344 :
4345 716 : if (!rollups && hash_sets)
4346 24 : rollups = list_copy(gd->rollups);
4347 :
4348 1432 : foreach(lc, hash_sets)
4349 : {
4350 716 : GroupingSetData *gs = lfirst_node(GroupingSetData, lc);
4351 716 : RollupData *rollup = makeNode(RollupData);
4352 :
4353 : Assert(gs->set != NIL);
4354 :
4355 716 : rollup->groupClause = groupclause_apply_groupingset(root, gs->set);
4356 716 : rollup->gsets_data = list_make1(gs);
4357 716 : rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4358 : rollup->gsets_data,
4359 : gd->tleref_to_colnum_map);
4360 716 : rollup->numGroups = gs->numGroups;
4361 716 : rollup->hashable = true;
4362 716 : rollup->is_hashed = true;
4363 716 : rollups = lcons(rollup, rollups);
4364 : }
4365 :
4366 716 : if (rollups)
4367 : {
4368 420 : add_path(grouped_rel, (Path *)
4369 420 : create_groupingsets_path(root,
4370 : grouped_rel,
4371 : path,
4372 420 : (List *) parse->havingQual,
4373 : AGG_MIXED,
4374 : rollups,
4375 : agg_costs));
4376 : }
4377 : }
4378 :
4379 : /*
4380 : * Now try the simple sorted case.
4381 : */
4382 794 : if (!gd->unsortable_sets)
4383 764 : add_path(grouped_rel, (Path *)
4384 764 : create_groupingsets_path(root,
4385 : grouped_rel,
4386 : path,
4387 764 : (List *) parse->havingQual,
4388 : AGG_SORTED,
4389 : gd->rollups,
4390 : agg_costs));
4391 : }
4392 :
4393 : /*
4394 : * create_window_paths
4395 : *
4396 : * Build a new upperrel containing Paths for window-function evaluation.
4397 : *
4398 : * input_rel: contains the source-data Paths
4399 : * input_target: result of make_window_input_target
4400 : * output_target: what the topmost WindowAggPath should return
4401 : * wflists: result of find_window_functions
4402 : * activeWindows: result of select_active_windows
4403 : *
4404 : * Note: all Paths in input_rel are expected to return input_target.
4405 : */
4406 : static RelOptInfo *
4407 2300 : create_window_paths(PlannerInfo *root,
4408 : RelOptInfo *input_rel,
4409 : PathTarget *input_target,
4410 : PathTarget *output_target,
4411 : bool output_target_parallel_safe,
4412 : WindowFuncLists *wflists,
4413 : List *activeWindows)
4414 : {
4415 : RelOptInfo *window_rel;
4416 : ListCell *lc;
4417 :
4418 : /* For now, do all work in the (WINDOW, NULL) upperrel */
4419 2300 : window_rel = fetch_upper_rel(root, UPPERREL_WINDOW, NULL);
4420 :
4421 : /*
4422 : * If the input relation is not parallel-safe, then the window relation
4423 : * can't be parallel-safe, either. Otherwise, we need to examine the
4424 : * target list and active windows for non-parallel-safe constructs.
4425 : */
4426 2300 : if (input_rel->consider_parallel && output_target_parallel_safe &&
4427 0 : is_parallel_safe(root, (Node *) activeWindows))
4428 0 : window_rel->consider_parallel = true;
4429 :
4430 : /*
4431 : * If the input rel belongs to a single FDW, so does the window rel.
4432 : */
4433 2300 : window_rel->serverid = input_rel->serverid;
4434 2300 : window_rel->userid = input_rel->userid;
4435 2300 : window_rel->useridiscurrent = input_rel->useridiscurrent;
4436 2300 : window_rel->fdwroutine = input_rel->fdwroutine;
4437 :
4438 : /*
4439 : * Consider computing window functions starting from the existing
4440 : * cheapest-total path (which will likely require a sort) as well as any
4441 : * existing paths that satisfy or partially satisfy root->window_pathkeys.
4442 : */
4443 4900 : foreach(lc, input_rel->pathlist)
4444 : {
4445 2600 : Path *path = (Path *) lfirst(lc);
4446 : int presorted_keys;
4447 :
4448 2900 : if (path == input_rel->cheapest_total_path ||
4449 300 : pathkeys_count_contained_in(root->window_pathkeys, path->pathkeys,
4450 132 : &presorted_keys) ||
4451 132 : presorted_keys > 0)
4452 2486 : create_one_window_path(root,
4453 : window_rel,
4454 : path,
4455 : input_target,
4456 : output_target,
4457 : wflists,
4458 : activeWindows);
4459 : }
4460 :
4461 : /*
4462 : * If there is an FDW that's responsible for all baserels of the query,
4463 : * let it consider adding ForeignPaths.
4464 : */
4465 2300 : if (window_rel->fdwroutine &&
4466 12 : window_rel->fdwroutine->GetForeignUpperPaths)
4467 12 : window_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_WINDOW,
4468 : input_rel, window_rel,
4469 : NULL);
4470 :
4471 : /* Let extensions possibly add some more paths */
4472 2300 : if (create_upper_paths_hook)
4473 0 : (*create_upper_paths_hook) (root, UPPERREL_WINDOW,
4474 : input_rel, window_rel, NULL);
4475 :
4476 : /* Now choose the best path(s) */
4477 2300 : set_cheapest(window_rel);
4478 :
4479 2300 : return window_rel;
4480 : }
4481 :
4482 : /*
4483 : * Stack window-function implementation steps atop the given Path, and
4484 : * add the result to window_rel.
4485 : *
4486 : * window_rel: upperrel to contain result
4487 : * path: input Path to use (must return input_target)
4488 : * input_target: result of make_window_input_target
4489 : * output_target: what the topmost WindowAggPath should return
4490 : * wflists: result of find_window_functions
4491 : * activeWindows: result of select_active_windows
4492 : */
4493 : static void
4494 2486 : create_one_window_path(PlannerInfo *root,
4495 : RelOptInfo *window_rel,
4496 : Path *path,
4497 : PathTarget *input_target,
4498 : PathTarget *output_target,
4499 : WindowFuncLists *wflists,
4500 : List *activeWindows)
4501 : {
4502 : PathTarget *window_target;
4503 : ListCell *l;
4504 2486 : List *topqual = NIL;
4505 :
4506 : /*
4507 : * Since each window clause could require a different sort order, we stack
4508 : * up a WindowAgg node for each clause, with sort steps between them as
4509 : * needed. (We assume that select_active_windows chose a good order for
4510 : * executing the clauses in.)
4511 : *
4512 : * input_target should contain all Vars and Aggs needed for the result.
4513 : * (In some cases we wouldn't need to propagate all of these all the way
4514 : * to the top, since they might only be needed as inputs to WindowFuncs.
4515 : * It's probably not worth trying to optimize that though.) It must also
4516 : * contain all window partitioning and sorting expressions, to ensure
4517 : * they're computed only once at the bottom of the stack (that's critical
4518 : * for volatile functions). As we climb up the stack, we'll add outputs
4519 : * for the WindowFuncs computed at each level.
4520 : */
4521 2486 : window_target = input_target;
4522 :
4523 5122 : foreach(l, activeWindows)
4524 : {
4525 2636 : WindowClause *wc = lfirst_node(WindowClause, l);
4526 : List *window_pathkeys;
4527 2636 : List *runcondition = NIL;
4528 : int presorted_keys;
4529 : bool is_sorted;
4530 : bool topwindow;
4531 : ListCell *lc2;
4532 :
4533 2636 : window_pathkeys = make_pathkeys_for_window(root,
4534 : wc,
4535 : root->processed_tlist);
4536 :
4537 2636 : is_sorted = pathkeys_count_contained_in(window_pathkeys,
4538 : path->pathkeys,
4539 : &presorted_keys);
4540 :
4541 : /* Sort if necessary */
4542 2636 : if (!is_sorted)
4543 : {
4544 : /*
4545 : * No presorted keys or incremental sort disabled, just perform a
4546 : * complete sort.
4547 : */
4548 2030 : if (presorted_keys == 0 || !enable_incremental_sort)
4549 1976 : path = (Path *) create_sort_path(root, window_rel,
4550 : path,
4551 : window_pathkeys,
4552 : -1.0);
4553 : else
4554 : {
4555 : /*
4556 : * Since we have presorted keys and incremental sort is
4557 : * enabled, just use incremental sort.
4558 : */
4559 54 : path = (Path *) create_incremental_sort_path(root,
4560 : window_rel,
4561 : path,
4562 : window_pathkeys,
4563 : presorted_keys,
4564 : -1.0);
4565 : }
4566 : }
4567 :
4568 2636 : if (lnext(activeWindows, l))
4569 : {
4570 : /*
4571 : * Add the current WindowFuncs to the output target for this
4572 : * intermediate WindowAggPath. We must copy window_target to
4573 : * avoid changing the previous path's target.
4574 : *
4575 : * Note: a WindowFunc adds nothing to the target's eval costs; but
4576 : * we do need to account for the increase in tlist width.
4577 : */
4578 150 : int64 tuple_width = window_target->width;
4579 :
4580 150 : window_target = copy_pathtarget(window_target);
4581 342 : foreach(lc2, wflists->windowFuncs[wc->winref])
4582 : {
4583 192 : WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
4584 :
4585 192 : add_column_to_pathtarget(window_target, (Expr *) wfunc, 0);
4586 192 : tuple_width += get_typavgwidth(wfunc->wintype, -1);
4587 : }
4588 150 : window_target->width = clamp_width_est(tuple_width);
4589 : }
4590 : else
4591 : {
4592 : /* Install the goal target in the topmost WindowAgg */
4593 2486 : window_target = output_target;
4594 : }
4595 :
4596 : /* mark the final item in the list as the top-level window */
4597 2636 : topwindow = foreach_current_index(l) == list_length(activeWindows) - 1;
4598 :
4599 : /*
4600 : * Collect the WindowFuncRunConditions from each WindowFunc and
4601 : * convert them into OpExprs
4602 : */
4603 5974 : foreach(lc2, wflists->windowFuncs[wc->winref])
4604 : {
4605 : ListCell *lc3;
4606 3338 : WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
4607 :
4608 3512 : foreach(lc3, wfunc->runCondition)
4609 : {
4610 174 : WindowFuncRunCondition *wfuncrc =
4611 : lfirst_node(WindowFuncRunCondition, lc3);
4612 : Expr *opexpr;
4613 : Expr *leftop;
4614 : Expr *rightop;
4615 :
4616 174 : if (wfuncrc->wfunc_left)
4617 : {
4618 156 : leftop = (Expr *) copyObject(wfunc);
4619 156 : rightop = copyObject(wfuncrc->arg);
4620 : }
4621 : else
4622 : {
4623 18 : leftop = copyObject(wfuncrc->arg);
4624 18 : rightop = (Expr *) copyObject(wfunc);
4625 : }
4626 :
4627 174 : opexpr = make_opclause(wfuncrc->opno,
4628 : BOOLOID,
4629 : false,
4630 : leftop,
4631 : rightop,
4632 : InvalidOid,
4633 : wfuncrc->inputcollid);
4634 :
4635 174 : runcondition = lappend(runcondition, opexpr);
4636 :
4637 174 : if (!topwindow)
4638 24 : topqual = lappend(topqual, opexpr);
4639 : }
4640 : }
4641 :
4642 : path = (Path *)
4643 2636 : create_windowagg_path(root, window_rel, path, window_target,
4644 2636 : wflists->windowFuncs[wc->winref],
4645 : runcondition, wc,
4646 : topwindow ? topqual : NIL, topwindow);
4647 : }
4648 :
4649 2486 : add_path(window_rel, path);
4650 2486 : }
4651 :
4652 : /*
4653 : * create_distinct_paths
4654 : *
4655 : * Build a new upperrel containing Paths for SELECT DISTINCT evaluation.
4656 : *
4657 : * input_rel: contains the source-data Paths
4658 : * target: the pathtarget for the result Paths to compute
4659 : *
4660 : * Note: input paths should already compute the desired pathtarget, since
4661 : * Sort/Unique won't project anything.
4662 : */
4663 : static RelOptInfo *
4664 2196 : create_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
4665 : PathTarget *target)
4666 : {
4667 : RelOptInfo *distinct_rel;
4668 :
4669 : /* For now, do all work in the (DISTINCT, NULL) upperrel */
4670 2196 : distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL);
4671 :
4672 : /*
4673 : * We don't compute anything at this level, so distinct_rel will be
4674 : * parallel-safe if the input rel is parallel-safe. In particular, if
4675 : * there is a DISTINCT ON (...) clause, any path for the input_rel will
4676 : * output those expressions, and will not be parallel-safe unless those
4677 : * expressions are parallel-safe.
4678 : */
4679 2196 : distinct_rel->consider_parallel = input_rel->consider_parallel;
4680 :
4681 : /*
4682 : * If the input rel belongs to a single FDW, so does the distinct_rel.
4683 : */
4684 2196 : distinct_rel->serverid = input_rel->serverid;
4685 2196 : distinct_rel->userid = input_rel->userid;
4686 2196 : distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4687 2196 : distinct_rel->fdwroutine = input_rel->fdwroutine;
4688 :
4689 : /* build distinct paths based on input_rel's pathlist */
4690 2196 : create_final_distinct_paths(root, input_rel, distinct_rel);
4691 :
4692 : /* now build distinct paths based on input_rel's partial_pathlist */
4693 2196 : create_partial_distinct_paths(root, input_rel, distinct_rel, target);
4694 :
4695 : /* Give a helpful error if we failed to create any paths */
4696 2196 : if (distinct_rel->pathlist == NIL)
4697 0 : ereport(ERROR,
4698 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4699 : errmsg("could not implement DISTINCT"),
4700 : errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4701 :
4702 : /*
4703 : * If there is an FDW that's responsible for all baserels of the query,
4704 : * let it consider adding ForeignPaths.
4705 : */
4706 2196 : if (distinct_rel->fdwroutine &&
4707 16 : distinct_rel->fdwroutine->GetForeignUpperPaths)
4708 16 : distinct_rel->fdwroutine->GetForeignUpperPaths(root,
4709 : UPPERREL_DISTINCT,
4710 : input_rel,
4711 : distinct_rel,
4712 : NULL);
4713 :
4714 : /* Let extensions possibly add some more paths */
4715 2196 : if (create_upper_paths_hook)
4716 0 : (*create_upper_paths_hook) (root, UPPERREL_DISTINCT, input_rel,
4717 : distinct_rel, NULL);
4718 :
4719 : /* Now choose the best path(s) */
4720 2196 : set_cheapest(distinct_rel);
4721 :
4722 2196 : return distinct_rel;
4723 : }
4724 :
4725 : /*
4726 : * create_partial_distinct_paths
4727 : *
4728 : * Process 'input_rel' partial paths and add unique/aggregate paths to the
4729 : * UPPERREL_PARTIAL_DISTINCT rel. For paths created, add Gather/GatherMerge
4730 : * paths on top and add a final unique/aggregate path to remove any duplicate
4731 : * produced from combining rows from parallel workers.
4732 : */
4733 : static void
4734 2196 : create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
4735 : RelOptInfo *final_distinct_rel,
4736 : PathTarget *target)
4737 : {
4738 : RelOptInfo *partial_distinct_rel;
4739 : Query *parse;
4740 : List *distinctExprs;
4741 : double numDistinctRows;
4742 : Path *cheapest_partial_path;
4743 : ListCell *lc;
4744 :
4745 : /* nothing to do when there are no partial paths in the input rel */
4746 2196 : if (!input_rel->consider_parallel || input_rel->partial_pathlist == NIL)
4747 2100 : return;
4748 :
4749 96 : parse = root->parse;
4750 :
4751 : /* can't do parallel DISTINCT ON */
4752 96 : if (parse->hasDistinctOn)
4753 0 : return;
4754 :
4755 96 : partial_distinct_rel = fetch_upper_rel(root, UPPERREL_PARTIAL_DISTINCT,
4756 : NULL);
4757 96 : partial_distinct_rel->reltarget = target;
4758 96 : partial_distinct_rel->consider_parallel = input_rel->consider_parallel;
4759 :
4760 : /*
4761 : * If input_rel belongs to a single FDW, so does the partial_distinct_rel.
4762 : */
4763 96 : partial_distinct_rel->serverid = input_rel->serverid;
4764 96 : partial_distinct_rel->userid = input_rel->userid;
4765 96 : partial_distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4766 96 : partial_distinct_rel->fdwroutine = input_rel->fdwroutine;
4767 :
4768 96 : cheapest_partial_path = linitial(input_rel->partial_pathlist);
4769 :
4770 96 : distinctExprs = get_sortgrouplist_exprs(root->processed_distinctClause,
4771 : parse->targetList);
4772 :
4773 : /* estimate how many distinct rows we'll get from each worker */
4774 96 : numDistinctRows = estimate_num_groups(root, distinctExprs,
4775 : cheapest_partial_path->rows,
4776 : NULL, NULL);
4777 :
4778 : /*
4779 : * Try sorting the cheapest path and incrementally sorting any paths with
4780 : * presorted keys and put a unique paths atop of those.
4781 : */
4782 96 : if (grouping_is_sortable(root->processed_distinctClause))
4783 : {
4784 198 : foreach(lc, input_rel->partial_pathlist)
4785 : {
4786 102 : Path *input_path = (Path *) lfirst(lc);
4787 : Path *sorted_path;
4788 : bool is_sorted;
4789 : int presorted_keys;
4790 :
4791 102 : is_sorted = pathkeys_count_contained_in(root->distinct_pathkeys,
4792 : input_path->pathkeys,
4793 : &presorted_keys);
4794 :
4795 102 : if (is_sorted)
4796 6 : sorted_path = input_path;
4797 : else
4798 : {
4799 : /*
4800 : * Try at least sorting the cheapest path and also try
4801 : * incrementally sorting any path which is partially sorted
4802 : * already (no need to deal with paths which have presorted
4803 : * keys when incremental sort is disabled unless it's the
4804 : * cheapest partial path).
4805 : */
4806 96 : if (input_path != cheapest_partial_path &&
4807 6 : (presorted_keys == 0 || !enable_incremental_sort))
4808 0 : continue;
4809 :
4810 : /*
4811 : * We've no need to consider both a sort and incremental sort.
4812 : * We'll just do a sort if there are no presorted keys and an
4813 : * incremental sort when there are presorted keys.
4814 : */
4815 96 : if (presorted_keys == 0 || !enable_incremental_sort)
4816 90 : sorted_path = (Path *) create_sort_path(root,
4817 : partial_distinct_rel,
4818 : input_path,
4819 : root->distinct_pathkeys,
4820 : -1.0);
4821 : else
4822 6 : sorted_path = (Path *) create_incremental_sort_path(root,
4823 : partial_distinct_rel,
4824 : input_path,
4825 : root->distinct_pathkeys,
4826 : presorted_keys,
4827 : -1.0);
4828 : }
4829 :
4830 : /*
4831 : * An empty distinct_pathkeys means all tuples have the same value
4832 : * for the DISTINCT clause. See create_final_distinct_paths()
4833 : */
4834 102 : if (root->distinct_pathkeys == NIL)
4835 : {
4836 : Node *limitCount;
4837 :
4838 6 : limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
4839 : sizeof(int64),
4840 : Int64GetDatum(1), false,
4841 : FLOAT8PASSBYVAL);
4842 :
4843 : /*
4844 : * Apply a LimitPath onto the partial path to restrict the
4845 : * tuples from each worker to 1. create_final_distinct_paths
4846 : * will need to apply an additional LimitPath to restrict this
4847 : * to a single row after the Gather node. If the query
4848 : * already has a LIMIT clause, then we could end up with three
4849 : * Limit nodes in the final plan. Consolidating the top two
4850 : * of these could be done, but does not seem worth troubling
4851 : * over.
4852 : */
4853 6 : add_partial_path(partial_distinct_rel, (Path *)
4854 6 : create_limit_path(root, partial_distinct_rel,
4855 : sorted_path,
4856 : NULL,
4857 : limitCount,
4858 : LIMIT_OPTION_COUNT,
4859 : 0, 1));
4860 : }
4861 : else
4862 : {
4863 96 : add_partial_path(partial_distinct_rel, (Path *)
4864 96 : create_upper_unique_path(root, partial_distinct_rel,
4865 : sorted_path,
4866 96 : list_length(root->distinct_pathkeys),
4867 : numDistinctRows));
4868 : }
4869 : }
4870 : }
4871 :
4872 : /*
4873 : * Now try hash aggregate paths, if enabled and hashing is possible. Since
4874 : * we're not on the hook to ensure we do our best to create at least one
4875 : * path here, we treat enable_hashagg as a hard off-switch rather than the
4876 : * slightly softer variant in create_final_distinct_paths.
4877 : */
4878 96 : if (enable_hashagg && grouping_is_hashable(root->processed_distinctClause))
4879 : {
4880 78 : add_partial_path(partial_distinct_rel, (Path *)
4881 78 : create_agg_path(root,
4882 : partial_distinct_rel,
4883 : cheapest_partial_path,
4884 : cheapest_partial_path->pathtarget,
4885 : AGG_HASHED,
4886 : AGGSPLIT_SIMPLE,
4887 : root->processed_distinctClause,
4888 : NIL,
4889 : NULL,
4890 : numDistinctRows));
4891 : }
4892 :
4893 : /*
4894 : * If there is an FDW that's responsible for all baserels of the query,
4895 : * let it consider adding ForeignPaths.
4896 : */
4897 96 : if (partial_distinct_rel->fdwroutine &&
4898 0 : partial_distinct_rel->fdwroutine->GetForeignUpperPaths)
4899 0 : partial_distinct_rel->fdwroutine->GetForeignUpperPaths(root,
4900 : UPPERREL_PARTIAL_DISTINCT,
4901 : input_rel,
4902 : partial_distinct_rel,
4903 : NULL);
4904 :
4905 : /* Let extensions possibly add some more partial paths */
4906 96 : if (create_upper_paths_hook)
4907 0 : (*create_upper_paths_hook) (root, UPPERREL_PARTIAL_DISTINCT,
4908 : input_rel, partial_distinct_rel, NULL);
4909 :
4910 96 : if (partial_distinct_rel->partial_pathlist != NIL)
4911 : {
4912 96 : generate_useful_gather_paths(root, partial_distinct_rel, true);
4913 96 : set_cheapest(partial_distinct_rel);
4914 :
4915 : /*
4916 : * Finally, create paths to distinctify the final result. This step
4917 : * is needed to remove any duplicates due to combining rows from
4918 : * parallel workers.
4919 : */
4920 96 : create_final_distinct_paths(root, partial_distinct_rel,
4921 : final_distinct_rel);
4922 : }
4923 : }
4924 :
4925 : /*
4926 : * create_final_distinct_paths
4927 : * Create distinct paths in 'distinct_rel' based on 'input_rel' pathlist
4928 : *
4929 : * input_rel: contains the source-data paths
4930 : * distinct_rel: destination relation for storing created paths
4931 : */
4932 : static RelOptInfo *
4933 2292 : create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
4934 : RelOptInfo *distinct_rel)
4935 : {
4936 2292 : Query *parse = root->parse;
4937 2292 : Path *cheapest_input_path = input_rel->cheapest_total_path;
4938 : double numDistinctRows;
4939 : bool allow_hash;
4940 :
4941 : /* Estimate number of distinct rows there will be */
4942 2292 : if (parse->groupClause || parse->groupingSets || parse->hasAggs ||
4943 2254 : root->hasHavingQual)
4944 : {
4945 : /*
4946 : * If there was grouping or aggregation, use the number of input rows
4947 : * as the estimated number of DISTINCT rows (ie, assume the input is
4948 : * already mostly unique).
4949 : */
4950 38 : numDistinctRows = cheapest_input_path->rows;
4951 : }
4952 : else
4953 : {
4954 : /*
4955 : * Otherwise, the UNIQUE filter has effects comparable to GROUP BY.
4956 : */
4957 : List *distinctExprs;
4958 :
4959 2254 : distinctExprs = get_sortgrouplist_exprs(root->processed_distinctClause,
4960 : parse->targetList);
4961 2254 : numDistinctRows = estimate_num_groups(root, distinctExprs,
4962 : cheapest_input_path->rows,
4963 : NULL, NULL);
4964 : }
4965 :
4966 : /*
4967 : * Consider sort-based implementations of DISTINCT, if possible.
4968 : */
4969 2292 : if (grouping_is_sortable(root->processed_distinctClause))
4970 : {
4971 : /*
4972 : * Firstly, if we have any adequately-presorted paths, just stick a
4973 : * Unique node on those. We also, consider doing an explicit sort of
4974 : * the cheapest input path and Unique'ing that. If any paths have
4975 : * presorted keys then we'll create an incremental sort atop of those
4976 : * before adding a unique node on the top.
4977 : *
4978 : * When we have DISTINCT ON, we must sort by the more rigorous of
4979 : * DISTINCT and ORDER BY, else it won't have the desired behavior.
4980 : * Also, if we do have to do an explicit sort, we might as well use
4981 : * the more rigorous ordering to avoid a second sort later. (Note
4982 : * that the parser will have ensured that one clause is a prefix of
4983 : * the other.)
4984 : */
4985 : List *needed_pathkeys;
4986 : ListCell *lc;
4987 2286 : double limittuples = root->distinct_pathkeys == NIL ? 1.0 : -1.0;
4988 :
4989 2452 : if (parse->hasDistinctOn &&
4990 166 : list_length(root->distinct_pathkeys) <
4991 166 : list_length(root->sort_pathkeys))
4992 42 : needed_pathkeys = root->sort_pathkeys;
4993 : else
4994 2244 : needed_pathkeys = root->distinct_pathkeys;
4995 :
4996 5458 : foreach(lc, input_rel->pathlist)
4997 : {
4998 3172 : Path *input_path = (Path *) lfirst(lc);
4999 : Path *sorted_path;
5000 : bool is_sorted;
5001 : int presorted_keys;
5002 :
5003 3172 : is_sorted = pathkeys_count_contained_in(needed_pathkeys,
5004 : input_path->pathkeys,
5005 : &presorted_keys);
5006 :
5007 3172 : if (is_sorted)
5008 664 : sorted_path = input_path;
5009 : else
5010 : {
5011 : /*
5012 : * Try at least sorting the cheapest path and also try
5013 : * incrementally sorting any path which is partially sorted
5014 : * already (no need to deal with paths which have presorted
5015 : * keys when incremental sort is disabled unless it's the
5016 : * cheapest input path).
5017 : */
5018 2508 : if (input_path != cheapest_input_path &&
5019 410 : (presorted_keys == 0 || !enable_incremental_sort))
5020 82 : continue;
5021 :
5022 : /*
5023 : * We've no need to consider both a sort and incremental sort.
5024 : * We'll just do a sort if there are no presorted keys and an
5025 : * incremental sort when there are presorted keys.
5026 : */
5027 2426 : if (presorted_keys == 0 || !enable_incremental_sort)
5028 2080 : sorted_path = (Path *) create_sort_path(root,
5029 : distinct_rel,
5030 : input_path,
5031 : needed_pathkeys,
5032 : limittuples);
5033 : else
5034 346 : sorted_path = (Path *) create_incremental_sort_path(root,
5035 : distinct_rel,
5036 : input_path,
5037 : needed_pathkeys,
5038 : presorted_keys,
5039 : limittuples);
5040 : }
5041 :
5042 : /*
5043 : * distinct_pathkeys may have become empty if all of the pathkeys
5044 : * were determined to be redundant. If all of the pathkeys are
5045 : * redundant then each DISTINCT target must only allow a single
5046 : * value, therefore all resulting tuples must be identical (or at
5047 : * least indistinguishable by an equality check). We can uniquify
5048 : * these tuples simply by just taking the first tuple. All we do
5049 : * here is add a path to do "LIMIT 1" atop of 'sorted_path'. When
5050 : * doing a DISTINCT ON we may still have a non-NIL sort_pathkeys
5051 : * list, so we must still only do this with paths which are
5052 : * correctly sorted by sort_pathkeys.
5053 : */
5054 3090 : if (root->distinct_pathkeys == NIL)
5055 : {
5056 : Node *limitCount;
5057 :
5058 98 : limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
5059 : sizeof(int64),
5060 : Int64GetDatum(1), false,
5061 : FLOAT8PASSBYVAL);
5062 :
5063 : /*
5064 : * If the query already has a LIMIT clause, then we could end
5065 : * up with a duplicate LimitPath in the final plan. That does
5066 : * not seem worth troubling over too much.
5067 : */
5068 98 : add_path(distinct_rel, (Path *)
5069 98 : create_limit_path(root, distinct_rel, sorted_path,
5070 : NULL, limitCount,
5071 : LIMIT_OPTION_COUNT, 0, 1));
5072 : }
5073 : else
5074 : {
5075 2992 : add_path(distinct_rel, (Path *)
5076 2992 : create_upper_unique_path(root, distinct_rel,
5077 : sorted_path,
5078 2992 : list_length(root->distinct_pathkeys),
5079 : numDistinctRows));
5080 : }
5081 : }
5082 : }
5083 :
5084 : /*
5085 : * Consider hash-based implementations of DISTINCT, if possible.
5086 : *
5087 : * If we were not able to make any other types of path, we *must* hash or
5088 : * die trying. If we do have other choices, there are two things that
5089 : * should prevent selection of hashing: if the query uses DISTINCT ON
5090 : * (because it won't really have the expected behavior if we hash), or if
5091 : * enable_hashagg is off.
5092 : *
5093 : * Note: grouping_is_hashable() is much more expensive to check than the
5094 : * other gating conditions, so we want to do it last.
5095 : */
5096 2292 : if (distinct_rel->pathlist == NIL)
5097 6 : allow_hash = true; /* we have no alternatives */
5098 2286 : else if (parse->hasDistinctOn || !enable_hashagg)
5099 256 : allow_hash = false; /* policy-based decision not to hash */
5100 : else
5101 2030 : allow_hash = true; /* default */
5102 :
5103 2292 : if (allow_hash && grouping_is_hashable(root->processed_distinctClause))
5104 : {
5105 : /* Generate hashed aggregate path --- no sort needed */
5106 2036 : add_path(distinct_rel, (Path *)
5107 2036 : create_agg_path(root,
5108 : distinct_rel,
5109 : cheapest_input_path,
5110 : cheapest_input_path->pathtarget,
5111 : AGG_HASHED,
5112 : AGGSPLIT_SIMPLE,
5113 : root->processed_distinctClause,
5114 : NIL,
5115 : NULL,
5116 : numDistinctRows));
5117 : }
5118 :
5119 2292 : return distinct_rel;
5120 : }
5121 :
5122 : /*
5123 : * create_ordered_paths
5124 : *
5125 : * Build a new upperrel containing Paths for ORDER BY evaluation.
5126 : *
5127 : * All paths in the result must satisfy the ORDER BY ordering.
5128 : * The only new paths we need consider are an explicit full sort
5129 : * and incremental sort on the cheapest-total existing path.
5130 : *
5131 : * input_rel: contains the source-data Paths
5132 : * target: the output tlist the result Paths must emit
5133 : * limit_tuples: estimated bound on the number of output tuples,
5134 : * or -1 if no LIMIT or couldn't estimate
5135 : *
5136 : * XXX This only looks at sort_pathkeys. I wonder if it needs to look at the
5137 : * other pathkeys (grouping, ...) like generate_useful_gather_paths.
5138 : */
5139 : static RelOptInfo *
5140 58044 : create_ordered_paths(PlannerInfo *root,
5141 : RelOptInfo *input_rel,
5142 : PathTarget *target,
5143 : bool target_parallel_safe,
5144 : double limit_tuples)
5145 : {
5146 58044 : Path *cheapest_input_path = input_rel->cheapest_total_path;
5147 : RelOptInfo *ordered_rel;
5148 : ListCell *lc;
5149 :
5150 : /* For now, do all work in the (ORDERED, NULL) upperrel */
5151 58044 : ordered_rel = fetch_upper_rel(root, UPPERREL_ORDERED, NULL);
5152 :
5153 : /*
5154 : * If the input relation is not parallel-safe, then the ordered relation
5155 : * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
5156 : * target list is parallel-safe.
5157 : */
5158 58044 : if (input_rel->consider_parallel && target_parallel_safe)
5159 38444 : ordered_rel->consider_parallel = true;
5160 :
5161 : /*
5162 : * If the input rel belongs to a single FDW, so does the ordered_rel.
5163 : */
5164 58044 : ordered_rel->serverid = input_rel->serverid;
5165 58044 : ordered_rel->userid = input_rel->userid;
5166 58044 : ordered_rel->useridiscurrent = input_rel->useridiscurrent;
5167 58044 : ordered_rel->fdwroutine = input_rel->fdwroutine;
5168 :
5169 145704 : foreach(lc, input_rel->pathlist)
5170 : {
5171 87660 : Path *input_path = (Path *) lfirst(lc);
5172 : Path *sorted_path;
5173 : bool is_sorted;
5174 : int presorted_keys;
5175 :
5176 87660 : is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
5177 : input_path->pathkeys, &presorted_keys);
5178 :
5179 87660 : if (is_sorted)
5180 32380 : sorted_path = input_path;
5181 : else
5182 : {
5183 : /*
5184 : * Try at least sorting the cheapest path and also try
5185 : * incrementally sorting any path which is partially sorted
5186 : * already (no need to deal with paths which have presorted keys
5187 : * when incremental sort is disabled unless it's the cheapest
5188 : * input path).
5189 : */
5190 55280 : if (input_path != cheapest_input_path &&
5191 5234 : (presorted_keys == 0 || !enable_incremental_sort))
5192 1682 : continue;
5193 :
5194 : /*
5195 : * We've no need to consider both a sort and incremental sort.
5196 : * We'll just do a sort if there are no presorted keys and an
5197 : * incremental sort when there are presorted keys.
5198 : */
5199 53598 : if (presorted_keys == 0 || !enable_incremental_sort)
5200 49656 : sorted_path = (Path *) create_sort_path(root,
5201 : ordered_rel,
5202 : input_path,
5203 : root->sort_pathkeys,
5204 : limit_tuples);
5205 : else
5206 3942 : sorted_path = (Path *) create_incremental_sort_path(root,
5207 : ordered_rel,
5208 : input_path,
5209 : root->sort_pathkeys,
5210 : presorted_keys,
5211 : limit_tuples);
5212 : }
5213 :
5214 : /* Add projection step if needed */
5215 85978 : if (sorted_path->pathtarget != target)
5216 17718 : sorted_path = apply_projection_to_path(root, ordered_rel,
5217 : sorted_path, target);
5218 :
5219 85978 : add_path(ordered_rel, sorted_path);
5220 : }
5221 :
5222 : /*
5223 : * generate_gather_paths() will have already generated a simple Gather
5224 : * path for the best parallel path, if any, and the loop above will have
5225 : * considered sorting it. Similarly, generate_gather_paths() will also
5226 : * have generated order-preserving Gather Merge plans which can be used
5227 : * without sorting if they happen to match the sort_pathkeys, and the loop
5228 : * above will have handled those as well. However, there's one more
5229 : * possibility: it may make sense to sort the cheapest partial path or
5230 : * incrementally sort any partial path that is partially sorted according
5231 : * to the required output order and then use Gather Merge.
5232 : */
5233 58044 : if (ordered_rel->consider_parallel && root->sort_pathkeys != NIL &&
5234 38306 : input_rel->partial_pathlist != NIL)
5235 : {
5236 : Path *cheapest_partial_path;
5237 :
5238 2114 : cheapest_partial_path = linitial(input_rel->partial_pathlist);
5239 :
5240 4452 : foreach(lc, input_rel->partial_pathlist)
5241 : {
5242 2338 : Path *input_path = (Path *) lfirst(lc);
5243 : Path *sorted_path;
5244 : bool is_sorted;
5245 : int presorted_keys;
5246 : double total_groups;
5247 :
5248 2338 : is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
5249 : input_path->pathkeys,
5250 : &presorted_keys);
5251 :
5252 2338 : if (is_sorted)
5253 194 : continue;
5254 :
5255 : /*
5256 : * Try at least sorting the cheapest path and also try
5257 : * incrementally sorting any path which is partially sorted
5258 : * already (no need to deal with paths which have presorted keys
5259 : * when incremental sort is disabled unless it's the cheapest
5260 : * partial path).
5261 : */
5262 2144 : if (input_path != cheapest_partial_path &&
5263 42 : (presorted_keys == 0 || !enable_incremental_sort))
5264 0 : continue;
5265 :
5266 : /*
5267 : * We've no need to consider both a sort and incremental sort.
5268 : * We'll just do a sort if there are no presorted keys and an
5269 : * incremental sort when there are presorted keys.
5270 : */
5271 2144 : if (presorted_keys == 0 || !enable_incremental_sort)
5272 2084 : sorted_path = (Path *) create_sort_path(root,
5273 : ordered_rel,
5274 : input_path,
5275 : root->sort_pathkeys,
5276 : limit_tuples);
5277 : else
5278 60 : sorted_path = (Path *) create_incremental_sort_path(root,
5279 : ordered_rel,
5280 : input_path,
5281 : root->sort_pathkeys,
5282 : presorted_keys,
5283 : limit_tuples);
5284 2144 : total_groups = input_path->rows *
5285 2144 : input_path->parallel_workers;
5286 : sorted_path = (Path *)
5287 2144 : create_gather_merge_path(root, ordered_rel,
5288 : sorted_path,
5289 : sorted_path->pathtarget,
5290 : root->sort_pathkeys, NULL,
5291 : &total_groups);
5292 :
5293 : /* Add projection step if needed */
5294 2144 : if (sorted_path->pathtarget != target)
5295 442 : sorted_path = apply_projection_to_path(root, ordered_rel,
5296 : sorted_path, target);
5297 :
5298 2144 : add_path(ordered_rel, sorted_path);
5299 : }
5300 : }
5301 :
5302 : /*
5303 : * If there is an FDW that's responsible for all baserels of the query,
5304 : * let it consider adding ForeignPaths.
5305 : */
5306 58044 : if (ordered_rel->fdwroutine &&
5307 376 : ordered_rel->fdwroutine->GetForeignUpperPaths)
5308 362 : ordered_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_ORDERED,
5309 : input_rel, ordered_rel,
5310 : NULL);
5311 :
5312 : /* Let extensions possibly add some more paths */
5313 58044 : if (create_upper_paths_hook)
5314 0 : (*create_upper_paths_hook) (root, UPPERREL_ORDERED,
5315 : input_rel, ordered_rel, NULL);
5316 :
5317 : /*
5318 : * No need to bother with set_cheapest here; grouping_planner does not
5319 : * need us to do it.
5320 : */
5321 : Assert(ordered_rel->pathlist != NIL);
5322 :
5323 58044 : return ordered_rel;
5324 : }
5325 :
5326 :
5327 : /*
5328 : * make_group_input_target
5329 : * Generate appropriate PathTarget for initial input to grouping nodes.
5330 : *
5331 : * If there is grouping or aggregation, the scan/join subplan cannot emit
5332 : * the query's final targetlist; for example, it certainly can't emit any
5333 : * aggregate function calls. This routine generates the correct target
5334 : * for the scan/join subplan.
5335 : *
5336 : * The query target list passed from the parser already contains entries
5337 : * for all ORDER BY and GROUP BY expressions, but it will not have entries
5338 : * for variables used only in HAVING clauses; so we need to add those
5339 : * variables to the subplan target list. Also, we flatten all expressions
5340 : * except GROUP BY items into their component variables; other expressions
5341 : * will be computed by the upper plan nodes rather than by the subplan.
5342 : * For example, given a query like
5343 : * SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
5344 : * we want to pass this targetlist to the subplan:
5345 : * a+b,c,d
5346 : * where the a+b target will be used by the Sort/Group steps, and the
5347 : * other targets will be used for computing the final results.
5348 : *
5349 : * 'final_target' is the query's final target list (in PathTarget form)
5350 : *
5351 : * The result is the PathTarget to be computed by the Paths returned from
5352 : * query_planner().
5353 : */
5354 : static PathTarget *
5355 34154 : make_group_input_target(PlannerInfo *root, PathTarget *final_target)
5356 : {
5357 34154 : Query *parse = root->parse;
5358 : PathTarget *input_target;
5359 : List *non_group_cols;
5360 : List *non_group_vars;
5361 : int i;
5362 : ListCell *lc;
5363 :
5364 : /*
5365 : * We must build a target containing all grouping columns, plus any other
5366 : * Vars mentioned in the query's targetlist and HAVING qual.
5367 : */
5368 34154 : input_target = create_empty_pathtarget();
5369 34154 : non_group_cols = NIL;
5370 :
5371 34154 : i = 0;
5372 83238 : foreach(lc, final_target->exprs)
5373 : {
5374 49084 : Expr *expr = (Expr *) lfirst(lc);
5375 49084 : Index sgref = get_pathtarget_sortgroupref(final_target, i);
5376 :
5377 57050 : if (sgref && root->processed_groupClause &&
5378 7966 : get_sortgroupref_clause_noerr(sgref,
5379 : root->processed_groupClause) != NULL)
5380 : {
5381 : /*
5382 : * It's a grouping column, so add it to the input target as-is.
5383 : */
5384 6410 : add_column_to_pathtarget(input_target, expr, sgref);
5385 : }
5386 : else
5387 : {
5388 : /*
5389 : * Non-grouping column, so just remember the expression for later
5390 : * call to pull_var_clause.
5391 : */
5392 42674 : non_group_cols = lappend(non_group_cols, expr);
5393 : }
5394 :
5395 49084 : i++;
5396 : }
5397 :
5398 : /*
5399 : * If there's a HAVING clause, we'll need the Vars it uses, too.
5400 : */
5401 34154 : if (parse->havingQual)
5402 980 : non_group_cols = lappend(non_group_cols, parse->havingQual);
5403 :
5404 : /*
5405 : * Pull out all the Vars mentioned in non-group cols (plus HAVING), and
5406 : * add them to the input target if not already present. (A Var used
5407 : * directly as a GROUP BY item will be present already.) Note this
5408 : * includes Vars used in resjunk items, so we are covering the needs of
5409 : * ORDER BY and window specifications. Vars used within Aggrefs and
5410 : * WindowFuncs will be pulled out here, too.
5411 : */
5412 34154 : non_group_vars = pull_var_clause((Node *) non_group_cols,
5413 : PVC_RECURSE_AGGREGATES |
5414 : PVC_RECURSE_WINDOWFUNCS |
5415 : PVC_INCLUDE_PLACEHOLDERS);
5416 34154 : add_new_columns_to_pathtarget(input_target, non_group_vars);
5417 :
5418 : /* clean up cruft */
5419 34154 : list_free(non_group_vars);
5420 34154 : list_free(non_group_cols);
5421 :
5422 : /* XXX this causes some redundant cost calculation ... */
5423 34154 : return set_pathtarget_cost_width(root, input_target);
5424 : }
5425 :
5426 : /*
5427 : * make_partial_grouping_target
5428 : * Generate appropriate PathTarget for output of partial aggregate
5429 : * (or partial grouping, if there are no aggregates) nodes.
5430 : *
5431 : * A partial aggregation node needs to emit all the same aggregates that
5432 : * a regular aggregation node would, plus any aggregates used in HAVING;
5433 : * except that the Aggref nodes should be marked as partial aggregates.
5434 : *
5435 : * In addition, we'd better emit any Vars and PlaceHolderVars that are
5436 : * used outside of Aggrefs in the aggregation tlist and HAVING. (Presumably,
5437 : * these would be Vars that are grouped by or used in grouping expressions.)
5438 : *
5439 : * grouping_target is the tlist to be emitted by the topmost aggregation step.
5440 : * havingQual represents the HAVING clause.
5441 : */
5442 : static PathTarget *
5443 2164 : make_partial_grouping_target(PlannerInfo *root,
5444 : PathTarget *grouping_target,
5445 : Node *havingQual)
5446 : {
5447 : PathTarget *partial_target;
5448 : List *non_group_cols;
5449 : List *non_group_exprs;
5450 : int i;
5451 : ListCell *lc;
5452 :
5453 2164 : partial_target = create_empty_pathtarget();
5454 2164 : non_group_cols = NIL;
5455 :
5456 2164 : i = 0;
5457 7674 : foreach(lc, grouping_target->exprs)
5458 : {
5459 5510 : Expr *expr = (Expr *) lfirst(lc);
5460 5510 : Index sgref = get_pathtarget_sortgroupref(grouping_target, i);
5461 :
5462 9252 : if (sgref && root->processed_groupClause &&
5463 3742 : get_sortgroupref_clause_noerr(sgref,
5464 : root->processed_groupClause) != NULL)
5465 : {
5466 : /*
5467 : * It's a grouping column, so add it to the partial_target as-is.
5468 : * (This allows the upper agg step to repeat the grouping calcs.)
5469 : */
5470 1864 : add_column_to_pathtarget(partial_target, expr, sgref);
5471 : }
5472 : else
5473 : {
5474 : /*
5475 : * Non-grouping column, so just remember the expression for later
5476 : * call to pull_var_clause.
5477 : */
5478 3646 : non_group_cols = lappend(non_group_cols, expr);
5479 : }
5480 :
5481 5510 : i++;
5482 : }
5483 :
5484 : /*
5485 : * If there's a HAVING clause, we'll need the Vars/Aggrefs it uses, too.
5486 : */
5487 2164 : if (havingQual)
5488 824 : non_group_cols = lappend(non_group_cols, havingQual);
5489 :
5490 : /*
5491 : * Pull out all the Vars, PlaceHolderVars, and Aggrefs mentioned in
5492 : * non-group cols (plus HAVING), and add them to the partial_target if not
5493 : * already present. (An expression used directly as a GROUP BY item will
5494 : * be present already.) Note this includes Vars used in resjunk items, so
5495 : * we are covering the needs of ORDER BY and window specifications.
5496 : */
5497 2164 : non_group_exprs = pull_var_clause((Node *) non_group_cols,
5498 : PVC_INCLUDE_AGGREGATES |
5499 : PVC_RECURSE_WINDOWFUNCS |
5500 : PVC_INCLUDE_PLACEHOLDERS);
5501 :
5502 2164 : add_new_columns_to_pathtarget(partial_target, non_group_exprs);
5503 :
5504 : /*
5505 : * Adjust Aggrefs to put them in partial mode. At this point all Aggrefs
5506 : * are at the top level of the target list, so we can just scan the list
5507 : * rather than recursing through the expression trees.
5508 : */
5509 8282 : foreach(lc, partial_target->exprs)
5510 : {
5511 6118 : Aggref *aggref = (Aggref *) lfirst(lc);
5512 :
5513 6118 : if (IsA(aggref, Aggref))
5514 : {
5515 : Aggref *newaggref;
5516 :
5517 : /*
5518 : * We shouldn't need to copy the substructure of the Aggref node,
5519 : * but flat-copy the node itself to avoid damaging other trees.
5520 : */
5521 4224 : newaggref = makeNode(Aggref);
5522 4224 : memcpy(newaggref, aggref, sizeof(Aggref));
5523 :
5524 : /* For now, assume serialization is required */
5525 4224 : mark_partial_aggref(newaggref, AGGSPLIT_INITIAL_SERIAL);
5526 :
5527 4224 : lfirst(lc) = newaggref;
5528 : }
5529 : }
5530 :
5531 : /* clean up cruft */
5532 2164 : list_free(non_group_exprs);
5533 2164 : list_free(non_group_cols);
5534 :
5535 : /* XXX this causes some redundant cost calculation ... */
5536 2164 : return set_pathtarget_cost_width(root, partial_target);
5537 : }
5538 :
5539 : /*
5540 : * mark_partial_aggref
5541 : * Adjust an Aggref to make it represent a partial-aggregation step.
5542 : *
5543 : * The Aggref node is modified in-place; caller must do any copying required.
5544 : */
5545 : void
5546 7048 : mark_partial_aggref(Aggref *agg, AggSplit aggsplit)
5547 : {
5548 : /* aggtranstype should be computed by this point */
5549 : Assert(OidIsValid(agg->aggtranstype));
5550 : /* ... but aggsplit should still be as the parser left it */
5551 : Assert(agg->aggsplit == AGGSPLIT_SIMPLE);
5552 :
5553 : /* Mark the Aggref with the intended partial-aggregation mode */
5554 7048 : agg->aggsplit = aggsplit;
5555 :
5556 : /*
5557 : * Adjust result type if needed. Normally, a partial aggregate returns
5558 : * the aggregate's transition type; but if that's INTERNAL and we're
5559 : * serializing, it returns BYTEA instead.
5560 : */
5561 7048 : if (DO_AGGSPLIT_SKIPFINAL(aggsplit))
5562 : {
5563 5636 : if (agg->aggtranstype == INTERNALOID && DO_AGGSPLIT_SERIALIZE(aggsplit))
5564 242 : agg->aggtype = BYTEAOID;
5565 : else
5566 5394 : agg->aggtype = agg->aggtranstype;
5567 : }
5568 7048 : }
5569 :
5570 : /*
5571 : * postprocess_setop_tlist
5572 : * Fix up targetlist returned by plan_set_operations().
5573 : *
5574 : * We need to transpose sort key info from the orig_tlist into new_tlist.
5575 : * NOTE: this would not be good enough if we supported resjunk sort keys
5576 : * for results of set operations --- then, we'd need to project a whole
5577 : * new tlist to evaluate the resjunk columns. For now, just ereport if we
5578 : * find any resjunk columns in orig_tlist.
5579 : */
5580 : static List *
5581 5238 : postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
5582 : {
5583 : ListCell *l;
5584 5238 : ListCell *orig_tlist_item = list_head(orig_tlist);
5585 :
5586 20204 : foreach(l, new_tlist)
5587 : {
5588 14966 : TargetEntry *new_tle = lfirst_node(TargetEntry, l);
5589 : TargetEntry *orig_tle;
5590 :
5591 : /* ignore resjunk columns in setop result */
5592 14966 : if (new_tle->resjunk)
5593 512 : continue;
5594 :
5595 : Assert(orig_tlist_item != NULL);
5596 14454 : orig_tle = lfirst_node(TargetEntry, orig_tlist_item);
5597 14454 : orig_tlist_item = lnext(orig_tlist, orig_tlist_item);
5598 14454 : if (orig_tle->resjunk) /* should not happen */
5599 0 : elog(ERROR, "resjunk output columns are not implemented");
5600 : Assert(new_tle->resno == orig_tle->resno);
5601 14454 : new_tle->ressortgroupref = orig_tle->ressortgroupref;
5602 : }
5603 5238 : if (orig_tlist_item != NULL)
5604 0 : elog(ERROR, "resjunk output columns are not implemented");
5605 5238 : return new_tlist;
5606 : }
5607 :
5608 : /*
5609 : * optimize_window_clauses
5610 : * Call each WindowFunc's prosupport function to see if we're able to
5611 : * make any adjustments to any of the WindowClause's so that the executor
5612 : * can execute the window functions in a more optimal way.
5613 : *
5614 : * Currently we only allow adjustments to the WindowClause's frameOptions. We
5615 : * may allow more things to be done here in the future.
5616 : */
5617 : static void
5618 2300 : optimize_window_clauses(PlannerInfo *root, WindowFuncLists *wflists)
5619 : {
5620 2300 : List *windowClause = root->parse->windowClause;
5621 : ListCell *lc;
5622 :
5623 4810 : foreach(lc, windowClause)
5624 : {
5625 2510 : WindowClause *wc = lfirst_node(WindowClause, lc);
5626 : ListCell *lc2;
5627 2510 : int optimizedFrameOptions = 0;
5628 :
5629 : Assert(wc->winref <= wflists->maxWinRef);
5630 :
5631 : /* skip any WindowClauses that have no WindowFuncs */
5632 2510 : if (wflists->windowFuncs[wc->winref] == NIL)
5633 24 : continue;
5634 :
5635 3008 : foreach(lc2, wflists->windowFuncs[wc->winref])
5636 : {
5637 : SupportRequestOptimizeWindowClause req;
5638 : SupportRequestOptimizeWindowClause *res;
5639 2522 : WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
5640 : Oid prosupport;
5641 :
5642 2522 : prosupport = get_func_support(wfunc->winfnoid);
5643 :
5644 : /* Check if there's a support function for 'wfunc' */
5645 2522 : if (!OidIsValid(prosupport))
5646 2000 : break; /* can't optimize this WindowClause */
5647 :
5648 742 : req.type = T_SupportRequestOptimizeWindowClause;
5649 742 : req.window_clause = wc;
5650 742 : req.window_func = wfunc;
5651 742 : req.frameOptions = wc->frameOptions;
5652 :
5653 : /* call the support function */
5654 : res = (SupportRequestOptimizeWindowClause *)
5655 742 : DatumGetPointer(OidFunctionCall1(prosupport,
5656 : PointerGetDatum(&req)));
5657 :
5658 : /*
5659 : * Skip to next WindowClause if the support function does not
5660 : * support this request type.
5661 : */
5662 742 : if (res == NULL)
5663 220 : break;
5664 :
5665 : /*
5666 : * Save these frameOptions for the first WindowFunc for this
5667 : * WindowClause.
5668 : */
5669 522 : if (foreach_current_index(lc2) == 0)
5670 498 : optimizedFrameOptions = res->frameOptions;
5671 :
5672 : /*
5673 : * On subsequent WindowFuncs, if the frameOptions are not the same
5674 : * then we're unable to optimize the frameOptions for this
5675 : * WindowClause.
5676 : */
5677 24 : else if (optimizedFrameOptions != res->frameOptions)
5678 0 : break; /* skip to the next WindowClause, if any */
5679 : }
5680 :
5681 : /* adjust the frameOptions if all WindowFunc's agree that it's ok */
5682 2486 : if (lc2 == NULL && wc->frameOptions != optimizedFrameOptions)
5683 : {
5684 : ListCell *lc3;
5685 :
5686 : /* apply the new frame options */
5687 486 : wc->frameOptions = optimizedFrameOptions;
5688 :
5689 : /*
5690 : * We now check to see if changing the frameOptions has caused
5691 : * this WindowClause to be a duplicate of some other WindowClause.
5692 : * This can only happen if we have multiple WindowClauses, so
5693 : * don't bother if there's only 1.
5694 : */
5695 486 : if (list_length(windowClause) == 1)
5696 396 : continue;
5697 :
5698 : /*
5699 : * Do the duplicate check and reuse the existing WindowClause if
5700 : * we find a duplicate.
5701 : */
5702 228 : foreach(lc3, windowClause)
5703 : {
5704 174 : WindowClause *existing_wc = lfirst_node(WindowClause, lc3);
5705 :
5706 : /* skip over the WindowClause we're currently editing */
5707 174 : if (existing_wc == wc)
5708 54 : continue;
5709 :
5710 : /*
5711 : * Perform the same duplicate check that is done in
5712 : * transformWindowFuncCall.
5713 : */
5714 240 : if (equal(wc->partitionClause, existing_wc->partitionClause) &&
5715 120 : equal(wc->orderClause, existing_wc->orderClause) &&
5716 120 : wc->frameOptions == existing_wc->frameOptions &&
5717 72 : equal(wc->startOffset, existing_wc->startOffset) &&
5718 36 : equal(wc->endOffset, existing_wc->endOffset))
5719 : {
5720 : ListCell *lc4;
5721 :
5722 : /*
5723 : * Now move each WindowFunc in 'wc' into 'existing_wc'.
5724 : * This required adjusting each WindowFunc's winref and
5725 : * moving the WindowFuncs in 'wc' to the list of
5726 : * WindowFuncs in 'existing_wc'.
5727 : */
5728 78 : foreach(lc4, wflists->windowFuncs[wc->winref])
5729 : {
5730 42 : WindowFunc *wfunc = lfirst_node(WindowFunc, lc4);
5731 :
5732 42 : wfunc->winref = existing_wc->winref;
5733 : }
5734 :
5735 : /* move list items */
5736 72 : wflists->windowFuncs[existing_wc->winref] = list_concat(wflists->windowFuncs[existing_wc->winref],
5737 36 : wflists->windowFuncs[wc->winref]);
5738 36 : wflists->windowFuncs[wc->winref] = NIL;
5739 :
5740 : /*
5741 : * transformWindowFuncCall() should have made sure there
5742 : * are no other duplicates, so we needn't bother looking
5743 : * any further.
5744 : */
5745 36 : break;
5746 : }
5747 : }
5748 : }
5749 : }
5750 2300 : }
5751 :
5752 : /*
5753 : * select_active_windows
5754 : * Create a list of the "active" window clauses (ie, those referenced
5755 : * by non-deleted WindowFuncs) in the order they are to be executed.
5756 : */
5757 : static List *
5758 2300 : select_active_windows(PlannerInfo *root, WindowFuncLists *wflists)
5759 : {
5760 2300 : List *windowClause = root->parse->windowClause;
5761 2300 : List *result = NIL;
5762 : ListCell *lc;
5763 2300 : int nActive = 0;
5764 2300 : WindowClauseSortData *actives = palloc(sizeof(WindowClauseSortData)
5765 2300 : * list_length(windowClause));
5766 :
5767 : /* First, construct an array of the active windows */
5768 4810 : foreach(lc, windowClause)
5769 : {
5770 2510 : WindowClause *wc = lfirst_node(WindowClause, lc);
5771 :
5772 : /* It's only active if wflists shows some related WindowFuncs */
5773 : Assert(wc->winref <= wflists->maxWinRef);
5774 2510 : if (wflists->windowFuncs[wc->winref] == NIL)
5775 60 : continue;
5776 :
5777 2450 : actives[nActive].wc = wc; /* original clause */
5778 :
5779 : /*
5780 : * For sorting, we want the list of partition keys followed by the
5781 : * list of sort keys. But pathkeys construction will remove duplicates
5782 : * between the two, so we can as well (even though we can't detect all
5783 : * of the duplicates, since some may come from ECs - that might mean
5784 : * we miss optimization chances here). We must, however, ensure that
5785 : * the order of entries is preserved with respect to the ones we do
5786 : * keep.
5787 : *
5788 : * partitionClause and orderClause had their own duplicates removed in
5789 : * parse analysis, so we're only concerned here with removing
5790 : * orderClause entries that also appear in partitionClause.
5791 : */
5792 4900 : actives[nActive].uniqueOrder =
5793 2450 : list_concat_unique(list_copy(wc->partitionClause),
5794 2450 : wc->orderClause);
5795 2450 : nActive++;
5796 : }
5797 :
5798 : /*
5799 : * Sort active windows by their partitioning/ordering clauses, ignoring
5800 : * any framing clauses, so that the windows that need the same sorting are
5801 : * adjacent in the list. When we come to generate paths, this will avoid
5802 : * inserting additional Sort nodes.
5803 : *
5804 : * This is how we implement a specific requirement from the SQL standard,
5805 : * which says that when two or more windows are order-equivalent (i.e.
5806 : * have matching partition and order clauses, even if their names or
5807 : * framing clauses differ), then all peer rows must be presented in the
5808 : * same order in all of them. If we allowed multiple sort nodes for such
5809 : * cases, we'd risk having the peer rows end up in different orders in
5810 : * equivalent windows due to sort instability. (See General Rule 4 of
5811 : * <window clause> in SQL2008 - SQL2016.)
5812 : *
5813 : * Additionally, if the entire list of clauses of one window is a prefix
5814 : * of another, put first the window with stronger sorting requirements.
5815 : * This way we will first sort for stronger window, and won't have to sort
5816 : * again for the weaker one.
5817 : */
5818 2300 : qsort(actives, nActive, sizeof(WindowClauseSortData), common_prefix_cmp);
5819 :
5820 : /* build ordered list of the original WindowClause nodes */
5821 4750 : for (int i = 0; i < nActive; i++)
5822 2450 : result = lappend(result, actives[i].wc);
5823 :
5824 2300 : pfree(actives);
5825 :
5826 2300 : return result;
5827 : }
5828 :
5829 : /*
5830 : * common_prefix_cmp
5831 : * QSort comparison function for WindowClauseSortData
5832 : *
5833 : * Sort the windows by the required sorting clauses. First, compare the sort
5834 : * clauses themselves. Second, if one window's clauses are a prefix of another
5835 : * one's clauses, put the window with more sort clauses first.
5836 : *
5837 : * We purposefully sort by the highest tleSortGroupRef first. Since
5838 : * tleSortGroupRefs are assigned for the query's DISTINCT and ORDER BY first
5839 : * and because here we sort the lowest tleSortGroupRefs last, if a
5840 : * WindowClause is sharing a tleSortGroupRef with the query's DISTINCT or
5841 : * ORDER BY clause, this makes it more likely that the final WindowAgg will
5842 : * provide presorted input for the query's DISTINCT or ORDER BY clause, thus
5843 : * reducing the total number of sorts required for the query.
5844 : */
5845 : static int
5846 162 : common_prefix_cmp(const void *a, const void *b)
5847 : {
5848 162 : const WindowClauseSortData *wcsa = a;
5849 162 : const WindowClauseSortData *wcsb = b;
5850 : ListCell *item_a;
5851 : ListCell *item_b;
5852 :
5853 276 : forboth(item_a, wcsa->uniqueOrder, item_b, wcsb->uniqueOrder)
5854 : {
5855 216 : SortGroupClause *sca = lfirst_node(SortGroupClause, item_a);
5856 216 : SortGroupClause *scb = lfirst_node(SortGroupClause, item_b);
5857 :
5858 216 : if (sca->tleSortGroupRef > scb->tleSortGroupRef)
5859 102 : return -1;
5860 204 : else if (sca->tleSortGroupRef < scb->tleSortGroupRef)
5861 66 : return 1;
5862 138 : else if (sca->sortop > scb->sortop)
5863 0 : return -1;
5864 138 : else if (sca->sortop < scb->sortop)
5865 24 : return 1;
5866 114 : else if (sca->nulls_first && !scb->nulls_first)
5867 0 : return -1;
5868 114 : else if (!sca->nulls_first && scb->nulls_first)
5869 0 : return 1;
5870 : /* no need to compare eqop, since it is fully determined by sortop */
5871 : }
5872 :
5873 60 : if (list_length(wcsa->uniqueOrder) > list_length(wcsb->uniqueOrder))
5874 6 : return -1;
5875 54 : else if (list_length(wcsa->uniqueOrder) < list_length(wcsb->uniqueOrder))
5876 12 : return 1;
5877 :
5878 42 : return 0;
5879 : }
5880 :
5881 : /*
5882 : * make_window_input_target
5883 : * Generate appropriate PathTarget for initial input to WindowAgg nodes.
5884 : *
5885 : * When the query has window functions, this function computes the desired
5886 : * target to be computed by the node just below the first WindowAgg.
5887 : * This tlist must contain all values needed to evaluate the window functions,
5888 : * compute the final target list, and perform any required final sort step.
5889 : * If multiple WindowAggs are needed, each intermediate one adds its window
5890 : * function results onto this base tlist; only the topmost WindowAgg computes
5891 : * the actual desired target list.
5892 : *
5893 : * This function is much like make_group_input_target, though not quite enough
5894 : * like it to share code. As in that function, we flatten most expressions
5895 : * into their component variables. But we do not want to flatten window
5896 : * PARTITION BY/ORDER BY clauses, since that might result in multiple
5897 : * evaluations of them, which would be bad (possibly even resulting in
5898 : * inconsistent answers, if they contain volatile functions).
5899 : * Also, we must not flatten GROUP BY clauses that were left unflattened by
5900 : * make_group_input_target, because we may no longer have access to the
5901 : * individual Vars in them.
5902 : *
5903 : * Another key difference from make_group_input_target is that we don't
5904 : * flatten Aggref expressions, since those are to be computed below the
5905 : * window functions and just referenced like Vars above that.
5906 : *
5907 : * 'final_target' is the query's final target list (in PathTarget form)
5908 : * 'activeWindows' is the list of active windows previously identified by
5909 : * select_active_windows.
5910 : *
5911 : * The result is the PathTarget to be computed by the plan node immediately
5912 : * below the first WindowAgg node.
5913 : */
5914 : static PathTarget *
5915 2300 : make_window_input_target(PlannerInfo *root,
5916 : PathTarget *final_target,
5917 : List *activeWindows)
5918 : {
5919 : PathTarget *input_target;
5920 : Bitmapset *sgrefs;
5921 : List *flattenable_cols;
5922 : List *flattenable_vars;
5923 : int i;
5924 : ListCell *lc;
5925 :
5926 : Assert(root->parse->hasWindowFuncs);
5927 :
5928 : /*
5929 : * Collect the sortgroupref numbers of window PARTITION/ORDER BY clauses
5930 : * into a bitmapset for convenient reference below.
5931 : */
5932 2300 : sgrefs = NULL;
5933 4750 : foreach(lc, activeWindows)
5934 : {
5935 2450 : WindowClause *wc = lfirst_node(WindowClause, lc);
5936 : ListCell *lc2;
5937 :
5938 3150 : foreach(lc2, wc->partitionClause)
5939 : {
5940 700 : SortGroupClause *sortcl = lfirst_node(SortGroupClause, lc2);
5941 :
5942 700 : sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
5943 : }
5944 4570 : foreach(lc2, wc->orderClause)
5945 : {
5946 2120 : SortGroupClause *sortcl = lfirst_node(SortGroupClause, lc2);
5947 :
5948 2120 : sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
5949 : }
5950 : }
5951 :
5952 : /* Add in sortgroupref numbers of GROUP BY clauses, too */
5953 2462 : foreach(lc, root->processed_groupClause)
5954 : {
5955 162 : SortGroupClause *grpcl = lfirst_node(SortGroupClause, lc);
5956 :
5957 162 : sgrefs = bms_add_member(sgrefs, grpcl->tleSortGroupRef);
5958 : }
5959 :
5960 : /*
5961 : * Construct a target containing all the non-flattenable targetlist items,
5962 : * and save aside the others for a moment.
5963 : */
5964 2300 : input_target = create_empty_pathtarget();
5965 2300 : flattenable_cols = NIL;
5966 :
5967 2300 : i = 0;
5968 10000 : foreach(lc, final_target->exprs)
5969 : {
5970 7700 : Expr *expr = (Expr *) lfirst(lc);
5971 7700 : Index sgref = get_pathtarget_sortgroupref(final_target, i);
5972 :
5973 : /*
5974 : * Don't want to deconstruct window clauses or GROUP BY items. (Note
5975 : * that such items can't contain window functions, so it's okay to
5976 : * compute them below the WindowAgg nodes.)
5977 : */
5978 7700 : if (sgref != 0 && bms_is_member(sgref, sgrefs))
5979 : {
5980 : /*
5981 : * Don't want to deconstruct this value, so add it to the input
5982 : * target as-is.
5983 : */
5984 2710 : add_column_to_pathtarget(input_target, expr, sgref);
5985 : }
5986 : else
5987 : {
5988 : /*
5989 : * Column is to be flattened, so just remember the expression for
5990 : * later call to pull_var_clause.
5991 : */
5992 4990 : flattenable_cols = lappend(flattenable_cols, expr);
5993 : }
5994 :
5995 7700 : i++;
5996 : }
5997 :
5998 : /*
5999 : * Pull out all the Vars and Aggrefs mentioned in flattenable columns, and
6000 : * add them to the input target if not already present. (Some might be
6001 : * there already because they're used directly as window/group clauses.)
6002 : *
6003 : * Note: it's essential to use PVC_INCLUDE_AGGREGATES here, so that any
6004 : * Aggrefs are placed in the Agg node's tlist and not left to be computed
6005 : * at higher levels. On the other hand, we should recurse into
6006 : * WindowFuncs to make sure their input expressions are available.
6007 : */
6008 2300 : flattenable_vars = pull_var_clause((Node *) flattenable_cols,
6009 : PVC_INCLUDE_AGGREGATES |
6010 : PVC_RECURSE_WINDOWFUNCS |
6011 : PVC_INCLUDE_PLACEHOLDERS);
6012 2300 : add_new_columns_to_pathtarget(input_target, flattenable_vars);
6013 :
6014 : /* clean up cruft */
6015 2300 : list_free(flattenable_vars);
6016 2300 : list_free(flattenable_cols);
6017 :
6018 : /* XXX this causes some redundant cost calculation ... */
6019 2300 : return set_pathtarget_cost_width(root, input_target);
6020 : }
6021 :
6022 : /*
6023 : * make_pathkeys_for_window
6024 : * Create a pathkeys list describing the required input ordering
6025 : * for the given WindowClause.
6026 : *
6027 : * Modifies wc's partitionClause to remove any clauses which are deemed
6028 : * redundant by the pathkey logic.
6029 : *
6030 : * The required ordering is first the PARTITION keys, then the ORDER keys.
6031 : * In the future we might try to implement windowing using hashing, in which
6032 : * case the ordering could be relaxed, but for now we always sort.
6033 : */
6034 : static List *
6035 4936 : make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
6036 : List *tlist)
6037 : {
6038 4936 : List *window_pathkeys = NIL;
6039 :
6040 : /* Throw error if can't sort */
6041 4936 : if (!grouping_is_sortable(wc->partitionClause))
6042 0 : ereport(ERROR,
6043 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
6044 : errmsg("could not implement window PARTITION BY"),
6045 : errdetail("Window partitioning columns must be of sortable datatypes.")));
6046 4936 : if (!grouping_is_sortable(wc->orderClause))
6047 0 : ereport(ERROR,
6048 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
6049 : errmsg("could not implement window ORDER BY"),
6050 : errdetail("Window ordering columns must be of sortable datatypes.")));
6051 :
6052 : /*
6053 : * First fetch the pathkeys for the PARTITION BY clause. We can safely
6054 : * remove any clauses from the wc->partitionClause for redundant pathkeys.
6055 : */
6056 4936 : if (wc->partitionClause != NIL)
6057 : {
6058 : bool sortable;
6059 :
6060 1174 : window_pathkeys = make_pathkeys_for_sortclauses_extended(root,
6061 : &wc->partitionClause,
6062 : tlist,
6063 : true,
6064 : &sortable);
6065 :
6066 : Assert(sortable);
6067 : }
6068 :
6069 : /*
6070 : * In principle, we could also consider removing redundant ORDER BY items
6071 : * too as doing so does not alter the result of peer row checks done by
6072 : * the executor. However, we must *not* remove the ordering column for
6073 : * RANGE OFFSET cases, as the executor needs that for in_range tests even
6074 : * if it's known to be equal to some partitioning column.
6075 : */
6076 4936 : if (wc->orderClause != NIL)
6077 : {
6078 : List *orderby_pathkeys;
6079 :
6080 4174 : orderby_pathkeys = make_pathkeys_for_sortclauses(root,
6081 : wc->orderClause,
6082 : tlist);
6083 :
6084 : /* Okay, make the combined pathkeys */
6085 4174 : if (window_pathkeys != NIL)
6086 880 : window_pathkeys = append_pathkeys(window_pathkeys, orderby_pathkeys);
6087 : else
6088 3294 : window_pathkeys = orderby_pathkeys;
6089 : }
6090 :
6091 4936 : return window_pathkeys;
6092 : }
6093 :
6094 : /*
6095 : * make_sort_input_target
6096 : * Generate appropriate PathTarget for initial input to Sort step.
6097 : *
6098 : * If the query has ORDER BY, this function chooses the target to be computed
6099 : * by the node just below the Sort (and DISTINCT, if any, since Unique can't
6100 : * project) steps. This might or might not be identical to the query's final
6101 : * output target.
6102 : *
6103 : * The main argument for keeping the sort-input tlist the same as the final
6104 : * is that we avoid a separate projection node (which will be needed if
6105 : * they're different, because Sort can't project). However, there are also
6106 : * advantages to postponing tlist evaluation till after the Sort: it ensures
6107 : * a consistent order of evaluation for any volatile functions in the tlist,
6108 : * and if there's also a LIMIT, we can stop the query without ever computing
6109 : * tlist functions for later rows, which is beneficial for both volatile and
6110 : * expensive functions.
6111 : *
6112 : * Our current policy is to postpone volatile expressions till after the sort
6113 : * unconditionally (assuming that that's possible, ie they are in plain tlist
6114 : * columns and not ORDER BY/GROUP BY/DISTINCT columns). We also prefer to
6115 : * postpone set-returning expressions, because running them beforehand would
6116 : * bloat the sort dataset, and because it might cause unexpected output order
6117 : * if the sort isn't stable. However there's a constraint on that: all SRFs
6118 : * in the tlist should be evaluated at the same plan step, so that they can
6119 : * run in sync in nodeProjectSet. So if any SRFs are in sort columns, we
6120 : * mustn't postpone any SRFs. (Note that in principle that policy should
6121 : * probably get applied to the group/window input targetlists too, but we
6122 : * have not done that historically.) Lastly, expensive expressions are
6123 : * postponed if there is a LIMIT, or if root->tuple_fraction shows that
6124 : * partial evaluation of the query is possible (if neither is true, we expect
6125 : * to have to evaluate the expressions for every row anyway), or if there are
6126 : * any volatile or set-returning expressions (since once we've put in a
6127 : * projection at all, it won't cost any more to postpone more stuff).
6128 : *
6129 : * Another issue that could potentially be considered here is that
6130 : * evaluating tlist expressions could result in data that's either wider
6131 : * or narrower than the input Vars, thus changing the volume of data that
6132 : * has to go through the Sort. However, we usually have only a very bad
6133 : * idea of the output width of any expression more complex than a Var,
6134 : * so for now it seems too risky to try to optimize on that basis.
6135 : *
6136 : * Note that if we do produce a modified sort-input target, and then the
6137 : * query ends up not using an explicit Sort, no particular harm is done:
6138 : * we'll initially use the modified target for the preceding path nodes,
6139 : * but then change them to the final target with apply_projection_to_path.
6140 : * Moreover, in such a case the guarantees about evaluation order of
6141 : * volatile functions still hold, since the rows are sorted already.
6142 : *
6143 : * This function has some things in common with make_group_input_target and
6144 : * make_window_input_target, though the detailed rules for what to do are
6145 : * different. We never flatten/postpone any grouping or ordering columns;
6146 : * those are needed before the sort. If we do flatten a particular
6147 : * expression, we leave Aggref and WindowFunc nodes alone, since those were
6148 : * computed earlier.
6149 : *
6150 : * 'final_target' is the query's final target list (in PathTarget form)
6151 : * 'have_postponed_srfs' is an output argument, see below
6152 : *
6153 : * The result is the PathTarget to be computed by the plan node immediately
6154 : * below the Sort step (and the Distinct step, if any). This will be
6155 : * exactly final_target if we decide a projection step wouldn't be helpful.
6156 : *
6157 : * In addition, *have_postponed_srfs is set to true if we choose to postpone
6158 : * any set-returning functions to after the Sort.
6159 : */
6160 : static PathTarget *
6161 54616 : make_sort_input_target(PlannerInfo *root,
6162 : PathTarget *final_target,
6163 : bool *have_postponed_srfs)
6164 : {
6165 54616 : Query *parse = root->parse;
6166 : PathTarget *input_target;
6167 : int ncols;
6168 : bool *col_is_srf;
6169 : bool *postpone_col;
6170 : bool have_srf;
6171 : bool have_volatile;
6172 : bool have_expensive;
6173 : bool have_srf_sortcols;
6174 : bool postpone_srfs;
6175 : List *postponable_cols;
6176 : List *postponable_vars;
6177 : int i;
6178 : ListCell *lc;
6179 :
6180 : /* Shouldn't get here unless query has ORDER BY */
6181 : Assert(parse->sortClause);
6182 :
6183 54616 : *have_postponed_srfs = false; /* default result */
6184 :
6185 : /* Inspect tlist and collect per-column information */
6186 54616 : ncols = list_length(final_target->exprs);
6187 54616 : col_is_srf = (bool *) palloc0(ncols * sizeof(bool));
6188 54616 : postpone_col = (bool *) palloc0(ncols * sizeof(bool));
6189 54616 : have_srf = have_volatile = have_expensive = have_srf_sortcols = false;
6190 :
6191 54616 : i = 0;
6192 328582 : foreach(lc, final_target->exprs)
6193 : {
6194 273966 : Expr *expr = (Expr *) lfirst(lc);
6195 :
6196 : /*
6197 : * If the column has a sortgroupref, assume it has to be evaluated
6198 : * before sorting. Generally such columns would be ORDER BY, GROUP
6199 : * BY, etc targets. One exception is columns that were removed from
6200 : * GROUP BY by remove_useless_groupby_columns() ... but those would
6201 : * only be Vars anyway. There don't seem to be any cases where it
6202 : * would be worth the trouble to double-check.
6203 : */
6204 273966 : if (get_pathtarget_sortgroupref(final_target, i) == 0)
6205 : {
6206 : /*
6207 : * Check for SRF or volatile functions. Check the SRF case first
6208 : * because we must know whether we have any postponed SRFs.
6209 : */
6210 195116 : if (parse->hasTargetSRFs &&
6211 216 : expression_returns_set((Node *) expr))
6212 : {
6213 : /* We'll decide below whether these are postponable */
6214 96 : col_is_srf[i] = true;
6215 96 : have_srf = true;
6216 : }
6217 194804 : else if (contain_volatile_functions((Node *) expr))
6218 : {
6219 : /* Unconditionally postpone */
6220 142 : postpone_col[i] = true;
6221 142 : have_volatile = true;
6222 : }
6223 : else
6224 : {
6225 : /*
6226 : * Else check the cost. XXX it's annoying to have to do this
6227 : * when set_pathtarget_cost_width() just did it. Refactor to
6228 : * allow sharing the work?
6229 : */
6230 : QualCost cost;
6231 :
6232 194662 : cost_qual_eval_node(&cost, (Node *) expr, root);
6233 :
6234 : /*
6235 : * We arbitrarily define "expensive" as "more than 10X
6236 : * cpu_operator_cost". Note this will take in any PL function
6237 : * with default cost.
6238 : */
6239 194662 : if (cost.per_tuple > 10 * cpu_operator_cost)
6240 : {
6241 14360 : postpone_col[i] = true;
6242 14360 : have_expensive = true;
6243 : }
6244 : }
6245 : }
6246 : else
6247 : {
6248 : /* For sortgroupref cols, just check if any contain SRFs */
6249 79066 : if (!have_srf_sortcols &&
6250 79376 : parse->hasTargetSRFs &&
6251 310 : expression_returns_set((Node *) expr))
6252 124 : have_srf_sortcols = true;
6253 : }
6254 :
6255 273966 : i++;
6256 : }
6257 :
6258 : /*
6259 : * We can postpone SRFs if we have some but none are in sortgroupref cols.
6260 : */
6261 54616 : postpone_srfs = (have_srf && !have_srf_sortcols);
6262 :
6263 : /*
6264 : * If we don't need a post-sort projection, just return final_target.
6265 : */
6266 54616 : if (!(postpone_srfs || have_volatile ||
6267 54418 : (have_expensive &&
6268 8612 : (parse->limitCount || root->tuple_fraction > 0))))
6269 54382 : return final_target;
6270 :
6271 : /*
6272 : * Report whether the post-sort projection will contain set-returning
6273 : * functions. This is important because it affects whether the Sort can
6274 : * rely on the query's LIMIT (if any) to bound the number of rows it needs
6275 : * to return.
6276 : */
6277 234 : *have_postponed_srfs = postpone_srfs;
6278 :
6279 : /*
6280 : * Construct the sort-input target, taking all non-postponable columns and
6281 : * then adding Vars, PlaceHolderVars, Aggrefs, and WindowFuncs found in
6282 : * the postponable ones.
6283 : */
6284 234 : input_target = create_empty_pathtarget();
6285 234 : postponable_cols = NIL;
6286 :
6287 234 : i = 0;
6288 1942 : foreach(lc, final_target->exprs)
6289 : {
6290 1708 : Expr *expr = (Expr *) lfirst(lc);
6291 :
6292 1708 : if (postpone_col[i] || (postpone_srfs && col_is_srf[i]))
6293 286 : postponable_cols = lappend(postponable_cols, expr);
6294 : else
6295 1422 : add_column_to_pathtarget(input_target, expr,
6296 1422 : get_pathtarget_sortgroupref(final_target, i));
6297 :
6298 1708 : i++;
6299 : }
6300 :
6301 : /*
6302 : * Pull out all the Vars, Aggrefs, and WindowFuncs mentioned in
6303 : * postponable columns, and add them to the sort-input target if not
6304 : * already present. (Some might be there already.) We mustn't
6305 : * deconstruct Aggrefs or WindowFuncs here, since the projection node
6306 : * would be unable to recompute them.
6307 : */
6308 234 : postponable_vars = pull_var_clause((Node *) postponable_cols,
6309 : PVC_INCLUDE_AGGREGATES |
6310 : PVC_INCLUDE_WINDOWFUNCS |
6311 : PVC_INCLUDE_PLACEHOLDERS);
6312 234 : add_new_columns_to_pathtarget(input_target, postponable_vars);
6313 :
6314 : /* clean up cruft */
6315 234 : list_free(postponable_vars);
6316 234 : list_free(postponable_cols);
6317 :
6318 : /* XXX this represents even more redundant cost calculation ... */
6319 234 : return set_pathtarget_cost_width(root, input_target);
6320 : }
6321 :
6322 : /*
6323 : * get_cheapest_fractional_path
6324 : * Find the cheapest path for retrieving a specified fraction of all
6325 : * the tuples expected to be returned by the given relation.
6326 : *
6327 : * We interpret tuple_fraction the same way as grouping_planner.
6328 : *
6329 : * We assume set_cheapest() has been run on the given rel.
6330 : */
6331 : Path *
6332 461914 : get_cheapest_fractional_path(RelOptInfo *rel, double tuple_fraction)
6333 : {
6334 461914 : Path *best_path = rel->cheapest_total_path;
6335 : ListCell *l;
6336 :
6337 : /* If all tuples will be retrieved, just return the cheapest-total path */
6338 461914 : if (tuple_fraction <= 0.0)
6339 456566 : return best_path;
6340 :
6341 : /* Convert absolute # of tuples to a fraction; no need to clamp to 0..1 */
6342 5348 : if (tuple_fraction >= 1.0 && best_path->rows > 0)
6343 2030 : tuple_fraction /= best_path->rows;
6344 :
6345 13260 : foreach(l, rel->pathlist)
6346 : {
6347 7912 : Path *path = (Path *) lfirst(l);
6348 :
6349 10476 : if (path == rel->cheapest_total_path ||
6350 2564 : compare_fractional_path_costs(best_path, path, tuple_fraction) <= 0)
6351 7686 : continue;
6352 :
6353 226 : best_path = path;
6354 : }
6355 :
6356 5348 : return best_path;
6357 : }
6358 :
6359 : /*
6360 : * adjust_paths_for_srfs
6361 : * Fix up the Paths of the given upperrel to handle tSRFs properly.
6362 : *
6363 : * The executor can only handle set-returning functions that appear at the
6364 : * top level of the targetlist of a ProjectSet plan node. If we have any SRFs
6365 : * that are not at top level, we need to split up the evaluation into multiple
6366 : * plan levels in which each level satisfies this constraint. This function
6367 : * modifies each Path of an upperrel that (might) compute any SRFs in its
6368 : * output tlist to insert appropriate projection steps.
6369 : *
6370 : * The given targets and targets_contain_srfs lists are from
6371 : * split_pathtarget_at_srfs(). We assume the existing Paths emit the first
6372 : * target in targets.
6373 : */
6374 : static void
6375 9000 : adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
6376 : List *targets, List *targets_contain_srfs)
6377 : {
6378 : ListCell *lc;
6379 :
6380 : Assert(list_length(targets) == list_length(targets_contain_srfs));
6381 : Assert(!linitial_int(targets_contain_srfs));
6382 :
6383 : /* If no SRFs appear at this plan level, nothing to do */
6384 9000 : if (list_length(targets) == 1)
6385 590 : return;
6386 :
6387 : /*
6388 : * Stack SRF-evaluation nodes atop each path for the rel.
6389 : *
6390 : * In principle we should re-run set_cheapest() here to identify the
6391 : * cheapest path, but it seems unlikely that adding the same tlist eval
6392 : * costs to all the paths would change that, so we don't bother. Instead,
6393 : * just assume that the cheapest-startup and cheapest-total paths remain
6394 : * so. (There should be no parameterized paths anymore, so we needn't
6395 : * worry about updating cheapest_parameterized_paths.)
6396 : */
6397 16846 : foreach(lc, rel->pathlist)
6398 : {
6399 8436 : Path *subpath = (Path *) lfirst(lc);
6400 8436 : Path *newpath = subpath;
6401 : ListCell *lc1,
6402 : *lc2;
6403 :
6404 : Assert(subpath->param_info == NULL);
6405 26416 : forboth(lc1, targets, lc2, targets_contain_srfs)
6406 : {
6407 17980 : PathTarget *thistarget = lfirst_node(PathTarget, lc1);
6408 17980 : bool contains_srfs = (bool) lfirst_int(lc2);
6409 :
6410 : /* If this level doesn't contain SRFs, do regular projection */
6411 17980 : if (contains_srfs)
6412 8496 : newpath = (Path *) create_set_projection_path(root,
6413 : rel,
6414 : newpath,
6415 : thistarget);
6416 : else
6417 9484 : newpath = (Path *) apply_projection_to_path(root,
6418 : rel,
6419 : newpath,
6420 : thistarget);
6421 : }
6422 8436 : lfirst(lc) = newpath;
6423 8436 : if (subpath == rel->cheapest_startup_path)
6424 334 : rel->cheapest_startup_path = newpath;
6425 8436 : if (subpath == rel->cheapest_total_path)
6426 334 : rel->cheapest_total_path = newpath;
6427 : }
6428 :
6429 : /* Likewise for partial paths, if any */
6430 8416 : foreach(lc, rel->partial_pathlist)
6431 : {
6432 6 : Path *subpath = (Path *) lfirst(lc);
6433 6 : Path *newpath = subpath;
6434 : ListCell *lc1,
6435 : *lc2;
6436 :
6437 : Assert(subpath->param_info == NULL);
6438 24 : forboth(lc1, targets, lc2, targets_contain_srfs)
6439 : {
6440 18 : PathTarget *thistarget = lfirst_node(PathTarget, lc1);
6441 18 : bool contains_srfs = (bool) lfirst_int(lc2);
6442 :
6443 : /* If this level doesn't contain SRFs, do regular projection */
6444 18 : if (contains_srfs)
6445 6 : newpath = (Path *) create_set_projection_path(root,
6446 : rel,
6447 : newpath,
6448 : thistarget);
6449 : else
6450 : {
6451 : /* avoid apply_projection_to_path, in case of multiple refs */
6452 12 : newpath = (Path *) create_projection_path(root,
6453 : rel,
6454 : newpath,
6455 : thistarget);
6456 : }
6457 : }
6458 6 : lfirst(lc) = newpath;
6459 : }
6460 : }
6461 :
6462 : /*
6463 : * expression_planner
6464 : * Perform planner's transformations on a standalone expression.
6465 : *
6466 : * Various utility commands need to evaluate expressions that are not part
6467 : * of a plannable query. They can do so using the executor's regular
6468 : * expression-execution machinery, but first the expression has to be fed
6469 : * through here to transform it from parser output to something executable.
6470 : *
6471 : * Currently, we disallow sublinks in standalone expressions, so there's no
6472 : * real "planning" involved here. (That might not always be true though.)
6473 : * What we must do is run eval_const_expressions to ensure that any function
6474 : * calls are converted to positional notation and function default arguments
6475 : * get inserted. The fact that constant subexpressions get simplified is a
6476 : * side-effect that is useful when the expression will get evaluated more than
6477 : * once. Also, we must fix operator function IDs.
6478 : *
6479 : * This does not return any information about dependencies of the expression.
6480 : * Hence callers should use the results only for the duration of the current
6481 : * query. Callers that would like to cache the results for longer should use
6482 : * expression_planner_with_deps, probably via the plancache.
6483 : *
6484 : * Note: this must not make any damaging changes to the passed-in expression
6485 : * tree. (It would actually be okay to apply fix_opfuncids to it, but since
6486 : * we first do an expression_tree_mutator-based walk, what is returned will
6487 : * be a new node tree.) The result is constructed in the current memory
6488 : * context; beware that this can leak a lot of additional stuff there, too.
6489 : */
6490 : Expr *
6491 230786 : expression_planner(Expr *expr)
6492 : {
6493 : Node *result;
6494 :
6495 : /*
6496 : * Convert named-argument function calls, insert default arguments and
6497 : * simplify constant subexprs
6498 : */
6499 230786 : result = eval_const_expressions(NULL, (Node *) expr);
6500 :
6501 : /* Fill in opfuncid values if missing */
6502 230768 : fix_opfuncids(result);
6503 :
6504 230768 : return (Expr *) result;
6505 : }
6506 :
6507 : /*
6508 : * expression_planner_with_deps
6509 : * Perform planner's transformations on a standalone expression,
6510 : * returning expression dependency information along with the result.
6511 : *
6512 : * This is identical to expression_planner() except that it also returns
6513 : * information about possible dependencies of the expression, ie identities of
6514 : * objects whose definitions affect the result. As in a PlannedStmt, these
6515 : * are expressed as a list of relation Oids and a list of PlanInvalItems.
6516 : */
6517 : Expr *
6518 358 : expression_planner_with_deps(Expr *expr,
6519 : List **relationOids,
6520 : List **invalItems)
6521 : {
6522 : Node *result;
6523 : PlannerGlobal glob;
6524 : PlannerInfo root;
6525 :
6526 : /* Make up dummy planner state so we can use setrefs machinery */
6527 6802 : MemSet(&glob, 0, sizeof(glob));
6528 358 : glob.type = T_PlannerGlobal;
6529 358 : glob.relationOids = NIL;
6530 358 : glob.invalItems = NIL;
6531 :
6532 31504 : MemSet(&root, 0, sizeof(root));
6533 358 : root.type = T_PlannerInfo;
6534 358 : root.glob = &glob;
6535 :
6536 : /*
6537 : * Convert named-argument function calls, insert default arguments and
6538 : * simplify constant subexprs. Collect identities of inlined functions
6539 : * and elided domains, too.
6540 : */
6541 358 : result = eval_const_expressions(&root, (Node *) expr);
6542 :
6543 : /* Fill in opfuncid values if missing */
6544 358 : fix_opfuncids(result);
6545 :
6546 : /*
6547 : * Now walk the finished expression to find anything else we ought to
6548 : * record as an expression dependency.
6549 : */
6550 358 : (void) extract_query_dependencies_walker(result, &root);
6551 :
6552 358 : *relationOids = glob.relationOids;
6553 358 : *invalItems = glob.invalItems;
6554 :
6555 358 : return (Expr *) result;
6556 : }
6557 :
6558 :
6559 : /*
6560 : * plan_cluster_use_sort
6561 : * Use the planner to decide how CLUSTER should implement sorting
6562 : *
6563 : * tableOid is the OID of a table to be clustered on its index indexOid
6564 : * (which is already known to be a btree index). Decide whether it's
6565 : * cheaper to do an indexscan or a seqscan-plus-sort to execute the CLUSTER.
6566 : * Return true to use sorting, false to use an indexscan.
6567 : *
6568 : * Note: caller had better already hold some type of lock on the table.
6569 : */
6570 : bool
6571 188 : plan_cluster_use_sort(Oid tableOid, Oid indexOid)
6572 : {
6573 : PlannerInfo *root;
6574 : Query *query;
6575 : PlannerGlobal *glob;
6576 : RangeTblEntry *rte;
6577 : RelOptInfo *rel;
6578 : IndexOptInfo *indexInfo;
6579 : QualCost indexExprCost;
6580 : Cost comparisonCost;
6581 : Path *seqScanPath;
6582 : Path seqScanAndSortPath;
6583 : IndexPath *indexScanPath;
6584 : ListCell *lc;
6585 :
6586 : /* We can short-circuit the cost comparison if indexscans are disabled */
6587 188 : if (!enable_indexscan)
6588 30 : return true; /* use sort */
6589 :
6590 : /* Set up mostly-dummy planner state */
6591 158 : query = makeNode(Query);
6592 158 : query->commandType = CMD_SELECT;
6593 :
6594 158 : glob = makeNode(PlannerGlobal);
6595 :
6596 158 : root = makeNode(PlannerInfo);
6597 158 : root->parse = query;
6598 158 : root->glob = glob;
6599 158 : root->query_level = 1;
6600 158 : root->planner_cxt = CurrentMemoryContext;
6601 158 : root->wt_param_id = -1;
6602 158 : root->join_domains = list_make1(makeNode(JoinDomain));
6603 :
6604 : /* Build a minimal RTE for the rel */
6605 158 : rte = makeNode(RangeTblEntry);
6606 158 : rte->rtekind = RTE_RELATION;
6607 158 : rte->relid = tableOid;
6608 158 : rte->relkind = RELKIND_RELATION; /* Don't be too picky. */
6609 158 : rte->rellockmode = AccessShareLock;
6610 158 : rte->lateral = false;
6611 158 : rte->inh = false;
6612 158 : rte->inFromCl = true;
6613 158 : query->rtable = list_make1(rte);
6614 158 : addRTEPermissionInfo(&query->rteperminfos, rte);
6615 :
6616 : /* Set up RTE/RelOptInfo arrays */
6617 158 : setup_simple_rel_arrays(root);
6618 :
6619 : /* Build RelOptInfo */
6620 158 : rel = build_simple_rel(root, 1, NULL);
6621 :
6622 : /* Locate IndexOptInfo for the target index */
6623 158 : indexInfo = NULL;
6624 196 : foreach(lc, rel->indexlist)
6625 : {
6626 196 : indexInfo = lfirst_node(IndexOptInfo, lc);
6627 196 : if (indexInfo->indexoid == indexOid)
6628 158 : break;
6629 : }
6630 :
6631 : /*
6632 : * It's possible that get_relation_info did not generate an IndexOptInfo
6633 : * for the desired index; this could happen if it's not yet reached its
6634 : * indcheckxmin usability horizon, or if it's a system index and we're
6635 : * ignoring system indexes. In such cases we should tell CLUSTER to not
6636 : * trust the index contents but use seqscan-and-sort.
6637 : */
6638 158 : if (lc == NULL) /* not in the list? */
6639 0 : return true; /* use sort */
6640 :
6641 : /*
6642 : * Rather than doing all the pushups that would be needed to use
6643 : * set_baserel_size_estimates, just do a quick hack for rows and width.
6644 : */
6645 158 : rel->rows = rel->tuples;
6646 158 : rel->reltarget->width = get_relation_data_width(tableOid, NULL);
6647 :
6648 158 : root->total_table_pages = rel->pages;
6649 :
6650 : /*
6651 : * Determine eval cost of the index expressions, if any. We need to
6652 : * charge twice that amount for each tuple comparison that happens during
6653 : * the sort, since tuplesort.c will have to re-evaluate the index
6654 : * expressions each time. (XXX that's pretty inefficient...)
6655 : */
6656 158 : cost_qual_eval(&indexExprCost, indexInfo->indexprs, root);
6657 158 : comparisonCost = 2.0 * (indexExprCost.startup + indexExprCost.per_tuple);
6658 :
6659 : /* Estimate the cost of seq scan + sort */
6660 158 : seqScanPath = create_seqscan_path(root, rel, NULL, 0);
6661 158 : cost_sort(&seqScanAndSortPath, root, NIL,
6662 158 : seqScanPath->total_cost, rel->tuples, rel->reltarget->width,
6663 : comparisonCost, maintenance_work_mem, -1.0);
6664 :
6665 : /* Estimate the cost of index scan */
6666 158 : indexScanPath = create_index_path(root, indexInfo,
6667 : NIL, NIL, NIL, NIL,
6668 : ForwardScanDirection, false,
6669 : NULL, 1.0, false);
6670 :
6671 158 : return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost);
6672 : }
6673 :
6674 : /*
6675 : * plan_create_index_workers
6676 : * Use the planner to decide how many parallel worker processes
6677 : * CREATE INDEX should request for use
6678 : *
6679 : * tableOid is the table on which the index is to be built. indexOid is the
6680 : * OID of an index to be created or reindexed (which must be a btree index).
6681 : *
6682 : * Return value is the number of parallel worker processes to request. It
6683 : * may be unsafe to proceed if this is 0. Note that this does not include the
6684 : * leader participating as a worker (value is always a number of parallel
6685 : * worker processes).
6686 : *
6687 : * Note: caller had better already hold some type of lock on the table and
6688 : * index.
6689 : */
6690 : int
6691 33374 : plan_create_index_workers(Oid tableOid, Oid indexOid)
6692 : {
6693 : PlannerInfo *root;
6694 : Query *query;
6695 : PlannerGlobal *glob;
6696 : RangeTblEntry *rte;
6697 : Relation heap;
6698 : Relation index;
6699 : RelOptInfo *rel;
6700 : int parallel_workers;
6701 : BlockNumber heap_blocks;
6702 : double reltuples;
6703 : double allvisfrac;
6704 :
6705 : /*
6706 : * We don't allow performing parallel operation in standalone backend or
6707 : * when parallelism is disabled.
6708 : */
6709 33374 : if (!IsUnderPostmaster || max_parallel_maintenance_workers == 0)
6710 426 : return 0;
6711 :
6712 : /* Set up largely-dummy planner state */
6713 32948 : query = makeNode(Query);
6714 32948 : query->commandType = CMD_SELECT;
6715 :
6716 32948 : glob = makeNode(PlannerGlobal);
6717 :
6718 32948 : root = makeNode(PlannerInfo);
6719 32948 : root->parse = query;
6720 32948 : root->glob = glob;
6721 32948 : root->query_level = 1;
6722 32948 : root->planner_cxt = CurrentMemoryContext;
6723 32948 : root->wt_param_id = -1;
6724 32948 : root->join_domains = list_make1(makeNode(JoinDomain));
6725 :
6726 : /*
6727 : * Build a minimal RTE.
6728 : *
6729 : * Mark the RTE with inh = true. This is a kludge to prevent
6730 : * get_relation_info() from fetching index info, which is necessary
6731 : * because it does not expect that any IndexOptInfo is currently
6732 : * undergoing REINDEX.
6733 : */
6734 32948 : rte = makeNode(RangeTblEntry);
6735 32948 : rte->rtekind = RTE_RELATION;
6736 32948 : rte->relid = tableOid;
6737 32948 : rte->relkind = RELKIND_RELATION; /* Don't be too picky. */
6738 32948 : rte->rellockmode = AccessShareLock;
6739 32948 : rte->lateral = false;
6740 32948 : rte->inh = true;
6741 32948 : rte->inFromCl = true;
6742 32948 : query->rtable = list_make1(rte);
6743 32948 : addRTEPermissionInfo(&query->rteperminfos, rte);
6744 :
6745 : /* Set up RTE/RelOptInfo arrays */
6746 32948 : setup_simple_rel_arrays(root);
6747 :
6748 : /* Build RelOptInfo */
6749 32948 : rel = build_simple_rel(root, 1, NULL);
6750 :
6751 : /* Rels are assumed already locked by the caller */
6752 32948 : heap = table_open(tableOid, NoLock);
6753 32948 : index = index_open(indexOid, NoLock);
6754 :
6755 : /*
6756 : * Determine if it's safe to proceed.
6757 : *
6758 : * Currently, parallel workers can't access the leader's temporary tables.
6759 : * Furthermore, any index predicate or index expressions must be parallel
6760 : * safe.
6761 : */
6762 32948 : if (heap->rd_rel->relpersistence == RELPERSISTENCE_TEMP ||
6763 31110 : !is_parallel_safe(root, (Node *) RelationGetIndexExpressions(index)) ||
6764 30990 : !is_parallel_safe(root, (Node *) RelationGetIndexPredicate(index)))
6765 : {
6766 1958 : parallel_workers = 0;
6767 1958 : goto done;
6768 : }
6769 :
6770 : /*
6771 : * If parallel_workers storage parameter is set for the table, accept that
6772 : * as the number of parallel worker processes to launch (though still cap
6773 : * at max_parallel_maintenance_workers). Note that we deliberately do not
6774 : * consider any other factor when parallel_workers is set. (e.g., memory
6775 : * use by workers.)
6776 : */
6777 30990 : if (rel->rel_parallel_workers != -1)
6778 : {
6779 14 : parallel_workers = Min(rel->rel_parallel_workers,
6780 : max_parallel_maintenance_workers);
6781 14 : goto done;
6782 : }
6783 :
6784 : /*
6785 : * Estimate heap relation size ourselves, since rel->pages cannot be
6786 : * trusted (heap RTE was marked as inheritance parent)
6787 : */
6788 30976 : estimate_rel_size(heap, NULL, &heap_blocks, &reltuples, &allvisfrac);
6789 :
6790 : /*
6791 : * Determine number of workers to scan the heap relation using generic
6792 : * model
6793 : */
6794 30976 : parallel_workers = compute_parallel_worker(rel, heap_blocks, -1,
6795 : max_parallel_maintenance_workers);
6796 :
6797 : /*
6798 : * Cap workers based on available maintenance_work_mem as needed.
6799 : *
6800 : * Note that each tuplesort participant receives an even share of the
6801 : * total maintenance_work_mem budget. Aim to leave participants
6802 : * (including the leader as a participant) with no less than 32MB of
6803 : * memory. This leaves cases where maintenance_work_mem is set to 64MB
6804 : * immediately past the threshold of being capable of launching a single
6805 : * parallel worker to sort.
6806 : */
6807 31108 : while (parallel_workers > 0 &&
6808 278 : maintenance_work_mem / (parallel_workers + 1) < 32768L)
6809 132 : parallel_workers--;
6810 :
6811 30976 : done:
6812 32948 : index_close(index, NoLock);
6813 32948 : table_close(heap, NoLock);
6814 :
6815 32948 : return parallel_workers;
6816 : }
6817 :
6818 : /*
6819 : * make_ordered_path
6820 : * Return a path ordered by 'pathkeys' based on the given 'path'. May
6821 : * return NULL if it doesn't make sense to generate an ordered path in
6822 : * this case.
6823 : */
6824 : static Path *
6825 42272 : make_ordered_path(PlannerInfo *root, RelOptInfo *rel, Path *path,
6826 : Path *cheapest_path, List *pathkeys)
6827 : {
6828 : bool is_sorted;
6829 : int presorted_keys;
6830 :
6831 42272 : is_sorted = pathkeys_count_contained_in(pathkeys,
6832 : path->pathkeys,
6833 : &presorted_keys);
6834 :
6835 42272 : if (!is_sorted)
6836 : {
6837 : /*
6838 : * Try at least sorting the cheapest path and also try incrementally
6839 : * sorting any path which is partially sorted already (no need to deal
6840 : * with paths which have presorted keys when incremental sort is
6841 : * disabled unless it's the cheapest input path).
6842 : */
6843 8192 : if (path != cheapest_path &&
6844 632 : (presorted_keys == 0 || !enable_incremental_sort))
6845 428 : return NULL;
6846 :
6847 : /*
6848 : * We've no need to consider both a sort and incremental sort. We'll
6849 : * just do a sort if there are no presorted keys and an incremental
6850 : * sort when there are presorted keys.
6851 : */
6852 7764 : if (presorted_keys == 0 || !enable_incremental_sort)
6853 7488 : path = (Path *) create_sort_path(root,
6854 : rel,
6855 : path,
6856 : pathkeys,
6857 : -1.0);
6858 : else
6859 276 : path = (Path *) create_incremental_sort_path(root,
6860 : rel,
6861 : path,
6862 : pathkeys,
6863 : presorted_keys,
6864 : -1.0);
6865 : }
6866 :
6867 41844 : return path;
6868 : }
6869 :
6870 : /*
6871 : * add_paths_to_grouping_rel
6872 : *
6873 : * Add non-partial paths to grouping relation.
6874 : */
6875 : static void
6876 34940 : add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
6877 : RelOptInfo *grouped_rel,
6878 : RelOptInfo *partially_grouped_rel,
6879 : const AggClauseCosts *agg_costs,
6880 : grouping_sets_data *gd, double dNumGroups,
6881 : GroupPathExtraData *extra)
6882 : {
6883 34940 : Query *parse = root->parse;
6884 34940 : Path *cheapest_path = input_rel->cheapest_total_path;
6885 : ListCell *lc;
6886 34940 : bool can_hash = (extra->flags & GROUPING_CAN_USE_HASH) != 0;
6887 34940 : bool can_sort = (extra->flags & GROUPING_CAN_USE_SORT) != 0;
6888 34940 : List *havingQual = (List *) extra->havingQual;
6889 34940 : AggClauseCosts *agg_final_costs = &extra->agg_final_costs;
6890 :
6891 34940 : if (can_sort)
6892 : {
6893 : /*
6894 : * Use any available suitably-sorted path as input, and also consider
6895 : * sorting the cheapest-total path and incremental sort on any paths
6896 : * with presorted keys.
6897 : */
6898 72326 : foreach(lc, input_rel->pathlist)
6899 : {
6900 : ListCell *lc2;
6901 37392 : Path *path = (Path *) lfirst(lc);
6902 37392 : Path *path_save = path;
6903 37392 : List *pathkey_orderings = NIL;
6904 :
6905 : /* generate alternative group orderings that might be useful */
6906 37392 : pathkey_orderings = get_useful_group_keys_orderings(root, path);
6907 :
6908 : Assert(list_length(pathkey_orderings) > 0);
6909 :
6910 74910 : foreach(lc2, pathkey_orderings)
6911 : {
6912 37518 : PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
6913 :
6914 : /* restore the path (we replace it in the loop) */
6915 37518 : path = path_save;
6916 :
6917 37518 : path = make_ordered_path(root,
6918 : grouped_rel,
6919 : path,
6920 : cheapest_path,
6921 : info->pathkeys);
6922 37518 : if (path == NULL)
6923 326 : continue;
6924 :
6925 : /* Now decide what to stick atop it */
6926 37192 : if (parse->groupingSets)
6927 : {
6928 794 : consider_groupingsets_paths(root, grouped_rel,
6929 : path, true, can_hash,
6930 : gd, agg_costs, dNumGroups);
6931 : }
6932 36398 : else if (parse->hasAggs)
6933 : {
6934 : /*
6935 : * We have aggregation, possibly with plain GROUP BY. Make
6936 : * an AggPath.
6937 : */
6938 35752 : add_path(grouped_rel, (Path *)
6939 35752 : create_agg_path(root,
6940 : grouped_rel,
6941 : path,
6942 35752 : grouped_rel->reltarget,
6943 35752 : parse->groupClause ? AGG_SORTED : AGG_PLAIN,
6944 : AGGSPLIT_SIMPLE,
6945 : info->clauses,
6946 : havingQual,
6947 : agg_costs,
6948 : dNumGroups));
6949 : }
6950 646 : else if (parse->groupClause)
6951 : {
6952 : /*
6953 : * We have GROUP BY without aggregation or grouping sets.
6954 : * Make a GroupPath.
6955 : */
6956 646 : add_path(grouped_rel, (Path *)
6957 646 : create_group_path(root,
6958 : grouped_rel,
6959 : path,
6960 : info->clauses,
6961 : havingQual,
6962 : dNumGroups));
6963 : }
6964 : else
6965 : {
6966 : /* Other cases should have been handled above */
6967 : Assert(false);
6968 : }
6969 : }
6970 : }
6971 :
6972 : /*
6973 : * Instead of operating directly on the input relation, we can
6974 : * consider finalizing a partially aggregated path.
6975 : */
6976 34934 : if (partially_grouped_rel != NULL)
6977 : {
6978 3898 : foreach(lc, partially_grouped_rel->pathlist)
6979 : {
6980 : ListCell *lc2;
6981 2328 : Path *path = (Path *) lfirst(lc);
6982 2328 : Path *path_save = path;
6983 2328 : List *pathkey_orderings = NIL;
6984 :
6985 : /* generate alternative group orderings that might be useful */
6986 2328 : pathkey_orderings = get_useful_group_keys_orderings(root, path);
6987 :
6988 : Assert(list_length(pathkey_orderings) > 0);
6989 :
6990 : /* process all potentially interesting grouping reorderings */
6991 4656 : foreach(lc2, pathkey_orderings)
6992 : {
6993 2328 : PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
6994 :
6995 : /* restore the path (we replace it in the loop) */
6996 2328 : path = path_save;
6997 :
6998 2328 : path = make_ordered_path(root,
6999 : grouped_rel,
7000 : path,
7001 2328 : partially_grouped_rel->cheapest_total_path,
7002 : info->pathkeys);
7003 :
7004 2328 : if (path == NULL)
7005 96 : continue;
7006 :
7007 2232 : if (parse->hasAggs)
7008 1984 : add_path(grouped_rel, (Path *)
7009 1984 : create_agg_path(root,
7010 : grouped_rel,
7011 : path,
7012 1984 : grouped_rel->reltarget,
7013 1984 : parse->groupClause ? AGG_SORTED : AGG_PLAIN,
7014 : AGGSPLIT_FINAL_DESERIAL,
7015 : info->clauses,
7016 : havingQual,
7017 : agg_final_costs,
7018 : dNumGroups));
7019 : else
7020 248 : add_path(grouped_rel, (Path *)
7021 248 : create_group_path(root,
7022 : grouped_rel,
7023 : path,
7024 : info->clauses,
7025 : havingQual,
7026 : dNumGroups));
7027 :
7028 : }
7029 : }
7030 : }
7031 : }
7032 :
7033 34940 : if (can_hash)
7034 : {
7035 4500 : if (parse->groupingSets)
7036 : {
7037 : /*
7038 : * Try for a hash-only groupingsets path over unsorted input.
7039 : */
7040 650 : consider_groupingsets_paths(root, grouped_rel,
7041 : cheapest_path, false, true,
7042 : gd, agg_costs, dNumGroups);
7043 : }
7044 : else
7045 : {
7046 : /*
7047 : * Generate a HashAgg Path. We just need an Agg over the
7048 : * cheapest-total input path, since input order won't matter.
7049 : */
7050 3850 : add_path(grouped_rel, (Path *)
7051 3850 : create_agg_path(root, grouped_rel,
7052 : cheapest_path,
7053 3850 : grouped_rel->reltarget,
7054 : AGG_HASHED,
7055 : AGGSPLIT_SIMPLE,
7056 : root->processed_groupClause,
7057 : havingQual,
7058 : agg_costs,
7059 : dNumGroups));
7060 : }
7061 :
7062 : /*
7063 : * Generate a Finalize HashAgg Path atop of the cheapest partially
7064 : * grouped path, assuming there is one
7065 : */
7066 4500 : if (partially_grouped_rel && partially_grouped_rel->pathlist)
7067 : {
7068 766 : Path *path = partially_grouped_rel->cheapest_total_path;
7069 :
7070 766 : add_path(grouped_rel, (Path *)
7071 766 : create_agg_path(root,
7072 : grouped_rel,
7073 : path,
7074 766 : grouped_rel->reltarget,
7075 : AGG_HASHED,
7076 : AGGSPLIT_FINAL_DESERIAL,
7077 : root->processed_groupClause,
7078 : havingQual,
7079 : agg_final_costs,
7080 : dNumGroups));
7081 : }
7082 : }
7083 :
7084 : /*
7085 : * When partitionwise aggregate is used, we might have fully aggregated
7086 : * paths in the partial pathlist, because add_paths_to_append_rel() will
7087 : * consider a path for grouped_rel consisting of a Parallel Append of
7088 : * non-partial paths from each child.
7089 : */
7090 34940 : if (grouped_rel->partial_pathlist != NIL)
7091 162 : gather_grouping_paths(root, grouped_rel);
7092 34940 : }
7093 :
7094 : /*
7095 : * create_partial_grouping_paths
7096 : *
7097 : * Create a new upper relation representing the result of partial aggregation
7098 : * and populate it with appropriate paths. Note that we don't finalize the
7099 : * lists of paths here, so the caller can add additional partial or non-partial
7100 : * paths and must afterward call gather_grouping_paths and set_cheapest on
7101 : * the returned upper relation.
7102 : *
7103 : * All paths for this new upper relation -- both partial and non-partial --
7104 : * have been partially aggregated but require a subsequent FinalizeAggregate
7105 : * step.
7106 : *
7107 : * NB: This function is allowed to return NULL if it determines that there is
7108 : * no real need to create a new RelOptInfo.
7109 : */
7110 : static RelOptInfo *
7111 31750 : create_partial_grouping_paths(PlannerInfo *root,
7112 : RelOptInfo *grouped_rel,
7113 : RelOptInfo *input_rel,
7114 : grouping_sets_data *gd,
7115 : GroupPathExtraData *extra,
7116 : bool force_rel_creation)
7117 : {
7118 31750 : Query *parse = root->parse;
7119 : RelOptInfo *partially_grouped_rel;
7120 31750 : AggClauseCosts *agg_partial_costs = &extra->agg_partial_costs;
7121 31750 : AggClauseCosts *agg_final_costs = &extra->agg_final_costs;
7122 31750 : Path *cheapest_partial_path = NULL;
7123 31750 : Path *cheapest_total_path = NULL;
7124 31750 : double dNumPartialGroups = 0;
7125 31750 : double dNumPartialPartialGroups = 0;
7126 : ListCell *lc;
7127 31750 : bool can_hash = (extra->flags & GROUPING_CAN_USE_HASH) != 0;
7128 31750 : bool can_sort = (extra->flags & GROUPING_CAN_USE_SORT) != 0;
7129 :
7130 : /*
7131 : * Consider whether we should generate partially aggregated non-partial
7132 : * paths. We can only do this if we have a non-partial path, and only if
7133 : * the parent of the input rel is performing partial partitionwise
7134 : * aggregation. (Note that extra->patype is the type of partitionwise
7135 : * aggregation being used at the parent level, not this level.)
7136 : */
7137 31750 : if (input_rel->pathlist != NIL &&
7138 31750 : extra->patype == PARTITIONWISE_AGGREGATE_PARTIAL)
7139 594 : cheapest_total_path = input_rel->cheapest_total_path;
7140 :
7141 : /*
7142 : * If parallelism is possible for grouped_rel, then we should consider
7143 : * generating partially-grouped partial paths. However, if the input rel
7144 : * has no partial paths, then we can't.
7145 : */
7146 31750 : if (grouped_rel->consider_parallel && input_rel->partial_pathlist != NIL)
7147 1820 : cheapest_partial_path = linitial(input_rel->partial_pathlist);
7148 :
7149 : /*
7150 : * If we can't partially aggregate partial paths, and we can't partially
7151 : * aggregate non-partial paths, then don't bother creating the new
7152 : * RelOptInfo at all, unless the caller specified force_rel_creation.
7153 : */
7154 31750 : if (cheapest_total_path == NULL &&
7155 29672 : cheapest_partial_path == NULL &&
7156 29672 : !force_rel_creation)
7157 29586 : return NULL;
7158 :
7159 : /*
7160 : * Build a new upper relation to represent the result of partially
7161 : * aggregating the rows from the input relation.
7162 : */
7163 2164 : partially_grouped_rel = fetch_upper_rel(root,
7164 : UPPERREL_PARTIAL_GROUP_AGG,
7165 : grouped_rel->relids);
7166 2164 : partially_grouped_rel->consider_parallel =
7167 2164 : grouped_rel->consider_parallel;
7168 2164 : partially_grouped_rel->reloptkind = grouped_rel->reloptkind;
7169 2164 : partially_grouped_rel->serverid = grouped_rel->serverid;
7170 2164 : partially_grouped_rel->userid = grouped_rel->userid;
7171 2164 : partially_grouped_rel->useridiscurrent = grouped_rel->useridiscurrent;
7172 2164 : partially_grouped_rel->fdwroutine = grouped_rel->fdwroutine;
7173 :
7174 : /*
7175 : * Build target list for partial aggregate paths. These paths cannot just
7176 : * emit the same tlist as regular aggregate paths, because (1) we must
7177 : * include Vars and Aggrefs needed in HAVING, which might not appear in
7178 : * the result tlist, and (2) the Aggrefs must be set in partial mode.
7179 : */
7180 2164 : partially_grouped_rel->reltarget =
7181 2164 : make_partial_grouping_target(root, grouped_rel->reltarget,
7182 : extra->havingQual);
7183 :
7184 2164 : if (!extra->partial_costs_set)
7185 : {
7186 : /*
7187 : * Collect statistics about aggregates for estimating costs of
7188 : * performing aggregation in parallel.
7189 : */
7190 7692 : MemSet(agg_partial_costs, 0, sizeof(AggClauseCosts));
7191 7692 : MemSet(agg_final_costs, 0, sizeof(AggClauseCosts));
7192 1282 : if (parse->hasAggs)
7193 : {
7194 : /* partial phase */
7195 1154 : get_agg_clause_costs(root, AGGSPLIT_INITIAL_SERIAL,
7196 : agg_partial_costs);
7197 :
7198 : /* final phase */
7199 1154 : get_agg_clause_costs(root, AGGSPLIT_FINAL_DESERIAL,
7200 : agg_final_costs);
7201 : }
7202 :
7203 1282 : extra->partial_costs_set = true;
7204 : }
7205 :
7206 : /* Estimate number of partial groups. */
7207 2164 : if (cheapest_total_path != NULL)
7208 : dNumPartialGroups =
7209 594 : get_number_of_groups(root,
7210 : cheapest_total_path->rows,
7211 : gd,
7212 : extra->targetList);
7213 2164 : if (cheapest_partial_path != NULL)
7214 : dNumPartialPartialGroups =
7215 1820 : get_number_of_groups(root,
7216 : cheapest_partial_path->rows,
7217 : gd,
7218 : extra->targetList);
7219 :
7220 2164 : if (can_sort && cheapest_total_path != NULL)
7221 : {
7222 : /* This should have been checked previously */
7223 : Assert(parse->hasAggs || parse->groupClause);
7224 :
7225 : /*
7226 : * Use any available suitably-sorted path as input, and also consider
7227 : * sorting the cheapest partial path.
7228 : */
7229 1188 : foreach(lc, input_rel->pathlist)
7230 : {
7231 : ListCell *lc2;
7232 594 : Path *path = (Path *) lfirst(lc);
7233 594 : Path *path_save = path;
7234 594 : List *pathkey_orderings = NIL;
7235 :
7236 : /* generate alternative group orderings that might be useful */
7237 594 : pathkey_orderings = get_useful_group_keys_orderings(root, path);
7238 :
7239 : Assert(list_length(pathkey_orderings) > 0);
7240 :
7241 : /* process all potentially interesting grouping reorderings */
7242 1188 : foreach(lc2, pathkey_orderings)
7243 : {
7244 594 : PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
7245 :
7246 : /* restore the path (we replace it in the loop) */
7247 594 : path = path_save;
7248 :
7249 594 : path = make_ordered_path(root,
7250 : partially_grouped_rel,
7251 : path,
7252 : cheapest_total_path,
7253 : info->pathkeys);
7254 :
7255 594 : if (path == NULL)
7256 0 : continue;
7257 :
7258 594 : if (parse->hasAggs)
7259 522 : add_path(partially_grouped_rel, (Path *)
7260 522 : create_agg_path(root,
7261 : partially_grouped_rel,
7262 : path,
7263 522 : partially_grouped_rel->reltarget,
7264 522 : parse->groupClause ? AGG_SORTED : AGG_PLAIN,
7265 : AGGSPLIT_INITIAL_SERIAL,
7266 : info->clauses,
7267 : NIL,
7268 : agg_partial_costs,
7269 : dNumPartialGroups));
7270 : else
7271 72 : add_path(partially_grouped_rel, (Path *)
7272 72 : create_group_path(root,
7273 : partially_grouped_rel,
7274 : path,
7275 : info->clauses,
7276 : NIL,
7277 : dNumPartialGroups));
7278 : }
7279 : }
7280 : }
7281 :
7282 2164 : if (can_sort && cheapest_partial_path != NULL)
7283 : {
7284 : /* Similar to above logic, but for partial paths. */
7285 3652 : foreach(lc, input_rel->partial_pathlist)
7286 : {
7287 : ListCell *lc2;
7288 1832 : Path *path = (Path *) lfirst(lc);
7289 1832 : Path *path_save = path;
7290 1832 : List *pathkey_orderings = NIL;
7291 :
7292 : /* generate alternative group orderings that might be useful */
7293 1832 : pathkey_orderings = get_useful_group_keys_orderings(root, path);
7294 :
7295 : Assert(list_length(pathkey_orderings) > 0);
7296 :
7297 : /* process all potentially interesting grouping reorderings */
7298 3664 : foreach(lc2, pathkey_orderings)
7299 : {
7300 1832 : PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
7301 :
7302 :
7303 : /* restore the path (we replace it in the loop) */
7304 1832 : path = path_save;
7305 :
7306 1832 : path = make_ordered_path(root,
7307 : partially_grouped_rel,
7308 : path,
7309 : cheapest_partial_path,
7310 : info->pathkeys);
7311 :
7312 1832 : if (path == NULL)
7313 6 : continue;
7314 :
7315 1826 : if (parse->hasAggs)
7316 1674 : add_partial_path(partially_grouped_rel, (Path *)
7317 1674 : create_agg_path(root,
7318 : partially_grouped_rel,
7319 : path,
7320 1674 : partially_grouped_rel->reltarget,
7321 1674 : parse->groupClause ? AGG_SORTED : AGG_PLAIN,
7322 : AGGSPLIT_INITIAL_SERIAL,
7323 : info->clauses,
7324 : NIL,
7325 : agg_partial_costs,
7326 : dNumPartialPartialGroups));
7327 : else
7328 152 : add_partial_path(partially_grouped_rel, (Path *)
7329 152 : create_group_path(root,
7330 : partially_grouped_rel,
7331 : path,
7332 : info->clauses,
7333 : NIL,
7334 : dNumPartialPartialGroups));
7335 : }
7336 : }
7337 : }
7338 :
7339 : /*
7340 : * Add a partially-grouped HashAgg Path where possible
7341 : */
7342 2164 : if (can_hash && cheapest_total_path != NULL)
7343 : {
7344 : /* Checked above */
7345 : Assert(parse->hasAggs || parse->groupClause);
7346 :
7347 594 : add_path(partially_grouped_rel, (Path *)
7348 594 : create_agg_path(root,
7349 : partially_grouped_rel,
7350 : cheapest_total_path,
7351 594 : partially_grouped_rel->reltarget,
7352 : AGG_HASHED,
7353 : AGGSPLIT_INITIAL_SERIAL,
7354 : root->processed_groupClause,
7355 : NIL,
7356 : agg_partial_costs,
7357 : dNumPartialGroups));
7358 : }
7359 :
7360 : /*
7361 : * Now add a partially-grouped HashAgg partial Path where possible
7362 : */
7363 2164 : if (can_hash && cheapest_partial_path != NULL)
7364 : {
7365 1016 : add_partial_path(partially_grouped_rel, (Path *)
7366 1016 : create_agg_path(root,
7367 : partially_grouped_rel,
7368 : cheapest_partial_path,
7369 1016 : partially_grouped_rel->reltarget,
7370 : AGG_HASHED,
7371 : AGGSPLIT_INITIAL_SERIAL,
7372 : root->processed_groupClause,
7373 : NIL,
7374 : agg_partial_costs,
7375 : dNumPartialPartialGroups));
7376 : }
7377 :
7378 : /*
7379 : * If there is an FDW that's responsible for all baserels of the query,
7380 : * let it consider adding partially grouped ForeignPaths.
7381 : */
7382 2164 : if (partially_grouped_rel->fdwroutine &&
7383 6 : partially_grouped_rel->fdwroutine->GetForeignUpperPaths)
7384 : {
7385 6 : FdwRoutine *fdwroutine = partially_grouped_rel->fdwroutine;
7386 :
7387 6 : fdwroutine->GetForeignUpperPaths(root,
7388 : UPPERREL_PARTIAL_GROUP_AGG,
7389 : input_rel, partially_grouped_rel,
7390 : extra);
7391 : }
7392 :
7393 2164 : return partially_grouped_rel;
7394 : }
7395 :
7396 : /*
7397 : * Generate Gather and Gather Merge paths for a grouping relation or partial
7398 : * grouping relation.
7399 : *
7400 : * generate_useful_gather_paths does most of the work, but we also consider a
7401 : * special case: we could try sorting the data by the group_pathkeys and then
7402 : * applying Gather Merge.
7403 : *
7404 : * NB: This function shouldn't be used for anything other than a grouped or
7405 : * partially grouped relation not only because of the fact that it explicitly
7406 : * references group_pathkeys but we pass "true" as the third argument to
7407 : * generate_useful_gather_paths().
7408 : */
7409 : static void
7410 1646 : gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
7411 : {
7412 : ListCell *lc;
7413 : Path *cheapest_partial_path;
7414 : List *groupby_pathkeys;
7415 :
7416 : /*
7417 : * This occurs after any partial aggregation has taken place, so trim off
7418 : * any pathkeys added for ORDER BY / DISTINCT aggregates.
7419 : */
7420 1646 : if (list_length(root->group_pathkeys) > root->num_groupby_pathkeys)
7421 18 : groupby_pathkeys = list_copy_head(root->group_pathkeys,
7422 : root->num_groupby_pathkeys);
7423 : else
7424 1628 : groupby_pathkeys = root->group_pathkeys;
7425 :
7426 : /* Try Gather for unordered paths and Gather Merge for ordered ones. */
7427 1646 : generate_useful_gather_paths(root, rel, true);
7428 :
7429 1646 : cheapest_partial_path = linitial(rel->partial_pathlist);
7430 :
7431 : /* XXX Shouldn't this also consider the group-key-reordering? */
7432 3904 : foreach(lc, rel->partial_pathlist)
7433 : {
7434 2258 : Path *path = (Path *) lfirst(lc);
7435 : bool is_sorted;
7436 : int presorted_keys;
7437 : double total_groups;
7438 :
7439 2258 : is_sorted = pathkeys_count_contained_in(groupby_pathkeys,
7440 : path->pathkeys,
7441 : &presorted_keys);
7442 :
7443 2258 : if (is_sorted)
7444 1484 : continue;
7445 :
7446 : /*
7447 : * Try at least sorting the cheapest path and also try incrementally
7448 : * sorting any path which is partially sorted already (no need to deal
7449 : * with paths which have presorted keys when incremental sort is
7450 : * disabled unless it's the cheapest input path).
7451 : */
7452 774 : if (path != cheapest_partial_path &&
7453 0 : (presorted_keys == 0 || !enable_incremental_sort))
7454 0 : continue;
7455 :
7456 774 : total_groups = path->rows * path->parallel_workers;
7457 :
7458 : /*
7459 : * We've no need to consider both a sort and incremental sort. We'll
7460 : * just do a sort if there are no presorted keys and an incremental
7461 : * sort when there are presorted keys.
7462 : */
7463 774 : if (presorted_keys == 0 || !enable_incremental_sort)
7464 774 : path = (Path *) create_sort_path(root, rel, path,
7465 : groupby_pathkeys,
7466 : -1.0);
7467 : else
7468 0 : path = (Path *) create_incremental_sort_path(root,
7469 : rel,
7470 : path,
7471 : groupby_pathkeys,
7472 : presorted_keys,
7473 : -1.0);
7474 :
7475 : path = (Path *)
7476 774 : create_gather_merge_path(root,
7477 : rel,
7478 : path,
7479 774 : rel->reltarget,
7480 : groupby_pathkeys,
7481 : NULL,
7482 : &total_groups);
7483 :
7484 774 : add_path(rel, path);
7485 : }
7486 1646 : }
7487 :
7488 : /*
7489 : * can_partial_agg
7490 : *
7491 : * Determines whether or not partial grouping and/or aggregation is possible.
7492 : * Returns true when possible, false otherwise.
7493 : */
7494 : static bool
7495 34136 : can_partial_agg(PlannerInfo *root)
7496 : {
7497 34136 : Query *parse = root->parse;
7498 :
7499 34136 : if (!parse->hasAggs && parse->groupClause == NIL)
7500 : {
7501 : /*
7502 : * We don't know how to do parallel aggregation unless we have either
7503 : * some aggregates or a grouping clause.
7504 : */
7505 0 : return false;
7506 : }
7507 34136 : else if (parse->groupingSets)
7508 : {
7509 : /* We don't know how to do grouping sets in parallel. */
7510 728 : return false;
7511 : }
7512 33408 : else if (root->hasNonPartialAggs || root->hasNonSerialAggs)
7513 : {
7514 : /* Insufficient support for partial mode. */
7515 2984 : return false;
7516 : }
7517 :
7518 : /* Everything looks good. */
7519 30424 : return true;
7520 : }
7521 :
7522 : /*
7523 : * apply_scanjoin_target_to_paths
7524 : *
7525 : * Adjust the final scan/join relation, and recursively all of its children,
7526 : * to generate the final scan/join target. It would be more correct to model
7527 : * this as a separate planning step with a new RelOptInfo at the toplevel and
7528 : * for each child relation, but doing it this way is noticeably cheaper.
7529 : * Maybe that problem can be solved at some point, but for now we do this.
7530 : *
7531 : * If tlist_same_exprs is true, then the scan/join target to be applied has
7532 : * the same expressions as the existing reltarget, so we need only insert the
7533 : * appropriate sortgroupref information. By avoiding the creation of
7534 : * projection paths we save effort both immediately and at plan creation time.
7535 : */
7536 : static void
7537 504366 : apply_scanjoin_target_to_paths(PlannerInfo *root,
7538 : RelOptInfo *rel,
7539 : List *scanjoin_targets,
7540 : List *scanjoin_targets_contain_srfs,
7541 : bool scanjoin_target_parallel_safe,
7542 : bool tlist_same_exprs)
7543 : {
7544 504366 : bool rel_is_partitioned = IS_PARTITIONED_REL(rel);
7545 : PathTarget *scanjoin_target;
7546 : ListCell *lc;
7547 :
7548 : /* This recurses, so be paranoid. */
7549 504366 : check_stack_depth();
7550 :
7551 : /*
7552 : * If the rel is partitioned, we want to drop its existing paths and
7553 : * generate new ones. This function would still be correct if we kept the
7554 : * existing paths: we'd modify them to generate the correct target above
7555 : * the partitioning Append, and then they'd compete on cost with paths
7556 : * generating the target below the Append. However, in our current cost
7557 : * model the latter way is always the same or cheaper cost, so modifying
7558 : * the existing paths would just be useless work. Moreover, when the cost
7559 : * is the same, varying roundoff errors might sometimes allow an existing
7560 : * path to be picked, resulting in undesirable cross-platform plan
7561 : * variations. So we drop old paths and thereby force the work to be done
7562 : * below the Append, except in the case of a non-parallel-safe target.
7563 : *
7564 : * Some care is needed, because we have to allow
7565 : * generate_useful_gather_paths to see the old partial paths in the next
7566 : * stanza. Hence, zap the main pathlist here, then allow
7567 : * generate_useful_gather_paths to add path(s) to the main list, and
7568 : * finally zap the partial pathlist.
7569 : */
7570 504366 : if (rel_is_partitioned)
7571 12300 : rel->pathlist = NIL;
7572 :
7573 : /*
7574 : * If the scan/join target is not parallel-safe, partial paths cannot
7575 : * generate it.
7576 : */
7577 504366 : if (!scanjoin_target_parallel_safe)
7578 : {
7579 : /*
7580 : * Since we can't generate the final scan/join target in parallel
7581 : * workers, this is our last opportunity to use any partial paths that
7582 : * exist; so build Gather path(s) that use them and emit whatever the
7583 : * current reltarget is. We don't do this in the case where the
7584 : * target is parallel-safe, since we will be able to generate superior
7585 : * paths by doing it after the final scan/join target has been
7586 : * applied.
7587 : */
7588 75550 : generate_useful_gather_paths(root, rel, false);
7589 :
7590 : /* Can't use parallel query above this level. */
7591 75550 : rel->partial_pathlist = NIL;
7592 75550 : rel->consider_parallel = false;
7593 : }
7594 :
7595 : /* Finish dropping old paths for a partitioned rel, per comment above */
7596 504366 : if (rel_is_partitioned)
7597 12300 : rel->partial_pathlist = NIL;
7598 :
7599 : /* Extract SRF-free scan/join target. */
7600 504366 : scanjoin_target = linitial_node(PathTarget, scanjoin_targets);
7601 :
7602 : /*
7603 : * Apply the SRF-free scan/join target to each existing path.
7604 : *
7605 : * If the tlist exprs are the same, we can just inject the sortgroupref
7606 : * information into the existing pathtargets. Otherwise, replace each
7607 : * path with a projection path that generates the SRF-free scan/join
7608 : * target. This can't change the ordering of paths within rel->pathlist,
7609 : * so we just modify the list in place.
7610 : */
7611 1036594 : foreach(lc, rel->pathlist)
7612 : {
7613 532228 : Path *subpath = (Path *) lfirst(lc);
7614 :
7615 : /* Shouldn't have any parameterized paths anymore */
7616 : Assert(subpath->param_info == NULL);
7617 :
7618 532228 : if (tlist_same_exprs)
7619 178638 : subpath->pathtarget->sortgrouprefs =
7620 178638 : scanjoin_target->sortgrouprefs;
7621 : else
7622 : {
7623 : Path *newpath;
7624 :
7625 353590 : newpath = (Path *) create_projection_path(root, rel, subpath,
7626 : scanjoin_target);
7627 353590 : lfirst(lc) = newpath;
7628 : }
7629 : }
7630 :
7631 : /* Likewise adjust the targets for any partial paths. */
7632 524194 : foreach(lc, rel->partial_pathlist)
7633 : {
7634 19828 : Path *subpath = (Path *) lfirst(lc);
7635 :
7636 : /* Shouldn't have any parameterized paths anymore */
7637 : Assert(subpath->param_info == NULL);
7638 :
7639 19828 : if (tlist_same_exprs)
7640 16266 : subpath->pathtarget->sortgrouprefs =
7641 16266 : scanjoin_target->sortgrouprefs;
7642 : else
7643 : {
7644 : Path *newpath;
7645 :
7646 3562 : newpath = (Path *) create_projection_path(root, rel, subpath,
7647 : scanjoin_target);
7648 3562 : lfirst(lc) = newpath;
7649 : }
7650 : }
7651 :
7652 : /*
7653 : * Now, if final scan/join target contains SRFs, insert ProjectSetPath(s)
7654 : * atop each existing path. (Note that this function doesn't look at the
7655 : * cheapest-path fields, which is a good thing because they're bogus right
7656 : * now.)
7657 : */
7658 504366 : if (root->parse->hasTargetSRFs)
7659 8410 : adjust_paths_for_srfs(root, rel,
7660 : scanjoin_targets,
7661 : scanjoin_targets_contain_srfs);
7662 :
7663 : /*
7664 : * Update the rel's target to be the final (with SRFs) scan/join target.
7665 : * This now matches the actual output of all the paths, and we might get
7666 : * confused in createplan.c if they don't agree. We must do this now so
7667 : * that any append paths made in the next part will use the correct
7668 : * pathtarget (cf. create_append_path).
7669 : *
7670 : * Note that this is also necessary if GetForeignUpperPaths() gets called
7671 : * on the final scan/join relation or on any of its children, since the
7672 : * FDW might look at the rel's target to create ForeignPaths.
7673 : */
7674 504366 : rel->reltarget = llast_node(PathTarget, scanjoin_targets);
7675 :
7676 : /*
7677 : * If the relation is partitioned, recursively apply the scan/join target
7678 : * to all partitions, and generate brand-new Append paths in which the
7679 : * scan/join target is computed below the Append rather than above it.
7680 : * Since Append is not projection-capable, that might save a separate
7681 : * Result node, and it also is important for partitionwise aggregate.
7682 : */
7683 504366 : if (rel_is_partitioned)
7684 : {
7685 12300 : List *live_children = NIL;
7686 : int i;
7687 :
7688 : /* Adjust each partition. */
7689 12300 : i = -1;
7690 34956 : while ((i = bms_next_member(rel->live_parts, i)) >= 0)
7691 : {
7692 22656 : RelOptInfo *child_rel = rel->part_rels[i];
7693 : AppendRelInfo **appinfos;
7694 : int nappinfos;
7695 22656 : List *child_scanjoin_targets = NIL;
7696 :
7697 : Assert(child_rel != NULL);
7698 :
7699 : /* Dummy children can be ignored. */
7700 22656 : if (IS_DUMMY_REL(child_rel))
7701 42 : continue;
7702 :
7703 : /* Translate scan/join targets for this child. */
7704 22614 : appinfos = find_appinfos_by_relids(root, child_rel->relids,
7705 : &nappinfos);
7706 45228 : foreach(lc, scanjoin_targets)
7707 : {
7708 22614 : PathTarget *target = lfirst_node(PathTarget, lc);
7709 :
7710 22614 : target = copy_pathtarget(target);
7711 22614 : target->exprs = (List *)
7712 22614 : adjust_appendrel_attrs(root,
7713 22614 : (Node *) target->exprs,
7714 : nappinfos, appinfos);
7715 22614 : child_scanjoin_targets = lappend(child_scanjoin_targets,
7716 : target);
7717 : }
7718 22614 : pfree(appinfos);
7719 :
7720 : /* Recursion does the real work. */
7721 22614 : apply_scanjoin_target_to_paths(root, child_rel,
7722 : child_scanjoin_targets,
7723 : scanjoin_targets_contain_srfs,
7724 : scanjoin_target_parallel_safe,
7725 : tlist_same_exprs);
7726 :
7727 : /* Save non-dummy children for Append paths. */
7728 22614 : if (!IS_DUMMY_REL(child_rel))
7729 22614 : live_children = lappend(live_children, child_rel);
7730 : }
7731 :
7732 : /* Build new paths for this relation by appending child paths. */
7733 12300 : add_paths_to_append_rel(root, rel, live_children);
7734 : }
7735 :
7736 : /*
7737 : * Consider generating Gather or Gather Merge paths. We must only do this
7738 : * if the relation is parallel safe, and we don't do it for child rels to
7739 : * avoid creating multiple Gather nodes within the same plan. We must do
7740 : * this after all paths have been generated and before set_cheapest, since
7741 : * one of the generated paths may turn out to be the cheapest one.
7742 : */
7743 504366 : if (rel->consider_parallel && !IS_OTHER_REL(rel))
7744 146158 : generate_useful_gather_paths(root, rel, false);
7745 :
7746 : /*
7747 : * Reassess which paths are the cheapest, now that we've potentially added
7748 : * new Gather (or Gather Merge) and/or Append (or MergeAppend) paths to
7749 : * this relation.
7750 : */
7751 504366 : set_cheapest(rel);
7752 504366 : }
7753 :
7754 : /*
7755 : * create_partitionwise_grouping_paths
7756 : *
7757 : * If the partition keys of input relation are part of the GROUP BY clause, all
7758 : * the rows belonging to a given group come from a single partition. This
7759 : * allows aggregation/grouping over a partitioned relation to be broken down
7760 : * into aggregation/grouping on each partition. This should be no worse, and
7761 : * often better, than the normal approach.
7762 : *
7763 : * However, if the GROUP BY clause does not contain all the partition keys,
7764 : * rows from a given group may be spread across multiple partitions. In that
7765 : * case, we perform partial aggregation for each group, append the results,
7766 : * and then finalize aggregation. This is less certain to win than the
7767 : * previous case. It may win if the PartialAggregate stage greatly reduces
7768 : * the number of groups, because fewer rows will pass through the Append node.
7769 : * It may lose if we have lots of small groups.
7770 : */
7771 : static void
7772 514 : create_partitionwise_grouping_paths(PlannerInfo *root,
7773 : RelOptInfo *input_rel,
7774 : RelOptInfo *grouped_rel,
7775 : RelOptInfo *partially_grouped_rel,
7776 : const AggClauseCosts *agg_costs,
7777 : grouping_sets_data *gd,
7778 : PartitionwiseAggregateType patype,
7779 : GroupPathExtraData *extra)
7780 : {
7781 514 : List *grouped_live_children = NIL;
7782 514 : List *partially_grouped_live_children = NIL;
7783 514 : PathTarget *target = grouped_rel->reltarget;
7784 514 : bool partial_grouping_valid = true;
7785 : int i;
7786 :
7787 : Assert(patype != PARTITIONWISE_AGGREGATE_NONE);
7788 : Assert(patype != PARTITIONWISE_AGGREGATE_PARTIAL ||
7789 : partially_grouped_rel != NULL);
7790 :
7791 : /* Add paths for partitionwise aggregation/grouping. */
7792 514 : i = -1;
7793 1912 : while ((i = bms_next_member(input_rel->live_parts, i)) >= 0)
7794 : {
7795 1398 : RelOptInfo *child_input_rel = input_rel->part_rels[i];
7796 : PathTarget *child_target;
7797 : AppendRelInfo **appinfos;
7798 : int nappinfos;
7799 : GroupPathExtraData child_extra;
7800 : RelOptInfo *child_grouped_rel;
7801 : RelOptInfo *child_partially_grouped_rel;
7802 :
7803 : Assert(child_input_rel != NULL);
7804 :
7805 : /* Dummy children can be ignored. */
7806 1398 : if (IS_DUMMY_REL(child_input_rel))
7807 0 : continue;
7808 :
7809 1398 : child_target = copy_pathtarget(target);
7810 :
7811 : /*
7812 : * Copy the given "extra" structure as is and then override the
7813 : * members specific to this child.
7814 : */
7815 1398 : memcpy(&child_extra, extra, sizeof(child_extra));
7816 :
7817 1398 : appinfos = find_appinfos_by_relids(root, child_input_rel->relids,
7818 : &nappinfos);
7819 :
7820 1398 : child_target->exprs = (List *)
7821 1398 : adjust_appendrel_attrs(root,
7822 1398 : (Node *) target->exprs,
7823 : nappinfos, appinfos);
7824 :
7825 : /* Translate havingQual and targetList. */
7826 1398 : child_extra.havingQual = (Node *)
7827 : adjust_appendrel_attrs(root,
7828 : extra->havingQual,
7829 : nappinfos, appinfos);
7830 1398 : child_extra.targetList = (List *)
7831 1398 : adjust_appendrel_attrs(root,
7832 1398 : (Node *) extra->targetList,
7833 : nappinfos, appinfos);
7834 :
7835 : /*
7836 : * extra->patype was the value computed for our parent rel; patype is
7837 : * the value for this relation. For the child, our value is its
7838 : * parent rel's value.
7839 : */
7840 1398 : child_extra.patype = patype;
7841 :
7842 : /*
7843 : * Create grouping relation to hold fully aggregated grouping and/or
7844 : * aggregation paths for the child.
7845 : */
7846 1398 : child_grouped_rel = make_grouping_rel(root, child_input_rel,
7847 : child_target,
7848 1398 : extra->target_parallel_safe,
7849 : child_extra.havingQual);
7850 :
7851 : /* Create grouping paths for this child relation. */
7852 1398 : create_ordinary_grouping_paths(root, child_input_rel,
7853 : child_grouped_rel,
7854 : agg_costs, gd, &child_extra,
7855 : &child_partially_grouped_rel);
7856 :
7857 1398 : if (child_partially_grouped_rel)
7858 : {
7859 : partially_grouped_live_children =
7860 882 : lappend(partially_grouped_live_children,
7861 : child_partially_grouped_rel);
7862 : }
7863 : else
7864 516 : partial_grouping_valid = false;
7865 :
7866 1398 : if (patype == PARTITIONWISE_AGGREGATE_FULL)
7867 : {
7868 804 : set_cheapest(child_grouped_rel);
7869 804 : grouped_live_children = lappend(grouped_live_children,
7870 : child_grouped_rel);
7871 : }
7872 :
7873 1398 : pfree(appinfos);
7874 : }
7875 :
7876 : /*
7877 : * Try to create append paths for partially grouped children. For full
7878 : * partitionwise aggregation, we might have paths in the partial_pathlist
7879 : * if parallel aggregation is possible. For partial partitionwise
7880 : * aggregation, we may have paths in both pathlist and partial_pathlist.
7881 : *
7882 : * NB: We must have a partially grouped path for every child in order to
7883 : * generate a partially grouped path for this relation.
7884 : */
7885 514 : if (partially_grouped_rel && partial_grouping_valid)
7886 : {
7887 : Assert(partially_grouped_live_children != NIL);
7888 :
7889 338 : add_paths_to_append_rel(root, partially_grouped_rel,
7890 : partially_grouped_live_children);
7891 :
7892 : /*
7893 : * We need call set_cheapest, since the finalization step will use the
7894 : * cheapest path from the rel.
7895 : */
7896 338 : if (partially_grouped_rel->pathlist)
7897 338 : set_cheapest(partially_grouped_rel);
7898 : }
7899 :
7900 : /* If possible, create append paths for fully grouped children. */
7901 514 : if (patype == PARTITIONWISE_AGGREGATE_FULL)
7902 : {
7903 : Assert(grouped_live_children != NIL);
7904 :
7905 284 : add_paths_to_append_rel(root, grouped_rel, grouped_live_children);
7906 : }
7907 514 : }
7908 :
7909 : /*
7910 : * group_by_has_partkey
7911 : *
7912 : * Returns true, if all the partition keys of the given relation are part of
7913 : * the GROUP BY clauses, false otherwise.
7914 : */
7915 : static bool
7916 508 : group_by_has_partkey(RelOptInfo *input_rel,
7917 : List *targetList,
7918 : List *groupClause)
7919 : {
7920 508 : List *groupexprs = get_sortgrouplist_exprs(groupClause, targetList);
7921 508 : int cnt = 0;
7922 : int partnatts;
7923 :
7924 : /* Input relation should be partitioned. */
7925 : Assert(input_rel->part_scheme);
7926 :
7927 : /* Rule out early, if there are no partition keys present. */
7928 508 : if (!input_rel->partexprs)
7929 0 : return false;
7930 :
7931 508 : partnatts = input_rel->part_scheme->partnatts;
7932 :
7933 828 : for (cnt = 0; cnt < partnatts; cnt++)
7934 : {
7935 544 : List *partexprs = input_rel->partexprs[cnt];
7936 : ListCell *lc;
7937 544 : bool found = false;
7938 :
7939 762 : foreach(lc, partexprs)
7940 : {
7941 538 : Expr *partexpr = lfirst(lc);
7942 :
7943 538 : if (list_member(groupexprs, partexpr))
7944 : {
7945 320 : found = true;
7946 320 : break;
7947 : }
7948 : }
7949 :
7950 : /*
7951 : * If none of the partition key expressions match with any of the
7952 : * GROUP BY expression, return false.
7953 : */
7954 544 : if (!found)
7955 224 : return false;
7956 : }
7957 :
7958 284 : return true;
7959 : }
7960 :
7961 : /*
7962 : * generate_setop_child_grouplist
7963 : * Build a SortGroupClause list defining the sort/grouping properties
7964 : * of the child of a set operation.
7965 : *
7966 : * This is similar to generate_setop_grouplist() but differs as the setop
7967 : * child query's targetlist entries may already have a tleSortGroupRef
7968 : * assigned for other purposes, such as GROUP BYs. Here we keep the
7969 : * SortGroupClause list in the same order as 'op' groupClauses and just adjust
7970 : * the tleSortGroupRef to reference the TargetEntry's 'ressortgroupref'.
7971 : */
7972 : static List *
7973 10474 : generate_setop_child_grouplist(SetOperationStmt *op, List *targetlist)
7974 : {
7975 10474 : List *grouplist = copyObject(op->groupClauses);
7976 : ListCell *lg;
7977 : ListCell *lt;
7978 :
7979 10474 : lg = list_head(grouplist);
7980 41168 : foreach(lt, targetlist)
7981 : {
7982 30694 : TargetEntry *tle = (TargetEntry *) lfirst(lt);
7983 : SortGroupClause *sgc;
7984 :
7985 : /* resjunk columns could have sortgrouprefs. Leave these alone */
7986 30694 : if (tle->resjunk)
7987 0 : continue;
7988 :
7989 : /* we expect every non-resjunk target to have a SortGroupClause */
7990 : Assert(lg != NULL);
7991 30694 : sgc = (SortGroupClause *) lfirst(lg);
7992 30694 : lg = lnext(grouplist, lg);
7993 :
7994 : /* assign a tleSortGroupRef, or reuse the existing one */
7995 30694 : sgc->tleSortGroupRef = assignSortGroupRef(tle, targetlist);
7996 : }
7997 : Assert(lg == NULL);
7998 10474 : return grouplist;
7999 : }
|