Line data Source code
1 : /*------------------------------------------------------------------------
2 : *
3 : * geqo_eval.c
4 : * Routines to evaluate query trees
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : * src/backend/optimizer/geqo/geqo_eval.c
10 : *
11 : *-------------------------------------------------------------------------
12 : */
13 :
14 : /* contributed by:
15 : =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
16 : * Martin Utesch * Institute of Automatic Control *
17 : = = University of Mining and Technology =
18 : * utesch@aut.tu-freiberg.de * Freiberg, Germany *
19 : =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
20 : */
21 :
22 : #include "postgres.h"
23 :
24 : #include <float.h>
25 : #include <limits.h>
26 :
27 : #include "optimizer/geqo.h"
28 : #include "optimizer/joininfo.h"
29 : #include "optimizer/pathnode.h"
30 : #include "optimizer/paths.h"
31 : #include "utils/memutils.h"
32 :
33 :
34 : /* A "clump" of already-joined relations within gimme_tree */
35 : typedef struct
36 : {
37 : RelOptInfo *joinrel; /* joinrel for the set of relations */
38 : int size; /* number of input relations in clump */
39 : } Clump;
40 :
41 : static List *merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump,
42 : int num_gene, bool force);
43 : static bool desirable_join(PlannerInfo *root,
44 : RelOptInfo *outer_rel, RelOptInfo *inner_rel);
45 :
46 :
47 : /*
48 : * geqo_eval
49 : *
50 : * Returns cost of a query tree as an individual of the population.
51 : *
52 : * If no legal join order can be extracted from the proposed tour,
53 : * returns DBL_MAX.
54 : */
55 : Cost
56 4368 : geqo_eval(PlannerInfo *root, Gene *tour, int num_gene)
57 : {
58 : MemoryContext mycontext;
59 : MemoryContext oldcxt;
60 : RelOptInfo *joinrel;
61 : Cost fitness;
62 : int savelength;
63 : struct HTAB *savehash;
64 :
65 : /*
66 : * Create a private memory context that will hold all temp storage
67 : * allocated inside gimme_tree().
68 : *
69 : * Since geqo_eval() will be called many times, we can't afford to let all
70 : * that memory go unreclaimed until end of statement. Note we make the
71 : * temp context a child of the planner's normal context, so that it will
72 : * be freed even if we abort via ereport(ERROR).
73 : */
74 4368 : mycontext = AllocSetContextCreate(CurrentMemoryContext,
75 : "GEQO",
76 : ALLOCSET_DEFAULT_SIZES);
77 4368 : oldcxt = MemoryContextSwitchTo(mycontext);
78 :
79 : /*
80 : * gimme_tree will add entries to root->join_rel_list, which may or may
81 : * not already contain some entries. The newly added entries will be
82 : * recycled by the MemoryContextDelete below, so we must ensure that the
83 : * list is restored to its former state before exiting. We can do this by
84 : * truncating the list to its original length. NOTE this assumes that any
85 : * added entries are appended at the end!
86 : *
87 : * We also must take care not to mess up the outer join_rel_hash, if there
88 : * is one. We can do this by just temporarily setting the link to NULL.
89 : * (If we are dealing with enough join rels, which we very likely are, a
90 : * new hash table will get built and used locally.)
91 : *
92 : * join_rel_level[] shouldn't be in use, so just Assert it isn't.
93 : */
94 4368 : savelength = list_length(root->join_rel_list);
95 4368 : savehash = root->join_rel_hash;
96 : Assert(root->join_rel_level == NULL);
97 :
98 4368 : root->join_rel_hash = NULL;
99 :
100 : /* construct the best path for the given combination of relations */
101 4368 : joinrel = gimme_tree(root, tour, num_gene);
102 :
103 : /*
104 : * compute fitness, if we found a valid join
105 : *
106 : * XXX geqo does not currently support optimization for partial result
107 : * retrieval, nor do we take any cognizance of possible use of
108 : * parameterized paths --- how to fix?
109 : */
110 4368 : if (joinrel)
111 : {
112 4368 : Path *best_path = joinrel->cheapest_total_path;
113 :
114 4368 : fitness = best_path->total_cost;
115 : }
116 : else
117 0 : fitness = DBL_MAX;
118 :
119 : /*
120 : * Restore join_rel_list to its former state, and put back original
121 : * hashtable if any.
122 : */
123 4368 : root->join_rel_list = list_truncate(root->join_rel_list,
124 : savelength);
125 4368 : root->join_rel_hash = savehash;
126 :
127 : /* release all the memory acquired within gimme_tree */
128 4368 : MemoryContextSwitchTo(oldcxt);
129 4368 : MemoryContextDelete(mycontext);
130 :
131 4368 : return fitness;
132 : }
133 :
134 : /*
135 : * gimme_tree
136 : * Form planner estimates for a join tree constructed in the specified
137 : * order.
138 : *
139 : * 'tour' is the proposed join order, of length 'num_gene'
140 : *
141 : * Returns a new join relation whose cheapest path is the best plan for
142 : * this join order. NB: will return NULL if join order is invalid and
143 : * we can't modify it into a valid order.
144 : *
145 : * The original implementation of this routine always joined in the specified
146 : * order, and so could only build left-sided plans (and right-sided and
147 : * mixtures, as a byproduct of the fact that make_join_rel() is symmetric).
148 : * It could never produce a "bushy" plan. This had a couple of big problems,
149 : * of which the worst was that there are situations involving join order
150 : * restrictions where the only valid plans are bushy.
151 : *
152 : * The present implementation takes the given tour as a guideline, but
153 : * postpones joins that are illegal or seem unsuitable according to some
154 : * heuristic rules. This allows correct bushy plans to be generated at need,
155 : * and as a nice side-effect it seems to materially improve the quality of the
156 : * generated plans. Note however that since it's just a heuristic, it can
157 : * still fail in some cases. (In particular, we might clump together
158 : * relations that actually mustn't be joined yet due to LATERAL restrictions;
159 : * since there's no provision for un-clumping, this must lead to failure.)
160 : */
161 : RelOptInfo *
162 4410 : gimme_tree(PlannerInfo *root, Gene *tour, int num_gene)
163 : {
164 4410 : GeqoPrivateData *private = GetGeqoPrivateData(root);
165 : List *clumps;
166 : int rel_count;
167 :
168 : /*
169 : * Sometimes, a relation can't yet be joined to others due to heuristics
170 : * or actual semantic restrictions. We maintain a list of "clumps" of
171 : * successfully joined relations, with larger clumps at the front. Each
172 : * new relation from the tour is added to the first clump it can be joined
173 : * to; if there is none then it becomes a new clump of its own. When we
174 : * enlarge an existing clump we check to see if it can now be merged with
175 : * any other clumps. After the tour is all scanned, we forget about the
176 : * heuristics and try to forcibly join any remaining clumps. If we are
177 : * unable to merge all the clumps into one, fail.
178 : */
179 4410 : clumps = NIL;
180 :
181 15552 : for (rel_count = 0; rel_count < num_gene; rel_count++)
182 : {
183 : int cur_rel_index;
184 : RelOptInfo *cur_rel;
185 : Clump *cur_clump;
186 :
187 : /* Get the next input relation */
188 11142 : cur_rel_index = (int) tour[rel_count];
189 11142 : cur_rel = (RelOptInfo *) list_nth(private->initial_rels,
190 : cur_rel_index - 1);
191 :
192 : /* Make it into a single-rel clump */
193 11142 : cur_clump = palloc_object(Clump);
194 11142 : cur_clump->joinrel = cur_rel;
195 11142 : cur_clump->size = 1;
196 :
197 : /* Merge it into the clumps list, using only desirable joins */
198 11142 : clumps = merge_clump(root, clumps, cur_clump, num_gene, false);
199 : }
200 :
201 4410 : if (list_length(clumps) > 1)
202 : {
203 : /* Force-join the remaining clumps in some legal order */
204 : List *fclumps;
205 : ListCell *lc;
206 :
207 0 : fclumps = NIL;
208 0 : foreach(lc, clumps)
209 : {
210 0 : Clump *clump = (Clump *) lfirst(lc);
211 :
212 0 : fclumps = merge_clump(root, fclumps, clump, num_gene, true);
213 : }
214 0 : clumps = fclumps;
215 : }
216 :
217 : /* Did we succeed in forming a single join relation? */
218 4410 : if (list_length(clumps) != 1)
219 0 : return NULL;
220 :
221 4410 : return ((Clump *) linitial(clumps))->joinrel;
222 : }
223 :
224 : /*
225 : * Merge a "clump" into the list of existing clumps for gimme_tree.
226 : *
227 : * We try to merge the clump into some existing clump, and repeat if
228 : * successful. When no more merging is possible, insert the clump
229 : * into the list, preserving the list ordering rule (namely, that
230 : * clumps of larger size appear earlier).
231 : *
232 : * If force is true, merge anywhere a join is legal, even if it causes
233 : * a cartesian join to be performed. When force is false, do only
234 : * "desirable" joins.
235 : */
236 : static List *
237 17874 : merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, int num_gene,
238 : bool force)
239 : {
240 : ListCell *lc;
241 : int pos;
242 :
243 : /* Look for a clump that new_clump can join to */
244 21810 : foreach(lc, clumps)
245 : {
246 10668 : Clump *old_clump = (Clump *) lfirst(lc);
247 :
248 21336 : if (force ||
249 10668 : desirable_join(root, old_clump->joinrel, new_clump->joinrel))
250 : {
251 : RelOptInfo *joinrel;
252 :
253 : /*
254 : * Construct a RelOptInfo representing the join of these two input
255 : * relations. Note that we expect the joinrel not to exist in
256 : * root->join_rel_list yet, and so the paths constructed for it
257 : * will only include the ones we want.
258 : */
259 8388 : joinrel = make_join_rel(root,
260 : old_clump->joinrel,
261 : new_clump->joinrel);
262 :
263 : /* Keep searching if join order is not valid */
264 8388 : if (joinrel)
265 : {
266 6732 : bool is_top_rel = bms_equal(joinrel->relids,
267 6732 : root->all_query_rels);
268 :
269 : /* Create paths for partitionwise joins. */
270 6732 : generate_partitionwise_join_paths(root, joinrel);
271 :
272 : /*
273 : * Except for the topmost scan/join rel, consider gathering
274 : * partial paths. We'll do the same for the topmost scan/join
275 : * rel once we know the final targetlist (see
276 : * grouping_planner).
277 : */
278 6732 : if (!is_top_rel)
279 2322 : generate_useful_gather_paths(root, joinrel, false);
280 :
281 : /* Find and save the cheapest paths for this joinrel */
282 6732 : set_cheapest(joinrel);
283 :
284 : /*
285 : * Except for the topmost scan/join rel, consider generating
286 : * partial aggregation paths for the grouped relation on top
287 : * of the paths of this rel. After that, we're done creating
288 : * paths for the grouped relation, so run set_cheapest().
289 : */
290 6732 : if (joinrel->grouped_rel != NULL && !is_top_rel)
291 : {
292 0 : RelOptInfo *grouped_rel = joinrel->grouped_rel;
293 :
294 : Assert(IS_GROUPED_REL(grouped_rel));
295 :
296 0 : generate_grouped_paths(root, grouped_rel, joinrel);
297 0 : set_cheapest(grouped_rel);
298 : }
299 :
300 : /* Absorb new clump into old */
301 6732 : old_clump->joinrel = joinrel;
302 6732 : old_clump->size += new_clump->size;
303 6732 : pfree(new_clump);
304 :
305 : /* Remove old_clump from list */
306 6732 : clumps = foreach_delete_current(clumps, lc);
307 :
308 : /*
309 : * Recursively try to merge the enlarged old_clump with
310 : * others. When no further merge is possible, we'll reinsert
311 : * it into the list.
312 : */
313 6732 : return merge_clump(root, clumps, old_clump, num_gene, force);
314 : }
315 : }
316 : }
317 :
318 : /*
319 : * No merging is possible, so add new_clump as an independent clump, in
320 : * proper order according to size. We can be fast for the common case
321 : * where it has size 1 --- it should always go at the end.
322 : */
323 11142 : if (clumps == NIL || new_clump->size == 1)
324 10272 : return lappend(clumps, new_clump);
325 :
326 : /* Else search for the place to insert it */
327 1020 : for (pos = 0; pos < list_length(clumps); pos++)
328 : {
329 870 : Clump *old_clump = (Clump *) list_nth(clumps, pos);
330 :
331 870 : if (new_clump->size > old_clump->size)
332 720 : break; /* new_clump belongs before old_clump */
333 : }
334 870 : clumps = list_insert_nth(clumps, pos, new_clump);
335 :
336 870 : return clumps;
337 : }
338 :
339 : /*
340 : * Heuristics for gimme_tree: do we want to join these two relations?
341 : */
342 : static bool
343 10668 : desirable_join(PlannerInfo *root,
344 : RelOptInfo *outer_rel, RelOptInfo *inner_rel)
345 : {
346 : /*
347 : * Join if there is an applicable join clause, or if there is a join order
348 : * restriction forcing these rels to be joined.
349 : */
350 12948 : if (have_relevant_joinclause(root, outer_rel, inner_rel) ||
351 2280 : have_join_order_restriction(root, outer_rel, inner_rel))
352 8388 : return true;
353 :
354 : /* Otherwise postpone the join till later. */
355 2280 : return false;
356 : }
|