Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * joinpath.c
4 : * Routines to find all possible paths for processing a set of joins
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/optimizer/path/joinpath.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "executor/executor.h"
18 : #include "foreign/fdwapi.h"
19 : #include "nodes/nodeFuncs.h"
20 : #include "optimizer/cost.h"
21 : #include "optimizer/optimizer.h"
22 : #include "optimizer/pathnode.h"
23 : #include "optimizer/paths.h"
24 : #include "optimizer/placeholder.h"
25 : #include "optimizer/planmain.h"
26 : #include "optimizer/restrictinfo.h"
27 : #include "utils/lsyscache.h"
28 : #include "utils/typcache.h"
29 :
30 : /* Hooks for plugins to get control in add_paths_to_joinrel() */
31 : set_join_pathlist_hook_type set_join_pathlist_hook = NULL;
32 : join_path_setup_hook_type join_path_setup_hook = NULL;
33 :
34 : /*
35 : * Paths parameterized by a parent rel can be considered to be parameterized
36 : * by any of its children, when we are performing partitionwise joins. These
37 : * macros simplify checking for such cases. Beware multiple eval of args.
38 : */
39 : #define PATH_PARAM_BY_PARENT(path, rel) \
40 : ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), \
41 : (rel)->top_parent_relids))
42 : #define PATH_PARAM_BY_REL_SELF(path, rel) \
43 : ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
44 :
45 : #define PATH_PARAM_BY_REL(path, rel) \
46 : (PATH_PARAM_BY_REL_SELF(path, rel) || PATH_PARAM_BY_PARENT(path, rel))
47 :
48 : static void try_partial_mergejoin_path(PlannerInfo *root,
49 : RelOptInfo *joinrel,
50 : Path *outer_path,
51 : Path *inner_path,
52 : List *pathkeys,
53 : List *mergeclauses,
54 : List *outersortkeys,
55 : List *innersortkeys,
56 : JoinType jointype,
57 : JoinPathExtraData *extra);
58 : static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
59 : RelOptInfo *outerrel, RelOptInfo *innerrel,
60 : JoinType jointype, JoinPathExtraData *extra);
61 : static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
62 : RelOptInfo *outerrel, RelOptInfo *innerrel,
63 : JoinType jointype, JoinPathExtraData *extra);
64 : static void consider_parallel_nestloop(PlannerInfo *root,
65 : RelOptInfo *joinrel,
66 : RelOptInfo *outerrel,
67 : RelOptInfo *innerrel,
68 : JoinType jointype,
69 : JoinPathExtraData *extra);
70 : static void consider_parallel_mergejoin(PlannerInfo *root,
71 : RelOptInfo *joinrel,
72 : RelOptInfo *outerrel,
73 : RelOptInfo *innerrel,
74 : JoinType jointype,
75 : JoinPathExtraData *extra,
76 : Path *inner_cheapest_total);
77 : static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
78 : RelOptInfo *outerrel, RelOptInfo *innerrel,
79 : JoinType jointype, JoinPathExtraData *extra);
80 : static List *select_mergejoin_clauses(PlannerInfo *root,
81 : RelOptInfo *joinrel,
82 : RelOptInfo *outerrel,
83 : RelOptInfo *innerrel,
84 : List *restrictlist,
85 : JoinType jointype,
86 : bool *mergejoin_allowed);
87 : static void generate_mergejoin_paths(PlannerInfo *root,
88 : RelOptInfo *joinrel,
89 : RelOptInfo *innerrel,
90 : Path *outerpath,
91 : JoinType jointype,
92 : JoinPathExtraData *extra,
93 : bool useallclauses,
94 : Path *inner_cheapest_total,
95 : List *merge_pathkeys,
96 : bool is_partial);
97 :
98 :
99 : /*
100 : * add_paths_to_joinrel
101 : * Given a join relation and two component rels from which it can be made,
102 : * consider all possible paths that use the two component rels as outer
103 : * and inner rel respectively. Add these paths to the join rel's pathlist
104 : * if they survive comparison with other paths (and remove any existing
105 : * paths that are dominated by these paths).
106 : *
107 : * Modifies the pathlist field of the joinrel node to contain the best
108 : * paths found so far.
109 : *
110 : * jointype is not necessarily the same as sjinfo->jointype; it might be
111 : * "flipped around" if we are considering joining the rels in the opposite
112 : * direction from what's indicated in sjinfo.
113 : *
114 : * Also, this routine accepts the special JoinTypes JOIN_UNIQUE_OUTER and
115 : * JOIN_UNIQUE_INNER to indicate that the outer or inner relation has been
116 : * unique-ified and a regular inner join should then be applied. These values
117 : * are not allowed to propagate outside this routine, however. Path cost
118 : * estimation code, as well as match_unsorted_outer, may need to recognize that
119 : * it's dealing with such a case --- the combination of nominal jointype INNER
120 : * with sjinfo->jointype == JOIN_SEMI indicates that.
121 : */
122 : void
123 628346 : add_paths_to_joinrel(PlannerInfo *root,
124 : RelOptInfo *joinrel,
125 : RelOptInfo *outerrel,
126 : RelOptInfo *innerrel,
127 : JoinType jointype,
128 : SpecialJoinInfo *sjinfo,
129 : List *restrictlist)
130 : {
131 628346 : JoinType save_jointype = jointype;
132 : JoinPathExtraData extra;
133 628346 : bool mergejoin_allowed = true;
134 : ListCell *lc;
135 : Relids joinrelids;
136 :
137 : /*
138 : * PlannerInfo doesn't contain the SpecialJoinInfos created for joins
139 : * between child relations, even if there is a SpecialJoinInfo node for
140 : * the join between the topmost parents. So, while calculating Relids set
141 : * representing the restriction, consider relids of topmost parent of
142 : * partitions.
143 : */
144 628346 : if (joinrel->reloptkind == RELOPT_OTHER_JOINREL)
145 54278 : joinrelids = joinrel->top_parent_relids;
146 : else
147 574068 : joinrelids = joinrel->relids;
148 :
149 628346 : extra.restrictlist = restrictlist;
150 628346 : extra.mergeclause_list = NIL;
151 628346 : extra.sjinfo = sjinfo;
152 628346 : extra.param_source_rels = NULL;
153 628346 : extra.pgs_mask = joinrel->pgs_mask;
154 :
155 : /*
156 : * Give extensions a chance to take control. In particular, an extension
157 : * might want to modify extra.pgs_mask. It's possible to override pgs_mask
158 : * on a query-wide basis using join_search_hook, or for a particular
159 : * relation using joinrel_setup_hook, but extensions that want to provide
160 : * different advice for the same joinrel based on the choice of innerrel
161 : * and outerrel will need to use this hook.
162 : *
163 : * A very simple way for an extension to use this hook is to set
164 : * extra.pgs_mask &= ~PGS_JOIN_ANY, if it simply doesn't want any of the
165 : * paths generated by this call to add_paths_to_joinrel() to be selected.
166 : * An extension could use this technique to constrain the join order,
167 : * since it could thereby arrange to reject all paths from join orders
168 : * that it does not like. An extension can also selectively clear bits
169 : * from extra.pgs_mask to rule out specific techniques for specific joins,
170 : * or could even set additional bits to re-allow methods disabled at some
171 : * higher level.
172 : *
173 : * NB: Below this point, this function should be careful to reference
174 : * extra.pgs_mask rather than rel->pgs_mask to avoid disregarding any
175 : * changes made by the hook we're about to call.
176 : */
177 628346 : if (join_path_setup_hook)
178 155296 : join_path_setup_hook(root, joinrel, outerrel, innerrel,
179 : jointype, &extra);
180 :
181 : /*
182 : * See if the inner relation is provably unique for this outer rel.
183 : *
184 : * We have some special cases: for JOIN_SEMI, it doesn't matter since the
185 : * executor can make the equivalent optimization anyway. It also doesn't
186 : * help enable use of Memoize, since a semijoin with a provably unique
187 : * inner side should have been reduced to an inner join in that case.
188 : * Therefore, we need not expend planner cycles on proofs. (For
189 : * JOIN_ANTI, although it doesn't help the executor for the same reason,
190 : * it can benefit Memoize paths.) For JOIN_UNIQUE_INNER, we must be
191 : * considering a semijoin whose inner side is not provably unique (else
192 : * reduce_unique_semijoins would've simplified it), so there's no point in
193 : * calling innerrel_is_unique. However, if the LHS covers all of the
194 : * semijoin's min_lefthand, then it's appropriate to set inner_unique
195 : * because the unique relation produced by create_unique_paths will be
196 : * unique relative to the LHS. (If we have an LHS that's only part of the
197 : * min_lefthand, that is *not* true.) For JOIN_UNIQUE_OUTER, pass
198 : * JOIN_INNER to avoid letting that value escape this module.
199 : */
200 628346 : switch (jointype)
201 : {
202 5944 : case JOIN_SEMI:
203 5944 : extra.inner_unique = false; /* well, unproven */
204 5944 : break;
205 4804 : case JOIN_UNIQUE_INNER:
206 9608 : extra.inner_unique = bms_is_subset(sjinfo->min_lefthand,
207 4804 : outerrel->relids);
208 4804 : break;
209 4804 : case JOIN_UNIQUE_OUTER:
210 4804 : extra.inner_unique = innerrel_is_unique(root,
211 : joinrel->relids,
212 : outerrel->relids,
213 : innerrel,
214 : JOIN_INNER,
215 : restrictlist,
216 : false);
217 4804 : break;
218 612794 : default:
219 612794 : extra.inner_unique = innerrel_is_unique(root,
220 : joinrel->relids,
221 : outerrel->relids,
222 : innerrel,
223 : jointype,
224 : restrictlist,
225 : false);
226 612794 : break;
227 : }
228 :
229 : /*
230 : * If the outer or inner relation has been unique-ified, handle as a plain
231 : * inner join.
232 : */
233 628346 : if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
234 9608 : jointype = JOIN_INNER;
235 :
236 : /*
237 : * Find potential mergejoin clauses. We can skip this if we are not
238 : * interested in doing a mergejoin. However, mergejoin may be our only
239 : * way of implementing a full outer join, so in that case we don't care
240 : * whether mergejoins are disabled.
241 : */
242 628346 : if ((extra.pgs_mask & PGS_MERGEJOIN_ANY) != 0 || jointype == JOIN_FULL)
243 553763 : extra.mergeclause_list = select_mergejoin_clauses(root,
244 : joinrel,
245 : outerrel,
246 : innerrel,
247 : restrictlist,
248 : jointype,
249 : &mergejoin_allowed);
250 :
251 : /*
252 : * If it's SEMI, ANTI, or inner_unique join, compute correction factors
253 : * for cost estimation. These will be the same for all paths.
254 : */
255 628346 : if (jointype == JOIN_SEMI || jointype == JOIN_ANTI || extra.inner_unique)
256 184520 : compute_semi_anti_join_factors(root, joinrel, outerrel, innerrel,
257 : jointype, sjinfo, restrictlist,
258 : &extra.semifactors);
259 :
260 : /*
261 : * Decide whether it's sensible to generate parameterized paths for this
262 : * joinrel, and if so, which relations such paths should require. There
263 : * is usually no need to create a parameterized result path unless there
264 : * is a join order restriction that prevents joining one of our input rels
265 : * directly to the parameter source rel instead of joining to the other
266 : * input rel. (But see allow_star_schema_join().) This restriction
267 : * reduces the number of parameterized paths we have to deal with at
268 : * higher join levels, without compromising the quality of the resulting
269 : * plan. We express the restriction as a Relids set that must overlap the
270 : * parameterization of any proposed join path. Note: param_source_rels
271 : * should contain only baserels, not OJ relids, so starting from
272 : * all_baserels not all_query_rels is correct.
273 : */
274 1152136 : foreach(lc, root->join_info_list)
275 : {
276 523790 : SpecialJoinInfo *sjinfo2 = (SpecialJoinInfo *) lfirst(lc);
277 :
278 : /*
279 : * SJ is relevant to this join if we have some part of its RHS
280 : * (possibly not all of it), and haven't yet joined to its LHS. (This
281 : * test is pretty simplistic, but should be sufficient considering the
282 : * join has already been proven legal.) If the SJ is relevant, it
283 : * presents constraints for joining to anything not in its RHS.
284 : */
285 523790 : if (bms_overlap(joinrelids, sjinfo2->min_righthand) &&
286 346886 : !bms_overlap(joinrelids, sjinfo2->min_lefthand))
287 15274 : extra.param_source_rels = bms_join(extra.param_source_rels,
288 15274 : bms_difference(root->all_baserels,
289 15274 : sjinfo2->min_righthand));
290 :
291 : /* full joins constrain both sides symmetrically */
292 527728 : if (sjinfo2->jointype == JOIN_FULL &&
293 3938 : bms_overlap(joinrelids, sjinfo2->min_lefthand) &&
294 3902 : !bms_overlap(joinrelids, sjinfo2->min_righthand))
295 536 : extra.param_source_rels = bms_join(extra.param_source_rels,
296 536 : bms_difference(root->all_baserels,
297 536 : sjinfo2->min_lefthand));
298 : }
299 :
300 : /*
301 : * However, when a LATERAL subquery is involved, there will simply not be
302 : * any paths for the joinrel that aren't parameterized by whatever the
303 : * subquery is parameterized by, unless its parameterization is resolved
304 : * within the joinrel. So we might as well allow additional dependencies
305 : * on whatever residual lateral dependencies the joinrel will have.
306 : */
307 1256692 : extra.param_source_rels = bms_add_members(extra.param_source_rels,
308 628346 : joinrel->lateral_relids);
309 :
310 : /*
311 : * 1. Consider mergejoin paths where both relations must be explicitly
312 : * sorted. Skip this if we can't mergejoin.
313 : */
314 628346 : if (mergejoin_allowed)
315 616778 : sort_inner_and_outer(root, joinrel, outerrel, innerrel,
316 : jointype, &extra);
317 :
318 : /*
319 : * 2. Consider paths where the outer relation need not be explicitly
320 : * sorted. This includes both nestloops and mergejoins where the outer
321 : * path is already ordered. Again, skip this if we can't mergejoin.
322 : * (That's okay because we know that nestloop can't handle
323 : * right/right-anti/right-semi/full joins at all, so it wouldn't work in
324 : * the prohibited cases either.)
325 : */
326 628346 : if (mergejoin_allowed)
327 616778 : match_unsorted_outer(root, joinrel, outerrel, innerrel,
328 : jointype, &extra);
329 :
330 : #ifdef NOT_USED
331 :
332 : /*
333 : * 3. Consider paths where the inner relation need not be explicitly
334 : * sorted. This includes mergejoins only (nestloops were already built in
335 : * match_unsorted_outer).
336 : *
337 : * Diked out as redundant 2/13/2000 -- tgl. There isn't any really
338 : * significant difference between the inner and outer side of a mergejoin,
339 : * so match_unsorted_inner creates no paths that aren't equivalent to
340 : * those made by match_unsorted_outer when add_paths_to_joinrel() is
341 : * invoked with the two rels given in the other order.
342 : */
343 : if (mergejoin_allowed)
344 : match_unsorted_inner(root, joinrel, outerrel, innerrel,
345 : jointype, &extra);
346 : #endif
347 :
348 : /*
349 : * 4. Consider paths where both outer and inner relations must be hashed
350 : * before being joined. As above, when it's a full join, we must try this
351 : * even when the path type is disabled, because it may be our only option.
352 : */
353 628346 : if ((extra.pgs_mask & PGS_HASHJOIN) != 0 || jointype == JOIN_FULL)
354 560614 : hash_inner_and_outer(root, joinrel, outerrel, innerrel,
355 : jointype, &extra);
356 :
357 : /*
358 : * 5. If inner and outer relations are foreign tables (or joins) belonging
359 : * to the same server and assigned to the same user to check access
360 : * permissions as, give the FDW a chance to push down joins.
361 : */
362 628346 : if ((extra.pgs_mask & PGS_FOREIGNJOIN) != 0 && joinrel->fdwroutine &&
363 1362 : joinrel->fdwroutine->GetForeignJoinPaths)
364 1360 : joinrel->fdwroutine->GetForeignJoinPaths(root, joinrel,
365 : outerrel, innerrel,
366 : save_jointype, &extra);
367 :
368 : /*
369 : * 6. Finally, give extensions a chance to manipulate the path list. They
370 : * could add new paths (such as CustomPaths) by calling add_path(), or
371 : * add_partial_path() if parallel aware.
372 : *
373 : * In theory, extensions could also use this hook to delete or modify
374 : * paths added by the core code, but in practice this is difficult to make
375 : * work, since it's too late to get back any paths that have already been
376 : * discarded by add_path() or add_partial_path(). If you're trying to
377 : * suppress paths, consider using join_path_setup_hook instead.
378 : */
379 628346 : if (set_join_pathlist_hook)
380 0 : set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
381 : save_jointype, &extra);
382 628346 : }
383 :
384 : /*
385 : * We override the param_source_rels heuristic to accept nestloop paths in
386 : * which the outer rel satisfies some but not all of the inner path's
387 : * parameterization. This is necessary to get good plans for star-schema
388 : * scenarios, in which a parameterized path for a large table may require
389 : * parameters from multiple small tables that will not get joined directly to
390 : * each other. We can handle that by stacking nestloops that have the small
391 : * tables on the outside; but this breaks the rule the param_source_rels
392 : * heuristic is based on, namely that parameters should not be passed down
393 : * across joins unless there's a join-order-constraint-based reason to do so.
394 : * So we ignore the param_source_rels restriction when this case applies.
395 : *
396 : * allow_star_schema_join() returns true if the param_source_rels restriction
397 : * should be overridden, ie, it's okay to perform this join.
398 : */
399 : static inline bool
400 248586 : allow_star_schema_join(PlannerInfo *root,
401 : Relids outerrelids,
402 : Relids inner_paramrels)
403 : {
404 : /*
405 : * It's a star-schema case if the outer rel provides some but not all of
406 : * the inner rel's parameterization.
407 : */
408 291298 : return (bms_overlap(inner_paramrels, outerrelids) &&
409 42712 : bms_nonempty_difference(inner_paramrels, outerrelids));
410 : }
411 :
412 : /*
413 : * If the parameterization is only partly satisfied by the outer rel,
414 : * the unsatisfied part can't include any outer-join relids that could
415 : * null rels of the satisfied part. That would imply that we're trying
416 : * to use a clause involving a Var with nonempty varnullingrels at
417 : * a join level where that value isn't yet computable.
418 : *
419 : * In practice, this test never finds a problem because earlier join order
420 : * restrictions prevent us from attempting a join that would cause a problem.
421 : * (That's unsurprising, because the code worked before we ever added
422 : * outer-join relids to expression relids.) It still seems worth checking
423 : * as a backstop, but we only do so in assert-enabled builds.
424 : */
425 : #ifdef USE_ASSERT_CHECKING
426 : static inline bool
427 : have_unsafe_outer_join_ref(PlannerInfo *root,
428 : Relids outerrelids,
429 : Relids inner_paramrels)
430 : {
431 : bool result = false;
432 : Relids unsatisfied = bms_difference(inner_paramrels, outerrelids);
433 : Relids satisfied = bms_intersect(inner_paramrels, outerrelids);
434 :
435 : if (bms_overlap(unsatisfied, root->outer_join_rels))
436 : {
437 : ListCell *lc;
438 :
439 : foreach(lc, root->join_info_list)
440 : {
441 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
442 :
443 : if (!bms_is_member(sjinfo->ojrelid, unsatisfied))
444 : continue; /* not relevant */
445 : if (bms_overlap(satisfied, sjinfo->min_righthand) ||
446 : (sjinfo->jointype == JOIN_FULL &&
447 : bms_overlap(satisfied, sjinfo->min_lefthand)))
448 : {
449 : result = true; /* doesn't work */
450 : break;
451 : }
452 : }
453 : }
454 :
455 : /* Waste no memory when we reject a path here */
456 : bms_free(unsatisfied);
457 : bms_free(satisfied);
458 :
459 : return result;
460 : }
461 : #endif /* USE_ASSERT_CHECKING */
462 :
463 : /*
464 : * paraminfo_get_equal_hashops
465 : * Determine if the clauses in param_info and innerrel's lateral vars
466 : * can be hashed.
467 : * Returns true if hashing is possible, otherwise false.
468 : *
469 : * Additionally, on success we collect the outer expressions and the
470 : * appropriate equality operators for each hashable parameter to innerrel.
471 : * These are returned in parallel lists in *param_exprs and *operators.
472 : * We also set *binary_mode to indicate whether strict binary matching is
473 : * required.
474 : */
475 : static bool
476 279098 : paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
477 : RelOptInfo *outerrel, RelOptInfo *innerrel,
478 : List *ph_lateral_vars, List **param_exprs,
479 : List **operators, bool *binary_mode)
480 :
481 : {
482 : List *lateral_vars;
483 : ListCell *lc;
484 :
485 279098 : *param_exprs = NIL;
486 279098 : *operators = NIL;
487 279098 : *binary_mode = false;
488 :
489 : /* Add join clauses from param_info to the hash key */
490 279098 : if (param_info != NULL)
491 : {
492 279098 : List *clauses = param_info->ppi_clauses;
493 :
494 524373 : foreach(lc, clauses)
495 : {
496 302932 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
497 : OpExpr *opexpr;
498 : Node *expr;
499 : Oid hasheqoperator;
500 :
501 302932 : opexpr = (OpExpr *) rinfo->clause;
502 :
503 : /*
504 : * Bail if the rinfo is not compatible. We need a join OpExpr
505 : * with 2 args.
506 : */
507 302932 : if (!IsA(opexpr, OpExpr) || list_length(opexpr->args) != 2 ||
508 290858 : !clause_sides_match_join(rinfo, outerrel->relids,
509 : innerrel->relids))
510 : {
511 57597 : list_free(*operators);
512 57597 : list_free(*param_exprs);
513 57657 : return false;
514 : }
515 :
516 245335 : if (rinfo->outer_is_left)
517 : {
518 138837 : expr = (Node *) linitial(opexpr->args);
519 138837 : hasheqoperator = rinfo->left_hasheqoperator;
520 : }
521 : else
522 : {
523 106498 : expr = (Node *) lsecond(opexpr->args);
524 106498 : hasheqoperator = rinfo->right_hasheqoperator;
525 : }
526 :
527 : /* can't do memoize if we can't hash the outer type */
528 245335 : if (!OidIsValid(hasheqoperator))
529 : {
530 60 : list_free(*operators);
531 60 : list_free(*param_exprs);
532 60 : return false;
533 : }
534 :
535 : /*
536 : * 'expr' may already exist as a parameter from a previous item in
537 : * ppi_clauses. No need to include it again, however we'd better
538 : * ensure we do switch into binary mode if required. See below.
539 : */
540 245275 : if (!list_member(*param_exprs, expr))
541 : {
542 245271 : *operators = lappend_oid(*operators, hasheqoperator);
543 245271 : *param_exprs = lappend(*param_exprs, expr);
544 : }
545 :
546 : /*
547 : * When the join operator is not hashable then it's possible that
548 : * the operator will be able to distinguish something that the
549 : * hash equality operator could not. For example with floating
550 : * point types -0.0 and +0.0 are classed as equal by the hash
551 : * function and equality function, but some other operator may be
552 : * able to tell those values apart. This means that we must put
553 : * memoize into binary comparison mode so that it does bit-by-bit
554 : * comparisons rather than a "logical" comparison as it would
555 : * using the hash equality operator.
556 : */
557 245275 : if (!OidIsValid(rinfo->hashjoinoperator))
558 747 : *binary_mode = true;
559 : }
560 : }
561 :
562 : /* Now add any lateral vars to the cache key too */
563 221441 : lateral_vars = list_concat(ph_lateral_vars, innerrel->lateral_vars);
564 226931 : foreach(lc, lateral_vars)
565 : {
566 6254 : Node *expr = (Node *) lfirst(lc);
567 : TypeCacheEntry *typentry;
568 :
569 : /* Reject if there are any volatile functions in lateral vars */
570 6254 : if (contain_volatile_functions(expr))
571 : {
572 0 : list_free(*operators);
573 0 : list_free(*param_exprs);
574 764 : return false;
575 : }
576 :
577 6254 : typentry = lookup_type_cache(exprType(expr),
578 : TYPECACHE_HASH_PROC | TYPECACHE_EQ_OPR);
579 :
580 : /* can't use memoize without a valid hash proc and equals operator */
581 6254 : if (!OidIsValid(typentry->hash_proc) || !OidIsValid(typentry->eq_opr))
582 : {
583 764 : list_free(*operators);
584 764 : list_free(*param_exprs);
585 764 : return false;
586 : }
587 :
588 : /*
589 : * 'expr' may already exist as a parameter from the ppi_clauses. No
590 : * need to include it again, however we'd better ensure we do switch
591 : * into binary mode.
592 : */
593 5490 : if (!list_member(*param_exprs, expr))
594 : {
595 5164 : *operators = lappend_oid(*operators, typentry->eq_opr);
596 5164 : *param_exprs = lappend(*param_exprs, expr);
597 : }
598 :
599 : /*
600 : * We must go into binary mode as we don't have too much of an idea of
601 : * how these lateral Vars are being used. See comment above when we
602 : * set *binary_mode for the non-lateral Var case. This could be
603 : * relaxed a bit if we had the RestrictInfos and knew the operators
604 : * being used, however for cases like Vars that are arguments to
605 : * functions we must operate in binary mode as we don't have
606 : * visibility into what the function is doing with the Vars.
607 : */
608 5490 : *binary_mode = true;
609 : }
610 :
611 : /* We're okay to use memoize */
612 220677 : return true;
613 : }
614 :
615 : /*
616 : * extract_lateral_vars_from_PHVs
617 : * Extract lateral references within PlaceHolderVars that are due to be
618 : * evaluated at 'innerrelids'.
619 : */
620 : static List *
621 1074016 : extract_lateral_vars_from_PHVs(PlannerInfo *root, Relids innerrelids)
622 : {
623 1074016 : List *ph_lateral_vars = NIL;
624 : ListCell *lc;
625 :
626 : /* Nothing would be found if the query contains no LATERAL RTEs */
627 1074016 : if (!root->hasLateralRTEs)
628 1029153 : return NIL;
629 :
630 : /*
631 : * No need to consider PHVs that are due to be evaluated at joinrels,
632 : * since we do not add Memoize nodes on top of joinrel paths.
633 : */
634 44863 : if (bms_membership(innerrelids) == BMS_MULTIPLE)
635 18484 : return NIL;
636 :
637 28301 : foreach(lc, root->placeholder_list)
638 : {
639 1922 : PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
640 : List *vars;
641 : ListCell *cell;
642 :
643 : /* PHV is uninteresting if no lateral refs */
644 1922 : if (phinfo->ph_lateral == NULL)
645 1102 : continue;
646 :
647 : /* PHV is uninteresting if not due to be evaluated at innerrelids */
648 820 : if (!bms_equal(phinfo->ph_eval_at, innerrelids))
649 654 : continue;
650 :
651 : /*
652 : * If the PHV does not reference any rels in innerrelids, use its
653 : * contained expression as a cache key rather than extracting the
654 : * Vars/PHVs from it and using those. This can be beneficial in cases
655 : * where the expression results in fewer distinct values to cache
656 : * tuples for.
657 : */
658 166 : if (!bms_overlap(pull_varnos(root, (Node *) phinfo->ph_var->phexpr),
659 : innerrelids))
660 : {
661 158 : ph_lateral_vars = lappend(ph_lateral_vars, phinfo->ph_var->phexpr);
662 158 : continue;
663 : }
664 :
665 : /* Fetch Vars and PHVs of lateral references within PlaceHolderVars */
666 8 : vars = pull_vars_of_level((Node *) phinfo->ph_var->phexpr, 0);
667 24 : foreach(cell, vars)
668 : {
669 16 : Node *node = (Node *) lfirst(cell);
670 :
671 16 : if (IsA(node, Var))
672 : {
673 8 : Var *var = (Var *) node;
674 :
675 : Assert(var->varlevelsup == 0);
676 :
677 8 : if (bms_is_member(var->varno, phinfo->ph_lateral))
678 0 : ph_lateral_vars = lappend(ph_lateral_vars, node);
679 : }
680 8 : else if (IsA(node, PlaceHolderVar))
681 : {
682 8 : PlaceHolderVar *phv = (PlaceHolderVar *) node;
683 :
684 : Assert(phv->phlevelsup == 0);
685 :
686 8 : if (bms_is_subset(find_placeholder_info(root, phv)->ph_eval_at,
687 8 : phinfo->ph_lateral))
688 8 : ph_lateral_vars = lappend(ph_lateral_vars, node);
689 : }
690 : else
691 : Assert(false);
692 : }
693 :
694 8 : list_free(vars);
695 : }
696 :
697 26379 : return ph_lateral_vars;
698 : }
699 :
700 : /*
701 : * get_memoize_path
702 : * If possible, make and return a Memoize path atop of 'inner_path'.
703 : * Otherwise return NULL.
704 : *
705 : * Note that currently we do not add Memoize nodes on top of join relation
706 : * paths. This is because the ParamPathInfos for join relation paths do not
707 : * maintain ppi_clauses, as the set of relevant clauses varies depending on how
708 : * the join is formed. In addition, joinrels do not maintain lateral_vars. So
709 : * we do not have a way to extract cache keys from joinrels.
710 : */
711 : static Path *
712 1673127 : get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
713 : RelOptInfo *outerrel, Path *inner_path,
714 : Path *outer_path, JoinType jointype,
715 : JoinPathExtraData *extra)
716 : {
717 : List *param_exprs;
718 : List *hash_operators;
719 : ListCell *lc;
720 : bool binary_mode;
721 : List *ph_lateral_vars;
722 :
723 : /* Obviously not if it's disabled */
724 1673127 : if ((extra->pgs_mask & PGS_NESTLOOP_MEMOIZE) == 0)
725 164168 : return NULL;
726 :
727 : /*
728 : * We can safely not bother with all this unless we expect to perform more
729 : * than one inner scan. The first scan is always going to be a cache
730 : * miss. This would likely fail later anyway based on costs, so this is
731 : * really just to save some wasted effort.
732 : *
733 : * However, if the "plain nested loop" strategy is disabled, then it is no
734 : * longer certain that any path we'd construct here would lose on cost.
735 : * So, in that case, continue and let cost comparison sort things out.
736 : */
737 1508959 : if (outer_path->parent->rows < 2 &&
738 434943 : (extra->pgs_mask & PGS_NESTLOOP_PLAIN) != 0)
739 434943 : return NULL;
740 :
741 : /*
742 : * Extract lateral Vars/PHVs within PlaceHolderVars that are due to be
743 : * evaluated at innerrel. These lateral Vars/PHVs could be used as
744 : * memoize cache keys.
745 : */
746 1074016 : ph_lateral_vars = extract_lateral_vars_from_PHVs(root, innerrel->relids);
747 :
748 : /*
749 : * We can only have a memoize node when there's some kind of cache key,
750 : * either parameterized path clauses or lateral Vars. No cache key sounds
751 : * more like something a Materialize node might be more useful for.
752 : */
753 1074016 : if ((inner_path->param_info == NULL ||
754 385008 : inner_path->param_info->ppi_clauses == NIL) &&
755 714723 : innerrel->lateral_vars == NIL &&
756 : ph_lateral_vars == NIL)
757 710009 : return NULL;
758 :
759 : /*
760 : * Currently we don't do this for SEMI and ANTI joins, because nested loop
761 : * SEMI/ANTI joins don't scan the inner node to completion, which means
762 : * memoize cannot mark the cache entry as complete. Nor can we mark the
763 : * cache entry as complete after fetching the first inner tuple, because
764 : * if that tuple and the current outer tuple don't satisfy the join
765 : * clauses, a second inner tuple that satisfies the parameters would find
766 : * the cache entry already marked as complete. The only exception is when
767 : * the inner relation is provably unique, as in that case, there won't be
768 : * a second matching tuple and we can safely mark the cache entry as
769 : * complete after fetching the first inner tuple. Note that in such
770 : * cases, the SEMI join should have been reduced to an inner join by
771 : * reduce_unique_semijoins.
772 : */
773 364007 : if ((jointype == JOIN_SEMI || jointype == JOIN_ANTI) &&
774 10732 : !extra->inner_unique)
775 4718 : return NULL;
776 :
777 : /*
778 : * Memoize normally marks cache entries as complete when it runs out of
779 : * tuples to read from its subplan. However, with unique joins, Nested
780 : * Loop will skip to the next outer tuple after finding the first matching
781 : * inner tuple. This means that we may not read the inner side of the
782 : * join to completion which leaves no opportunity to mark the cache entry
783 : * as complete. To work around that, when the join is unique we
784 : * automatically mark cache entries as complete after fetching the first
785 : * tuple. This works when the entire join condition is parameterized.
786 : * Otherwise, when the parameterization is only a subset of the join
787 : * condition, we can't be sure which part of it causes the join to be
788 : * unique. This means there are no guarantees that only 1 tuple will be
789 : * read. We cannot mark the cache entry as complete after reading the
790 : * first tuple without that guarantee. This means the scope of Memoize
791 : * node's usefulness is limited to only outer rows that have no join
792 : * partner as this is the only case where Nested Loop would exhaust the
793 : * inner scan of a unique join. Since the scope is limited to that, we
794 : * just don't bother making a memoize path in this case.
795 : *
796 : * Lateral vars needn't be considered here as they're not considered when
797 : * determining if the join is unique.
798 : */
799 359289 : if (extra->inner_unique)
800 : {
801 : Bitmapset *ppi_serials;
802 :
803 217336 : if (inner_path->param_info == NULL)
804 0 : return NULL;
805 :
806 217336 : ppi_serials = inner_path->param_info->ppi_serials;
807 :
808 518437 : foreach_node(RestrictInfo, rinfo, extra->restrictlist)
809 : {
810 243937 : if (!bms_is_member(rinfo->rinfo_serial, ppi_serials))
811 80086 : return NULL;
812 : }
813 : }
814 :
815 : /*
816 : * We can't use a memoize node if there are volatile functions in the
817 : * inner rel's target list or restrict list. A cache hit could reduce the
818 : * number of calls to these functions.
819 : */
820 279203 : if (contain_volatile_functions((Node *) innerrel->reltarget))
821 0 : return NULL;
822 :
823 461046 : foreach(lc, innerrel->baserestrictinfo)
824 : {
825 181947 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
826 :
827 181947 : if (contain_volatile_functions((Node *) rinfo))
828 104 : return NULL;
829 : }
830 :
831 : /*
832 : * Also check the parameterized path restrictinfos for volatile functions.
833 : * Indexed functions must be immutable so shouldn't have any volatile
834 : * functions, however, with a lateral join the inner scan may not be an
835 : * index scan.
836 : */
837 279099 : if (inner_path->param_info != NULL)
838 : {
839 603218 : foreach(lc, inner_path->param_info->ppi_clauses)
840 : {
841 324120 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
842 :
843 324120 : if (contain_volatile_functions((Node *) rinfo))
844 1 : return NULL;
845 : }
846 : }
847 :
848 : /* Check if we have hash ops for each parameter to the path */
849 279098 : if (paraminfo_get_equal_hashops(root,
850 : inner_path->param_info,
851 279098 : outerrel->top_parent ?
852 : outerrel->top_parent : outerrel,
853 : innerrel,
854 : ph_lateral_vars,
855 : ¶m_exprs,
856 : &hash_operators,
857 : &binary_mode))
858 : {
859 220677 : return (Path *) create_memoize_path(root,
860 : innerrel,
861 : inner_path,
862 : param_exprs,
863 : hash_operators,
864 220677 : extra->inner_unique,
865 : binary_mode,
866 : outer_path->rows);
867 : }
868 :
869 58421 : return NULL;
870 : }
871 :
872 : /*
873 : * try_nestloop_path
874 : * Consider a nestloop join path; if it appears useful, push it into
875 : * the joinrel's pathlist via add_path().
876 : */
877 : static void
878 2669967 : try_nestloop_path(PlannerInfo *root,
879 : RelOptInfo *joinrel,
880 : Path *outer_path,
881 : Path *inner_path,
882 : List *pathkeys,
883 : JoinType jointype,
884 : uint64 nestloop_subtype,
885 : JoinPathExtraData *extra)
886 : {
887 : Relids required_outer;
888 : JoinCostWorkspace workspace;
889 2669967 : RelOptInfo *innerrel = inner_path->parent;
890 2669967 : RelOptInfo *outerrel = outer_path->parent;
891 : Relids innerrelids;
892 : Relids outerrelids;
893 2669967 : Relids inner_paramrels = PATH_REQ_OUTER(inner_path);
894 2669967 : Relids outer_paramrels = PATH_REQ_OUTER(outer_path);
895 :
896 : /*
897 : * If we are forming an outer join at this join, it's nonsensical to use
898 : * an input path that uses the outer join as part of its parameterization.
899 : * (This can happen despite our join order restrictions, since those apply
900 : * to what is in an input relation not what its parameters are.)
901 : */
902 3124995 : if (extra->sjinfo->ojrelid != 0 &&
903 910056 : (bms_is_member(extra->sjinfo->ojrelid, inner_paramrels) ||
904 455028 : bms_is_member(extra->sjinfo->ojrelid, outer_paramrels)))
905 245216 : return;
906 :
907 : /*
908 : * Any parameterization of the input paths refers to topmost parents of
909 : * the relevant relations, because reparameterize_path_by_child() hasn't
910 : * been called yet. So we must consider topmost parents of the relations
911 : * being joined, too, while determining parameterization of the result and
912 : * checking for disallowed parameterization cases.
913 : */
914 2669867 : if (innerrel->top_parent_relids)
915 134037 : innerrelids = innerrel->top_parent_relids;
916 : else
917 2535830 : innerrelids = innerrel->relids;
918 :
919 2669867 : if (outerrel->top_parent_relids)
920 134037 : outerrelids = outerrel->top_parent_relids;
921 : else
922 2535830 : outerrelids = outerrel->relids;
923 :
924 : /*
925 : * Check to see if proposed path is still parameterized, and reject if the
926 : * parameterization wouldn't be sensible --- unless allow_star_schema_join
927 : * says to allow it anyway.
928 : */
929 2669867 : required_outer = calc_nestloop_required_outer(outerrelids, outer_paramrels,
930 : innerrelids, inner_paramrels);
931 2669867 : if (required_outer &&
932 286957 : !bms_overlap(required_outer, extra->param_source_rels) &&
933 248586 : !allow_star_schema_join(root, outerrelids, inner_paramrels))
934 : {
935 : /* Waste no memory when we reject a path here */
936 245116 : bms_free(required_outer);
937 245116 : return;
938 : }
939 :
940 : /* If we got past that, we shouldn't have any unsafe outer-join refs */
941 : Assert(!have_unsafe_outer_join_ref(root, outerrelids, inner_paramrels));
942 :
943 : /*
944 : * If the inner path is parameterized, it is parameterized by the topmost
945 : * parent of the outer rel, not the outer rel itself. We will need to
946 : * translate the parameterization, if this path is chosen, during
947 : * create_plan(). Here we just check whether we will be able to perform
948 : * the translation, and if not avoid creating a nestloop path.
949 : */
950 2424751 : if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent) &&
951 10645 : !path_is_reparameterizable_by_child(inner_path, outer_path->parent))
952 : {
953 0 : bms_free(required_outer);
954 0 : return;
955 : }
956 :
957 : /*
958 : * Do a precheck to quickly eliminate obviously-inferior paths. We
959 : * calculate a cheap lower bound on the path's cost and then use
960 : * add_path_precheck() to see if the path is clearly going to be dominated
961 : * by some existing path for the joinrel. If not, do the full pushup with
962 : * creating a fully valid path structure and submitting it to add_path().
963 : * The latter two steps are expensive enough to make this two-phase
964 : * methodology worthwhile.
965 : */
966 2424751 : initial_cost_nestloop(root, &workspace, jointype,
967 : nestloop_subtype | PGS_CONSIDER_NONPARTIAL,
968 : outer_path, inner_path, extra);
969 :
970 2424751 : if (add_path_precheck(joinrel, workspace.disabled_nodes,
971 : workspace.startup_cost, workspace.total_cost,
972 : pathkeys, required_outer))
973 : {
974 1112003 : add_path(joinrel, (Path *)
975 1112003 : create_nestloop_path(root,
976 : joinrel,
977 : jointype,
978 : &workspace,
979 : extra,
980 : outer_path,
981 : inner_path,
982 : extra->restrictlist,
983 : pathkeys,
984 : required_outer));
985 : }
986 : else
987 : {
988 : /* Waste no memory when we reject a path here */
989 1312748 : bms_free(required_outer);
990 : }
991 : }
992 :
993 : /*
994 : * try_partial_nestloop_path
995 : * Consider a partial nestloop join path; if it appears useful, push it into
996 : * the joinrel's partial_pathlist via add_partial_path().
997 : */
998 : static void
999 157752 : try_partial_nestloop_path(PlannerInfo *root,
1000 : RelOptInfo *joinrel,
1001 : Path *outer_path,
1002 : Path *inner_path,
1003 : List *pathkeys,
1004 : JoinType jointype,
1005 : uint64 nestloop_subtype,
1006 : JoinPathExtraData *extra)
1007 : {
1008 : JoinCostWorkspace workspace;
1009 :
1010 : /*
1011 : * If the inner path is parameterized, the parameterization must be fully
1012 : * satisfied by the proposed outer path. Parameterized partial paths are
1013 : * not supported. The caller should already have verified that no lateral
1014 : * rels are required here.
1015 : */
1016 : Assert(bms_is_empty(joinrel->lateral_relids));
1017 : Assert(bms_is_empty(PATH_REQ_OUTER(outer_path)));
1018 157752 : if (inner_path->param_info != NULL)
1019 : {
1020 12329 : Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
1021 12329 : RelOptInfo *outerrel = outer_path->parent;
1022 : Relids outerrelids;
1023 :
1024 : /*
1025 : * The inner and outer paths are parameterized, if at all, by the top
1026 : * level parents, not the child relations, so we must use those relids
1027 : * for our parameterization tests.
1028 : */
1029 12329 : if (outerrel->top_parent_relids)
1030 8708 : outerrelids = outerrel->top_parent_relids;
1031 : else
1032 3621 : outerrelids = outerrel->relids;
1033 :
1034 12329 : if (!bms_is_subset(inner_paramrels, outerrelids))
1035 119462 : return;
1036 : }
1037 :
1038 : /*
1039 : * If the inner path is parameterized, it is parameterized by the topmost
1040 : * parent of the outer rel, not the outer rel itself. We will need to
1041 : * translate the parameterization, if this path is chosen, during
1042 : * create_plan(). Here we just check whether we will be able to perform
1043 : * the translation, and if not avoid creating a nestloop path.
1044 : */
1045 156579 : if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent) &&
1046 7920 : !path_is_reparameterizable_by_child(inner_path, outer_path->parent))
1047 0 : return;
1048 :
1049 : /*
1050 : * Before creating a path, get a quick lower bound on what it is likely to
1051 : * cost. Bail out right away if it looks terrible.
1052 : */
1053 156579 : initial_cost_nestloop(root, &workspace, jointype, nestloop_subtype,
1054 : outer_path, inner_path, extra);
1055 156579 : if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
1056 : workspace.startup_cost,
1057 : workspace.total_cost, pathkeys))
1058 118289 : return;
1059 :
1060 : /* Might be good enough to be worth trying, so let's try it. */
1061 38290 : add_partial_path(joinrel, (Path *)
1062 38290 : create_nestloop_path(root,
1063 : joinrel,
1064 : jointype,
1065 : &workspace,
1066 : extra,
1067 : outer_path,
1068 : inner_path,
1069 : extra->restrictlist,
1070 : pathkeys,
1071 : NULL));
1072 : }
1073 :
1074 : /*
1075 : * try_mergejoin_path
1076 : * Consider a merge join path; if it appears useful, push it into
1077 : * the joinrel's pathlist via add_path().
1078 : */
1079 : static void
1080 1097556 : try_mergejoin_path(PlannerInfo *root,
1081 : RelOptInfo *joinrel,
1082 : Path *outer_path,
1083 : Path *inner_path,
1084 : List *pathkeys,
1085 : List *mergeclauses,
1086 : List *outersortkeys,
1087 : List *innersortkeys,
1088 : JoinType jointype,
1089 : JoinPathExtraData *extra,
1090 : bool is_partial)
1091 : {
1092 : Relids required_outer;
1093 1097556 : int outer_presorted_keys = 0;
1094 : JoinCostWorkspace workspace;
1095 :
1096 1097556 : if (is_partial)
1097 : {
1098 16425 : try_partial_mergejoin_path(root,
1099 : joinrel,
1100 : outer_path,
1101 : inner_path,
1102 : pathkeys,
1103 : mergeclauses,
1104 : outersortkeys,
1105 : innersortkeys,
1106 : jointype,
1107 : extra);
1108 45819 : return;
1109 : }
1110 :
1111 : /*
1112 : * If we are forming an outer join at this join, it's nonsensical to use
1113 : * an input path that uses the outer join as part of its parameterization.
1114 : * (This can happen despite our join order restrictions, since those apply
1115 : * to what is in an input relation not what its parameters are.)
1116 : */
1117 1332063 : if (extra->sjinfo->ojrelid != 0 &&
1118 501864 : (bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(inner_path)) ||
1119 250932 : bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(outer_path))))
1120 10 : return;
1121 :
1122 : /*
1123 : * Check to see if proposed path is still parameterized, and reject if the
1124 : * parameterization wouldn't be sensible.
1125 : */
1126 1081121 : required_outer = calc_non_nestloop_required_outer(outer_path,
1127 : inner_path);
1128 1081121 : if (required_outer &&
1129 35008 : !bms_overlap(required_outer, extra->param_source_rels))
1130 : {
1131 : /* Waste no memory when we reject a path here */
1132 29384 : bms_free(required_outer);
1133 29384 : return;
1134 : }
1135 :
1136 : /*
1137 : * If the given paths are already well enough ordered, we can skip doing
1138 : * an explicit sort.
1139 : *
1140 : * We need to determine the number of presorted keys of the outer path to
1141 : * decide whether explicit incremental sort can be applied when
1142 : * outersortkeys is not NIL. We do not need to do the same for the inner
1143 : * path though, as incremental sort currently does not support
1144 : * mark/restore.
1145 : */
1146 1584039 : if (outersortkeys &&
1147 532302 : pathkeys_count_contained_in(outersortkeys, outer_path->pathkeys,
1148 : &outer_presorted_keys))
1149 13379 : outersortkeys = NIL;
1150 1906500 : if (innersortkeys &&
1151 854763 : pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
1152 22991 : innersortkeys = NIL;
1153 :
1154 : /*
1155 : * See comments in try_nestloop_path().
1156 : */
1157 1051737 : initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
1158 : outer_path, inner_path,
1159 : outersortkeys, innersortkeys,
1160 : outer_presorted_keys,
1161 : extra);
1162 :
1163 1051737 : if (add_path_precheck(joinrel, workspace.disabled_nodes,
1164 : workspace.startup_cost, workspace.total_cost,
1165 : pathkeys, required_outer))
1166 : {
1167 298841 : add_path(joinrel, (Path *)
1168 298841 : create_mergejoin_path(root,
1169 : joinrel,
1170 : jointype,
1171 : &workspace,
1172 : extra,
1173 : outer_path,
1174 : inner_path,
1175 : extra->restrictlist,
1176 : pathkeys,
1177 : required_outer,
1178 : mergeclauses,
1179 : outersortkeys,
1180 : innersortkeys,
1181 : outer_presorted_keys));
1182 : }
1183 : else
1184 : {
1185 : /* Waste no memory when we reject a path here */
1186 752896 : bms_free(required_outer);
1187 : }
1188 : }
1189 :
1190 : /*
1191 : * try_partial_mergejoin_path
1192 : * Consider a partial merge join path; if it appears useful, push it into
1193 : * the joinrel's pathlist via add_partial_path().
1194 : */
1195 : static void
1196 71647 : try_partial_mergejoin_path(PlannerInfo *root,
1197 : RelOptInfo *joinrel,
1198 : Path *outer_path,
1199 : Path *inner_path,
1200 : List *pathkeys,
1201 : List *mergeclauses,
1202 : List *outersortkeys,
1203 : List *innersortkeys,
1204 : JoinType jointype,
1205 : JoinPathExtraData *extra)
1206 : {
1207 71647 : int outer_presorted_keys = 0;
1208 : JoinCostWorkspace workspace;
1209 :
1210 : /*
1211 : * See comments in try_partial_hashjoin_path().
1212 : */
1213 : Assert(bms_is_empty(joinrel->lateral_relids));
1214 : Assert(bms_is_empty(PATH_REQ_OUTER(outer_path)));
1215 71647 : if (!bms_is_empty(PATH_REQ_OUTER(inner_path)))
1216 24447 : return;
1217 :
1218 : /*
1219 : * If the given paths are already well enough ordered, we can skip doing
1220 : * an explicit sort.
1221 : *
1222 : * We need to determine the number of presorted keys of the outer path to
1223 : * decide whether explicit incremental sort can be applied when
1224 : * outersortkeys is not NIL. We do not need to do the same for the inner
1225 : * path though, as incremental sort currently does not support
1226 : * mark/restore.
1227 : */
1228 126869 : if (outersortkeys &&
1229 55222 : pathkeys_count_contained_in(outersortkeys, outer_path->pathkeys,
1230 : &outer_presorted_keys))
1231 129 : outersortkeys = NIL;
1232 141158 : if (innersortkeys &&
1233 69511 : pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
1234 450 : innersortkeys = NIL;
1235 :
1236 : /*
1237 : * See comments in try_partial_nestloop_path().
1238 : */
1239 71647 : initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
1240 : outer_path, inner_path,
1241 : outersortkeys, innersortkeys,
1242 : outer_presorted_keys,
1243 : extra);
1244 :
1245 71647 : if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
1246 : workspace.startup_cost,
1247 : workspace.total_cost, pathkeys))
1248 24447 : return;
1249 :
1250 : /* Might be good enough to be worth trying, so let's try it. */
1251 47200 : add_partial_path(joinrel, (Path *)
1252 47200 : create_mergejoin_path(root,
1253 : joinrel,
1254 : jointype,
1255 : &workspace,
1256 : extra,
1257 : outer_path,
1258 : inner_path,
1259 : extra->restrictlist,
1260 : pathkeys,
1261 : NULL,
1262 : mergeclauses,
1263 : outersortkeys,
1264 : innersortkeys,
1265 : outer_presorted_keys));
1266 : }
1267 :
1268 : /*
1269 : * try_hashjoin_path
1270 : * Consider a hash join path; if it appears useful, push it into
1271 : * the joinrel's pathlist via add_path().
1272 : */
1273 : static void
1274 639109 : try_hashjoin_path(PlannerInfo *root,
1275 : RelOptInfo *joinrel,
1276 : Path *outer_path,
1277 : Path *inner_path,
1278 : List *hashclauses,
1279 : JoinType jointype,
1280 : JoinPathExtraData *extra)
1281 : {
1282 : Relids required_outer;
1283 : JoinCostWorkspace workspace;
1284 :
1285 : /*
1286 : * If we are forming an outer join at this join, it's nonsensical to use
1287 : * an input path that uses the outer join as part of its parameterization.
1288 : * (This can happen despite our join order restrictions, since those apply
1289 : * to what is in an input relation not what its parameters are.)
1290 : */
1291 800592 : if (extra->sjinfo->ojrelid != 0 &&
1292 322941 : (bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(inner_path)) ||
1293 161458 : bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(outer_path))))
1294 94142 : return;
1295 :
1296 : /*
1297 : * Check to see if proposed path is still parameterized, and reject if the
1298 : * parameterization wouldn't be sensible.
1299 : */
1300 639059 : required_outer = calc_non_nestloop_required_outer(outer_path,
1301 : inner_path);
1302 639059 : if (required_outer &&
1303 105806 : !bms_overlap(required_outer, extra->param_source_rels))
1304 : {
1305 : /* Waste no memory when we reject a path here */
1306 94092 : bms_free(required_outer);
1307 94092 : return;
1308 : }
1309 :
1310 : /*
1311 : * See comments in try_nestloop_path(). Also note that hashjoin paths
1312 : * never have any output pathkeys, per comments in create_hashjoin_path.
1313 : */
1314 544967 : initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
1315 : outer_path, inner_path, extra, false);
1316 :
1317 544967 : if (add_path_precheck(joinrel, workspace.disabled_nodes,
1318 : workspace.startup_cost, workspace.total_cost,
1319 : NIL, required_outer))
1320 : {
1321 266295 : add_path(joinrel, (Path *)
1322 266295 : create_hashjoin_path(root,
1323 : joinrel,
1324 : jointype,
1325 : &workspace,
1326 : extra,
1327 : outer_path,
1328 : inner_path,
1329 : false, /* parallel_hash */
1330 : extra->restrictlist,
1331 : required_outer,
1332 : hashclauses));
1333 : }
1334 : else
1335 : {
1336 : /* Waste no memory when we reject a path here */
1337 278672 : bms_free(required_outer);
1338 : }
1339 : }
1340 :
1341 : /*
1342 : * try_partial_hashjoin_path
1343 : * Consider a partial hashjoin join path; if it appears useful, push it into
1344 : * the joinrel's partial_pathlist via add_partial_path().
1345 : * The outer side is partial. If parallel_hash is true, then the inner path
1346 : * must be partial and will be run in parallel to create one or more shared
1347 : * hash tables; otherwise the inner path must be complete and a copy of it
1348 : * is run in every process to create separate identical private hash tables.
1349 : */
1350 : static void
1351 114004 : try_partial_hashjoin_path(PlannerInfo *root,
1352 : RelOptInfo *joinrel,
1353 : Path *outer_path,
1354 : Path *inner_path,
1355 : List *hashclauses,
1356 : JoinType jointype,
1357 : JoinPathExtraData *extra,
1358 : bool parallel_hash)
1359 : {
1360 : JoinCostWorkspace workspace;
1361 :
1362 : /*
1363 : * If the inner path is parameterized, we can't use a partial hashjoin.
1364 : * Parameterized partial paths are not supported. The caller should
1365 : * already have verified that no lateral rels are required here.
1366 : */
1367 : Assert(bms_is_empty(joinrel->lateral_relids));
1368 : Assert(bms_is_empty(PATH_REQ_OUTER(outer_path)));
1369 114004 : if (!bms_is_empty(PATH_REQ_OUTER(inner_path)))
1370 32137 : return;
1371 :
1372 : /*
1373 : * Before creating a path, get a quick lower bound on what it is likely to
1374 : * cost. Bail out right away if it looks terrible.
1375 : */
1376 114004 : initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
1377 : outer_path, inner_path, extra, parallel_hash);
1378 114004 : if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
1379 : workspace.startup_cost,
1380 : workspace.total_cost, NIL))
1381 32137 : return;
1382 :
1383 : /* Might be good enough to be worth trying, so let's try it. */
1384 81867 : add_partial_path(joinrel, (Path *)
1385 81867 : create_hashjoin_path(root,
1386 : joinrel,
1387 : jointype,
1388 : &workspace,
1389 : extra,
1390 : outer_path,
1391 : inner_path,
1392 : parallel_hash,
1393 : extra->restrictlist,
1394 : NULL,
1395 : hashclauses));
1396 : }
1397 :
1398 : /*
1399 : * sort_inner_and_outer
1400 : * Create mergejoin join paths by explicitly sorting both the outer and
1401 : * inner join relations on each available merge ordering.
1402 : *
1403 : * 'joinrel' is the join relation
1404 : * 'outerrel' is the outer join relation
1405 : * 'innerrel' is the inner join relation
1406 : * 'jointype' is the type of join to do
1407 : * 'extra' contains additional input values
1408 : */
1409 : static void
1410 616778 : sort_inner_and_outer(PlannerInfo *root,
1411 : RelOptInfo *joinrel,
1412 : RelOptInfo *outerrel,
1413 : RelOptInfo *innerrel,
1414 : JoinType jointype,
1415 : JoinPathExtraData *extra)
1416 : {
1417 : Path *outer_path;
1418 : Path *inner_path;
1419 616778 : Path *cheapest_partial_outer = NULL;
1420 616778 : Path *cheapest_safe_inner = NULL;
1421 : List *all_pathkeys;
1422 : ListCell *l;
1423 :
1424 : /* Nothing to do if there are no available mergejoin clauses */
1425 616778 : if (extra->mergeclause_list == NIL)
1426 150930 : return;
1427 :
1428 : /*
1429 : * We only consider the cheapest-total-cost input paths, since we are
1430 : * assuming here that a sort is required. We will consider
1431 : * cheapest-startup-cost input paths later, and only if they don't need a
1432 : * sort.
1433 : *
1434 : * This function intentionally does not consider parameterized input
1435 : * paths, except when the cheapest-total is parameterized. If we did so,
1436 : * we'd have a combinatorial explosion of mergejoin paths of dubious
1437 : * value. This interacts with decisions elsewhere that also discriminate
1438 : * against mergejoins with parameterized inputs; see comments in
1439 : * src/backend/optimizer/README.
1440 : */
1441 465848 : outer_path = outerrel->cheapest_total_path;
1442 465848 : inner_path = innerrel->cheapest_total_path;
1443 :
1444 : /*
1445 : * If either cheapest-total path is parameterized by the other rel, we
1446 : * can't use a mergejoin. (There's no use looking for alternative input
1447 : * paths, since these should already be the least-parameterized available
1448 : * paths.)
1449 : */
1450 465848 : if (PATH_PARAM_BY_REL(outer_path, innerrel) ||
1451 463918 : PATH_PARAM_BY_REL(inner_path, outerrel))
1452 3884 : return;
1453 :
1454 : /*
1455 : * If the joinrel is parallel-safe, we may be able to consider a partial
1456 : * merge join. However, we can't handle JOIN_FULL, JOIN_RIGHT and
1457 : * JOIN_RIGHT_ANTI, because they can produce false null extended rows.
1458 : * Also, the resulting path must not be parameterized.
1459 : */
1460 461964 : if (joinrel->consider_parallel &&
1461 417396 : jointype != JOIN_FULL &&
1462 365219 : jointype != JOIN_RIGHT &&
1463 362092 : jointype != JOIN_RIGHT_ANTI &&
1464 362092 : outerrel->partial_pathlist != NIL &&
1465 52641 : bms_is_empty(joinrel->lateral_relids))
1466 : {
1467 52641 : cheapest_partial_outer = (Path *) linitial(outerrel->partial_pathlist);
1468 :
1469 52641 : if (inner_path->parallel_safe)
1470 52510 : cheapest_safe_inner = inner_path;
1471 : else
1472 : cheapest_safe_inner =
1473 131 : get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
1474 : }
1475 :
1476 : /*
1477 : * Each possible ordering of the available mergejoin clauses will generate
1478 : * a differently-sorted result path at essentially the same cost. We have
1479 : * no basis for choosing one over another at this level of joining, but
1480 : * some sort orders may be more useful than others for higher-level
1481 : * mergejoins, so it's worth considering multiple orderings.
1482 : *
1483 : * Actually, it's not quite true that every mergeclause ordering will
1484 : * generate a different path order, because some of the clauses may be
1485 : * partially redundant (refer to the same EquivalenceClasses). Therefore,
1486 : * what we do is convert the mergeclause list to a list of canonical
1487 : * pathkeys, and then consider different orderings of the pathkeys.
1488 : *
1489 : * Generating a path for *every* permutation of the pathkeys doesn't seem
1490 : * like a winning strategy; the cost in planning time is too high. For
1491 : * now, we generate one path for each pathkey, listing that pathkey first
1492 : * and the rest in random order. This should allow at least a one-clause
1493 : * mergejoin without re-sorting against any other possible mergejoin
1494 : * partner path. But if we've not guessed the right ordering of secondary
1495 : * keys, we may end up evaluating clauses as qpquals when they could have
1496 : * been done as mergeclauses. (In practice, it's rare that there's more
1497 : * than two or three mergeclauses, so expending a huge amount of thought
1498 : * on that is probably not worth it.)
1499 : *
1500 : * The pathkey order returned by select_outer_pathkeys_for_merge() has
1501 : * some heuristics behind it (see that function), so be sure to try it
1502 : * exactly as-is as well as making variants.
1503 : */
1504 461964 : all_pathkeys = select_outer_pathkeys_for_merge(root,
1505 : extra->mergeclause_list,
1506 : joinrel);
1507 :
1508 994266 : foreach(l, all_pathkeys)
1509 : {
1510 532302 : PathKey *front_pathkey = (PathKey *) lfirst(l);
1511 : List *cur_mergeclauses;
1512 : List *outerkeys;
1513 : List *innerkeys;
1514 : List *merge_pathkeys;
1515 :
1516 : /* Make a pathkey list with this guy first */
1517 532302 : if (l != list_head(all_pathkeys))
1518 70338 : outerkeys = lcons(front_pathkey,
1519 : list_delete_nth_cell(list_copy(all_pathkeys),
1520 : foreach_current_index(l)));
1521 : else
1522 461964 : outerkeys = all_pathkeys; /* no work at first one... */
1523 :
1524 : /* Sort the mergeclauses into the corresponding ordering */
1525 : cur_mergeclauses =
1526 532302 : find_mergeclauses_for_outer_pathkeys(root,
1527 : outerkeys,
1528 : extra->mergeclause_list);
1529 :
1530 : /* Should have used them all... */
1531 : Assert(list_length(cur_mergeclauses) == list_length(extra->mergeclause_list));
1532 :
1533 : /* Build sort pathkeys for the inner side */
1534 532302 : innerkeys = make_inner_pathkeys_for_merge(root,
1535 : cur_mergeclauses,
1536 : outerkeys);
1537 :
1538 : /* Build pathkeys representing output sort order */
1539 532302 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1540 : outerkeys);
1541 :
1542 : /*
1543 : * And now we can make the path.
1544 : *
1545 : * Note: it's possible that the cheapest paths will already be sorted
1546 : * properly. try_mergejoin_path will detect that case and suppress an
1547 : * explicit sort step, so we needn't do so here.
1548 : */
1549 532302 : try_mergejoin_path(root,
1550 : joinrel,
1551 : outer_path,
1552 : inner_path,
1553 : merge_pathkeys,
1554 : cur_mergeclauses,
1555 : outerkeys,
1556 : innerkeys,
1557 : jointype,
1558 : extra,
1559 : false);
1560 :
1561 : /*
1562 : * If we have partial outer and parallel safe inner path then try
1563 : * partial mergejoin path.
1564 : */
1565 532302 : if (cheapest_partial_outer && cheapest_safe_inner)
1566 55222 : try_partial_mergejoin_path(root,
1567 : joinrel,
1568 : cheapest_partial_outer,
1569 : cheapest_safe_inner,
1570 : merge_pathkeys,
1571 : cur_mergeclauses,
1572 : outerkeys,
1573 : innerkeys,
1574 : jointype,
1575 : extra);
1576 : }
1577 : }
1578 :
1579 : /*
1580 : * generate_mergejoin_paths
1581 : * Creates possible mergejoin paths for input outerpath.
1582 : *
1583 : * We generate mergejoins if mergejoin clauses are available. We have
1584 : * two ways to generate the inner path for a mergejoin: sort the cheapest
1585 : * inner path, or use an inner path that is already suitably ordered for the
1586 : * merge. If we have several mergeclauses, it could be that there is no inner
1587 : * path (or only a very expensive one) for the full list of mergeclauses, but
1588 : * better paths exist if we truncate the mergeclause list (thereby discarding
1589 : * some sort key requirements). So, we consider truncations of the
1590 : * mergeclause list as well as the full list. (Ideally we'd consider all
1591 : * subsets of the mergeclause list, but that seems way too expensive.)
1592 : */
1593 : static void
1594 1218058 : generate_mergejoin_paths(PlannerInfo *root,
1595 : RelOptInfo *joinrel,
1596 : RelOptInfo *innerrel,
1597 : Path *outerpath,
1598 : JoinType jointype,
1599 : JoinPathExtraData *extra,
1600 : bool useallclauses,
1601 : Path *inner_cheapest_total,
1602 : List *merge_pathkeys,
1603 : bool is_partial)
1604 : {
1605 : List *mergeclauses;
1606 : List *innersortkeys;
1607 : List *trialsortkeys;
1608 : Path *cheapest_startup_inner;
1609 : Path *cheapest_total_inner;
1610 : int num_sortkeys;
1611 : int sortkeycnt;
1612 :
1613 : /* Look for useful mergeclauses (if any) */
1614 : mergeclauses =
1615 1218058 : find_mergeclauses_for_outer_pathkeys(root,
1616 : outerpath->pathkeys,
1617 : extra->mergeclause_list);
1618 :
1619 : /*
1620 : * Done with this outer path if no chance for a mergejoin.
1621 : *
1622 : * Special corner case: for "x FULL JOIN y ON true", there will be no join
1623 : * clauses at all. Ordinarily we'd generate a clauseless nestloop path,
1624 : * but since mergejoin is our only join type that supports FULL JOIN
1625 : * without any join clauses, it's necessary to generate a clauseless
1626 : * mergejoin path instead.
1627 : */
1628 1218058 : if (mergeclauses == NIL)
1629 : {
1630 856894 : if (jointype == JOIN_FULL)
1631 : /* okay to try for mergejoin */ ;
1632 : else
1633 854184 : return;
1634 : }
1635 422924 : if (useallclauses &&
1636 59050 : list_length(mergeclauses) != list_length(extra->mergeclause_list))
1637 9916 : return;
1638 :
1639 : /* Compute the required ordering of the inner path */
1640 353958 : innersortkeys = make_inner_pathkeys_for_merge(root,
1641 : mergeclauses,
1642 : outerpath->pathkeys);
1643 :
1644 : /*
1645 : * Generate a mergejoin on the basis of sorting the cheapest inner. Since
1646 : * a sort will be needed, only cheapest total cost matters. (But
1647 : * try_mergejoin_path will do the right thing if inner_cheapest_total is
1648 : * already correctly sorted.)
1649 : */
1650 353958 : try_mergejoin_path(root,
1651 : joinrel,
1652 : outerpath,
1653 : inner_cheapest_total,
1654 : merge_pathkeys,
1655 : mergeclauses,
1656 : NIL,
1657 : innersortkeys,
1658 : jointype,
1659 : extra,
1660 : is_partial);
1661 :
1662 : /*
1663 : * Look for presorted inner paths that satisfy the innersortkey list ---
1664 : * or any truncation thereof, if we are allowed to build a mergejoin using
1665 : * a subset of the merge clauses. Here, we consider both cheap startup
1666 : * cost and cheap total cost.
1667 : *
1668 : * Currently we do not consider parameterized inner paths here. This
1669 : * interacts with decisions elsewhere that also discriminate against
1670 : * mergejoins with parameterized inputs; see comments in
1671 : * src/backend/optimizer/README.
1672 : *
1673 : * As we shorten the sortkey list, we should consider only paths that are
1674 : * strictly cheaper than (in particular, not the same as) any path found
1675 : * in an earlier iteration. Otherwise we'd be intentionally using fewer
1676 : * merge keys than a given path allows (treating the rest as plain
1677 : * joinquals), which is unlikely to be a good idea. Also, eliminating
1678 : * paths here on the basis of compare_path_costs is a lot cheaper than
1679 : * building the mergejoin path only to throw it away.
1680 : *
1681 : * If inner_cheapest_total is well enough sorted to have not required a
1682 : * sort in the path made above, we shouldn't make a duplicate path with
1683 : * it, either. We handle that case with the same logic that handles the
1684 : * previous consideration, by initializing the variables that track
1685 : * cheapest-so-far properly. Note that we do NOT reject
1686 : * inner_cheapest_total if we find it matches some shorter set of
1687 : * pathkeys. That case corresponds to using fewer mergekeys to avoid
1688 : * sorting inner_cheapest_total, whereas we did sort it above, so the
1689 : * plans being considered are different.
1690 : */
1691 353958 : if (pathkeys_contained_in(innersortkeys,
1692 : inner_cheapest_total->pathkeys))
1693 : {
1694 : /* inner_cheapest_total didn't require a sort */
1695 11222 : cheapest_startup_inner = inner_cheapest_total;
1696 11222 : cheapest_total_inner = inner_cheapest_total;
1697 : }
1698 : else
1699 : {
1700 : /* it did require a sort, at least for the full set of keys */
1701 342736 : cheapest_startup_inner = NULL;
1702 342736 : cheapest_total_inner = NULL;
1703 : }
1704 353958 : num_sortkeys = list_length(innersortkeys);
1705 353958 : if (num_sortkeys > 1 && !useallclauses)
1706 20757 : trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
1707 : else
1708 333201 : trialsortkeys = innersortkeys; /* won't really truncate */
1709 :
1710 680148 : for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
1711 : {
1712 : Path *innerpath;
1713 375212 : List *newclauses = NIL;
1714 :
1715 : /*
1716 : * Look for an inner path ordered well enough for the first
1717 : * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
1718 : * destructively, which is why we made a copy...
1719 : */
1720 375212 : trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
1721 375212 : innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1722 : trialsortkeys,
1723 : NULL,
1724 : TOTAL_COST,
1725 : is_partial);
1726 375212 : if (innerpath != NULL &&
1727 18072 : (cheapest_total_inner == NULL ||
1728 18072 : compare_path_costs(innerpath, cheapest_total_inner,
1729 : TOTAL_COST) < 0))
1730 : {
1731 : /* Found a cheap (or even-cheaper) sorted path */
1732 : /* Select the right mergeclauses, if we didn't already */
1733 209449 : if (sortkeycnt < num_sortkeys)
1734 : {
1735 : newclauses =
1736 10367 : trim_mergeclauses_for_inner_pathkeys(root,
1737 : mergeclauses,
1738 : trialsortkeys);
1739 : Assert(newclauses != NIL);
1740 : }
1741 : else
1742 199082 : newclauses = mergeclauses;
1743 209449 : try_mergejoin_path(root,
1744 : joinrel,
1745 : outerpath,
1746 : innerpath,
1747 : merge_pathkeys,
1748 : newclauses,
1749 : NIL,
1750 : NIL,
1751 : jointype,
1752 : extra,
1753 : is_partial);
1754 209449 : cheapest_total_inner = innerpath;
1755 : }
1756 : /* Same on the basis of cheapest startup cost ... */
1757 375212 : innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1758 : trialsortkeys,
1759 : NULL,
1760 : STARTUP_COST,
1761 : is_partial);
1762 375212 : if (innerpath != NULL &&
1763 18072 : (cheapest_startup_inner == NULL ||
1764 18072 : compare_path_costs(innerpath, cheapest_startup_inner,
1765 : STARTUP_COST) < 0))
1766 : {
1767 : /* Found a cheap (or even-cheaper) sorted path */
1768 207752 : if (innerpath != cheapest_total_inner)
1769 : {
1770 : /*
1771 : * Avoid rebuilding clause list if we already made one; saves
1772 : * memory in big join trees...
1773 : */
1774 1847 : if (newclauses == NIL)
1775 : {
1776 102 : if (sortkeycnt < num_sortkeys)
1777 : {
1778 : newclauses =
1779 61 : trim_mergeclauses_for_inner_pathkeys(root,
1780 : mergeclauses,
1781 : trialsortkeys);
1782 : Assert(newclauses != NIL);
1783 : }
1784 : else
1785 41 : newclauses = mergeclauses;
1786 : }
1787 1847 : try_mergejoin_path(root,
1788 : joinrel,
1789 : outerpath,
1790 : innerpath,
1791 : merge_pathkeys,
1792 : newclauses,
1793 : NIL,
1794 : NIL,
1795 : jointype,
1796 : extra,
1797 : is_partial);
1798 : }
1799 207752 : cheapest_startup_inner = innerpath;
1800 : }
1801 :
1802 : /*
1803 : * Don't consider truncated sortkeys if we need all clauses.
1804 : */
1805 375212 : if (useallclauses)
1806 49022 : break;
1807 : }
1808 : }
1809 :
1810 : /*
1811 : * match_unsorted_outer
1812 : * Creates possible join paths for processing a single join relation
1813 : * 'joinrel' by employing either iterative substitution or
1814 : * mergejoining on each of its possible outer paths (considering
1815 : * only outer paths that are already ordered well enough for merging).
1816 : *
1817 : * We always generate a nestloop path for each available outer path.
1818 : * In fact we may generate as many as five: one on the cheapest-total-cost
1819 : * inner path, one on the same with materialization, one on the
1820 : * cheapest-startup-cost inner path (if different), one on the
1821 : * cheapest-total inner-indexscan path (if any), and one on the
1822 : * cheapest-startup inner-indexscan path (if different).
1823 : *
1824 : * We also consider mergejoins if mergejoin clauses are available. See
1825 : * detailed comments in generate_mergejoin_paths.
1826 : *
1827 : * 'joinrel' is the join relation
1828 : * 'outerrel' is the outer join relation
1829 : * 'innerrel' is the inner join relation
1830 : * 'jointype' is the type of join to do
1831 : * 'extra' contains additional input values
1832 : */
1833 : static void
1834 616778 : match_unsorted_outer(PlannerInfo *root,
1835 : RelOptInfo *joinrel,
1836 : RelOptInfo *outerrel,
1837 : RelOptInfo *innerrel,
1838 : JoinType jointype,
1839 : JoinPathExtraData *extra)
1840 : {
1841 : bool nestjoinOK;
1842 : bool useallclauses;
1843 616778 : Path *inner_cheapest_total = innerrel->cheapest_total_path;
1844 616778 : Path *matpath = NULL;
1845 : ListCell *lc1;
1846 :
1847 : /*
1848 : * For now we do not support RIGHT_SEMI join in mergejoin or nestloop
1849 : * join.
1850 : */
1851 616778 : if (jointype == JOIN_RIGHT_SEMI)
1852 1106 : return;
1853 :
1854 : /*
1855 : * Nestloop only supports inner, left, semi, and anti joins. Also, if we
1856 : * are doing a right, right-anti or full mergejoin, we must use *all* the
1857 : * mergeclauses as join clauses, else we will not have a valid plan.
1858 : * (Although these two flags are currently inverses, keep them separate
1859 : * for clarity and possible future changes.)
1860 : */
1861 615672 : switch (jointype)
1862 : {
1863 544644 : case JOIN_INNER:
1864 : case JOIN_LEFT:
1865 : case JOIN_SEMI:
1866 : case JOIN_ANTI:
1867 544644 : nestjoinOK = true;
1868 544644 : useallclauses = false;
1869 544644 : break;
1870 71028 : case JOIN_RIGHT:
1871 : case JOIN_RIGHT_ANTI:
1872 : case JOIN_FULL:
1873 71028 : nestjoinOK = false;
1874 71028 : useallclauses = true;
1875 71028 : break;
1876 0 : default:
1877 0 : elog(ERROR, "unrecognized join type: %d",
1878 : (int) jointype);
1879 : nestjoinOK = false; /* keep compiler quiet */
1880 : useallclauses = false;
1881 : break;
1882 : }
1883 :
1884 : /*
1885 : * If inner_cheapest_total is parameterized by the outer rel, ignore it;
1886 : * we will consider it below as a member of cheapest_parameterized_paths,
1887 : * but the other possibilities considered in this routine aren't usable.
1888 : *
1889 : * Furthermore, if the inner side is a unique-ified relation, we cannot
1890 : * generate any valid paths here, because the inner rel's dependency on
1891 : * the outer rel makes unique-ification meaningless.
1892 : */
1893 615672 : if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel))
1894 : {
1895 12085 : inner_cheapest_total = NULL;
1896 :
1897 12085 : if (RELATION_WAS_MADE_UNIQUE(innerrel, extra->sjinfo, jointype))
1898 30 : return;
1899 : }
1900 :
1901 615642 : if (nestjoinOK)
1902 : {
1903 : /*
1904 : * Consider materializing the cheapest inner path, unless that is
1905 : * disabled or the path in question materializes its output anyway.
1906 : *
1907 : * At present, we only consider materialization for non-partial outer
1908 : * paths, so it's correct to test PGS_CONSIDER_NONPARTIAL here. If we
1909 : * ever want to consider materialization for partial paths, we'll need
1910 : * to create matpath whenever PGS_NESTLOOP_MATERIALIZE is set, use it
1911 : * for partial paths either way, and use it for non-partial paths only
1912 : * when PGS_CONSIDER_NONPARTIAL is also set.
1913 : */
1914 544614 : if ((extra->pgs_mask &
1915 : (PGS_NESTLOOP_MATERIALIZE | PGS_CONSIDER_NONPARTIAL)) ==
1916 479270 : (PGS_NESTLOOP_MATERIALIZE | PGS_CONSIDER_NONPARTIAL) &&
1917 468093 : inner_cheapest_total != NULL &&
1918 468093 : !ExecMaterializesOutput(inner_cheapest_total->pathtype))
1919 : matpath = (Path *)
1920 450682 : create_material_path(innerrel, inner_cheapest_total, true);
1921 : }
1922 :
1923 1985013 : foreach(lc1, outerrel->pathlist)
1924 : {
1925 1369371 : Path *outerpath = (Path *) lfirst(lc1);
1926 : List *merge_pathkeys;
1927 :
1928 : /*
1929 : * We cannot use an outer path that is parameterized by the inner rel.
1930 : */
1931 1369371 : if (PATH_PARAM_BY_REL(outerpath, innerrel))
1932 210948 : continue;
1933 :
1934 : /*
1935 : * The result will have this sort order (even if it is implemented as
1936 : * a nestloop, and even if some of the mergeclauses are implemented by
1937 : * qpquals rather than as true mergeclauses):
1938 : */
1939 1158423 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1940 : outerpath->pathkeys);
1941 :
1942 1158423 : if (nestjoinOK)
1943 : {
1944 : /*
1945 : * Consider nestloop joins using this outer path and various
1946 : * available paths for the inner relation. We consider the
1947 : * cheapest-total paths for each available parameterization of the
1948 : * inner relation, including the unparameterized case.
1949 : */
1950 : ListCell *lc2;
1951 :
1952 2613500 : foreach(lc2, innerrel->cheapest_parameterized_paths)
1953 : {
1954 1586353 : Path *innerpath = (Path *) lfirst(lc2);
1955 : Path *mpath;
1956 :
1957 1586353 : try_nestloop_path(root,
1958 : joinrel,
1959 : outerpath,
1960 : innerpath,
1961 : merge_pathkeys,
1962 : jointype,
1963 : PGS_NESTLOOP_PLAIN,
1964 : extra);
1965 :
1966 : /*
1967 : * Try generating a memoize path and see if that makes the
1968 : * nested loop any cheaper.
1969 : */
1970 1586353 : mpath = get_memoize_path(root, innerrel, outerrel,
1971 : innerpath, outerpath, jointype,
1972 : extra);
1973 1586353 : if (mpath != NULL)
1974 216666 : try_nestloop_path(root,
1975 : joinrel,
1976 : outerpath,
1977 : mpath,
1978 : merge_pathkeys,
1979 : jointype,
1980 : PGS_NESTLOOP_MEMOIZE,
1981 : extra);
1982 : }
1983 :
1984 : /* Also consider materialized form of the cheapest inner path */
1985 1027147 : if (matpath != NULL)
1986 866948 : try_nestloop_path(root,
1987 : joinrel,
1988 : outerpath,
1989 : matpath,
1990 : merge_pathkeys,
1991 : jointype,
1992 : PGS_NESTLOOP_MATERIALIZE,
1993 : extra);
1994 : }
1995 :
1996 : /* Can't do anything else if inner rel is parameterized by outer */
1997 1158423 : if (inner_cheapest_total == NULL)
1998 19151 : continue;
1999 :
2000 : /* Generate merge join paths */
2001 1139272 : generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
2002 : jointype, extra, useallclauses,
2003 : inner_cheapest_total, merge_pathkeys,
2004 : false);
2005 : }
2006 :
2007 : /*
2008 : * Consider partial nestloop and mergejoin plan if outerrel has any
2009 : * partial path and the joinrel is parallel-safe. However, we can't
2010 : * handle joins needing lateral rels, since partial paths must not be
2011 : * parameterized. Similarly, we can't handle JOIN_FULL, JOIN_RIGHT and
2012 : * JOIN_RIGHT_ANTI, because they can produce false null extended rows.
2013 : */
2014 615642 : if (joinrel->consider_parallel &&
2015 530488 : jointype != JOIN_FULL &&
2016 471912 : jointype != JOIN_RIGHT &&
2017 468136 : jointype != JOIN_RIGHT_ANTI &&
2018 468136 : outerrel->partial_pathlist != NIL &&
2019 62867 : bms_is_empty(joinrel->lateral_relids))
2020 : {
2021 62867 : if (nestjoinOK)
2022 62867 : consider_parallel_nestloop(root, joinrel, outerrel, innerrel,
2023 : jointype, extra);
2024 :
2025 : /*
2026 : * If inner_cheapest_total is NULL or non parallel-safe then find the
2027 : * cheapest total parallel safe path.
2028 : */
2029 62867 : if (inner_cheapest_total == NULL ||
2030 62517 : !inner_cheapest_total->parallel_safe)
2031 : {
2032 : inner_cheapest_total =
2033 688 : get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
2034 : }
2035 :
2036 62867 : if (inner_cheapest_total)
2037 62485 : consider_parallel_mergejoin(root, joinrel, outerrel, innerrel,
2038 : jointype, extra,
2039 : inner_cheapest_total);
2040 : }
2041 : }
2042 :
2043 : /*
2044 : * consider_parallel_mergejoin
2045 : * Try to build partial paths for a joinrel by joining a partial path
2046 : * for the outer relation to a complete path for the inner relation.
2047 : *
2048 : * 'joinrel' is the join relation
2049 : * 'outerrel' is the outer join relation
2050 : * 'innerrel' is the inner join relation
2051 : * 'jointype' is the type of join to do
2052 : * 'extra' contains additional input values
2053 : * 'inner_cheapest_total' cheapest total path for innerrel
2054 : */
2055 : static void
2056 62485 : consider_parallel_mergejoin(PlannerInfo *root,
2057 : RelOptInfo *joinrel,
2058 : RelOptInfo *outerrel,
2059 : RelOptInfo *innerrel,
2060 : JoinType jointype,
2061 : JoinPathExtraData *extra,
2062 : Path *inner_cheapest_total)
2063 : {
2064 : ListCell *lc1;
2065 :
2066 : /* generate merge join path for each partial outer path */
2067 141271 : foreach(lc1, outerrel->partial_pathlist)
2068 : {
2069 78786 : Path *outerpath = (Path *) lfirst(lc1);
2070 : List *merge_pathkeys;
2071 :
2072 : /*
2073 : * Figure out what useful ordering any paths we create will have.
2074 : */
2075 78786 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
2076 : outerpath->pathkeys);
2077 :
2078 78786 : generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype,
2079 : extra, false, inner_cheapest_total,
2080 : merge_pathkeys, true);
2081 : }
2082 62485 : }
2083 :
2084 : /*
2085 : * consider_parallel_nestloop
2086 : * Try to build partial paths for a joinrel by joining a partial path for the
2087 : * outer relation to a complete path for the inner relation.
2088 : *
2089 : * 'joinrel' is the join relation
2090 : * 'outerrel' is the outer join relation
2091 : * 'innerrel' is the inner join relation
2092 : * 'jointype' is the type of join to do
2093 : * 'extra' contains additional input values
2094 : */
2095 : static void
2096 62867 : consider_parallel_nestloop(PlannerInfo *root,
2097 : RelOptInfo *joinrel,
2098 : RelOptInfo *outerrel,
2099 : RelOptInfo *innerrel,
2100 : JoinType jointype,
2101 : JoinPathExtraData *extra)
2102 : {
2103 62867 : Path *inner_cheapest_total = innerrel->cheapest_total_path;
2104 62867 : Path *matpath = NULL;
2105 : ListCell *lc1;
2106 :
2107 : /*
2108 : * Consider materializing the cheapest inner path, unless: 1)
2109 : * materialization is disabled here, 2) the cheapest inner path is not
2110 : * parallel-safe, 3) the cheapest inner path is parameterized by the outer
2111 : * rel, or 4) the cheapest inner path materializes its output anyway.
2112 : */
2113 62867 : if ((extra->pgs_mask & PGS_NESTLOOP_MATERIALIZE) != 0 &&
2114 53509 : inner_cheapest_total->parallel_safe &&
2115 53308 : !PATH_PARAM_BY_REL(inner_cheapest_total, outerrel) &&
2116 53048 : !ExecMaterializesOutput(inner_cheapest_total->pathtype))
2117 : {
2118 : matpath = (Path *)
2119 52984 : create_material_path(innerrel, inner_cheapest_total, true);
2120 : Assert(matpath->parallel_safe);
2121 : }
2122 :
2123 142150 : foreach(lc1, outerrel->partial_pathlist)
2124 : {
2125 79283 : Path *outerpath = (Path *) lfirst(lc1);
2126 : List *pathkeys;
2127 : ListCell *lc2;
2128 :
2129 : /* Figure out what useful ordering any paths we create will have. */
2130 79283 : pathkeys = build_join_pathkeys(root, joinrel, jointype,
2131 : outerpath->pathkeys);
2132 :
2133 : /*
2134 : * Try the cheapest parameterized paths; only those which will produce
2135 : * an unparameterized path when joined to this outerrel will survive
2136 : * try_partial_nestloop_path. The cheapest unparameterized path is
2137 : * also in this list.
2138 : */
2139 166454 : foreach(lc2, innerrel->cheapest_parameterized_paths)
2140 : {
2141 87171 : Path *innerpath = (Path *) lfirst(lc2);
2142 : Path *mpath;
2143 :
2144 : /* Can't join to an inner path that is not parallel-safe */
2145 87171 : if (!innerpath->parallel_safe)
2146 397 : continue;
2147 :
2148 86774 : try_partial_nestloop_path(root, joinrel, outerpath, innerpath,
2149 : pathkeys, jointype,
2150 : PGS_NESTLOOP_PLAIN, extra);
2151 :
2152 : /*
2153 : * Try generating a memoize path and see if that makes the nested
2154 : * loop any cheaper.
2155 : */
2156 86774 : mpath = get_memoize_path(root, innerrel, outerrel,
2157 : innerpath, outerpath, jointype,
2158 : extra);
2159 86774 : if (mpath != NULL)
2160 4011 : try_partial_nestloop_path(root, joinrel, outerpath, mpath,
2161 : pathkeys, jointype,
2162 : PGS_NESTLOOP_MEMOIZE, extra);
2163 : }
2164 :
2165 : /* Also consider materialized form of the cheapest inner path */
2166 79283 : if (matpath != NULL)
2167 66967 : try_partial_nestloop_path(root, joinrel, outerpath, matpath,
2168 : pathkeys, jointype,
2169 : PGS_NESTLOOP_MATERIALIZE, extra);
2170 : }
2171 62867 : }
2172 :
2173 : /*
2174 : * hash_inner_and_outer
2175 : * Create hashjoin join paths by explicitly hashing both the outer and
2176 : * inner keys of each available hash clause.
2177 : *
2178 : * 'joinrel' is the join relation
2179 : * 'outerrel' is the outer join relation
2180 : * 'innerrel' is the inner join relation
2181 : * 'jointype' is the type of join to do
2182 : * 'extra' contains additional input values
2183 : */
2184 : static void
2185 560614 : hash_inner_and_outer(PlannerInfo *root,
2186 : RelOptInfo *joinrel,
2187 : RelOptInfo *outerrel,
2188 : RelOptInfo *innerrel,
2189 : JoinType jointype,
2190 : JoinPathExtraData *extra)
2191 : {
2192 560614 : bool isouterjoin = IS_OUTER_JOIN(jointype);
2193 : List *hashclauses;
2194 : ListCell *l;
2195 :
2196 : /*
2197 : * We need to build only one hashclauses list for any given pair of outer
2198 : * and inner relations; all of the hashable clauses will be used as keys.
2199 : *
2200 : * Scan the join's restrictinfo list to find hashjoinable clauses that are
2201 : * usable with this pair of sub-relations.
2202 : */
2203 560614 : hashclauses = NIL;
2204 1191892 : foreach(l, extra->restrictlist)
2205 : {
2206 631278 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
2207 :
2208 : /*
2209 : * If processing an outer join, only use its own join clauses for
2210 : * hashing. For inner joins we need not be so picky.
2211 : */
2212 631278 : if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
2213 11694 : continue;
2214 :
2215 619584 : if (!restrictinfo->can_join ||
2216 565907 : restrictinfo->hashjoinoperator == InvalidOid)
2217 66630 : continue; /* not hashjoinable */
2218 :
2219 : /*
2220 : * Check if clause has the form "outer op inner" or "inner op outer".
2221 : */
2222 552954 : if (!clause_sides_match_join(restrictinfo, outerrel->relids,
2223 : innerrel->relids))
2224 144 : continue; /* no good for these input relations */
2225 :
2226 : /*
2227 : * If clause has the form "inner op outer", check if its operator has
2228 : * valid commutator. This is necessary because hashclauses in this
2229 : * form will get commuted in createplan.c to put the outer var on the
2230 : * left (see get_switched_clauses). This probably shouldn't ever
2231 : * fail, since hashable operators ought to have commutators, but be
2232 : * paranoid.
2233 : *
2234 : * The clause being hashjoinable indicates that it's an OpExpr.
2235 : */
2236 828779 : if (!restrictinfo->outer_is_left &&
2237 275969 : !OidIsValid(get_commutator(castNode(OpExpr, restrictinfo->clause)->opno)))
2238 5 : continue;
2239 :
2240 552805 : hashclauses = lappend(hashclauses, restrictinfo);
2241 : }
2242 :
2243 : /* If we found any usable hashclauses, make paths */
2244 560614 : if (hashclauses)
2245 : {
2246 : /*
2247 : * We consider both the cheapest-total-cost and cheapest-startup-cost
2248 : * outer paths. There's no need to consider any but the
2249 : * cheapest-total-cost inner path, however.
2250 : */
2251 481624 : Path *cheapest_startup_outer = outerrel->cheapest_startup_path;
2252 481624 : Path *cheapest_total_outer = outerrel->cheapest_total_path;
2253 481624 : Path *cheapest_total_inner = innerrel->cheapest_total_path;
2254 : ListCell *lc1;
2255 : ListCell *lc2;
2256 :
2257 : /*
2258 : * If either cheapest-total path is parameterized by the other rel, we
2259 : * can't use a hashjoin. (There's no use looking for alternative
2260 : * input paths, since these should already be the least-parameterized
2261 : * available paths.)
2262 : */
2263 481624 : if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) ||
2264 479686 : PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
2265 3876 : return;
2266 :
2267 : /*
2268 : * Consider the cheapest startup outer together with the cheapest
2269 : * total inner, and then consider pairings of cheapest-total paths
2270 : * including parameterized ones. There is no use in generating
2271 : * parameterized paths on the basis of possibly cheap startup cost, so
2272 : * this is sufficient.
2273 : */
2274 477748 : if (cheapest_startup_outer != NULL)
2275 475999 : try_hashjoin_path(root,
2276 : joinrel,
2277 : cheapest_startup_outer,
2278 : cheapest_total_inner,
2279 : hashclauses,
2280 : jointype,
2281 : extra);
2282 :
2283 1185648 : foreach(lc1, outerrel->cheapest_parameterized_paths)
2284 : {
2285 707900 : Path *outerpath = (Path *) lfirst(lc1);
2286 :
2287 : /*
2288 : * We cannot use an outer path that is parameterized by the inner
2289 : * rel.
2290 : */
2291 707900 : if (PATH_PARAM_BY_REL(outerpath, innerrel))
2292 184679 : continue;
2293 :
2294 1311877 : foreach(lc2, innerrel->cheapest_parameterized_paths)
2295 : {
2296 788656 : Path *innerpath = (Path *) lfirst(lc2);
2297 :
2298 : /*
2299 : * We cannot use an inner path that is parameterized by the
2300 : * outer rel, either.
2301 : */
2302 788656 : if (PATH_PARAM_BY_REL(innerpath, outerrel))
2303 208600 : continue;
2304 :
2305 580056 : if (outerpath == cheapest_startup_outer &&
2306 : innerpath == cheapest_total_inner)
2307 416946 : continue; /* already tried it */
2308 :
2309 163110 : try_hashjoin_path(root,
2310 : joinrel,
2311 : outerpath,
2312 : innerpath,
2313 : hashclauses,
2314 : jointype,
2315 : extra);
2316 : }
2317 : }
2318 :
2319 : /*
2320 : * If the joinrel is parallel-safe, we may be able to consider a
2321 : * partial hash join.
2322 : *
2323 : * However, we can't handle JOIN_RIGHT_SEMI, because the hash table is
2324 : * either a shared hash table or a private hash table per backend. In
2325 : * the shared case, there is no concurrency protection for the match
2326 : * flags, so multiple workers could inspect and set the flags
2327 : * concurrently, potentially producing incorrect results. In the
2328 : * private case, each worker has its own copy of the hash table, so no
2329 : * single process has all the match flags.
2330 : *
2331 : * Also, the resulting path must not be parameterized.
2332 : */
2333 477748 : if (joinrel->consider_parallel &&
2334 430759 : jointype != JOIN_RIGHT_SEMI &&
2335 430759 : outerrel->partial_pathlist != NIL &&
2336 59412 : bms_is_empty(joinrel->lateral_relids))
2337 : {
2338 : Path *cheapest_partial_outer;
2339 59412 : Path *cheapest_partial_inner = NULL;
2340 59412 : Path *cheapest_safe_inner = NULL;
2341 :
2342 59412 : cheapest_partial_outer =
2343 59412 : (Path *) linitial(outerrel->partial_pathlist);
2344 :
2345 : /*
2346 : * Can we use a partial inner plan too, so that we can build a
2347 : * shared hash table in parallel?
2348 : */
2349 59412 : if (innerrel->partial_pathlist != NIL &&
2350 : enable_parallel_hash)
2351 : {
2352 57878 : cheapest_partial_inner =
2353 57878 : (Path *) linitial(innerrel->partial_pathlist);
2354 57878 : try_partial_hashjoin_path(root, joinrel,
2355 : cheapest_partial_outer,
2356 : cheapest_partial_inner,
2357 : hashclauses, jointype, extra,
2358 : true /* parallel_hash */ );
2359 : }
2360 :
2361 : /*
2362 : * Normally, given that the joinrel is parallel-safe, the cheapest
2363 : * total inner path will also be parallel-safe, but if not, we'll
2364 : * have to search for the cheapest safe, unparameterized inner
2365 : * path. If full, right, or right-anti join, we can't use
2366 : * parallelism (building the hash table in each backend) because
2367 : * no one process has all the match bits.
2368 : */
2369 59412 : if (jointype == JOIN_FULL ||
2370 56411 : jointype == JOIN_RIGHT ||
2371 : jointype == JOIN_RIGHT_ANTI)
2372 3258 : cheapest_safe_inner = NULL;
2373 56154 : else if (cheapest_total_inner->parallel_safe)
2374 55935 : cheapest_safe_inner = cheapest_total_inner;
2375 : else
2376 : cheapest_safe_inner =
2377 219 : get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
2378 :
2379 59412 : if (cheapest_safe_inner != NULL)
2380 56126 : try_partial_hashjoin_path(root, joinrel,
2381 : cheapest_partial_outer,
2382 : cheapest_safe_inner,
2383 : hashclauses, jointype, extra,
2384 : false /* parallel_hash */ );
2385 : }
2386 : }
2387 : }
2388 :
2389 : /*
2390 : * select_mergejoin_clauses
2391 : * Select mergejoin clauses that are usable for a particular join.
2392 : * Returns a list of RestrictInfo nodes for those clauses.
2393 : *
2394 : * *mergejoin_allowed is normally set to true, but it is set to false if
2395 : * this is a right-semi join, or this is a right/right-anti/full join and
2396 : * there are nonmergejoinable join clauses. The executor's mergejoin
2397 : * machinery cannot handle such cases, so we have to avoid generating a
2398 : * mergejoin plan. (Note that this flag does NOT consider whether there are
2399 : * actually any mergejoinable clauses. This is correct because in some
2400 : * cases we need to build a clauseless mergejoin. Simply returning NIL is
2401 : * therefore not enough to distinguish safe from unsafe cases.)
2402 : *
2403 : * We also mark each selected RestrictInfo to show which side is currently
2404 : * being considered as outer. These are transient markings that are only
2405 : * good for the duration of the current add_paths_to_joinrel() call!
2406 : *
2407 : * We examine each restrictinfo clause known for the join to see
2408 : * if it is mergejoinable and involves vars from the two sub-relations
2409 : * currently of interest.
2410 : */
2411 : static List *
2412 553763 : select_mergejoin_clauses(PlannerInfo *root,
2413 : RelOptInfo *joinrel,
2414 : RelOptInfo *outerrel,
2415 : RelOptInfo *innerrel,
2416 : List *restrictlist,
2417 : JoinType jointype,
2418 : bool *mergejoin_allowed)
2419 : {
2420 553763 : List *result_list = NIL;
2421 553763 : bool isouterjoin = IS_OUTER_JOIN(jointype);
2422 553763 : bool have_nonmergeable_joinclause = false;
2423 : ListCell *l;
2424 :
2425 : /*
2426 : * For now we do not support RIGHT_SEMI join in mergejoin: the benefit of
2427 : * swapping inputs tends to be small here.
2428 : */
2429 553763 : if (jointype == JOIN_RIGHT_SEMI)
2430 : {
2431 4838 : *mergejoin_allowed = false;
2432 4838 : return NIL;
2433 : }
2434 :
2435 1168673 : foreach(l, restrictlist)
2436 : {
2437 619748 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
2438 :
2439 : /*
2440 : * If processing an outer join, only use its own join clauses in the
2441 : * merge. For inner joins we can use pushed-down clauses too. (Note:
2442 : * we don't set have_nonmergeable_joinclause here because pushed-down
2443 : * clauses will become otherquals not joinquals.)
2444 : */
2445 619748 : if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
2446 11335 : continue;
2447 :
2448 : /* Check that clause is a mergeable operator clause */
2449 608413 : if (!restrictinfo->can_join ||
2450 554770 : restrictinfo->mergeopfamilies == NIL)
2451 : {
2452 : /*
2453 : * The executor can handle extra joinquals that are constants, but
2454 : * not anything else, when doing right/right-anti/full merge join.
2455 : * (The reason to support constants is so we can do FULL JOIN ON
2456 : * FALSE.)
2457 : */
2458 65611 : if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
2459 58753 : have_nonmergeable_joinclause = true;
2460 65611 : continue; /* not mergejoinable */
2461 : }
2462 :
2463 : /*
2464 : * Check if clause has the form "outer op inner" or "inner op outer".
2465 : */
2466 542802 : if (!clause_sides_match_join(restrictinfo, outerrel->relids,
2467 : innerrel->relids))
2468 : {
2469 672 : have_nonmergeable_joinclause = true;
2470 672 : continue; /* no good for these input relations */
2471 : }
2472 :
2473 : /*
2474 : * If clause has the form "inner op outer", check if its operator has
2475 : * valid commutator. This is necessary because mergejoin clauses in
2476 : * this form will get commuted in createplan.c to put the outer var on
2477 : * the left (see get_switched_clauses). This probably shouldn't ever
2478 : * fail, since mergejoinable operators ought to have commutators, but
2479 : * be paranoid.
2480 : *
2481 : * The clause being mergejoinable indicates that it's an OpExpr.
2482 : */
2483 811130 : if (!restrictinfo->outer_is_left &&
2484 269000 : !OidIsValid(get_commutator(castNode(OpExpr, restrictinfo->clause)->opno)))
2485 : {
2486 26 : have_nonmergeable_joinclause = true;
2487 26 : continue;
2488 : }
2489 :
2490 : /*
2491 : * Insist that each side have a non-redundant eclass. This
2492 : * restriction is needed because various bits of the planner expect
2493 : * that each clause in a merge be associable with some pathkey in a
2494 : * canonical pathkey list, but redundant eclasses can't appear in
2495 : * canonical sort orderings. (XXX it might be worth relaxing this,
2496 : * but not enough time to address it for 8.3.)
2497 : */
2498 542104 : update_mergeclause_eclasses(root, restrictinfo);
2499 :
2500 542104 : if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
2501 542072 : EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
2502 : {
2503 84 : have_nonmergeable_joinclause = true;
2504 84 : continue; /* can't handle redundant eclasses */
2505 : }
2506 :
2507 542020 : result_list = lappend(result_list, restrictinfo);
2508 : }
2509 :
2510 : /*
2511 : * Report whether mergejoin is allowed (see comment at top of function).
2512 : */
2513 548925 : switch (jointype)
2514 : {
2515 70905 : case JOIN_RIGHT:
2516 : case JOIN_RIGHT_ANTI:
2517 : case JOIN_FULL:
2518 70905 : *mergejoin_allowed = !have_nonmergeable_joinclause;
2519 70905 : break;
2520 478020 : default:
2521 478020 : *mergejoin_allowed = true;
2522 478020 : break;
2523 : }
2524 :
2525 548925 : return result_list;
2526 : }
|