Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * joinpath.c
4 : * Routines to find all possible paths for processing a set of joins
5 : *
6 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/optimizer/path/joinpath.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include <math.h>
18 :
19 : #include "executor/executor.h"
20 : #include "foreign/fdwapi.h"
21 : #include "nodes/nodeFuncs.h"
22 : #include "optimizer/cost.h"
23 : #include "optimizer/optimizer.h"
24 : #include "optimizer/pathnode.h"
25 : #include "optimizer/paths.h"
26 : #include "optimizer/placeholder.h"
27 : #include "optimizer/planmain.h"
28 : #include "utils/lsyscache.h"
29 : #include "utils/typcache.h"
30 :
31 : /* Hook for plugins to get control in add_paths_to_joinrel() */
32 : set_join_pathlist_hook_type set_join_pathlist_hook = NULL;
33 :
34 : /*
35 : * Paths parameterized by a parent rel can be considered to be parameterized
36 : * by any of its children, when we are performing partitionwise joins. These
37 : * macros simplify checking for such cases. Beware multiple eval of args.
38 : */
39 : #define PATH_PARAM_BY_PARENT(path, rel) \
40 : ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), \
41 : (rel)->top_parent_relids))
42 : #define PATH_PARAM_BY_REL_SELF(path, rel) \
43 : ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
44 :
45 : #define PATH_PARAM_BY_REL(path, rel) \
46 : (PATH_PARAM_BY_REL_SELF(path, rel) || PATH_PARAM_BY_PARENT(path, rel))
47 :
48 : static void try_partial_mergejoin_path(PlannerInfo *root,
49 : RelOptInfo *joinrel,
50 : Path *outer_path,
51 : Path *inner_path,
52 : List *pathkeys,
53 : List *mergeclauses,
54 : List *outersortkeys,
55 : List *innersortkeys,
56 : JoinType jointype,
57 : JoinPathExtraData *extra);
58 : static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
59 : RelOptInfo *outerrel, RelOptInfo *innerrel,
60 : JoinType jointype, JoinPathExtraData *extra);
61 : static inline bool clause_sides_match_join(RestrictInfo *rinfo,
62 : RelOptInfo *outerrel,
63 : RelOptInfo *innerrel);
64 : static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
65 : RelOptInfo *outerrel, RelOptInfo *innerrel,
66 : JoinType jointype, JoinPathExtraData *extra);
67 : static void consider_parallel_nestloop(PlannerInfo *root,
68 : RelOptInfo *joinrel,
69 : RelOptInfo *outerrel,
70 : RelOptInfo *innerrel,
71 : JoinType jointype,
72 : JoinPathExtraData *extra);
73 : static void consider_parallel_mergejoin(PlannerInfo *root,
74 : RelOptInfo *joinrel,
75 : RelOptInfo *outerrel,
76 : RelOptInfo *innerrel,
77 : JoinType jointype,
78 : JoinPathExtraData *extra,
79 : Path *inner_cheapest_total);
80 : static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
81 : RelOptInfo *outerrel, RelOptInfo *innerrel,
82 : JoinType jointype, JoinPathExtraData *extra);
83 : static List *select_mergejoin_clauses(PlannerInfo *root,
84 : RelOptInfo *joinrel,
85 : RelOptInfo *outerrel,
86 : RelOptInfo *innerrel,
87 : List *restrictlist,
88 : JoinType jointype,
89 : bool *mergejoin_allowed);
90 : static void generate_mergejoin_paths(PlannerInfo *root,
91 : RelOptInfo *joinrel,
92 : RelOptInfo *innerrel,
93 : Path *outerpath,
94 : JoinType jointype,
95 : JoinPathExtraData *extra,
96 : bool useallclauses,
97 : Path *inner_cheapest_total,
98 : List *merge_pathkeys,
99 : bool is_partial);
100 :
101 :
102 : /*
103 : * add_paths_to_joinrel
104 : * Given a join relation and two component rels from which it can be made,
105 : * consider all possible paths that use the two component rels as outer
106 : * and inner rel respectively. Add these paths to the join rel's pathlist
107 : * if they survive comparison with other paths (and remove any existing
108 : * paths that are dominated by these paths).
109 : *
110 : * Modifies the pathlist field of the joinrel node to contain the best
111 : * paths found so far.
112 : *
113 : * jointype is not necessarily the same as sjinfo->jointype; it might be
114 : * "flipped around" if we are considering joining the rels in the opposite
115 : * direction from what's indicated in sjinfo.
116 : *
117 : * Also, this routine and others in this module accept the special JoinTypes
118 : * JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should
119 : * unique-ify the outer or inner relation and then apply a regular inner
120 : * join. These values are not allowed to propagate outside this module,
121 : * however. Path cost estimation code may need to recognize that it's
122 : * dealing with such a case --- the combination of nominal jointype INNER
123 : * with sjinfo->jointype == JOIN_SEMI indicates that.
124 : */
125 : void
126 525716 : add_paths_to_joinrel(PlannerInfo *root,
127 : RelOptInfo *joinrel,
128 : RelOptInfo *outerrel,
129 : RelOptInfo *innerrel,
130 : JoinType jointype,
131 : SpecialJoinInfo *sjinfo,
132 : List *restrictlist)
133 : {
134 : JoinPathExtraData extra;
135 525716 : bool mergejoin_allowed = true;
136 : ListCell *lc;
137 : Relids joinrelids;
138 :
139 : /*
140 : * PlannerInfo doesn't contain the SpecialJoinInfos created for joins
141 : * between child relations, even if there is a SpecialJoinInfo node for
142 : * the join between the topmost parents. So, while calculating Relids set
143 : * representing the restriction, consider relids of topmost parent of
144 : * partitions.
145 : */
146 525716 : if (joinrel->reloptkind == RELOPT_OTHER_JOINREL)
147 11176 : joinrelids = joinrel->top_parent_relids;
148 : else
149 514540 : joinrelids = joinrel->relids;
150 :
151 525716 : extra.restrictlist = restrictlist;
152 525716 : extra.mergeclause_list = NIL;
153 525716 : extra.sjinfo = sjinfo;
154 525716 : extra.param_source_rels = NULL;
155 :
156 : /*
157 : * See if the inner relation is provably unique for this outer rel.
158 : *
159 : * We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't
160 : * matter since the executor can make the equivalent optimization anyway;
161 : * we need not expend planner cycles on proofs. For JOIN_UNIQUE_INNER, we
162 : * must be considering a semijoin whose inner side is not provably unique
163 : * (else reduce_unique_semijoins would've simplified it), so there's no
164 : * point in calling innerrel_is_unique. However, if the LHS covers all of
165 : * the semijoin's min_lefthand, then it's appropriate to set inner_unique
166 : * because the path produced by create_unique_path will be unique relative
167 : * to the LHS. (If we have an LHS that's only part of the min_lefthand,
168 : * that is *not* true.) For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid
169 : * letting that value escape this module.
170 : */
171 525716 : switch (jointype)
172 : {
173 8688 : case JOIN_SEMI:
174 : case JOIN_ANTI:
175 :
176 : /*
177 : * XXX it may be worth proving this to allow a Memoize to be
178 : * considered for Nested Loop Semi/Anti Joins.
179 : */
180 8688 : extra.inner_unique = false; /* well, unproven */
181 8688 : break;
182 4258 : case JOIN_UNIQUE_INNER:
183 8516 : extra.inner_unique = bms_is_subset(sjinfo->min_lefthand,
184 4258 : outerrel->relids);
185 4258 : break;
186 4258 : case JOIN_UNIQUE_OUTER:
187 4258 : extra.inner_unique = innerrel_is_unique(root,
188 : joinrel->relids,
189 : outerrel->relids,
190 : innerrel,
191 : JOIN_INNER,
192 : restrictlist,
193 : false);
194 4258 : break;
195 508512 : default:
196 508512 : extra.inner_unique = innerrel_is_unique(root,
197 : joinrel->relids,
198 : outerrel->relids,
199 : innerrel,
200 : jointype,
201 : restrictlist,
202 : false);
203 508512 : break;
204 : }
205 :
206 : /*
207 : * Find potential mergejoin clauses. We can skip this if we are not
208 : * interested in doing a mergejoin. However, mergejoin may be our only
209 : * way of implementing a full outer join, so override enable_mergejoin if
210 : * it's a full join.
211 : */
212 525716 : if (enable_mergejoin || jointype == JOIN_FULL)
213 524188 : extra.mergeclause_list = select_mergejoin_clauses(root,
214 : joinrel,
215 : outerrel,
216 : innerrel,
217 : restrictlist,
218 : jointype,
219 : &mergejoin_allowed);
220 :
221 : /*
222 : * If it's SEMI, ANTI, or inner_unique join, compute correction factors
223 : * for cost estimation. These will be the same for all paths.
224 : */
225 525716 : if (jointype == JOIN_SEMI || jointype == JOIN_ANTI || extra.inner_unique)
226 160974 : compute_semi_anti_join_factors(root, joinrel, outerrel, innerrel,
227 : jointype, sjinfo, restrictlist,
228 : &extra.semifactors);
229 :
230 : /*
231 : * Decide whether it's sensible to generate parameterized paths for this
232 : * joinrel, and if so, which relations such paths should require. There
233 : * is usually no need to create a parameterized result path unless there
234 : * is a join order restriction that prevents joining one of our input rels
235 : * directly to the parameter source rel instead of joining to the other
236 : * input rel. (But see allow_star_schema_join().) This restriction
237 : * reduces the number of parameterized paths we have to deal with at
238 : * higher join levels, without compromising the quality of the resulting
239 : * plan. We express the restriction as a Relids set that must overlap the
240 : * parameterization of any proposed join path. Note: param_source_rels
241 : * should contain only baserels, not OJ relids, so starting from
242 : * all_baserels not all_query_rels is correct.
243 : */
244 1085948 : foreach(lc, root->join_info_list)
245 : {
246 560232 : SpecialJoinInfo *sjinfo2 = (SpecialJoinInfo *) lfirst(lc);
247 :
248 : /*
249 : * SJ is relevant to this join if we have some part of its RHS
250 : * (possibly not all of it), and haven't yet joined to its LHS. (This
251 : * test is pretty simplistic, but should be sufficient considering the
252 : * join has already been proven legal.) If the SJ is relevant, it
253 : * presents constraints for joining to anything not in its RHS.
254 : */
255 560232 : if (bms_overlap(joinrelids, sjinfo2->min_righthand) &&
256 372788 : !bms_overlap(joinrelids, sjinfo2->min_lefthand))
257 19664 : extra.param_source_rels = bms_join(extra.param_source_rels,
258 19664 : bms_difference(root->all_baserels,
259 19664 : sjinfo2->min_righthand));
260 :
261 : /* full joins constrain both sides symmetrically */
262 565088 : if (sjinfo2->jointype == JOIN_FULL &&
263 4856 : bms_overlap(joinrelids, sjinfo2->min_lefthand) &&
264 4800 : !bms_overlap(joinrelids, sjinfo2->min_righthand))
265 672 : extra.param_source_rels = bms_join(extra.param_source_rels,
266 672 : bms_difference(root->all_baserels,
267 672 : sjinfo2->min_lefthand));
268 : }
269 :
270 : /*
271 : * However, when a LATERAL subquery is involved, there will simply not be
272 : * any paths for the joinrel that aren't parameterized by whatever the
273 : * subquery is parameterized by, unless its parameterization is resolved
274 : * within the joinrel. So we might as well allow additional dependencies
275 : * on whatever residual lateral dependencies the joinrel will have.
276 : */
277 1051432 : extra.param_source_rels = bms_add_members(extra.param_source_rels,
278 525716 : joinrel->lateral_relids);
279 :
280 : /*
281 : * 1. Consider mergejoin paths where both relations must be explicitly
282 : * sorted. Skip this if we can't mergejoin.
283 : */
284 525716 : if (mergejoin_allowed)
285 514284 : sort_inner_and_outer(root, joinrel, outerrel, innerrel,
286 : jointype, &extra);
287 :
288 : /*
289 : * 2. Consider paths where the outer relation need not be explicitly
290 : * sorted. This includes both nestloops and mergejoins where the outer
291 : * path is already ordered. Again, skip this if we can't mergejoin.
292 : * (That's okay because we know that nestloop can't handle
293 : * right/right-anti/right-semi/full joins at all, so it wouldn't work in
294 : * the prohibited cases either.)
295 : */
296 525716 : if (mergejoin_allowed)
297 514284 : match_unsorted_outer(root, joinrel, outerrel, innerrel,
298 : jointype, &extra);
299 :
300 : #ifdef NOT_USED
301 :
302 : /*
303 : * 3. Consider paths where the inner relation need not be explicitly
304 : * sorted. This includes mergejoins only (nestloops were already built in
305 : * match_unsorted_outer).
306 : *
307 : * Diked out as redundant 2/13/2000 -- tgl. There isn't any really
308 : * significant difference between the inner and outer side of a mergejoin,
309 : * so match_unsorted_inner creates no paths that aren't equivalent to
310 : * those made by match_unsorted_outer when add_paths_to_joinrel() is
311 : * invoked with the two rels given in the other order.
312 : */
313 : if (mergejoin_allowed)
314 : match_unsorted_inner(root, joinrel, outerrel, innerrel,
315 : jointype, &extra);
316 : #endif
317 :
318 : /*
319 : * 4. Consider paths where both outer and inner relations must be hashed
320 : * before being joined. As above, disregard enable_hashjoin for full
321 : * joins, because there may be no other alternative.
322 : */
323 525716 : if (enable_hashjoin || jointype == JOIN_FULL)
324 522548 : hash_inner_and_outer(root, joinrel, outerrel, innerrel,
325 : jointype, &extra);
326 :
327 : /*
328 : * 5. If inner and outer relations are foreign tables (or joins) belonging
329 : * to the same server and assigned to the same user to check access
330 : * permissions as, give the FDW a chance to push down joins.
331 : */
332 525716 : if (joinrel->fdwroutine &&
333 2548 : joinrel->fdwroutine->GetForeignJoinPaths)
334 2544 : joinrel->fdwroutine->GetForeignJoinPaths(root, joinrel,
335 : outerrel, innerrel,
336 : jointype, &extra);
337 :
338 : /*
339 : * 6. Finally, give extensions a chance to manipulate the path list. They
340 : * could add new paths (such as CustomPaths) by calling add_path(), or
341 : * add_partial_path() if parallel aware. They could also delete or modify
342 : * paths added by the core code.
343 : */
344 525716 : if (set_join_pathlist_hook)
345 0 : set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
346 : jointype, &extra);
347 525716 : }
348 :
349 : /*
350 : * We override the param_source_rels heuristic to accept nestloop paths in
351 : * which the outer rel satisfies some but not all of the inner path's
352 : * parameterization. This is necessary to get good plans for star-schema
353 : * scenarios, in which a parameterized path for a large table may require
354 : * parameters from multiple small tables that will not get joined directly to
355 : * each other. We can handle that by stacking nestloops that have the small
356 : * tables on the outside; but this breaks the rule the param_source_rels
357 : * heuristic is based on, namely that parameters should not be passed down
358 : * across joins unless there's a join-order-constraint-based reason to do so.
359 : * So we ignore the param_source_rels restriction when this case applies.
360 : *
361 : * allow_star_schema_join() returns true if the param_source_rels restriction
362 : * should be overridden, ie, it's okay to perform this join.
363 : */
364 : static inline bool
365 217960 : allow_star_schema_join(PlannerInfo *root,
366 : Relids outerrelids,
367 : Relids inner_paramrels)
368 : {
369 : /*
370 : * It's a star-schema case if the outer rel provides some but not all of
371 : * the inner rel's parameterization.
372 : */
373 254220 : return (bms_overlap(inner_paramrels, outerrelids) &&
374 36260 : bms_nonempty_difference(inner_paramrels, outerrelids));
375 : }
376 :
377 : /*
378 : * If the parameterization is only partly satisfied by the outer rel,
379 : * the unsatisfied part can't include any outer-join relids that could
380 : * null rels of the satisfied part. That would imply that we're trying
381 : * to use a clause involving a Var with nonempty varnullingrels at
382 : * a join level where that value isn't yet computable.
383 : *
384 : * In practice, this test never finds a problem because earlier join order
385 : * restrictions prevent us from attempting a join that would cause a problem.
386 : * (That's unsurprising, because the code worked before we ever added
387 : * outer-join relids to expression relids.) It still seems worth checking
388 : * as a backstop, but we only do so in assert-enabled builds.
389 : */
390 : #ifdef USE_ASSERT_CHECKING
391 : static inline bool
392 : have_unsafe_outer_join_ref(PlannerInfo *root,
393 : Relids outerrelids,
394 : Relids inner_paramrels)
395 : {
396 : bool result = false;
397 : Relids unsatisfied = bms_difference(inner_paramrels, outerrelids);
398 : Relids satisfied = bms_intersect(inner_paramrels, outerrelids);
399 :
400 : if (bms_overlap(unsatisfied, root->outer_join_rels))
401 : {
402 : ListCell *lc;
403 :
404 : foreach(lc, root->join_info_list)
405 : {
406 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
407 :
408 : if (!bms_is_member(sjinfo->ojrelid, unsatisfied))
409 : continue; /* not relevant */
410 : if (bms_overlap(satisfied, sjinfo->min_righthand) ||
411 : (sjinfo->jointype == JOIN_FULL &&
412 : bms_overlap(satisfied, sjinfo->min_lefthand)))
413 : {
414 : result = true; /* doesn't work */
415 : break;
416 : }
417 : }
418 : }
419 :
420 : /* Waste no memory when we reject a path here */
421 : bms_free(unsatisfied);
422 : bms_free(satisfied);
423 :
424 : return result;
425 : }
426 : #endif /* USE_ASSERT_CHECKING */
427 :
428 : /*
429 : * paraminfo_get_equal_hashops
430 : * Determine if the clauses in param_info and innerrel's lateral vars
431 : * can be hashed.
432 : * Returns true if hashing is possible, otherwise false.
433 : *
434 : * Additionally, on success we collect the outer expressions and the
435 : * appropriate equality operators for each hashable parameter to innerrel.
436 : * These are returned in parallel lists in *param_exprs and *operators.
437 : * We also set *binary_mode to indicate whether strict binary matching is
438 : * required.
439 : */
440 : static bool
441 320430 : paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
442 : RelOptInfo *outerrel, RelOptInfo *innerrel,
443 : List *ph_lateral_vars, List **param_exprs,
444 : List **operators, bool *binary_mode)
445 :
446 : {
447 : List *lateral_vars;
448 : ListCell *lc;
449 :
450 320430 : *param_exprs = NIL;
451 320430 : *operators = NIL;
452 320430 : *binary_mode = false;
453 :
454 : /* Add join clauses from param_info to the hash key */
455 320430 : if (param_info != NULL)
456 : {
457 320430 : List *clauses = param_info->ppi_clauses;
458 :
459 575960 : foreach(lc, clauses)
460 : {
461 339658 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
462 : OpExpr *opexpr;
463 : Node *expr;
464 : Oid hasheqoperator;
465 :
466 339658 : opexpr = (OpExpr *) rinfo->clause;
467 :
468 : /*
469 : * Bail if the rinfo is not compatible. We need a join OpExpr
470 : * with 2 args.
471 : */
472 339658 : if (!IsA(opexpr, OpExpr) || list_length(opexpr->args) != 2 ||
473 316824 : !clause_sides_match_join(rinfo, outerrel, innerrel))
474 : {
475 83972 : list_free(*operators);
476 83972 : list_free(*param_exprs);
477 84128 : return false;
478 : }
479 :
480 255686 : if (rinfo->outer_is_left)
481 : {
482 145740 : expr = (Node *) linitial(opexpr->args);
483 145740 : hasheqoperator = rinfo->left_hasheqoperator;
484 : }
485 : else
486 : {
487 109946 : expr = (Node *) lsecond(opexpr->args);
488 109946 : hasheqoperator = rinfo->right_hasheqoperator;
489 : }
490 :
491 : /* can't do memoize if we can't hash the outer type */
492 255686 : if (!OidIsValid(hasheqoperator))
493 : {
494 156 : list_free(*operators);
495 156 : list_free(*param_exprs);
496 156 : return false;
497 : }
498 :
499 : /*
500 : * 'expr' may already exist as a parameter from a previous item in
501 : * ppi_clauses. No need to include it again, however we'd better
502 : * ensure we do switch into binary mode if required. See below.
503 : */
504 255530 : if (!list_member(*param_exprs, expr))
505 : {
506 255524 : *operators = lappend_oid(*operators, hasheqoperator);
507 255524 : *param_exprs = lappend(*param_exprs, expr);
508 : }
509 :
510 : /*
511 : * When the join operator is not hashable then it's possible that
512 : * the operator will be able to distinguish something that the
513 : * hash equality operator could not. For example with floating
514 : * point types -0.0 and +0.0 are classed as equal by the hash
515 : * function and equality function, but some other operator may be
516 : * able to tell those values apart. This means that we must put
517 : * memoize into binary comparison mode so that it does bit-by-bit
518 : * comparisons rather than a "logical" comparison as it would
519 : * using the hash equality operator.
520 : */
521 255530 : if (!OidIsValid(rinfo->hashjoinoperator))
522 1070 : *binary_mode = true;
523 : }
524 : }
525 :
526 : /* Now add any lateral vars to the cache key too */
527 236302 : lateral_vars = list_concat(ph_lateral_vars, innerrel->lateral_vars);
528 240424 : foreach(lc, lateral_vars)
529 : {
530 4320 : Node *expr = (Node *) lfirst(lc);
531 : TypeCacheEntry *typentry;
532 :
533 : /* Reject if there are any volatile functions in lateral vars */
534 4320 : if (contain_volatile_functions(expr))
535 : {
536 0 : list_free(*operators);
537 0 : list_free(*param_exprs);
538 198 : return false;
539 : }
540 :
541 4320 : typentry = lookup_type_cache(exprType(expr),
542 : TYPECACHE_HASH_PROC | TYPECACHE_EQ_OPR);
543 :
544 : /* can't use memoize without a valid hash proc and equals operator */
545 4320 : if (!OidIsValid(typentry->hash_proc) || !OidIsValid(typentry->eq_opr))
546 : {
547 198 : list_free(*operators);
548 198 : list_free(*param_exprs);
549 198 : return false;
550 : }
551 :
552 : /*
553 : * 'expr' may already exist as a parameter from the ppi_clauses. No
554 : * need to include it again, however we'd better ensure we do switch
555 : * into binary mode.
556 : */
557 4122 : if (!list_member(*param_exprs, expr))
558 : {
559 3684 : *operators = lappend_oid(*operators, typentry->eq_opr);
560 3684 : *param_exprs = lappend(*param_exprs, expr);
561 : }
562 :
563 : /*
564 : * We must go into binary mode as we don't have too much of an idea of
565 : * how these lateral Vars are being used. See comment above when we
566 : * set *binary_mode for the non-lateral Var case. This could be
567 : * relaxed a bit if we had the RestrictInfos and knew the operators
568 : * being used, however for cases like Vars that are arguments to
569 : * functions we must operate in binary mode as we don't have
570 : * visibility into what the function is doing with the Vars.
571 : */
572 4122 : *binary_mode = true;
573 : }
574 :
575 : /* We're okay to use memoize */
576 236104 : return true;
577 : }
578 :
579 : /*
580 : * extract_lateral_vars_from_PHVs
581 : * Extract lateral references within PlaceHolderVars that are due to be
582 : * evaluated at 'innerrelids'.
583 : */
584 : static List *
585 953964 : extract_lateral_vars_from_PHVs(PlannerInfo *root, Relids innerrelids)
586 : {
587 953964 : List *ph_lateral_vars = NIL;
588 : ListCell *lc;
589 :
590 : /* Nothing would be found if the query contains no LATERAL RTEs */
591 953964 : if (!root->hasLateralRTEs)
592 935138 : return NIL;
593 :
594 : /*
595 : * No need to consider PHVs that are due to be evaluated at joinrels,
596 : * since we do not add Memoize nodes on top of joinrel paths.
597 : */
598 18826 : if (bms_membership(innerrelids) == BMS_MULTIPLE)
599 5734 : return NIL;
600 :
601 15236 : foreach(lc, root->placeholder_list)
602 : {
603 2144 : PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
604 : List *vars;
605 : ListCell *cell;
606 :
607 : /* PHV is uninteresting if no lateral refs */
608 2144 : if (phinfo->ph_lateral == NULL)
609 978 : continue;
610 :
611 : /* PHV is uninteresting if not due to be evaluated at innerrelids */
612 1166 : if (!bms_equal(phinfo->ph_eval_at, innerrelids))
613 970 : continue;
614 :
615 : /*
616 : * If the PHV does not reference any rels in innerrelids, use its
617 : * contained expression as a cache key rather than extracting the
618 : * Vars/PHVs from it and using those. This can be beneficial in cases
619 : * where the expression results in fewer distinct values to cache
620 : * tuples for.
621 : */
622 196 : if (!bms_overlap(pull_varnos(root, (Node *) phinfo->ph_var->phexpr),
623 : innerrelids))
624 : {
625 184 : ph_lateral_vars = lappend(ph_lateral_vars, phinfo->ph_var->phexpr);
626 184 : continue;
627 : }
628 :
629 : /* Fetch Vars and PHVs of lateral references within PlaceHolderVars */
630 12 : vars = pull_vars_of_level((Node *) phinfo->ph_var->phexpr, 0);
631 36 : foreach(cell, vars)
632 : {
633 24 : Node *node = (Node *) lfirst(cell);
634 :
635 24 : if (IsA(node, Var))
636 : {
637 12 : Var *var = (Var *) node;
638 :
639 : Assert(var->varlevelsup == 0);
640 :
641 12 : if (bms_is_member(var->varno, phinfo->ph_lateral))
642 0 : ph_lateral_vars = lappend(ph_lateral_vars, node);
643 : }
644 12 : else if (IsA(node, PlaceHolderVar))
645 : {
646 12 : PlaceHolderVar *phv = (PlaceHolderVar *) node;
647 :
648 : Assert(phv->phlevelsup == 0);
649 :
650 12 : if (bms_is_subset(find_placeholder_info(root, phv)->ph_eval_at,
651 12 : phinfo->ph_lateral))
652 12 : ph_lateral_vars = lappend(ph_lateral_vars, node);
653 : }
654 : else
655 : Assert(false);
656 : }
657 :
658 12 : list_free(vars);
659 : }
660 :
661 13092 : return ph_lateral_vars;
662 : }
663 :
664 : /*
665 : * get_memoize_path
666 : * If possible, make and return a Memoize path atop of 'inner_path'.
667 : * Otherwise return NULL.
668 : *
669 : * Note that currently we do not add Memoize nodes on top of join relation
670 : * paths. This is because the ParamPathInfos for join relation paths do not
671 : * maintain ppi_clauses, as the set of relevant clauses varies depending on how
672 : * the join is formed. In addition, joinrels do not maintain lateral_vars. So
673 : * we do not have a way to extract cache keys from joinrels.
674 : */
675 : static Path *
676 1352230 : get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
677 : RelOptInfo *outerrel, Path *inner_path,
678 : Path *outer_path, JoinType jointype,
679 : JoinPathExtraData *extra)
680 : {
681 : List *param_exprs;
682 : List *hash_operators;
683 : ListCell *lc;
684 : bool binary_mode;
685 : List *ph_lateral_vars;
686 :
687 : /* Obviously not if it's disabled */
688 1352230 : if (!enable_memoize)
689 388 : return NULL;
690 :
691 : /*
692 : * We can safely not bother with all this unless we expect to perform more
693 : * than one inner scan. The first scan is always going to be a cache
694 : * miss. This would likely fail later anyway based on costs, so this is
695 : * really just to save some wasted effort.
696 : */
697 1351842 : if (outer_path->parent->rows < 2)
698 397878 : return NULL;
699 :
700 : /*
701 : * Extract lateral Vars/PHVs within PlaceHolderVars that are due to be
702 : * evaluated at innerrel. These lateral Vars/PHVs could be used as
703 : * memoize cache keys.
704 : */
705 953964 : ph_lateral_vars = extract_lateral_vars_from_PHVs(root, innerrel->relids);
706 :
707 : /*
708 : * We can only have a memoize node when there's some kind of cache key,
709 : * either parameterized path clauses or lateral Vars. No cache key sounds
710 : * more like something a Materialize node might be more useful for.
711 : */
712 953964 : if ((inner_path->param_info == NULL ||
713 395740 : inner_path->param_info->ppi_clauses == NIL) &&
714 576656 : innerrel->lateral_vars == NIL &&
715 : ph_lateral_vars == NIL)
716 573708 : return NULL;
717 :
718 : /*
719 : * Currently we don't do this for SEMI and ANTI joins unless they're
720 : * marked as inner_unique. This is because nested loop SEMI/ANTI joins
721 : * don't scan the inner node to completion, which will mean memoize cannot
722 : * mark the cache entry as complete.
723 : *
724 : * XXX Currently we don't attempt to mark SEMI/ANTI joins as inner_unique
725 : * = true. Should we? See add_paths_to_joinrel()
726 : */
727 380256 : if (!extra->inner_unique && (jointype == JOIN_SEMI ||
728 : jointype == JOIN_ANTI))
729 8480 : return NULL;
730 :
731 : /*
732 : * Memoize normally marks cache entries as complete when it runs out of
733 : * tuples to read from its subplan. However, with unique joins, Nested
734 : * Loop will skip to the next outer tuple after finding the first matching
735 : * inner tuple. This means that we may not read the inner side of the
736 : * join to completion which leaves no opportunity to mark the cache entry
737 : * as complete. To work around that, when the join is unique we
738 : * automatically mark cache entries as complete after fetching the first
739 : * tuple. This works when the entire join condition is parameterized.
740 : * Otherwise, when the parameterization is only a subset of the join
741 : * condition, we can't be sure which part of it causes the join to be
742 : * unique. This means there are no guarantees that only 1 tuple will be
743 : * read. We cannot mark the cache entry as complete after reading the
744 : * first tuple without that guarantee. This means the scope of Memoize
745 : * node's usefulness is limited to only outer rows that have no join
746 : * partner as this is the only case where Nested Loop would exhaust the
747 : * inner scan of a unique join. Since the scope is limited to that, we
748 : * just don't bother making a memoize path in this case.
749 : *
750 : * Lateral vars needn't be considered here as they're not considered when
751 : * determining if the join is unique.
752 : *
753 : * XXX this could be enabled if the remaining join quals were made part of
754 : * the inner scan's filter instead of the join filter. Maybe it's worth
755 : * considering doing that?
756 : */
757 371776 : if (extra->inner_unique &&
758 451592 : (inner_path->param_info == NULL ||
759 225796 : bms_num_members(inner_path->param_info->ppi_serials) <
760 225796 : list_length(extra->restrictlist)))
761 51332 : return NULL;
762 :
763 : /*
764 : * We can't use a memoize node if there are volatile functions in the
765 : * inner rel's target list or restrict list. A cache hit could reduce the
766 : * number of calls to these functions.
767 : */
768 320444 : if (contain_volatile_functions((Node *) innerrel->reltarget))
769 0 : return NULL;
770 :
771 530178 : foreach(lc, innerrel->baserestrictinfo)
772 : {
773 209746 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
774 :
775 209746 : if (contain_volatile_functions((Node *) rinfo))
776 12 : return NULL;
777 : }
778 :
779 : /*
780 : * Also check the parameterized path restrictinfos for volatile functions.
781 : * Indexed functions must be immutable so shouldn't have any volatile
782 : * functions, however, with a lateral join the inner scan may not be an
783 : * index scan.
784 : */
785 320432 : if (inner_path->param_info != NULL)
786 : {
787 687266 : foreach(lc, inner_path->param_info->ppi_clauses)
788 : {
789 366836 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
790 :
791 366836 : if (contain_volatile_functions((Node *) rinfo))
792 2 : return NULL;
793 : }
794 : }
795 :
796 : /* Check if we have hash ops for each parameter to the path */
797 320430 : if (paraminfo_get_equal_hashops(root,
798 : inner_path->param_info,
799 320430 : outerrel->top_parent ?
800 : outerrel->top_parent : outerrel,
801 : innerrel,
802 : ph_lateral_vars,
803 : ¶m_exprs,
804 : &hash_operators,
805 : &binary_mode))
806 : {
807 236104 : return (Path *) create_memoize_path(root,
808 : innerrel,
809 : inner_path,
810 : param_exprs,
811 : hash_operators,
812 236104 : extra->inner_unique,
813 : binary_mode,
814 : outer_path->rows);
815 : }
816 :
817 84326 : return NULL;
818 : }
819 :
820 : /*
821 : * try_nestloop_path
822 : * Consider a nestloop join path; if it appears useful, push it into
823 : * the joinrel's pathlist via add_path().
824 : */
825 : static void
826 2335072 : try_nestloop_path(PlannerInfo *root,
827 : RelOptInfo *joinrel,
828 : Path *outer_path,
829 : Path *inner_path,
830 : List *pathkeys,
831 : JoinType jointype,
832 : JoinPathExtraData *extra)
833 : {
834 : Relids required_outer;
835 : JoinCostWorkspace workspace;
836 2335072 : RelOptInfo *innerrel = inner_path->parent;
837 2335072 : RelOptInfo *outerrel = outer_path->parent;
838 : Relids innerrelids;
839 : Relids outerrelids;
840 2335072 : Relids inner_paramrels = PATH_REQ_OUTER(inner_path);
841 2335072 : Relids outer_paramrels = PATH_REQ_OUTER(outer_path);
842 :
843 : /*
844 : * If we are forming an outer join at this join, it's nonsensical to use
845 : * an input path that uses the outer join as part of its parameterization.
846 : * (This can happen despite our join order restrictions, since those apply
847 : * to what is in an input relation not what its parameters are.)
848 : */
849 2860794 : if (extra->sjinfo->ojrelid != 0 &&
850 1051444 : (bms_is_member(extra->sjinfo->ojrelid, inner_paramrels) ||
851 525722 : bms_is_member(extra->sjinfo->ojrelid, outer_paramrels)))
852 217748 : return;
853 :
854 : /*
855 : * Any parameterization of the input paths refers to topmost parents of
856 : * the relevant relations, because reparameterize_path_by_child() hasn't
857 : * been called yet. So we must consider topmost parents of the relations
858 : * being joined, too, while determining parameterization of the result and
859 : * checking for disallowed parameterization cases.
860 : */
861 2334952 : if (innerrel->top_parent_relids)
862 33820 : innerrelids = innerrel->top_parent_relids;
863 : else
864 2301132 : innerrelids = innerrel->relids;
865 :
866 2334952 : if (outerrel->top_parent_relids)
867 33820 : outerrelids = outerrel->top_parent_relids;
868 : else
869 2301132 : outerrelids = outerrel->relids;
870 :
871 : /*
872 : * Check to see if proposed path is still parameterized, and reject if the
873 : * parameterization wouldn't be sensible --- unless allow_star_schema_join
874 : * says to allow it anyway. Also, we must reject if have_dangerous_phv
875 : * doesn't like the look of it, which could only happen if the nestloop is
876 : * still parameterized.
877 : */
878 2334952 : required_outer = calc_nestloop_required_outer(outerrelids, outer_paramrels,
879 : innerrelids, inner_paramrels);
880 2334952 : if (required_outer &&
881 248868 : ((!bms_overlap(required_outer, extra->param_source_rels) &&
882 249296 : !allow_star_schema_join(root, outerrelids, inner_paramrels)) ||
883 31336 : have_dangerous_phv(root, outerrelids, inner_paramrels)))
884 : {
885 : /* Waste no memory when we reject a path here */
886 217628 : bms_free(required_outer);
887 217628 : return;
888 : }
889 :
890 : /* If we got past that, we shouldn't have any unsafe outer-join refs */
891 : Assert(!have_unsafe_outer_join_ref(root, outerrelids, inner_paramrels));
892 :
893 : /*
894 : * If the inner path is parameterized, it is parameterized by the topmost
895 : * parent of the outer rel, not the outer rel itself. We will need to
896 : * translate the parameterization, if this path is chosen, during
897 : * create_plan(). Here we just check whether we will be able to perform
898 : * the translation, and if not avoid creating a nestloop path.
899 : */
900 2117324 : if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent) &&
901 10888 : !path_is_reparameterizable_by_child(inner_path, outer_path->parent))
902 : {
903 0 : bms_free(required_outer);
904 0 : return;
905 : }
906 :
907 : /*
908 : * Do a precheck to quickly eliminate obviously-inferior paths. We
909 : * calculate a cheap lower bound on the path's cost and then use
910 : * add_path_precheck() to see if the path is clearly going to be dominated
911 : * by some existing path for the joinrel. If not, do the full pushup with
912 : * creating a fully valid path structure and submitting it to add_path().
913 : * The latter two steps are expensive enough to make this two-phase
914 : * methodology worthwhile.
915 : */
916 2117324 : initial_cost_nestloop(root, &workspace, jointype,
917 : outer_path, inner_path, extra);
918 :
919 2117324 : if (add_path_precheck(joinrel, workspace.disabled_nodes,
920 : workspace.startup_cost, workspace.total_cost,
921 : pathkeys, required_outer))
922 : {
923 1038124 : add_path(joinrel, (Path *)
924 1038124 : create_nestloop_path(root,
925 : joinrel,
926 : jointype,
927 : &workspace,
928 : extra,
929 : outer_path,
930 : inner_path,
931 : extra->restrictlist,
932 : pathkeys,
933 : required_outer));
934 : }
935 : else
936 : {
937 : /* Waste no memory when we reject a path here */
938 1079200 : bms_free(required_outer);
939 : }
940 : }
941 :
942 : /*
943 : * try_partial_nestloop_path
944 : * Consider a partial nestloop join path; if it appears useful, push it into
945 : * the joinrel's partial_pathlist via add_partial_path().
946 : */
947 : static void
948 39832 : try_partial_nestloop_path(PlannerInfo *root,
949 : RelOptInfo *joinrel,
950 : Path *outer_path,
951 : Path *inner_path,
952 : List *pathkeys,
953 : JoinType jointype,
954 : JoinPathExtraData *extra)
955 : {
956 : JoinCostWorkspace workspace;
957 :
958 : /*
959 : * If the inner path is parameterized, the parameterization must be fully
960 : * satisfied by the proposed outer path. Parameterized partial paths are
961 : * not supported. The caller should already have verified that no lateral
962 : * rels are required here.
963 : */
964 : Assert(bms_is_empty(joinrel->lateral_relids));
965 : Assert(bms_is_empty(PATH_REQ_OUTER(outer_path)));
966 39832 : if (inner_path->param_info != NULL)
967 : {
968 13686 : Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
969 13686 : RelOptInfo *outerrel = outer_path->parent;
970 : Relids outerrelids;
971 :
972 : /*
973 : * The inner and outer paths are parameterized, if at all, by the top
974 : * level parents, not the child relations, so we must use those relids
975 : * for our parameterization tests.
976 : */
977 13686 : if (outerrel->top_parent_relids)
978 10188 : outerrelids = outerrel->top_parent_relids;
979 : else
980 3498 : outerrelids = outerrel->relids;
981 :
982 13686 : if (!bms_is_subset(inner_paramrels, outerrelids))
983 27586 : return;
984 : }
985 :
986 : /*
987 : * If the inner path is parameterized, it is parameterized by the topmost
988 : * parent of the outer rel, not the outer rel itself. We will need to
989 : * translate the parameterization, if this path is chosen, during
990 : * create_plan(). Here we just check whether we will be able to perform
991 : * the translation, and if not avoid creating a nestloop path.
992 : */
993 38740 : if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent) &&
994 9396 : !path_is_reparameterizable_by_child(inner_path, outer_path->parent))
995 0 : return;
996 :
997 : /*
998 : * Before creating a path, get a quick lower bound on what it is likely to
999 : * cost. Bail out right away if it looks terrible.
1000 : */
1001 38740 : initial_cost_nestloop(root, &workspace, jointype,
1002 : outer_path, inner_path, extra);
1003 38740 : if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
1004 : workspace.total_cost, pathkeys))
1005 26494 : return;
1006 :
1007 : /* Might be good enough to be worth trying, so let's try it. */
1008 12246 : add_partial_path(joinrel, (Path *)
1009 12246 : create_nestloop_path(root,
1010 : joinrel,
1011 : jointype,
1012 : &workspace,
1013 : extra,
1014 : outer_path,
1015 : inner_path,
1016 : extra->restrictlist,
1017 : pathkeys,
1018 : NULL));
1019 : }
1020 :
1021 : /*
1022 : * try_mergejoin_path
1023 : * Consider a merge join path; if it appears useful, push it into
1024 : * the joinrel's pathlist via add_path().
1025 : */
1026 : static void
1027 940822 : try_mergejoin_path(PlannerInfo *root,
1028 : RelOptInfo *joinrel,
1029 : Path *outer_path,
1030 : Path *inner_path,
1031 : List *pathkeys,
1032 : List *mergeclauses,
1033 : List *outersortkeys,
1034 : List *innersortkeys,
1035 : JoinType jointype,
1036 : JoinPathExtraData *extra,
1037 : bool is_partial)
1038 : {
1039 : Relids required_outer;
1040 : JoinCostWorkspace workspace;
1041 :
1042 940822 : if (is_partial)
1043 : {
1044 6314 : try_partial_mergejoin_path(root,
1045 : joinrel,
1046 : outer_path,
1047 : inner_path,
1048 : pathkeys,
1049 : mergeclauses,
1050 : outersortkeys,
1051 : innersortkeys,
1052 : jointype,
1053 : extra);
1054 32160 : return;
1055 : }
1056 :
1057 : /*
1058 : * If we are forming an outer join at this join, it's nonsensical to use
1059 : * an input path that uses the outer join as part of its parameterization.
1060 : * (This can happen despite our join order restrictions, since those apply
1061 : * to what is in an input relation not what its parameters are.)
1062 : */
1063 1229866 : if (extra->sjinfo->ojrelid != 0 &&
1064 590716 : (bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(inner_path)) ||
1065 295358 : bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(outer_path))))
1066 12 : return;
1067 :
1068 : /*
1069 : * Check to see if proposed path is still parameterized, and reject if the
1070 : * parameterization wouldn't be sensible.
1071 : */
1072 934496 : required_outer = calc_non_nestloop_required_outer(outer_path,
1073 : inner_path);
1074 934496 : if (required_outer &&
1075 28318 : !bms_overlap(required_outer, extra->param_source_rels))
1076 : {
1077 : /* Waste no memory when we reject a path here */
1078 25834 : bms_free(required_outer);
1079 25834 : return;
1080 : }
1081 :
1082 : /*
1083 : * If the given paths are already well enough ordered, we can skip doing
1084 : * an explicit sort.
1085 : */
1086 1356820 : if (outersortkeys &&
1087 448158 : pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
1088 17350 : outersortkeys = NIL;
1089 1649286 : if (innersortkeys &&
1090 740624 : pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
1091 31868 : innersortkeys = NIL;
1092 :
1093 : /*
1094 : * See comments in try_nestloop_path().
1095 : */
1096 908662 : initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
1097 : outer_path, inner_path,
1098 : outersortkeys, innersortkeys,
1099 : extra);
1100 :
1101 908662 : if (add_path_precheck(joinrel, workspace.disabled_nodes,
1102 : workspace.startup_cost, workspace.total_cost,
1103 : pathkeys, required_outer))
1104 : {
1105 231010 : add_path(joinrel, (Path *)
1106 231010 : create_mergejoin_path(root,
1107 : joinrel,
1108 : jointype,
1109 : &workspace,
1110 : extra,
1111 : outer_path,
1112 : inner_path,
1113 : extra->restrictlist,
1114 : pathkeys,
1115 : required_outer,
1116 : mergeclauses,
1117 : outersortkeys,
1118 : innersortkeys));
1119 : }
1120 : else
1121 : {
1122 : /* Waste no memory when we reject a path here */
1123 677652 : bms_free(required_outer);
1124 : }
1125 : }
1126 :
1127 : /*
1128 : * try_partial_mergejoin_path
1129 : * Consider a partial merge join path; if it appears useful, push it into
1130 : * the joinrel's pathlist via add_partial_path().
1131 : */
1132 : static void
1133 19034 : try_partial_mergejoin_path(PlannerInfo *root,
1134 : RelOptInfo *joinrel,
1135 : Path *outer_path,
1136 : Path *inner_path,
1137 : List *pathkeys,
1138 : List *mergeclauses,
1139 : List *outersortkeys,
1140 : List *innersortkeys,
1141 : JoinType jointype,
1142 : JoinPathExtraData *extra)
1143 : {
1144 : JoinCostWorkspace workspace;
1145 :
1146 : /*
1147 : * See comments in try_partial_hashjoin_path().
1148 : */
1149 : Assert(bms_is_empty(joinrel->lateral_relids));
1150 : Assert(bms_is_empty(PATH_REQ_OUTER(outer_path)));
1151 19034 : if (!bms_is_empty(PATH_REQ_OUTER(inner_path)))
1152 9876 : return;
1153 :
1154 : /*
1155 : * If the given paths are already well enough ordered, we can skip doing
1156 : * an explicit sort.
1157 : */
1158 31754 : if (outersortkeys &&
1159 12720 : pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
1160 138 : outersortkeys = NIL;
1161 35478 : if (innersortkeys &&
1162 16444 : pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
1163 252 : innersortkeys = NIL;
1164 :
1165 : /*
1166 : * See comments in try_partial_nestloop_path().
1167 : */
1168 19034 : initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
1169 : outer_path, inner_path,
1170 : outersortkeys, innersortkeys,
1171 : extra);
1172 :
1173 19034 : if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
1174 : workspace.total_cost, pathkeys))
1175 9876 : return;
1176 :
1177 : /* Might be good enough to be worth trying, so let's try it. */
1178 9158 : add_partial_path(joinrel, (Path *)
1179 9158 : create_mergejoin_path(root,
1180 : joinrel,
1181 : jointype,
1182 : &workspace,
1183 : extra,
1184 : outer_path,
1185 : inner_path,
1186 : extra->restrictlist,
1187 : pathkeys,
1188 : NULL,
1189 : mergeclauses,
1190 : outersortkeys,
1191 : innersortkeys));
1192 : }
1193 :
1194 : /*
1195 : * try_hashjoin_path
1196 : * Consider a hash join path; if it appears useful, push it into
1197 : * the joinrel's pathlist via add_path().
1198 : */
1199 : static void
1200 578660 : try_hashjoin_path(PlannerInfo *root,
1201 : RelOptInfo *joinrel,
1202 : Path *outer_path,
1203 : Path *inner_path,
1204 : List *hashclauses,
1205 : JoinType jointype,
1206 : JoinPathExtraData *extra)
1207 : {
1208 : Relids required_outer;
1209 : JoinCostWorkspace workspace;
1210 :
1211 : /*
1212 : * If we are forming an outer join at this join, it's nonsensical to use
1213 : * an input path that uses the outer join as part of its parameterization.
1214 : * (This can happen despite our join order restrictions, since those apply
1215 : * to what is in an input relation not what its parameters are.)
1216 : */
1217 783250 : if (extra->sjinfo->ojrelid != 0 &&
1218 409150 : (bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(inner_path)) ||
1219 204560 : bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(outer_path))))
1220 84252 : return;
1221 :
1222 : /*
1223 : * Check to see if proposed path is still parameterized, and reject if the
1224 : * parameterization wouldn't be sensible.
1225 : */
1226 578600 : required_outer = calc_non_nestloop_required_outer(outer_path,
1227 : inner_path);
1228 578600 : if (required_outer &&
1229 96650 : !bms_overlap(required_outer, extra->param_source_rels))
1230 : {
1231 : /* Waste no memory when we reject a path here */
1232 84192 : bms_free(required_outer);
1233 84192 : return;
1234 : }
1235 :
1236 : /*
1237 : * See comments in try_nestloop_path(). Also note that hashjoin paths
1238 : * never have any output pathkeys, per comments in create_hashjoin_path.
1239 : */
1240 494408 : initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
1241 : outer_path, inner_path, extra, false);
1242 :
1243 494408 : if (add_path_precheck(joinrel, workspace.disabled_nodes,
1244 : workspace.startup_cost, workspace.total_cost,
1245 : NIL, required_outer))
1246 : {
1247 207420 : add_path(joinrel, (Path *)
1248 207420 : create_hashjoin_path(root,
1249 : joinrel,
1250 : jointype,
1251 : &workspace,
1252 : extra,
1253 : outer_path,
1254 : inner_path,
1255 : false, /* parallel_hash */
1256 : extra->restrictlist,
1257 : required_outer,
1258 : hashclauses));
1259 : }
1260 : else
1261 : {
1262 : /* Waste no memory when we reject a path here */
1263 286988 : bms_free(required_outer);
1264 : }
1265 : }
1266 :
1267 : /*
1268 : * try_partial_hashjoin_path
1269 : * Consider a partial hashjoin join path; if it appears useful, push it into
1270 : * the joinrel's partial_pathlist via add_partial_path().
1271 : * The outer side is partial. If parallel_hash is true, then the inner path
1272 : * must be partial and will be run in parallel to create one or more shared
1273 : * hash tables; otherwise the inner path must be complete and a copy of it
1274 : * is run in every process to create separate identical private hash tables.
1275 : */
1276 : static void
1277 21146 : try_partial_hashjoin_path(PlannerInfo *root,
1278 : RelOptInfo *joinrel,
1279 : Path *outer_path,
1280 : Path *inner_path,
1281 : List *hashclauses,
1282 : JoinType jointype,
1283 : JoinPathExtraData *extra,
1284 : bool parallel_hash)
1285 : {
1286 : JoinCostWorkspace workspace;
1287 :
1288 : /*
1289 : * If the inner path is parameterized, we can't use a partial hashjoin.
1290 : * Parameterized partial paths are not supported. The caller should
1291 : * already have verified that no lateral rels are required here.
1292 : */
1293 : Assert(bms_is_empty(joinrel->lateral_relids));
1294 : Assert(bms_is_empty(PATH_REQ_OUTER(outer_path)));
1295 21146 : if (!bms_is_empty(PATH_REQ_OUTER(inner_path)))
1296 10208 : return;
1297 :
1298 : /*
1299 : * Before creating a path, get a quick lower bound on what it is likely to
1300 : * cost. Bail out right away if it looks terrible.
1301 : */
1302 21146 : initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
1303 : outer_path, inner_path, extra, parallel_hash);
1304 21146 : if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
1305 : workspace.total_cost, NIL))
1306 10208 : return;
1307 :
1308 : /* Might be good enough to be worth trying, so let's try it. */
1309 10938 : add_partial_path(joinrel, (Path *)
1310 10938 : create_hashjoin_path(root,
1311 : joinrel,
1312 : jointype,
1313 : &workspace,
1314 : extra,
1315 : outer_path,
1316 : inner_path,
1317 : parallel_hash,
1318 : extra->restrictlist,
1319 : NULL,
1320 : hashclauses));
1321 : }
1322 :
1323 : /*
1324 : * clause_sides_match_join
1325 : * Determine whether a join clause is of the right form to use in this join.
1326 : *
1327 : * We already know that the clause is a binary opclause referencing only the
1328 : * rels in the current join. The point here is to check whether it has the
1329 : * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
1330 : * rather than mixing outer and inner vars on either side. If it matches,
1331 : * we set the transient flag outer_is_left to identify which side is which.
1332 : */
1333 : static inline bool
1334 1232236 : clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel,
1335 : RelOptInfo *innerrel)
1336 : {
1337 1836846 : if (bms_is_subset(rinfo->left_relids, outerrel->relids) &&
1338 604610 : bms_is_subset(rinfo->right_relids, innerrel->relids))
1339 : {
1340 : /* lefthand side is outer */
1341 604262 : rinfo->outer_is_left = true;
1342 604262 : return true;
1343 : }
1344 1238226 : else if (bms_is_subset(rinfo->left_relids, innerrel->relids) &&
1345 610252 : bms_is_subset(rinfo->right_relids, outerrel->relids))
1346 : {
1347 : /* righthand side is outer */
1348 565924 : rinfo->outer_is_left = false;
1349 565924 : return true;
1350 : }
1351 62050 : return false; /* no good for these input relations */
1352 : }
1353 :
1354 : /*
1355 : * sort_inner_and_outer
1356 : * Create mergejoin join paths by explicitly sorting both the outer and
1357 : * inner join relations on each available merge ordering.
1358 : *
1359 : * 'joinrel' is the join relation
1360 : * 'outerrel' is the outer join relation
1361 : * 'innerrel' is the inner join relation
1362 : * 'jointype' is the type of join to do
1363 : * 'extra' contains additional input values
1364 : */
1365 : static void
1366 514284 : sort_inner_and_outer(PlannerInfo *root,
1367 : RelOptInfo *joinrel,
1368 : RelOptInfo *outerrel,
1369 : RelOptInfo *innerrel,
1370 : JoinType jointype,
1371 : JoinPathExtraData *extra)
1372 : {
1373 514284 : JoinType save_jointype = jointype;
1374 : Path *outer_path;
1375 : Path *inner_path;
1376 514284 : Path *cheapest_partial_outer = NULL;
1377 514284 : Path *cheapest_safe_inner = NULL;
1378 : List *all_pathkeys;
1379 : ListCell *l;
1380 :
1381 : /* Nothing to do if there are no available mergejoin clauses */
1382 514284 : if (extra->mergeclause_list == NIL)
1383 103750 : return;
1384 :
1385 : /*
1386 : * We only consider the cheapest-total-cost input paths, since we are
1387 : * assuming here that a sort is required. We will consider
1388 : * cheapest-startup-cost input paths later, and only if they don't need a
1389 : * sort.
1390 : *
1391 : * This function intentionally does not consider parameterized input
1392 : * paths, except when the cheapest-total is parameterized. If we did so,
1393 : * we'd have a combinatorial explosion of mergejoin paths of dubious
1394 : * value. This interacts with decisions elsewhere that also discriminate
1395 : * against mergejoins with parameterized inputs; see comments in
1396 : * src/backend/optimizer/README.
1397 : */
1398 410534 : outer_path = outerrel->cheapest_total_path;
1399 410534 : inner_path = innerrel->cheapest_total_path;
1400 :
1401 : /*
1402 : * If either cheapest-total path is parameterized by the other rel, we
1403 : * can't use a mergejoin. (There's no use looking for alternative input
1404 : * paths, since these should already be the least-parameterized available
1405 : * paths.)
1406 : */
1407 410534 : if (PATH_PARAM_BY_REL(outer_path, innerrel) ||
1408 409846 : PATH_PARAM_BY_REL(inner_path, outerrel))
1409 1388 : return;
1410 :
1411 : /*
1412 : * If unique-ification is requested, do it and then handle as a plain
1413 : * inner join.
1414 : */
1415 409146 : if (jointype == JOIN_UNIQUE_OUTER)
1416 : {
1417 2662 : outer_path = (Path *) create_unique_path(root, outerrel,
1418 : outer_path, extra->sjinfo);
1419 : Assert(outer_path);
1420 2662 : jointype = JOIN_INNER;
1421 : }
1422 406484 : else if (jointype == JOIN_UNIQUE_INNER)
1423 : {
1424 2662 : inner_path = (Path *) create_unique_path(root, innerrel,
1425 : inner_path, extra->sjinfo);
1426 : Assert(inner_path);
1427 2662 : jointype = JOIN_INNER;
1428 : }
1429 :
1430 : /*
1431 : * If the joinrel is parallel-safe, we may be able to consider a partial
1432 : * merge join. However, we can't handle JOIN_UNIQUE_OUTER, because the
1433 : * outer path will be partial, and therefore we won't be able to properly
1434 : * guarantee uniqueness. Similarly, we can't handle JOIN_FULL, JOIN_RIGHT
1435 : * and JOIN_RIGHT_ANTI, because they can produce false null extended rows.
1436 : * Also, the resulting path must not be parameterized.
1437 : */
1438 409146 : if (joinrel->consider_parallel &&
1439 361864 : save_jointype != JOIN_UNIQUE_OUTER &&
1440 359520 : save_jointype != JOIN_FULL &&
1441 294962 : save_jointype != JOIN_RIGHT &&
1442 293664 : save_jointype != JOIN_RIGHT_ANTI &&
1443 293664 : outerrel->partial_pathlist != NIL &&
1444 9176 : bms_is_empty(joinrel->lateral_relids))
1445 : {
1446 9176 : cheapest_partial_outer = (Path *) linitial(outerrel->partial_pathlist);
1447 :
1448 9176 : if (inner_path->parallel_safe)
1449 9080 : cheapest_safe_inner = inner_path;
1450 96 : else if (save_jointype != JOIN_UNIQUE_INNER)
1451 : cheapest_safe_inner =
1452 90 : get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
1453 : }
1454 :
1455 : /*
1456 : * Each possible ordering of the available mergejoin clauses will generate
1457 : * a differently-sorted result path at essentially the same cost. We have
1458 : * no basis for choosing one over another at this level of joining, but
1459 : * some sort orders may be more useful than others for higher-level
1460 : * mergejoins, so it's worth considering multiple orderings.
1461 : *
1462 : * Actually, it's not quite true that every mergeclause ordering will
1463 : * generate a different path order, because some of the clauses may be
1464 : * partially redundant (refer to the same EquivalenceClasses). Therefore,
1465 : * what we do is convert the mergeclause list to a list of canonical
1466 : * pathkeys, and then consider different orderings of the pathkeys.
1467 : *
1468 : * Generating a path for *every* permutation of the pathkeys doesn't seem
1469 : * like a winning strategy; the cost in planning time is too high. For
1470 : * now, we generate one path for each pathkey, listing that pathkey first
1471 : * and the rest in random order. This should allow at least a one-clause
1472 : * mergejoin without re-sorting against any other possible mergejoin
1473 : * partner path. But if we've not guessed the right ordering of secondary
1474 : * keys, we may end up evaluating clauses as qpquals when they could have
1475 : * been done as mergeclauses. (In practice, it's rare that there's more
1476 : * than two or three mergeclauses, so expending a huge amount of thought
1477 : * on that is probably not worth it.)
1478 : *
1479 : * The pathkey order returned by select_outer_pathkeys_for_merge() has
1480 : * some heuristics behind it (see that function), so be sure to try it
1481 : * exactly as-is as well as making variants.
1482 : */
1483 409146 : all_pathkeys = select_outer_pathkeys_for_merge(root,
1484 : extra->mergeclause_list,
1485 : joinrel);
1486 :
1487 857304 : foreach(l, all_pathkeys)
1488 : {
1489 448158 : PathKey *front_pathkey = (PathKey *) lfirst(l);
1490 : List *cur_mergeclauses;
1491 : List *outerkeys;
1492 : List *innerkeys;
1493 : List *merge_pathkeys;
1494 :
1495 : /* Make a pathkey list with this guy first */
1496 448158 : if (l != list_head(all_pathkeys))
1497 39012 : outerkeys = lcons(front_pathkey,
1498 : list_delete_nth_cell(list_copy(all_pathkeys),
1499 : foreach_current_index(l)));
1500 : else
1501 409146 : outerkeys = all_pathkeys; /* no work at first one... */
1502 :
1503 : /* Sort the mergeclauses into the corresponding ordering */
1504 : cur_mergeclauses =
1505 448158 : find_mergeclauses_for_outer_pathkeys(root,
1506 : outerkeys,
1507 : extra->mergeclause_list);
1508 :
1509 : /* Should have used them all... */
1510 : Assert(list_length(cur_mergeclauses) == list_length(extra->mergeclause_list));
1511 :
1512 : /* Build sort pathkeys for the inner side */
1513 448158 : innerkeys = make_inner_pathkeys_for_merge(root,
1514 : cur_mergeclauses,
1515 : outerkeys);
1516 :
1517 : /* Build pathkeys representing output sort order */
1518 448158 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1519 : outerkeys);
1520 :
1521 : /*
1522 : * And now we can make the path.
1523 : *
1524 : * Note: it's possible that the cheapest paths will already be sorted
1525 : * properly. try_mergejoin_path will detect that case and suppress an
1526 : * explicit sort step, so we needn't do so here.
1527 : */
1528 448158 : try_mergejoin_path(root,
1529 : joinrel,
1530 : outer_path,
1531 : inner_path,
1532 : merge_pathkeys,
1533 : cur_mergeclauses,
1534 : outerkeys,
1535 : innerkeys,
1536 : jointype,
1537 : extra,
1538 : false);
1539 :
1540 : /*
1541 : * If we have partial outer and parallel safe inner path then try
1542 : * partial mergejoin path.
1543 : */
1544 448158 : if (cheapest_partial_outer && cheapest_safe_inner)
1545 12720 : try_partial_mergejoin_path(root,
1546 : joinrel,
1547 : cheapest_partial_outer,
1548 : cheapest_safe_inner,
1549 : merge_pathkeys,
1550 : cur_mergeclauses,
1551 : outerkeys,
1552 : innerkeys,
1553 : jointype,
1554 : extra);
1555 : }
1556 : }
1557 :
1558 : /*
1559 : * generate_mergejoin_paths
1560 : * Creates possible mergejoin paths for input outerpath.
1561 : *
1562 : * We generate mergejoins if mergejoin clauses are available. We have
1563 : * two ways to generate the inner path for a mergejoin: sort the cheapest
1564 : * inner path, or use an inner path that is already suitably ordered for the
1565 : * merge. If we have several mergeclauses, it could be that there is no inner
1566 : * path (or only a very expensive one) for the full list of mergeclauses, but
1567 : * better paths exist if we truncate the mergeclause list (thereby discarding
1568 : * some sort key requirements). So, we consider truncations of the
1569 : * mergeclause list as well as the full list. (Ideally we'd consider all
1570 : * subsets of the mergeclause list, but that seems way too expensive.)
1571 : */
1572 : static void
1573 966584 : generate_mergejoin_paths(PlannerInfo *root,
1574 : RelOptInfo *joinrel,
1575 : RelOptInfo *innerrel,
1576 : Path *outerpath,
1577 : JoinType jointype,
1578 : JoinPathExtraData *extra,
1579 : bool useallclauses,
1580 : Path *inner_cheapest_total,
1581 : List *merge_pathkeys,
1582 : bool is_partial)
1583 : {
1584 : List *mergeclauses;
1585 : List *innersortkeys;
1586 : List *trialsortkeys;
1587 : Path *cheapest_startup_inner;
1588 : Path *cheapest_total_inner;
1589 966584 : JoinType save_jointype = jointype;
1590 : int num_sortkeys;
1591 : int sortkeycnt;
1592 :
1593 966584 : if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
1594 8748 : jointype = JOIN_INNER;
1595 :
1596 : /* Look for useful mergeclauses (if any) */
1597 : mergeclauses =
1598 966584 : find_mergeclauses_for_outer_pathkeys(root,
1599 : outerpath->pathkeys,
1600 : extra->mergeclause_list);
1601 :
1602 : /*
1603 : * Done with this outer path if no chance for a mergejoin.
1604 : *
1605 : * Special corner case: for "x FULL JOIN y ON true", there will be no join
1606 : * clauses at all. Ordinarily we'd generate a clauseless nestloop path,
1607 : * but since mergejoin is our only join type that supports FULL JOIN
1608 : * without any join clauses, it's necessary to generate a clauseless
1609 : * mergejoin path instead.
1610 : */
1611 966584 : if (mergeclauses == NIL)
1612 : {
1613 649078 : if (jointype == JOIN_FULL)
1614 : /* okay to try for mergejoin */ ;
1615 : else
1616 645718 : return;
1617 : }
1618 391604 : if (useallclauses &&
1619 70738 : list_length(mergeclauses) != list_length(extra->mergeclause_list))
1620 9484 : return;
1621 :
1622 : /* Compute the required ordering of the inner path */
1623 311382 : innersortkeys = make_inner_pathkeys_for_merge(root,
1624 : mergeclauses,
1625 : outerpath->pathkeys);
1626 :
1627 : /*
1628 : * Generate a mergejoin on the basis of sorting the cheapest inner. Since
1629 : * a sort will be needed, only cheapest total cost matters. (But
1630 : * try_mergejoin_path will do the right thing if inner_cheapest_total is
1631 : * already correctly sorted.)
1632 : */
1633 311382 : try_mergejoin_path(root,
1634 : joinrel,
1635 : outerpath,
1636 : inner_cheapest_total,
1637 : merge_pathkeys,
1638 : mergeclauses,
1639 : NIL,
1640 : innersortkeys,
1641 : jointype,
1642 : extra,
1643 : is_partial);
1644 :
1645 : /* Can't do anything else if inner path needs to be unique'd */
1646 311382 : if (save_jointype == JOIN_UNIQUE_INNER)
1647 1798 : return;
1648 :
1649 : /*
1650 : * Look for presorted inner paths that satisfy the innersortkey list ---
1651 : * or any truncation thereof, if we are allowed to build a mergejoin using
1652 : * a subset of the merge clauses. Here, we consider both cheap startup
1653 : * cost and cheap total cost.
1654 : *
1655 : * Currently we do not consider parameterized inner paths here. This
1656 : * interacts with decisions elsewhere that also discriminate against
1657 : * mergejoins with parameterized inputs; see comments in
1658 : * src/backend/optimizer/README.
1659 : *
1660 : * As we shorten the sortkey list, we should consider only paths that are
1661 : * strictly cheaper than (in particular, not the same as) any path found
1662 : * in an earlier iteration. Otherwise we'd be intentionally using fewer
1663 : * merge keys than a given path allows (treating the rest as plain
1664 : * joinquals), which is unlikely to be a good idea. Also, eliminating
1665 : * paths here on the basis of compare_path_costs is a lot cheaper than
1666 : * building the mergejoin path only to throw it away.
1667 : *
1668 : * If inner_cheapest_total is well enough sorted to have not required a
1669 : * sort in the path made above, we shouldn't make a duplicate path with
1670 : * it, either. We handle that case with the same logic that handles the
1671 : * previous consideration, by initializing the variables that track
1672 : * cheapest-so-far properly. Note that we do NOT reject
1673 : * inner_cheapest_total if we find it matches some shorter set of
1674 : * pathkeys. That case corresponds to using fewer mergekeys to avoid
1675 : * sorting inner_cheapest_total, whereas we did sort it above, so the
1676 : * plans being considered are different.
1677 : */
1678 309584 : if (pathkeys_contained_in(innersortkeys,
1679 : inner_cheapest_total->pathkeys))
1680 : {
1681 : /* inner_cheapest_total didn't require a sort */
1682 15512 : cheapest_startup_inner = inner_cheapest_total;
1683 15512 : cheapest_total_inner = inner_cheapest_total;
1684 : }
1685 : else
1686 : {
1687 : /* it did require a sort, at least for the full set of keys */
1688 294072 : cheapest_startup_inner = NULL;
1689 294072 : cheapest_total_inner = NULL;
1690 : }
1691 309584 : num_sortkeys = list_length(innersortkeys);
1692 309584 : if (num_sortkeys > 1 && !useallclauses)
1693 8952 : trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
1694 : else
1695 300632 : trialsortkeys = innersortkeys; /* won't really truncate */
1696 :
1697 567002 : for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
1698 : {
1699 : Path *innerpath;
1700 318528 : List *newclauses = NIL;
1701 :
1702 : /*
1703 : * Look for an inner path ordered well enough for the first
1704 : * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
1705 : * destructively, which is why we made a copy...
1706 : */
1707 318528 : trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
1708 318528 : innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1709 : trialsortkeys,
1710 : NULL,
1711 : TOTAL_COST,
1712 : is_partial);
1713 318528 : if (innerpath != NULL &&
1714 20506 : (cheapest_total_inner == NULL ||
1715 20506 : compare_path_costs(innerpath, cheapest_total_inner,
1716 : TOTAL_COST) < 0))
1717 : {
1718 : /* Found a cheap (or even-cheaper) sorted path */
1719 : /* Select the right mergeclauses, if we didn't already */
1720 180180 : if (sortkeycnt < num_sortkeys)
1721 : {
1722 : newclauses =
1723 2612 : trim_mergeclauses_for_inner_pathkeys(root,
1724 : mergeclauses,
1725 : trialsortkeys);
1726 : Assert(newclauses != NIL);
1727 : }
1728 : else
1729 177568 : newclauses = mergeclauses;
1730 180180 : try_mergejoin_path(root,
1731 : joinrel,
1732 : outerpath,
1733 : innerpath,
1734 : merge_pathkeys,
1735 : newclauses,
1736 : NIL,
1737 : NIL,
1738 : jointype,
1739 : extra,
1740 : is_partial);
1741 180180 : cheapest_total_inner = innerpath;
1742 : }
1743 : /* Same on the basis of cheapest startup cost ... */
1744 318528 : innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1745 : trialsortkeys,
1746 : NULL,
1747 : STARTUP_COST,
1748 : is_partial);
1749 318528 : if (innerpath != NULL &&
1750 20506 : (cheapest_startup_inner == NULL ||
1751 20506 : compare_path_costs(innerpath, cheapest_startup_inner,
1752 : STARTUP_COST) < 0))
1753 : {
1754 : /* Found a cheap (or even-cheaper) sorted path */
1755 178818 : if (innerpath != cheapest_total_inner)
1756 : {
1757 : /*
1758 : * Avoid rebuilding clause list if we already made one; saves
1759 : * memory in big join trees...
1760 : */
1761 1102 : if (newclauses == NIL)
1762 : {
1763 18 : if (sortkeycnt < num_sortkeys)
1764 : {
1765 : newclauses =
1766 0 : trim_mergeclauses_for_inner_pathkeys(root,
1767 : mergeclauses,
1768 : trialsortkeys);
1769 : Assert(newclauses != NIL);
1770 : }
1771 : else
1772 18 : newclauses = mergeclauses;
1773 : }
1774 1102 : try_mergejoin_path(root,
1775 : joinrel,
1776 : outerpath,
1777 : innerpath,
1778 : merge_pathkeys,
1779 : newclauses,
1780 : NIL,
1781 : NIL,
1782 : jointype,
1783 : extra,
1784 : is_partial);
1785 : }
1786 178818 : cheapest_startup_inner = innerpath;
1787 : }
1788 :
1789 : /*
1790 : * Don't consider truncated sortkeys if we need all clauses.
1791 : */
1792 318528 : if (useallclauses)
1793 61110 : break;
1794 : }
1795 : }
1796 :
1797 : /*
1798 : * match_unsorted_outer
1799 : * Creates possible join paths for processing a single join relation
1800 : * 'joinrel' by employing either iterative substitution or
1801 : * mergejoining on each of its possible outer paths (considering
1802 : * only outer paths that are already ordered well enough for merging).
1803 : *
1804 : * We always generate a nestloop path for each available outer path.
1805 : * In fact we may generate as many as five: one on the cheapest-total-cost
1806 : * inner path, one on the same with materialization, one on the
1807 : * cheapest-startup-cost inner path (if different), one on the
1808 : * cheapest-total inner-indexscan path (if any), and one on the
1809 : * cheapest-startup inner-indexscan path (if different).
1810 : *
1811 : * We also consider mergejoins if mergejoin clauses are available. See
1812 : * detailed comments in generate_mergejoin_paths.
1813 : *
1814 : * 'joinrel' is the join relation
1815 : * 'outerrel' is the outer join relation
1816 : * 'innerrel' is the inner join relation
1817 : * 'jointype' is the type of join to do
1818 : * 'extra' contains additional input values
1819 : */
1820 : static void
1821 514284 : match_unsorted_outer(PlannerInfo *root,
1822 : RelOptInfo *joinrel,
1823 : RelOptInfo *outerrel,
1824 : RelOptInfo *innerrel,
1825 : JoinType jointype,
1826 : JoinPathExtraData *extra)
1827 : {
1828 514284 : JoinType save_jointype = jointype;
1829 : bool nestjoinOK;
1830 : bool useallclauses;
1831 514284 : Path *inner_cheapest_total = innerrel->cheapest_total_path;
1832 514284 : Path *matpath = NULL;
1833 : ListCell *lc1;
1834 :
1835 : /*
1836 : * For now we do not support RIGHT_SEMI join in mergejoin or nestloop
1837 : * join.
1838 : */
1839 514284 : if (jointype == JOIN_RIGHT_SEMI)
1840 0 : return;
1841 :
1842 : /*
1843 : * Nestloop only supports inner, left, semi, and anti joins. Also, if we
1844 : * are doing a right, right-anti or full mergejoin, we must use *all* the
1845 : * mergeclauses as join clauses, else we will not have a valid plan.
1846 : * (Although these two flags are currently inverses, keep them separate
1847 : * for clarity and possible future changes.)
1848 : */
1849 514284 : switch (jointype)
1850 : {
1851 428686 : case JOIN_INNER:
1852 : case JOIN_LEFT:
1853 : case JOIN_SEMI:
1854 : case JOIN_ANTI:
1855 428686 : nestjoinOK = true;
1856 428686 : useallclauses = false;
1857 428686 : break;
1858 77082 : case JOIN_RIGHT:
1859 : case JOIN_RIGHT_ANTI:
1860 : case JOIN_FULL:
1861 77082 : nestjoinOK = false;
1862 77082 : useallclauses = true;
1863 77082 : break;
1864 8516 : case JOIN_UNIQUE_OUTER:
1865 : case JOIN_UNIQUE_INNER:
1866 8516 : jointype = JOIN_INNER;
1867 8516 : nestjoinOK = true;
1868 8516 : useallclauses = false;
1869 8516 : break;
1870 0 : default:
1871 0 : elog(ERROR, "unrecognized join type: %d",
1872 : (int) jointype);
1873 : nestjoinOK = false; /* keep compiler quiet */
1874 : useallclauses = false;
1875 : break;
1876 : }
1877 :
1878 : /*
1879 : * If inner_cheapest_total is parameterized by the outer rel, ignore it;
1880 : * we will consider it below as a member of cheapest_parameterized_paths,
1881 : * but the other possibilities considered in this routine aren't usable.
1882 : */
1883 514284 : if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel))
1884 11402 : inner_cheapest_total = NULL;
1885 :
1886 : /*
1887 : * If we need to unique-ify the inner path, we will consider only the
1888 : * cheapest-total inner.
1889 : */
1890 514284 : if (save_jointype == JOIN_UNIQUE_INNER)
1891 : {
1892 : /* No way to do this with an inner path parameterized by outer rel */
1893 4258 : if (inner_cheapest_total == NULL)
1894 12 : return;
1895 : inner_cheapest_total = (Path *)
1896 4246 : create_unique_path(root, innerrel, inner_cheapest_total, extra->sjinfo);
1897 : Assert(inner_cheapest_total);
1898 : }
1899 510026 : else if (nestjoinOK)
1900 : {
1901 : /*
1902 : * Consider materializing the cheapest inner path, unless
1903 : * enable_material is off or the path in question materializes its
1904 : * output anyway.
1905 : */
1906 432944 : if (enable_material && inner_cheapest_total != NULL &&
1907 421120 : !ExecMaterializesOutput(inner_cheapest_total->pathtype))
1908 : matpath = (Path *)
1909 401448 : create_material_path(innerrel, inner_cheapest_total);
1910 : }
1911 :
1912 1681540 : foreach(lc1, outerrel->pathlist)
1913 : {
1914 1167268 : Path *outerpath = (Path *) lfirst(lc1);
1915 : List *merge_pathkeys;
1916 :
1917 : /*
1918 : * We cannot use an outer path that is parameterized by the inner rel.
1919 : */
1920 1167268 : if (PATH_PARAM_BY_REL(outerpath, innerrel))
1921 197420 : continue;
1922 :
1923 : /*
1924 : * If we need to unique-ify the outer path, it's pointless to consider
1925 : * any but the cheapest outer. (XXX we don't consider parameterized
1926 : * outers, nor inners, for unique-ified cases. Should we?)
1927 : */
1928 969848 : if (save_jointype == JOIN_UNIQUE_OUTER)
1929 : {
1930 4932 : if (outerpath != outerrel->cheapest_total_path)
1931 686 : continue;
1932 4246 : outerpath = (Path *) create_unique_path(root, outerrel,
1933 : outerpath, extra->sjinfo);
1934 : Assert(outerpath);
1935 : }
1936 :
1937 : /*
1938 : * The result will have this sort order (even if it is implemented as
1939 : * a nestloop, and even if some of the mergeclauses are implemented by
1940 : * qpquals rather than as true mergeclauses):
1941 : */
1942 969162 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1943 : outerpath->pathkeys);
1944 :
1945 969162 : if (save_jointype == JOIN_UNIQUE_INNER)
1946 : {
1947 : /*
1948 : * Consider nestloop join, but only with the unique-ified cheapest
1949 : * inner path
1950 : */
1951 7902 : try_nestloop_path(root,
1952 : joinrel,
1953 : outerpath,
1954 : inner_cheapest_total,
1955 : merge_pathkeys,
1956 : jointype,
1957 : extra);
1958 : }
1959 961260 : else if (nestjoinOK)
1960 : {
1961 : /*
1962 : * Consider nestloop joins using this outer path and various
1963 : * available paths for the inner relation. We consider the
1964 : * cheapest-total paths for each available parameterization of the
1965 : * inner relation, including the unparameterized case.
1966 : */
1967 : ListCell *lc2;
1968 :
1969 2147316 : foreach(lc2, innerrel->cheapest_parameterized_paths)
1970 : {
1971 1330402 : Path *innerpath = (Path *) lfirst(lc2);
1972 : Path *mpath;
1973 :
1974 1330402 : try_nestloop_path(root,
1975 : joinrel,
1976 : outerpath,
1977 : innerpath,
1978 : merge_pathkeys,
1979 : jointype,
1980 : extra);
1981 :
1982 : /*
1983 : * Try generating a memoize path and see if that makes the
1984 : * nested loop any cheaper.
1985 : */
1986 1330402 : mpath = get_memoize_path(root, innerrel, outerrel,
1987 : innerpath, outerpath, jointype,
1988 : extra);
1989 1330402 : if (mpath != NULL)
1990 230696 : try_nestloop_path(root,
1991 : joinrel,
1992 : outerpath,
1993 : mpath,
1994 : merge_pathkeys,
1995 : jointype,
1996 : extra);
1997 : }
1998 :
1999 : /* Also consider materialized form of the cheapest inner path */
2000 816914 : if (matpath != NULL)
2001 766072 : try_nestloop_path(root,
2002 : joinrel,
2003 : outerpath,
2004 : matpath,
2005 : merge_pathkeys,
2006 : jointype,
2007 : extra);
2008 : }
2009 :
2010 : /* Can't do anything else if outer path needs to be unique'd */
2011 969162 : if (save_jointype == JOIN_UNIQUE_OUTER)
2012 4246 : continue;
2013 :
2014 : /* Can't do anything else if inner rel is parameterized by outer */
2015 964916 : if (inner_cheapest_total == NULL)
2016 12266 : continue;
2017 :
2018 : /* Generate merge join paths */
2019 952650 : generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
2020 : save_jointype, extra, useallclauses,
2021 : inner_cheapest_total, merge_pathkeys,
2022 : false);
2023 : }
2024 :
2025 : /*
2026 : * Consider partial nestloop and mergejoin plan if outerrel has any
2027 : * partial path and the joinrel is parallel-safe. However, we can't
2028 : * handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and
2029 : * therefore we won't be able to properly guarantee uniqueness. Nor can
2030 : * we handle joins needing lateral rels, since partial paths must not be
2031 : * parameterized. Similarly, we can't handle JOIN_FULL, JOIN_RIGHT and
2032 : * JOIN_RIGHT_ANTI, because they can produce false null extended rows.
2033 : */
2034 514272 : if (joinrel->consider_parallel &&
2035 428834 : save_jointype != JOIN_UNIQUE_OUTER &&
2036 426370 : save_jointype != JOIN_FULL &&
2037 360344 : save_jointype != JOIN_RIGHT &&
2038 359046 : save_jointype != JOIN_RIGHT_ANTI &&
2039 359046 : outerrel->partial_pathlist != NIL &&
2040 10796 : bms_is_empty(joinrel->lateral_relids))
2041 : {
2042 10796 : if (nestjoinOK)
2043 10796 : consider_parallel_nestloop(root, joinrel, outerrel, innerrel,
2044 : save_jointype, extra);
2045 :
2046 : /*
2047 : * If inner_cheapest_total is NULL or non parallel-safe then find the
2048 : * cheapest total parallel safe path. If doing JOIN_UNIQUE_INNER, we
2049 : * can't use any alternative inner path.
2050 : */
2051 10796 : if (inner_cheapest_total == NULL ||
2052 10388 : !inner_cheapest_total->parallel_safe)
2053 : {
2054 780 : if (save_jointype == JOIN_UNIQUE_INNER)
2055 6 : return;
2056 :
2057 774 : inner_cheapest_total = get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
2058 : }
2059 :
2060 10790 : if (inner_cheapest_total)
2061 10376 : consider_parallel_mergejoin(root, joinrel, outerrel, innerrel,
2062 : save_jointype, extra,
2063 : inner_cheapest_total);
2064 : }
2065 : }
2066 :
2067 : /*
2068 : * consider_parallel_mergejoin
2069 : * Try to build partial paths for a joinrel by joining a partial path
2070 : * for the outer relation to a complete path for the inner relation.
2071 : *
2072 : * 'joinrel' is the join relation
2073 : * 'outerrel' is the outer join relation
2074 : * 'innerrel' is the inner join relation
2075 : * 'jointype' is the type of join to do
2076 : * 'extra' contains additional input values
2077 : * 'inner_cheapest_total' cheapest total path for innerrel
2078 : */
2079 : static void
2080 10376 : consider_parallel_mergejoin(PlannerInfo *root,
2081 : RelOptInfo *joinrel,
2082 : RelOptInfo *outerrel,
2083 : RelOptInfo *innerrel,
2084 : JoinType jointype,
2085 : JoinPathExtraData *extra,
2086 : Path *inner_cheapest_total)
2087 : {
2088 : ListCell *lc1;
2089 :
2090 : /* generate merge join path for each partial outer path */
2091 24310 : foreach(lc1, outerrel->partial_pathlist)
2092 : {
2093 13934 : Path *outerpath = (Path *) lfirst(lc1);
2094 : List *merge_pathkeys;
2095 :
2096 : /*
2097 : * Figure out what useful ordering any paths we create will have.
2098 : */
2099 13934 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
2100 : outerpath->pathkeys);
2101 :
2102 13934 : generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype,
2103 : extra, false, inner_cheapest_total,
2104 : merge_pathkeys, true);
2105 : }
2106 10376 : }
2107 :
2108 : /*
2109 : * consider_parallel_nestloop
2110 : * Try to build partial paths for a joinrel by joining a partial path for the
2111 : * outer relation to a complete path for the inner relation.
2112 : *
2113 : * 'joinrel' is the join relation
2114 : * 'outerrel' is the outer join relation
2115 : * 'innerrel' is the inner join relation
2116 : * 'jointype' is the type of join to do
2117 : * 'extra' contains additional input values
2118 : */
2119 : static void
2120 10796 : consider_parallel_nestloop(PlannerInfo *root,
2121 : RelOptInfo *joinrel,
2122 : RelOptInfo *outerrel,
2123 : RelOptInfo *innerrel,
2124 : JoinType jointype,
2125 : JoinPathExtraData *extra)
2126 : {
2127 10796 : JoinType save_jointype = jointype;
2128 10796 : Path *inner_cheapest_total = innerrel->cheapest_total_path;
2129 10796 : Path *matpath = NULL;
2130 : ListCell *lc1;
2131 :
2132 10796 : if (jointype == JOIN_UNIQUE_INNER)
2133 564 : jointype = JOIN_INNER;
2134 :
2135 : /*
2136 : * Consider materializing the cheapest inner path, unless: 1) we're doing
2137 : * JOIN_UNIQUE_INNER, because in this case we have to unique-ify the
2138 : * cheapest inner path, 2) enable_material is off, 3) the cheapest inner
2139 : * path is not parallel-safe, 4) the cheapest inner path is parameterized
2140 : * by the outer rel, or 5) the cheapest inner path materializes its output
2141 : * anyway.
2142 : */
2143 10796 : if (save_jointype != JOIN_UNIQUE_INNER &&
2144 10028 : enable_material && inner_cheapest_total->parallel_safe &&
2145 9836 : !PATH_PARAM_BY_REL(inner_cheapest_total, outerrel) &&
2146 9446 : !ExecMaterializesOutput(inner_cheapest_total->pathtype))
2147 : {
2148 : matpath = (Path *)
2149 9350 : create_material_path(innerrel, inner_cheapest_total);
2150 : Assert(matpath->parallel_safe);
2151 : }
2152 :
2153 25288 : foreach(lc1, outerrel->partial_pathlist)
2154 : {
2155 14492 : Path *outerpath = (Path *) lfirst(lc1);
2156 : List *pathkeys;
2157 : ListCell *lc2;
2158 :
2159 : /* Figure out what useful ordering any paths we create will have. */
2160 14492 : pathkeys = build_join_pathkeys(root, joinrel, jointype,
2161 : outerpath->pathkeys);
2162 :
2163 : /*
2164 : * Try the cheapest parameterized paths; only those which will produce
2165 : * an unparameterized path when joined to this outerrel will survive
2166 : * try_partial_nestloop_path. The cheapest unparameterized path is
2167 : * also in this list.
2168 : */
2169 37622 : foreach(lc2, innerrel->cheapest_parameterized_paths)
2170 : {
2171 23130 : Path *innerpath = (Path *) lfirst(lc2);
2172 : Path *mpath;
2173 :
2174 : /* Can't join to an inner path that is not parallel-safe */
2175 23130 : if (!innerpath->parallel_safe)
2176 426 : continue;
2177 :
2178 : /*
2179 : * If we're doing JOIN_UNIQUE_INNER, we can only use the inner's
2180 : * cheapest_total_path, and we have to unique-ify it. (We might
2181 : * be able to relax this to allow other safe, unparameterized
2182 : * inner paths, but right now create_unique_path is not on board
2183 : * with that.)
2184 : */
2185 22704 : if (save_jointype == JOIN_UNIQUE_INNER)
2186 : {
2187 1722 : if (innerpath != innerrel->cheapest_total_path)
2188 876 : continue;
2189 846 : innerpath = (Path *) create_unique_path(root, innerrel,
2190 : innerpath,
2191 : extra->sjinfo);
2192 : Assert(innerpath);
2193 : }
2194 :
2195 21828 : try_partial_nestloop_path(root, joinrel, outerpath, innerpath,
2196 : pathkeys, jointype, extra);
2197 :
2198 : /*
2199 : * Try generating a memoize path and see if that makes the nested
2200 : * loop any cheaper.
2201 : */
2202 21828 : mpath = get_memoize_path(root, innerrel, outerrel,
2203 : innerpath, outerpath, jointype,
2204 : extra);
2205 21828 : if (mpath != NULL)
2206 5408 : try_partial_nestloop_path(root, joinrel, outerpath, mpath,
2207 : pathkeys, jointype, extra);
2208 : }
2209 :
2210 : /* Also consider materialized form of the cheapest inner path */
2211 14492 : if (matpath != NULL)
2212 12596 : try_partial_nestloop_path(root, joinrel, outerpath, matpath,
2213 : pathkeys, jointype, extra);
2214 : }
2215 10796 : }
2216 :
2217 : /*
2218 : * hash_inner_and_outer
2219 : * Create hashjoin join paths by explicitly hashing both the outer and
2220 : * inner keys of each available hash clause.
2221 : *
2222 : * 'joinrel' is the join relation
2223 : * 'outerrel' is the outer join relation
2224 : * 'innerrel' is the inner join relation
2225 : * 'jointype' is the type of join to do
2226 : * 'extra' contains additional input values
2227 : */
2228 : static void
2229 522548 : hash_inner_and_outer(PlannerInfo *root,
2230 : RelOptInfo *joinrel,
2231 : RelOptInfo *outerrel,
2232 : RelOptInfo *innerrel,
2233 : JoinType jointype,
2234 : JoinPathExtraData *extra)
2235 : {
2236 522548 : JoinType save_jointype = jointype;
2237 522548 : bool isouterjoin = IS_OUTER_JOIN(jointype);
2238 : List *hashclauses;
2239 : ListCell *l;
2240 :
2241 : /*
2242 : * We need to build only one hashclauses list for any given pair of outer
2243 : * and inner relations; all of the hashable clauses will be used as keys.
2244 : *
2245 : * Scan the join's restrictinfo list to find hashjoinable clauses that are
2246 : * usable with this pair of sub-relations.
2247 : */
2248 522548 : hashclauses = NIL;
2249 1064268 : foreach(l, extra->restrictlist)
2250 : {
2251 541720 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
2252 :
2253 : /*
2254 : * If processing an outer join, only use its own join clauses for
2255 : * hashing. For inner joins we need not be so picky.
2256 : */
2257 541720 : if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
2258 9092 : continue;
2259 :
2260 532628 : if (!restrictinfo->can_join ||
2261 474276 : restrictinfo->hashjoinoperator == InvalidOid)
2262 74952 : continue; /* not hashjoinable */
2263 :
2264 : /*
2265 : * Check if clause has the form "outer op inner" or "inner op outer".
2266 : */
2267 457676 : if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
2268 144 : continue; /* no good for these input relations */
2269 :
2270 : /*
2271 : * If clause has the form "inner op outer", check if its operator has
2272 : * valid commutator. This is necessary because hashclauses in this
2273 : * form will get commuted in createplan.c to put the outer var on the
2274 : * left (see get_switched_clauses). This probably shouldn't ever
2275 : * fail, since hashable operators ought to have commutators, but be
2276 : * paranoid.
2277 : *
2278 : * The clause being hashjoinable indicates that it's an OpExpr.
2279 : */
2280 686298 : if (!restrictinfo->outer_is_left &&
2281 228766 : !OidIsValid(get_commutator(castNode(OpExpr, restrictinfo->clause)->opno)))
2282 6 : continue;
2283 :
2284 457526 : hashclauses = lappend(hashclauses, restrictinfo);
2285 : }
2286 :
2287 : /* If we found any usable hashclauses, make paths */
2288 522548 : if (hashclauses)
2289 : {
2290 : /*
2291 : * We consider both the cheapest-total-cost and cheapest-startup-cost
2292 : * outer paths. There's no need to consider any but the
2293 : * cheapest-total-cost inner path, however.
2294 : */
2295 417738 : Path *cheapest_startup_outer = outerrel->cheapest_startup_path;
2296 417738 : Path *cheapest_total_outer = outerrel->cheapest_total_path;
2297 417738 : Path *cheapest_total_inner = innerrel->cheapest_total_path;
2298 :
2299 : /*
2300 : * If either cheapest-total path is parameterized by the other rel, we
2301 : * can't use a hashjoin. (There's no use looking for alternative
2302 : * input paths, since these should already be the least-parameterized
2303 : * available paths.)
2304 : */
2305 417738 : if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) ||
2306 417062 : PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
2307 1352 : return;
2308 :
2309 : /* Unique-ify if need be; we ignore parameterized possibilities */
2310 416386 : if (jointype == JOIN_UNIQUE_OUTER)
2311 : {
2312 : cheapest_total_outer = (Path *)
2313 2560 : create_unique_path(root, outerrel,
2314 : cheapest_total_outer, extra->sjinfo);
2315 : Assert(cheapest_total_outer);
2316 2560 : jointype = JOIN_INNER;
2317 2560 : try_hashjoin_path(root,
2318 : joinrel,
2319 : cheapest_total_outer,
2320 : cheapest_total_inner,
2321 : hashclauses,
2322 : jointype,
2323 : extra);
2324 : /* no possibility of cheap startup here */
2325 : }
2326 413826 : else if (jointype == JOIN_UNIQUE_INNER)
2327 : {
2328 : cheapest_total_inner = (Path *)
2329 2560 : create_unique_path(root, innerrel,
2330 : cheapest_total_inner, extra->sjinfo);
2331 : Assert(cheapest_total_inner);
2332 2560 : jointype = JOIN_INNER;
2333 2560 : try_hashjoin_path(root,
2334 : joinrel,
2335 : cheapest_total_outer,
2336 : cheapest_total_inner,
2337 : hashclauses,
2338 : jointype,
2339 : extra);
2340 2560 : if (cheapest_startup_outer != NULL &&
2341 : cheapest_startup_outer != cheapest_total_outer)
2342 324 : try_hashjoin_path(root,
2343 : joinrel,
2344 : cheapest_startup_outer,
2345 : cheapest_total_inner,
2346 : hashclauses,
2347 : jointype,
2348 : extra);
2349 : }
2350 : else
2351 : {
2352 : /*
2353 : * For other jointypes, we consider the cheapest startup outer
2354 : * together with the cheapest total inner, and then consider
2355 : * pairings of cheapest-total paths including parameterized ones.
2356 : * There is no use in generating parameterized paths on the basis
2357 : * of possibly cheap startup cost, so this is sufficient.
2358 : */
2359 : ListCell *lc1;
2360 : ListCell *lc2;
2361 :
2362 411266 : if (cheapest_startup_outer != NULL)
2363 410636 : try_hashjoin_path(root,
2364 : joinrel,
2365 : cheapest_startup_outer,
2366 : cheapest_total_inner,
2367 : hashclauses,
2368 : jointype,
2369 : extra);
2370 :
2371 1054532 : foreach(lc1, outerrel->cheapest_parameterized_paths)
2372 : {
2373 643266 : Path *outerpath = (Path *) lfirst(lc1);
2374 :
2375 : /*
2376 : * We cannot use an outer path that is parameterized by the
2377 : * inner rel.
2378 : */
2379 643266 : if (PATH_PARAM_BY_REL(outerpath, innerrel))
2380 189552 : continue;
2381 :
2382 1175528 : foreach(lc2, innerrel->cheapest_parameterized_paths)
2383 : {
2384 721814 : Path *innerpath = (Path *) lfirst(lc2);
2385 :
2386 : /*
2387 : * We cannot use an inner path that is parameterized by
2388 : * the outer rel, either.
2389 : */
2390 721814 : if (PATH_PARAM_BY_REL(innerpath, outerrel))
2391 214924 : continue;
2392 :
2393 506890 : if (outerpath == cheapest_startup_outer &&
2394 : innerpath == cheapest_total_inner)
2395 344310 : continue; /* already tried it */
2396 :
2397 162580 : try_hashjoin_path(root,
2398 : joinrel,
2399 : outerpath,
2400 : innerpath,
2401 : hashclauses,
2402 : jointype,
2403 : extra);
2404 : }
2405 : }
2406 : }
2407 :
2408 : /*
2409 : * If the joinrel is parallel-safe, we may be able to consider a
2410 : * partial hash join. However, we can't handle JOIN_UNIQUE_OUTER,
2411 : * because the outer path will be partial, and therefore we won't be
2412 : * able to properly guarantee uniqueness. Also, the resulting path
2413 : * must not be parameterized.
2414 : */
2415 416386 : if (joinrel->consider_parallel &&
2416 369058 : save_jointype != JOIN_UNIQUE_OUTER &&
2417 369058 : outerrel->partial_pathlist != NIL &&
2418 13916 : bms_is_empty(joinrel->lateral_relids))
2419 : {
2420 : Path *cheapest_partial_outer;
2421 13916 : Path *cheapest_partial_inner = NULL;
2422 13916 : Path *cheapest_safe_inner = NULL;
2423 :
2424 13916 : cheapest_partial_outer =
2425 13916 : (Path *) linitial(outerrel->partial_pathlist);
2426 :
2427 : /*
2428 : * Can we use a partial inner plan too, so that we can build a
2429 : * shared hash table in parallel? We can't handle
2430 : * JOIN_UNIQUE_INNER because we can't guarantee uniqueness.
2431 : */
2432 13916 : if (innerrel->partial_pathlist != NIL &&
2433 12540 : save_jointype != JOIN_UNIQUE_INNER &&
2434 : enable_parallel_hash)
2435 : {
2436 12264 : cheapest_partial_inner =
2437 12264 : (Path *) linitial(innerrel->partial_pathlist);
2438 12264 : try_partial_hashjoin_path(root, joinrel,
2439 : cheapest_partial_outer,
2440 : cheapest_partial_inner,
2441 : hashclauses, jointype, extra,
2442 : true /* parallel_hash */ );
2443 : }
2444 :
2445 : /*
2446 : * Normally, given that the joinrel is parallel-safe, the cheapest
2447 : * total inner path will also be parallel-safe, but if not, we'll
2448 : * have to search for the cheapest safe, unparameterized inner
2449 : * path. If doing JOIN_UNIQUE_INNER, we can't use any alternative
2450 : * inner path. If full, right, right-semi or right-anti join, we
2451 : * can't use parallelism (building the hash table in each backend)
2452 : * because no one process has all the match bits.
2453 : */
2454 13916 : if (save_jointype == JOIN_FULL ||
2455 10100 : save_jointype == JOIN_RIGHT ||
2456 9224 : save_jointype == JOIN_RIGHT_SEMI ||
2457 : save_jointype == JOIN_RIGHT_ANTI)
2458 5022 : cheapest_safe_inner = NULL;
2459 8894 : else if (cheapest_total_inner->parallel_safe)
2460 8666 : cheapest_safe_inner = cheapest_total_inner;
2461 228 : else if (save_jointype != JOIN_UNIQUE_INNER)
2462 : cheapest_safe_inner =
2463 222 : get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
2464 :
2465 13916 : if (cheapest_safe_inner != NULL)
2466 8882 : try_partial_hashjoin_path(root, joinrel,
2467 : cheapest_partial_outer,
2468 : cheapest_safe_inner,
2469 : hashclauses, jointype, extra,
2470 : false /* parallel_hash */ );
2471 : }
2472 : }
2473 : }
2474 :
2475 : /*
2476 : * select_mergejoin_clauses
2477 : * Select mergejoin clauses that are usable for a particular join.
2478 : * Returns a list of RestrictInfo nodes for those clauses.
2479 : *
2480 : * *mergejoin_allowed is normally set to true, but it is set to false if
2481 : * this is a right-semi join, or this is a right/right-anti/full join and
2482 : * there are nonmergejoinable join clauses. The executor's mergejoin
2483 : * machinery cannot handle such cases, so we have to avoid generating a
2484 : * mergejoin plan. (Note that this flag does NOT consider whether there are
2485 : * actually any mergejoinable clauses. This is correct because in some
2486 : * cases we need to build a clauseless mergejoin. Simply returning NIL is
2487 : * therefore not enough to distinguish safe from unsafe cases.)
2488 : *
2489 : * We also mark each selected RestrictInfo to show which side is currently
2490 : * being considered as outer. These are transient markings that are only
2491 : * good for the duration of the current add_paths_to_joinrel() call!
2492 : *
2493 : * We examine each restrictinfo clause known for the join to see
2494 : * if it is mergejoinable and involves vars from the two sub-relations
2495 : * currently of interest.
2496 : */
2497 : static List *
2498 524188 : select_mergejoin_clauses(PlannerInfo *root,
2499 : RelOptInfo *joinrel,
2500 : RelOptInfo *outerrel,
2501 : RelOptInfo *innerrel,
2502 : List *restrictlist,
2503 : JoinType jointype,
2504 : bool *mergejoin_allowed)
2505 : {
2506 524188 : List *result_list = NIL;
2507 524188 : bool isouterjoin = IS_OUTER_JOIN(jointype);
2508 524188 : bool have_nonmergeable_joinclause = false;
2509 : ListCell *l;
2510 :
2511 : /*
2512 : * For now we do not support RIGHT_SEMI join in mergejoin: the benefit of
2513 : * swapping inputs tends to be small here.
2514 : */
2515 524188 : if (jointype == JOIN_RIGHT_SEMI)
2516 : {
2517 4146 : *mergejoin_allowed = false;
2518 4146 : return NIL;
2519 : }
2520 :
2521 1060542 : foreach(l, restrictlist)
2522 : {
2523 540500 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
2524 :
2525 : /*
2526 : * If processing an outer join, only use its own join clauses in the
2527 : * merge. For inner joins we can use pushed-down clauses too. (Note:
2528 : * we don't set have_nonmergeable_joinclause here because pushed-down
2529 : * clauses will become otherquals not joinquals.)
2530 : */
2531 540500 : if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
2532 9116 : continue;
2533 :
2534 : /* Check that clause is a mergeable operator clause */
2535 531384 : if (!restrictinfo->can_join ||
2536 473050 : restrictinfo->mergeopfamilies == NIL)
2537 : {
2538 : /*
2539 : * The executor can handle extra joinquals that are constants, but
2540 : * not anything else, when doing right/right-anti/full merge join.
2541 : * (The reason to support constants is so we can do FULL JOIN ON
2542 : * FALSE.)
2543 : */
2544 73648 : if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
2545 65724 : have_nonmergeable_joinclause = true;
2546 73648 : continue; /* not mergejoinable */
2547 : }
2548 :
2549 : /*
2550 : * Check if clause has the form "outer op inner" or "inner op outer".
2551 : */
2552 457736 : if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
2553 : {
2554 768 : have_nonmergeable_joinclause = true;
2555 768 : continue; /* no good for these input relations */
2556 : }
2557 :
2558 : /*
2559 : * If clause has the form "inner op outer", check if its operator has
2560 : * valid commutator. This is necessary because mergejoin clauses in
2561 : * this form will get commuted in createplan.c to put the outer var on
2562 : * the left (see get_switched_clauses). This probably shouldn't ever
2563 : * fail, since mergejoinable operators ought to have commutators, but
2564 : * be paranoid.
2565 : *
2566 : * The clause being mergejoinable indicates that it's an OpExpr.
2567 : */
2568 684180 : if (!restrictinfo->outer_is_left &&
2569 227212 : !OidIsValid(get_commutator(castNode(OpExpr, restrictinfo->clause)->opno)))
2570 : {
2571 36 : have_nonmergeable_joinclause = true;
2572 36 : continue;
2573 : }
2574 :
2575 : /*
2576 : * Insist that each side have a non-redundant eclass. This
2577 : * restriction is needed because various bits of the planner expect
2578 : * that each clause in a merge be associable with some pathkey in a
2579 : * canonical pathkey list, but redundant eclasses can't appear in
2580 : * canonical sort orderings. (XXX it might be worth relaxing this,
2581 : * but not enough time to address it for 8.3.)
2582 : */
2583 456932 : update_mergeclause_eclasses(root, restrictinfo);
2584 :
2585 456932 : if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
2586 456884 : EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
2587 : {
2588 128 : have_nonmergeable_joinclause = true;
2589 128 : continue; /* can't handle redundant eclasses */
2590 : }
2591 :
2592 456804 : result_list = lappend(result_list, restrictinfo);
2593 : }
2594 :
2595 : /*
2596 : * Report whether mergejoin is allowed (see comment at top of function).
2597 : */
2598 520042 : switch (jointype)
2599 : {
2600 84260 : case JOIN_RIGHT:
2601 : case JOIN_RIGHT_ANTI:
2602 : case JOIN_FULL:
2603 84260 : *mergejoin_allowed = !have_nonmergeable_joinclause;
2604 84260 : break;
2605 435782 : default:
2606 435782 : *mergejoin_allowed = true;
2607 435782 : break;
2608 : }
2609 :
2610 520042 : return result_list;
2611 : }
|