Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * joinpath.c
4 : * Routines to find all possible paths for processing a set of joins
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/optimizer/path/joinpath.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include <math.h>
18 :
19 : #include "executor/executor.h"
20 : #include "foreign/fdwapi.h"
21 : #include "nodes/nodeFuncs.h"
22 : #include "optimizer/cost.h"
23 : #include "optimizer/optimizer.h"
24 : #include "optimizer/pathnode.h"
25 : #include "optimizer/paths.h"
26 : #include "optimizer/placeholder.h"
27 : #include "optimizer/planmain.h"
28 : #include "optimizer/restrictinfo.h"
29 : #include "utils/lsyscache.h"
30 : #include "utils/typcache.h"
31 :
32 : /* Hook for plugins to get control in add_paths_to_joinrel() */
33 : set_join_pathlist_hook_type set_join_pathlist_hook = NULL;
34 :
35 : /*
36 : * Paths parameterized by a parent rel can be considered to be parameterized
37 : * by any of its children, when we are performing partitionwise joins. These
38 : * macros simplify checking for such cases. Beware multiple eval of args.
39 : */
40 : #define PATH_PARAM_BY_PARENT(path, rel) \
41 : ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), \
42 : (rel)->top_parent_relids))
43 : #define PATH_PARAM_BY_REL_SELF(path, rel) \
44 : ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
45 :
46 : #define PATH_PARAM_BY_REL(path, rel) \
47 : (PATH_PARAM_BY_REL_SELF(path, rel) || PATH_PARAM_BY_PARENT(path, rel))
48 :
49 : static void try_partial_mergejoin_path(PlannerInfo *root,
50 : RelOptInfo *joinrel,
51 : Path *outer_path,
52 : Path *inner_path,
53 : List *pathkeys,
54 : List *mergeclauses,
55 : List *outersortkeys,
56 : List *innersortkeys,
57 : JoinType jointype,
58 : JoinPathExtraData *extra);
59 : static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
60 : RelOptInfo *outerrel, RelOptInfo *innerrel,
61 : JoinType jointype, JoinPathExtraData *extra);
62 : static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
63 : RelOptInfo *outerrel, RelOptInfo *innerrel,
64 : JoinType jointype, JoinPathExtraData *extra);
65 : static void consider_parallel_nestloop(PlannerInfo *root,
66 : RelOptInfo *joinrel,
67 : RelOptInfo *outerrel,
68 : RelOptInfo *innerrel,
69 : JoinType jointype,
70 : JoinPathExtraData *extra);
71 : static void consider_parallel_mergejoin(PlannerInfo *root,
72 : RelOptInfo *joinrel,
73 : RelOptInfo *outerrel,
74 : RelOptInfo *innerrel,
75 : JoinType jointype,
76 : JoinPathExtraData *extra,
77 : Path *inner_cheapest_total);
78 : static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
79 : RelOptInfo *outerrel, RelOptInfo *innerrel,
80 : JoinType jointype, JoinPathExtraData *extra);
81 : static List *select_mergejoin_clauses(PlannerInfo *root,
82 : RelOptInfo *joinrel,
83 : RelOptInfo *outerrel,
84 : RelOptInfo *innerrel,
85 : List *restrictlist,
86 : JoinType jointype,
87 : bool *mergejoin_allowed);
88 : static void generate_mergejoin_paths(PlannerInfo *root,
89 : RelOptInfo *joinrel,
90 : RelOptInfo *innerrel,
91 : Path *outerpath,
92 : JoinType jointype,
93 : JoinPathExtraData *extra,
94 : bool useallclauses,
95 : Path *inner_cheapest_total,
96 : List *merge_pathkeys,
97 : bool is_partial);
98 :
99 :
100 : /*
101 : * add_paths_to_joinrel
102 : * Given a join relation and two component rels from which it can be made,
103 : * consider all possible paths that use the two component rels as outer
104 : * and inner rel respectively. Add these paths to the join rel's pathlist
105 : * if they survive comparison with other paths (and remove any existing
106 : * paths that are dominated by these paths).
107 : *
108 : * Modifies the pathlist field of the joinrel node to contain the best
109 : * paths found so far.
110 : *
111 : * jointype is not necessarily the same as sjinfo->jointype; it might be
112 : * "flipped around" if we are considering joining the rels in the opposite
113 : * direction from what's indicated in sjinfo.
114 : *
115 : * Also, this routine and others in this module accept the special JoinTypes
116 : * JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should
117 : * unique-ify the outer or inner relation and then apply a regular inner
118 : * join. These values are not allowed to propagate outside this module,
119 : * however. Path cost estimation code may need to recognize that it's
120 : * dealing with such a case --- the combination of nominal jointype INNER
121 : * with sjinfo->jointype == JOIN_SEMI indicates that.
122 : */
123 : void
124 691984 : add_paths_to_joinrel(PlannerInfo *root,
125 : RelOptInfo *joinrel,
126 : RelOptInfo *outerrel,
127 : RelOptInfo *innerrel,
128 : JoinType jointype,
129 : SpecialJoinInfo *sjinfo,
130 : List *restrictlist)
131 : {
132 : JoinPathExtraData extra;
133 691984 : bool mergejoin_allowed = true;
134 : ListCell *lc;
135 : Relids joinrelids;
136 :
137 : /*
138 : * PlannerInfo doesn't contain the SpecialJoinInfos created for joins
139 : * between child relations, even if there is a SpecialJoinInfo node for
140 : * the join between the topmost parents. So, while calculating Relids set
141 : * representing the restriction, consider relids of topmost parent of
142 : * partitions.
143 : */
144 691984 : if (joinrel->reloptkind == RELOPT_OTHER_JOINREL)
145 11728 : joinrelids = joinrel->top_parent_relids;
146 : else
147 680256 : joinrelids = joinrel->relids;
148 :
149 691984 : extra.restrictlist = restrictlist;
150 691984 : extra.mergeclause_list = NIL;
151 691984 : extra.sjinfo = sjinfo;
152 691984 : extra.param_source_rels = NULL;
153 :
154 : /*
155 : * See if the inner relation is provably unique for this outer rel.
156 : *
157 : * We have some special cases: for JOIN_SEMI, it doesn't matter since the
158 : * executor can make the equivalent optimization anyway. It also doesn't
159 : * help enable use of Memoize, since a semijoin with a provably unique
160 : * inner side should have been reduced to an inner join in that case.
161 : * Therefore, we need not expend planner cycles on proofs. (For
162 : * JOIN_ANTI, although it doesn't help the executor for the same reason,
163 : * it can benefit Memoize paths.) For JOIN_UNIQUE_INNER, we must be
164 : * considering a semijoin whose inner side is not provably unique (else
165 : * reduce_unique_semijoins would've simplified it), so there's no point in
166 : * calling innerrel_is_unique. However, if the LHS covers all of the
167 : * semijoin's min_lefthand, then it's appropriate to set inner_unique
168 : * because the path produced by create_unique_path will be unique relative
169 : * to the LHS. (If we have an LHS that's only part of the min_lefthand,
170 : * that is *not* true.) For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid
171 : * letting that value escape this module.
172 : */
173 691984 : switch (jointype)
174 : {
175 7326 : case JOIN_SEMI:
176 7326 : extra.inner_unique = false; /* well, unproven */
177 7326 : break;
178 7534 : case JOIN_UNIQUE_INNER:
179 15068 : extra.inner_unique = bms_is_subset(sjinfo->min_lefthand,
180 7534 : outerrel->relids);
181 7534 : break;
182 7534 : case JOIN_UNIQUE_OUTER:
183 7534 : extra.inner_unique = innerrel_is_unique(root,
184 : joinrel->relids,
185 : outerrel->relids,
186 : innerrel,
187 : JOIN_INNER,
188 : restrictlist,
189 : false);
190 7534 : break;
191 669590 : default:
192 669590 : extra.inner_unique = innerrel_is_unique(root,
193 : joinrel->relids,
194 : outerrel->relids,
195 : innerrel,
196 : jointype,
197 : restrictlist,
198 : false);
199 669590 : break;
200 : }
201 :
202 : /*
203 : * Find potential mergejoin clauses. We can skip this if we are not
204 : * interested in doing a mergejoin. However, mergejoin may be our only
205 : * way of implementing a full outer join, so override enable_mergejoin if
206 : * it's a full join.
207 : */
208 691984 : if (enable_mergejoin || jointype == JOIN_FULL)
209 688388 : extra.mergeclause_list = select_mergejoin_clauses(root,
210 : joinrel,
211 : outerrel,
212 : innerrel,
213 : restrictlist,
214 : jointype,
215 : &mergejoin_allowed);
216 :
217 : /*
218 : * If it's SEMI, ANTI, or inner_unique join, compute correction factors
219 : * for cost estimation. These will be the same for all paths.
220 : */
221 691984 : if (jointype == JOIN_SEMI || jointype == JOIN_ANTI || extra.inner_unique)
222 213616 : compute_semi_anti_join_factors(root, joinrel, outerrel, innerrel,
223 : jointype, sjinfo, restrictlist,
224 : &extra.semifactors);
225 :
226 : /*
227 : * Decide whether it's sensible to generate parameterized paths for this
228 : * joinrel, and if so, which relations such paths should require. There
229 : * is usually no need to create a parameterized result path unless there
230 : * is a join order restriction that prevents joining one of our input rels
231 : * directly to the parameter source rel instead of joining to the other
232 : * input rel. (But see allow_star_schema_join().) This restriction
233 : * reduces the number of parameterized paths we have to deal with at
234 : * higher join levels, without compromising the quality of the resulting
235 : * plan. We express the restriction as a Relids set that must overlap the
236 : * parameterization of any proposed join path. Note: param_source_rels
237 : * should contain only baserels, not OJ relids, so starting from
238 : * all_baserels not all_query_rels is correct.
239 : */
240 1485820 : foreach(lc, root->join_info_list)
241 : {
242 793836 : SpecialJoinInfo *sjinfo2 = (SpecialJoinInfo *) lfirst(lc);
243 :
244 : /*
245 : * SJ is relevant to this join if we have some part of its RHS
246 : * (possibly not all of it), and haven't yet joined to its LHS. (This
247 : * test is pretty simplistic, but should be sufficient considering the
248 : * join has already been proven legal.) If the SJ is relevant, it
249 : * presents constraints for joining to anything not in its RHS.
250 : */
251 793836 : if (bms_overlap(joinrelids, sjinfo2->min_righthand) &&
252 525788 : !bms_overlap(joinrelids, sjinfo2->min_lefthand))
253 22120 : extra.param_source_rels = bms_join(extra.param_source_rels,
254 22120 : bms_difference(root->all_baserels,
255 22120 : sjinfo2->min_righthand));
256 :
257 : /* full joins constrain both sides symmetrically */
258 798740 : if (sjinfo2->jointype == JOIN_FULL &&
259 4904 : bms_overlap(joinrelids, sjinfo2->min_lefthand) &&
260 4848 : !bms_overlap(joinrelids, sjinfo2->min_righthand))
261 672 : extra.param_source_rels = bms_join(extra.param_source_rels,
262 672 : bms_difference(root->all_baserels,
263 672 : sjinfo2->min_lefthand));
264 : }
265 :
266 : /*
267 : * However, when a LATERAL subquery is involved, there will simply not be
268 : * any paths for the joinrel that aren't parameterized by whatever the
269 : * subquery is parameterized by, unless its parameterization is resolved
270 : * within the joinrel. So we might as well allow additional dependencies
271 : * on whatever residual lateral dependencies the joinrel will have.
272 : */
273 1383968 : extra.param_source_rels = bms_add_members(extra.param_source_rels,
274 691984 : joinrel->lateral_relids);
275 :
276 : /*
277 : * 1. Consider mergejoin paths where both relations must be explicitly
278 : * sorted. Skip this if we can't mergejoin.
279 : */
280 691984 : if (mergejoin_allowed)
281 674796 : sort_inner_and_outer(root, joinrel, outerrel, innerrel,
282 : jointype, &extra);
283 :
284 : /*
285 : * 2. Consider paths where the outer relation need not be explicitly
286 : * sorted. This includes both nestloops and mergejoins where the outer
287 : * path is already ordered. Again, skip this if we can't mergejoin.
288 : * (That's okay because we know that nestloop can't handle
289 : * right/right-anti/right-semi/full joins at all, so it wouldn't work in
290 : * the prohibited cases either.)
291 : */
292 691984 : if (mergejoin_allowed)
293 674796 : match_unsorted_outer(root, joinrel, outerrel, innerrel,
294 : jointype, &extra);
295 :
296 : #ifdef NOT_USED
297 :
298 : /*
299 : * 3. Consider paths where the inner relation need not be explicitly
300 : * sorted. This includes mergejoins only (nestloops were already built in
301 : * match_unsorted_outer).
302 : *
303 : * Diked out as redundant 2/13/2000 -- tgl. There isn't any really
304 : * significant difference between the inner and outer side of a mergejoin,
305 : * so match_unsorted_inner creates no paths that aren't equivalent to
306 : * those made by match_unsorted_outer when add_paths_to_joinrel() is
307 : * invoked with the two rels given in the other order.
308 : */
309 : if (mergejoin_allowed)
310 : match_unsorted_inner(root, joinrel, outerrel, innerrel,
311 : jointype, &extra);
312 : #endif
313 :
314 : /*
315 : * 4. Consider paths where both outer and inner relations must be hashed
316 : * before being joined. As above, disregard enable_hashjoin for full
317 : * joins, because there may be no other alternative.
318 : */
319 691984 : if (enable_hashjoin || jointype == JOIN_FULL)
320 686652 : hash_inner_and_outer(root, joinrel, outerrel, innerrel,
321 : jointype, &extra);
322 :
323 : /*
324 : * 5. If inner and outer relations are foreign tables (or joins) belonging
325 : * to the same server and assigned to the same user to check access
326 : * permissions as, give the FDW a chance to push down joins.
327 : */
328 691984 : if (joinrel->fdwroutine &&
329 2708 : joinrel->fdwroutine->GetForeignJoinPaths)
330 2704 : joinrel->fdwroutine->GetForeignJoinPaths(root, joinrel,
331 : outerrel, innerrel,
332 : jointype, &extra);
333 :
334 : /*
335 : * 6. Finally, give extensions a chance to manipulate the path list. They
336 : * could add new paths (such as CustomPaths) by calling add_path(), or
337 : * add_partial_path() if parallel aware. They could also delete or modify
338 : * paths added by the core code.
339 : */
340 691984 : if (set_join_pathlist_hook)
341 0 : set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
342 : jointype, &extra);
343 691984 : }
344 :
345 : /*
346 : * We override the param_source_rels heuristic to accept nestloop paths in
347 : * which the outer rel satisfies some but not all of the inner path's
348 : * parameterization. This is necessary to get good plans for star-schema
349 : * scenarios, in which a parameterized path for a large table may require
350 : * parameters from multiple small tables that will not get joined directly to
351 : * each other. We can handle that by stacking nestloops that have the small
352 : * tables on the outside; but this breaks the rule the param_source_rels
353 : * heuristic is based on, namely that parameters should not be passed down
354 : * across joins unless there's a join-order-constraint-based reason to do so.
355 : * So we ignore the param_source_rels restriction when this case applies.
356 : *
357 : * allow_star_schema_join() returns true if the param_source_rels restriction
358 : * should be overridden, ie, it's okay to perform this join.
359 : */
360 : static inline bool
361 300612 : allow_star_schema_join(PlannerInfo *root,
362 : Relids outerrelids,
363 : Relids inner_paramrels)
364 : {
365 : /*
366 : * It's a star-schema case if the outer rel provides some but not all of
367 : * the inner rel's parameterization.
368 : */
369 350090 : return (bms_overlap(inner_paramrels, outerrelids) &&
370 49478 : bms_nonempty_difference(inner_paramrels, outerrelids));
371 : }
372 :
373 : /*
374 : * If the parameterization is only partly satisfied by the outer rel,
375 : * the unsatisfied part can't include any outer-join relids that could
376 : * null rels of the satisfied part. That would imply that we're trying
377 : * to use a clause involving a Var with nonempty varnullingrels at
378 : * a join level where that value isn't yet computable.
379 : *
380 : * In practice, this test never finds a problem because earlier join order
381 : * restrictions prevent us from attempting a join that would cause a problem.
382 : * (That's unsurprising, because the code worked before we ever added
383 : * outer-join relids to expression relids.) It still seems worth checking
384 : * as a backstop, but we only do so in assert-enabled builds.
385 : */
386 : #ifdef USE_ASSERT_CHECKING
387 : static inline bool
388 : have_unsafe_outer_join_ref(PlannerInfo *root,
389 : Relids outerrelids,
390 : Relids inner_paramrels)
391 : {
392 : bool result = false;
393 : Relids unsatisfied = bms_difference(inner_paramrels, outerrelids);
394 : Relids satisfied = bms_intersect(inner_paramrels, outerrelids);
395 :
396 : if (bms_overlap(unsatisfied, root->outer_join_rels))
397 : {
398 : ListCell *lc;
399 :
400 : foreach(lc, root->join_info_list)
401 : {
402 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
403 :
404 : if (!bms_is_member(sjinfo->ojrelid, unsatisfied))
405 : continue; /* not relevant */
406 : if (bms_overlap(satisfied, sjinfo->min_righthand) ||
407 : (sjinfo->jointype == JOIN_FULL &&
408 : bms_overlap(satisfied, sjinfo->min_lefthand)))
409 : {
410 : result = true; /* doesn't work */
411 : break;
412 : }
413 : }
414 : }
415 :
416 : /* Waste no memory when we reject a path here */
417 : bms_free(unsatisfied);
418 : bms_free(satisfied);
419 :
420 : return result;
421 : }
422 : #endif /* USE_ASSERT_CHECKING */
423 :
424 : /*
425 : * paraminfo_get_equal_hashops
426 : * Determine if the clauses in param_info and innerrel's lateral vars
427 : * can be hashed.
428 : * Returns true if hashing is possible, otherwise false.
429 : *
430 : * Additionally, on success we collect the outer expressions and the
431 : * appropriate equality operators for each hashable parameter to innerrel.
432 : * These are returned in parallel lists in *param_exprs and *operators.
433 : * We also set *binary_mode to indicate whether strict binary matching is
434 : * required.
435 : */
436 : static bool
437 398218 : paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
438 : RelOptInfo *outerrel, RelOptInfo *innerrel,
439 : List *ph_lateral_vars, List **param_exprs,
440 : List **operators, bool *binary_mode)
441 :
442 : {
443 : List *lateral_vars;
444 : ListCell *lc;
445 :
446 398218 : *param_exprs = NIL;
447 398218 : *operators = NIL;
448 398218 : *binary_mode = false;
449 :
450 : /* Add join clauses from param_info to the hash key */
451 398218 : if (param_info != NULL)
452 : {
453 398218 : List *clauses = param_info->ppi_clauses;
454 :
455 761954 : foreach(lc, clauses)
456 : {
457 441494 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
458 : OpExpr *opexpr;
459 : Node *expr;
460 : Oid hasheqoperator;
461 :
462 441494 : opexpr = (OpExpr *) rinfo->clause;
463 :
464 : /*
465 : * Bail if the rinfo is not compatible. We need a join OpExpr
466 : * with 2 args.
467 : */
468 441494 : if (!IsA(opexpr, OpExpr) || list_length(opexpr->args) != 2 ||
469 420642 : !clause_sides_match_join(rinfo, outerrel->relids,
470 : innerrel->relids))
471 : {
472 77602 : list_free(*operators);
473 77602 : list_free(*param_exprs);
474 77758 : return false;
475 : }
476 :
477 363892 : if (rinfo->outer_is_left)
478 : {
479 206988 : expr = (Node *) linitial(opexpr->args);
480 206988 : hasheqoperator = rinfo->left_hasheqoperator;
481 : }
482 : else
483 : {
484 156904 : expr = (Node *) lsecond(opexpr->args);
485 156904 : hasheqoperator = rinfo->right_hasheqoperator;
486 : }
487 :
488 : /* can't do memoize if we can't hash the outer type */
489 363892 : if (!OidIsValid(hasheqoperator))
490 : {
491 156 : list_free(*operators);
492 156 : list_free(*param_exprs);
493 156 : return false;
494 : }
495 :
496 : /*
497 : * 'expr' may already exist as a parameter from a previous item in
498 : * ppi_clauses. No need to include it again, however we'd better
499 : * ensure we do switch into binary mode if required. See below.
500 : */
501 363736 : if (!list_member(*param_exprs, expr))
502 : {
503 363730 : *operators = lappend_oid(*operators, hasheqoperator);
504 363730 : *param_exprs = lappend(*param_exprs, expr);
505 : }
506 :
507 : /*
508 : * When the join operator is not hashable then it's possible that
509 : * the operator will be able to distinguish something that the
510 : * hash equality operator could not. For example with floating
511 : * point types -0.0 and +0.0 are classed as equal by the hash
512 : * function and equality function, but some other operator may be
513 : * able to tell those values apart. This means that we must put
514 : * memoize into binary comparison mode so that it does bit-by-bit
515 : * comparisons rather than a "logical" comparison as it would
516 : * using the hash equality operator.
517 : */
518 363736 : if (!OidIsValid(rinfo->hashjoinoperator))
519 1240 : *binary_mode = true;
520 : }
521 : }
522 :
523 : /* Now add any lateral vars to the cache key too */
524 320460 : lateral_vars = list_concat(ph_lateral_vars, innerrel->lateral_vars);
525 325204 : foreach(lc, lateral_vars)
526 : {
527 4942 : Node *expr = (Node *) lfirst(lc);
528 : TypeCacheEntry *typentry;
529 :
530 : /* Reject if there are any volatile functions in lateral vars */
531 4942 : if (contain_volatile_functions(expr))
532 : {
533 0 : list_free(*operators);
534 0 : list_free(*param_exprs);
535 198 : return false;
536 : }
537 :
538 4942 : typentry = lookup_type_cache(exprType(expr),
539 : TYPECACHE_HASH_PROC | TYPECACHE_EQ_OPR);
540 :
541 : /* can't use memoize without a valid hash proc and equals operator */
542 4942 : if (!OidIsValid(typentry->hash_proc) || !OidIsValid(typentry->eq_opr))
543 : {
544 198 : list_free(*operators);
545 198 : list_free(*param_exprs);
546 198 : return false;
547 : }
548 :
549 : /*
550 : * 'expr' may already exist as a parameter from the ppi_clauses. No
551 : * need to include it again, however we'd better ensure we do switch
552 : * into binary mode.
553 : */
554 4744 : if (!list_member(*param_exprs, expr))
555 : {
556 4294 : *operators = lappend_oid(*operators, typentry->eq_opr);
557 4294 : *param_exprs = lappend(*param_exprs, expr);
558 : }
559 :
560 : /*
561 : * We must go into binary mode as we don't have too much of an idea of
562 : * how these lateral Vars are being used. See comment above when we
563 : * set *binary_mode for the non-lateral Var case. This could be
564 : * relaxed a bit if we had the RestrictInfos and knew the operators
565 : * being used, however for cases like Vars that are arguments to
566 : * functions we must operate in binary mode as we don't have
567 : * visibility into what the function is doing with the Vars.
568 : */
569 4744 : *binary_mode = true;
570 : }
571 :
572 : /* We're okay to use memoize */
573 320262 : return true;
574 : }
575 :
576 : /*
577 : * extract_lateral_vars_from_PHVs
578 : * Extract lateral references within PlaceHolderVars that are due to be
579 : * evaluated at 'innerrelids'.
580 : */
581 : static List *
582 1306384 : extract_lateral_vars_from_PHVs(PlannerInfo *root, Relids innerrelids)
583 : {
584 1306384 : List *ph_lateral_vars = NIL;
585 : ListCell *lc;
586 :
587 : /* Nothing would be found if the query contains no LATERAL RTEs */
588 1306384 : if (!root->hasLateralRTEs)
589 1286758 : return NIL;
590 :
591 : /*
592 : * No need to consider PHVs that are due to be evaluated at joinrels,
593 : * since we do not add Memoize nodes on top of joinrel paths.
594 : */
595 19626 : if (bms_membership(innerrelids) == BMS_MULTIPLE)
596 6154 : return NIL;
597 :
598 16336 : foreach(lc, root->placeholder_list)
599 : {
600 2864 : PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
601 : List *vars;
602 : ListCell *cell;
603 :
604 : /* PHV is uninteresting if no lateral refs */
605 2864 : if (phinfo->ph_lateral == NULL)
606 1638 : continue;
607 :
608 : /* PHV is uninteresting if not due to be evaluated at innerrelids */
609 1226 : if (!bms_equal(phinfo->ph_eval_at, innerrelids))
610 982 : continue;
611 :
612 : /*
613 : * If the PHV does not reference any rels in innerrelids, use its
614 : * contained expression as a cache key rather than extracting the
615 : * Vars/PHVs from it and using those. This can be beneficial in cases
616 : * where the expression results in fewer distinct values to cache
617 : * tuples for.
618 : */
619 244 : if (!bms_overlap(pull_varnos(root, (Node *) phinfo->ph_var->phexpr),
620 : innerrelids))
621 : {
622 232 : ph_lateral_vars = lappend(ph_lateral_vars, phinfo->ph_var->phexpr);
623 232 : continue;
624 : }
625 :
626 : /* Fetch Vars and PHVs of lateral references within PlaceHolderVars */
627 12 : vars = pull_vars_of_level((Node *) phinfo->ph_var->phexpr, 0);
628 36 : foreach(cell, vars)
629 : {
630 24 : Node *node = (Node *) lfirst(cell);
631 :
632 24 : if (IsA(node, Var))
633 : {
634 12 : Var *var = (Var *) node;
635 :
636 : Assert(var->varlevelsup == 0);
637 :
638 12 : if (bms_is_member(var->varno, phinfo->ph_lateral))
639 0 : ph_lateral_vars = lappend(ph_lateral_vars, node);
640 : }
641 12 : else if (IsA(node, PlaceHolderVar))
642 : {
643 12 : PlaceHolderVar *phv = (PlaceHolderVar *) node;
644 :
645 : Assert(phv->phlevelsup == 0);
646 :
647 12 : if (bms_is_subset(find_placeholder_info(root, phv)->ph_eval_at,
648 12 : phinfo->ph_lateral))
649 12 : ph_lateral_vars = lappend(ph_lateral_vars, node);
650 : }
651 : else
652 : Assert(false);
653 : }
654 :
655 12 : list_free(vars);
656 : }
657 :
658 13472 : return ph_lateral_vars;
659 : }
660 :
661 : /*
662 : * get_memoize_path
663 : * If possible, make and return a Memoize path atop of 'inner_path'.
664 : * Otherwise return NULL.
665 : *
666 : * Note that currently we do not add Memoize nodes on top of join relation
667 : * paths. This is because the ParamPathInfos for join relation paths do not
668 : * maintain ppi_clauses, as the set of relevant clauses varies depending on how
669 : * the join is formed. In addition, joinrels do not maintain lateral_vars. So
670 : * we do not have a way to extract cache keys from joinrels.
671 : */
672 : static Path *
673 1834048 : get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
674 : RelOptInfo *outerrel, Path *inner_path,
675 : Path *outer_path, JoinType jointype,
676 : JoinPathExtraData *extra)
677 : {
678 : List *param_exprs;
679 : List *hash_operators;
680 : ListCell *lc;
681 : bool binary_mode;
682 : List *ph_lateral_vars;
683 :
684 : /* Obviously not if it's disabled */
685 1834048 : if (!enable_memoize)
686 388 : return NULL;
687 :
688 : /*
689 : * We can safely not bother with all this unless we expect to perform more
690 : * than one inner scan. The first scan is always going to be a cache
691 : * miss. This would likely fail later anyway based on costs, so this is
692 : * really just to save some wasted effort.
693 : */
694 1833660 : if (outer_path->parent->rows < 2)
695 527276 : return NULL;
696 :
697 : /*
698 : * Extract lateral Vars/PHVs within PlaceHolderVars that are due to be
699 : * evaluated at innerrel. These lateral Vars/PHVs could be used as
700 : * memoize cache keys.
701 : */
702 1306384 : ph_lateral_vars = extract_lateral_vars_from_PHVs(root, innerrel->relids);
703 :
704 : /*
705 : * We can only have a memoize node when there's some kind of cache key,
706 : * either parameterized path clauses or lateral Vars. No cache key sounds
707 : * more like something a Materialize node might be more useful for.
708 : */
709 1306384 : if ((inner_path->param_info == NULL ||
710 546692 : inner_path->param_info->ppi_clauses == NIL) &&
711 785874 : innerrel->lateral_vars == NIL &&
712 : ph_lateral_vars == NIL)
713 782608 : return NULL;
714 :
715 : /*
716 : * Currently we don't do this for SEMI and ANTI joins, because nested loop
717 : * SEMI/ANTI joins don't scan the inner node to completion, which means
718 : * memoize cannot mark the cache entry as complete. Nor can we mark the
719 : * cache entry as complete after fetching the first inner tuple, because
720 : * if that tuple and the current outer tuple don't satisfy the join
721 : * clauses, a second inner tuple that satisfies the parameters would find
722 : * the cache entry already marked as complete. The only exception is when
723 : * the inner relation is provably unique, as in that case, there won't be
724 : * a second matching tuple and we can safely mark the cache entry as
725 : * complete after fetching the first inner tuple. Note that in such
726 : * cases, the SEMI join should have been reduced to an inner join by
727 : * reduce_unique_semijoins.
728 : */
729 523776 : if ((jointype == JOIN_SEMI || jointype == JOIN_ANTI) &&
730 10570 : !extra->inner_unique)
731 6590 : return NULL;
732 :
733 : /*
734 : * Memoize normally marks cache entries as complete when it runs out of
735 : * tuples to read from its subplan. However, with unique joins, Nested
736 : * Loop will skip to the next outer tuple after finding the first matching
737 : * inner tuple. This means that we may not read the inner side of the
738 : * join to completion which leaves no opportunity to mark the cache entry
739 : * as complete. To work around that, when the join is unique we
740 : * automatically mark cache entries as complete after fetching the first
741 : * tuple. This works when the entire join condition is parameterized.
742 : * Otherwise, when the parameterization is only a subset of the join
743 : * condition, we can't be sure which part of it causes the join to be
744 : * unique. This means there are no guarantees that only 1 tuple will be
745 : * read. We cannot mark the cache entry as complete after reading the
746 : * first tuple without that guarantee. This means the scope of Memoize
747 : * node's usefulness is limited to only outer rows that have no join
748 : * partner as this is the only case where Nested Loop would exhaust the
749 : * inner scan of a unique join. Since the scope is limited to that, we
750 : * just don't bother making a memoize path in this case.
751 : *
752 : * Lateral vars needn't be considered here as they're not considered when
753 : * determining if the join is unique.
754 : */
755 517186 : if (extra->inner_unique)
756 : {
757 : Bitmapset *ppi_serials;
758 :
759 315908 : if (inner_path->param_info == NULL)
760 0 : return NULL;
761 :
762 315908 : ppi_serials = inner_path->param_info->ppi_serials;
763 :
764 753816 : foreach_node(RestrictInfo, rinfo, extra->restrictlist)
765 : {
766 359908 : if (!bms_is_member(rinfo->rinfo_serial, ppi_serials))
767 118954 : return NULL;
768 : }
769 : }
770 :
771 : /*
772 : * We can't use a memoize node if there are volatile functions in the
773 : * inner rel's target list or restrict list. A cache hit could reduce the
774 : * number of calls to these functions.
775 : */
776 398232 : if (contain_volatile_functions((Node *) innerrel->reltarget))
777 0 : return NULL;
778 :
779 663618 : foreach(lc, innerrel->baserestrictinfo)
780 : {
781 265398 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
782 :
783 265398 : if (contain_volatile_functions((Node *) rinfo))
784 12 : return NULL;
785 : }
786 :
787 : /*
788 : * Also check the parameterized path restrictinfos for volatile functions.
789 : * Indexed functions must be immutable so shouldn't have any volatile
790 : * functions, however, with a lateral join the inner scan may not be an
791 : * index scan.
792 : */
793 398220 : if (inner_path->param_info != NULL)
794 : {
795 867660 : foreach(lc, inner_path->param_info->ppi_clauses)
796 : {
797 469442 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
798 :
799 469442 : if (contain_volatile_functions((Node *) rinfo))
800 2 : return NULL;
801 : }
802 : }
803 :
804 : /* Check if we have hash ops for each parameter to the path */
805 398218 : if (paraminfo_get_equal_hashops(root,
806 : inner_path->param_info,
807 398218 : outerrel->top_parent ?
808 : outerrel->top_parent : outerrel,
809 : innerrel,
810 : ph_lateral_vars,
811 : ¶m_exprs,
812 : &hash_operators,
813 : &binary_mode))
814 : {
815 320262 : return (Path *) create_memoize_path(root,
816 : innerrel,
817 : inner_path,
818 : param_exprs,
819 : hash_operators,
820 320262 : extra->inner_unique,
821 : binary_mode,
822 : outer_path->rows);
823 : }
824 :
825 77956 : return NULL;
826 : }
827 :
828 : /*
829 : * try_nestloop_path
830 : * Consider a nestloop join path; if it appears useful, push it into
831 : * the joinrel's pathlist via add_path().
832 : */
833 : static void
834 3172260 : try_nestloop_path(PlannerInfo *root,
835 : RelOptInfo *joinrel,
836 : Path *outer_path,
837 : Path *inner_path,
838 : List *pathkeys,
839 : JoinType jointype,
840 : JoinPathExtraData *extra)
841 : {
842 : Relids required_outer;
843 : JoinCostWorkspace workspace;
844 3172260 : RelOptInfo *innerrel = inner_path->parent;
845 3172260 : RelOptInfo *outerrel = outer_path->parent;
846 : Relids innerrelids;
847 : Relids outerrelids;
848 3172260 : Relids inner_paramrels = PATH_REQ_OUTER(inner_path);
849 3172260 : Relids outer_paramrels = PATH_REQ_OUTER(outer_path);
850 :
851 : /*
852 : * If we are forming an outer join at this join, it's nonsensical to use
853 : * an input path that uses the outer join as part of its parameterization.
854 : * (This can happen despite our join order restrictions, since those apply
855 : * to what is in an input relation not what its parameters are.)
856 : */
857 3932554 : if (extra->sjinfo->ojrelid != 0 &&
858 1520588 : (bms_is_member(extra->sjinfo->ojrelid, inner_paramrels) ||
859 760294 : bms_is_member(extra->sjinfo->ojrelid, outer_paramrels)))
860 300316 : return;
861 :
862 : /*
863 : * Any parameterization of the input paths refers to topmost parents of
864 : * the relevant relations, because reparameterize_path_by_child() hasn't
865 : * been called yet. So we must consider topmost parents of the relations
866 : * being joined, too, while determining parameterization of the result and
867 : * checking for disallowed parameterization cases.
868 : */
869 3172140 : if (innerrel->top_parent_relids)
870 37132 : innerrelids = innerrel->top_parent_relids;
871 : else
872 3135008 : innerrelids = innerrel->relids;
873 :
874 3172140 : if (outerrel->top_parent_relids)
875 37132 : outerrelids = outerrel->top_parent_relids;
876 : else
877 3135008 : outerrelids = outerrel->relids;
878 :
879 : /*
880 : * Check to see if proposed path is still parameterized, and reject if the
881 : * parameterization wouldn't be sensible --- unless allow_star_schema_join
882 : * says to allow it anyway.
883 : */
884 3172140 : required_outer = calc_nestloop_required_outer(outerrelids, outer_paramrels,
885 : innerrelids, inner_paramrels);
886 3172140 : if (required_outer &&
887 336512 : !bms_overlap(required_outer, extra->param_source_rels) &&
888 300612 : !allow_star_schema_join(root, outerrelids, inner_paramrels))
889 : {
890 : /* Waste no memory when we reject a path here */
891 300196 : bms_free(required_outer);
892 300196 : return;
893 : }
894 :
895 : /* If we got past that, we shouldn't have any unsafe outer-join refs */
896 : Assert(!have_unsafe_outer_join_ref(root, outerrelids, inner_paramrels));
897 :
898 : /*
899 : * If the inner path is parameterized, it is parameterized by the topmost
900 : * parent of the outer rel, not the outer rel itself. We will need to
901 : * translate the parameterization, if this path is chosen, during
902 : * create_plan(). Here we just check whether we will be able to perform
903 : * the translation, and if not avoid creating a nestloop path.
904 : */
905 2871944 : if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent) &&
906 12160 : !path_is_reparameterizable_by_child(inner_path, outer_path->parent))
907 : {
908 0 : bms_free(required_outer);
909 0 : return;
910 : }
911 :
912 : /*
913 : * Do a precheck to quickly eliminate obviously-inferior paths. We
914 : * calculate a cheap lower bound on the path's cost and then use
915 : * add_path_precheck() to see if the path is clearly going to be dominated
916 : * by some existing path for the joinrel. If not, do the full pushup with
917 : * creating a fully valid path structure and submitting it to add_path().
918 : * The latter two steps are expensive enough to make this two-phase
919 : * methodology worthwhile.
920 : */
921 2871944 : initial_cost_nestloop(root, &workspace, jointype,
922 : outer_path, inner_path, extra);
923 :
924 2871944 : if (add_path_precheck(joinrel, workspace.disabled_nodes,
925 : workspace.startup_cost, workspace.total_cost,
926 : pathkeys, required_outer))
927 : {
928 1393746 : add_path(joinrel, (Path *)
929 1393746 : create_nestloop_path(root,
930 : joinrel,
931 : jointype,
932 : &workspace,
933 : extra,
934 : outer_path,
935 : inner_path,
936 : extra->restrictlist,
937 : pathkeys,
938 : required_outer));
939 : }
940 : else
941 : {
942 : /* Waste no memory when we reject a path here */
943 1478198 : bms_free(required_outer);
944 : }
945 : }
946 :
947 : /*
948 : * try_partial_nestloop_path
949 : * Consider a partial nestloop join path; if it appears useful, push it into
950 : * the joinrel's partial_pathlist via add_partial_path().
951 : */
952 : static void
953 42706 : try_partial_nestloop_path(PlannerInfo *root,
954 : RelOptInfo *joinrel,
955 : Path *outer_path,
956 : Path *inner_path,
957 : List *pathkeys,
958 : JoinType jointype,
959 : JoinPathExtraData *extra)
960 : {
961 : JoinCostWorkspace workspace;
962 :
963 : /*
964 : * If the inner path is parameterized, the parameterization must be fully
965 : * satisfied by the proposed outer path. Parameterized partial paths are
966 : * not supported. The caller should already have verified that no lateral
967 : * rels are required here.
968 : */
969 : Assert(bms_is_empty(joinrel->lateral_relids));
970 : Assert(bms_is_empty(PATH_REQ_OUTER(outer_path)));
971 42706 : if (inner_path->param_info != NULL)
972 : {
973 15054 : Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
974 15054 : RelOptInfo *outerrel = outer_path->parent;
975 : Relids outerrelids;
976 :
977 : /*
978 : * The inner and outer paths are parameterized, if at all, by the top
979 : * level parents, not the child relations, so we must use those relids
980 : * for our parameterization tests.
981 : */
982 15054 : if (outerrel->top_parent_relids)
983 11016 : outerrelids = outerrel->top_parent_relids;
984 : else
985 4038 : outerrelids = outerrel->relids;
986 :
987 15054 : if (!bms_is_subset(inner_paramrels, outerrelids))
988 29978 : return;
989 : }
990 :
991 : /*
992 : * If the inner path is parameterized, it is parameterized by the topmost
993 : * parent of the outer rel, not the outer rel itself. We will need to
994 : * translate the parameterization, if this path is chosen, during
995 : * create_plan(). Here we just check whether we will be able to perform
996 : * the translation, and if not avoid creating a nestloop path.
997 : */
998 41402 : if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent) &&
999 10116 : !path_is_reparameterizable_by_child(inner_path, outer_path->parent))
1000 0 : return;
1001 :
1002 : /*
1003 : * Before creating a path, get a quick lower bound on what it is likely to
1004 : * cost. Bail out right away if it looks terrible.
1005 : */
1006 41402 : initial_cost_nestloop(root, &workspace, jointype,
1007 : outer_path, inner_path, extra);
1008 41402 : if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
1009 : workspace.total_cost, pathkeys))
1010 28674 : return;
1011 :
1012 : /* Might be good enough to be worth trying, so let's try it. */
1013 12728 : add_partial_path(joinrel, (Path *)
1014 12728 : create_nestloop_path(root,
1015 : joinrel,
1016 : jointype,
1017 : &workspace,
1018 : extra,
1019 : outer_path,
1020 : inner_path,
1021 : extra->restrictlist,
1022 : pathkeys,
1023 : NULL));
1024 : }
1025 :
1026 : /*
1027 : * try_mergejoin_path
1028 : * Consider a merge join path; if it appears useful, push it into
1029 : * the joinrel's pathlist via add_path().
1030 : */
1031 : static void
1032 1313804 : try_mergejoin_path(PlannerInfo *root,
1033 : RelOptInfo *joinrel,
1034 : Path *outer_path,
1035 : Path *inner_path,
1036 : List *pathkeys,
1037 : List *mergeclauses,
1038 : List *outersortkeys,
1039 : List *innersortkeys,
1040 : JoinType jointype,
1041 : JoinPathExtraData *extra,
1042 : bool is_partial)
1043 : {
1044 : Relids required_outer;
1045 1313804 : int outer_presorted_keys = 0;
1046 : JoinCostWorkspace workspace;
1047 :
1048 1313804 : if (is_partial)
1049 : {
1050 6994 : try_partial_mergejoin_path(root,
1051 : joinrel,
1052 : outer_path,
1053 : inner_path,
1054 : pathkeys,
1055 : mergeclauses,
1056 : outersortkeys,
1057 : innersortkeys,
1058 : jointype,
1059 : extra);
1060 48894 : return;
1061 : }
1062 :
1063 : /*
1064 : * If we are forming an outer join at this join, it's nonsensical to use
1065 : * an input path that uses the outer join as part of its parameterization.
1066 : * (This can happen despite our join order restrictions, since those apply
1067 : * to what is in an input relation not what its parameters are.)
1068 : */
1069 1729316 : if (extra->sjinfo->ojrelid != 0 &&
1070 845012 : (bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(inner_path)) ||
1071 422506 : bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(outer_path))))
1072 12 : return;
1073 :
1074 : /*
1075 : * Check to see if proposed path is still parameterized, and reject if the
1076 : * parameterization wouldn't be sensible.
1077 : */
1078 1306798 : required_outer = calc_non_nestloop_required_outer(outer_path,
1079 : inner_path);
1080 1306798 : if (required_outer &&
1081 44392 : !bms_overlap(required_outer, extra->param_source_rels))
1082 : {
1083 : /* Waste no memory when we reject a path here */
1084 41888 : bms_free(required_outer);
1085 41888 : return;
1086 : }
1087 :
1088 : /*
1089 : * If the given paths are already well enough ordered, we can skip doing
1090 : * an explicit sort.
1091 : *
1092 : * We need to determine the number of presorted keys of the outer path to
1093 : * decide whether explicit incremental sort can be applied when
1094 : * outersortkeys is not NIL. We do not need to do the same for the inner
1095 : * path though, as incremental sort currently does not support
1096 : * mark/restore.
1097 : */
1098 1885362 : if (outersortkeys &&
1099 620452 : pathkeys_count_contained_in(outersortkeys, outer_path->pathkeys,
1100 : &outer_presorted_keys))
1101 17186 : outersortkeys = NIL;
1102 2284606 : if (innersortkeys &&
1103 1019696 : pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
1104 30010 : innersortkeys = NIL;
1105 :
1106 : /*
1107 : * See comments in try_nestloop_path().
1108 : */
1109 1264910 : initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
1110 : outer_path, inner_path,
1111 : outersortkeys, innersortkeys,
1112 : outer_presorted_keys,
1113 : extra);
1114 :
1115 1264910 : if (add_path_precheck(joinrel, workspace.disabled_nodes,
1116 : workspace.startup_cost, workspace.total_cost,
1117 : pathkeys, required_outer))
1118 : {
1119 318146 : add_path(joinrel, (Path *)
1120 318146 : create_mergejoin_path(root,
1121 : joinrel,
1122 : jointype,
1123 : &workspace,
1124 : extra,
1125 : outer_path,
1126 : inner_path,
1127 : extra->restrictlist,
1128 : pathkeys,
1129 : required_outer,
1130 : mergeclauses,
1131 : outersortkeys,
1132 : innersortkeys,
1133 : outer_presorted_keys));
1134 : }
1135 : else
1136 : {
1137 : /* Waste no memory when we reject a path here */
1138 946764 : bms_free(required_outer);
1139 : }
1140 : }
1141 :
1142 : /*
1143 : * try_partial_mergejoin_path
1144 : * Consider a partial merge join path; if it appears useful, push it into
1145 : * the joinrel's pathlist via add_partial_path().
1146 : */
1147 : static void
1148 20278 : try_partial_mergejoin_path(PlannerInfo *root,
1149 : RelOptInfo *joinrel,
1150 : Path *outer_path,
1151 : Path *inner_path,
1152 : List *pathkeys,
1153 : List *mergeclauses,
1154 : List *outersortkeys,
1155 : List *innersortkeys,
1156 : JoinType jointype,
1157 : JoinPathExtraData *extra)
1158 : {
1159 20278 : int outer_presorted_keys = 0;
1160 : JoinCostWorkspace workspace;
1161 :
1162 : /*
1163 : * See comments in try_partial_hashjoin_path().
1164 : */
1165 : Assert(bms_is_empty(joinrel->lateral_relids));
1166 : Assert(bms_is_empty(PATH_REQ_OUTER(outer_path)));
1167 20278 : if (!bms_is_empty(PATH_REQ_OUTER(inner_path)))
1168 10802 : return;
1169 :
1170 : /*
1171 : * If the given paths are already well enough ordered, we can skip doing
1172 : * an explicit sort.
1173 : *
1174 : * We need to determine the number of presorted keys of the outer path to
1175 : * decide whether explicit incremental sort can be applied when
1176 : * outersortkeys is not NIL. We do not need to do the same for the inner
1177 : * path though, as incremental sort currently does not support
1178 : * mark/restore.
1179 : */
1180 33562 : if (outersortkeys &&
1181 13284 : pathkeys_count_contained_in(outersortkeys, outer_path->pathkeys,
1182 : &outer_presorted_keys))
1183 234 : outersortkeys = NIL;
1184 37656 : if (innersortkeys &&
1185 17378 : pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
1186 252 : innersortkeys = NIL;
1187 :
1188 : /*
1189 : * See comments in try_partial_nestloop_path().
1190 : */
1191 20278 : initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
1192 : outer_path, inner_path,
1193 : outersortkeys, innersortkeys,
1194 : outer_presorted_keys,
1195 : extra);
1196 :
1197 20278 : if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
1198 : workspace.total_cost, pathkeys))
1199 10802 : return;
1200 :
1201 : /* Might be good enough to be worth trying, so let's try it. */
1202 9476 : add_partial_path(joinrel, (Path *)
1203 9476 : create_mergejoin_path(root,
1204 : joinrel,
1205 : jointype,
1206 : &workspace,
1207 : extra,
1208 : outer_path,
1209 : inner_path,
1210 : extra->restrictlist,
1211 : pathkeys,
1212 : NULL,
1213 : mergeclauses,
1214 : outersortkeys,
1215 : innersortkeys,
1216 : outer_presorted_keys));
1217 : }
1218 :
1219 : /*
1220 : * try_hashjoin_path
1221 : * Consider a hash join path; if it appears useful, push it into
1222 : * the joinrel's pathlist via add_path().
1223 : */
1224 : static void
1225 787992 : try_hashjoin_path(PlannerInfo *root,
1226 : RelOptInfo *joinrel,
1227 : Path *outer_path,
1228 : Path *inner_path,
1229 : List *hashclauses,
1230 : JoinType jointype,
1231 : JoinPathExtraData *extra)
1232 : {
1233 : Relids required_outer;
1234 : JoinCostWorkspace workspace;
1235 :
1236 : /*
1237 : * If we are forming an outer join at this join, it's nonsensical to use
1238 : * an input path that uses the outer join as part of its parameterization.
1239 : * (This can happen despite our join order restrictions, since those apply
1240 : * to what is in an input relation not what its parameters are.)
1241 : */
1242 1065770 : if (extra->sjinfo->ojrelid != 0 &&
1243 555526 : (bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(inner_path)) ||
1244 277748 : bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(outer_path))))
1245 118756 : return;
1246 :
1247 : /*
1248 : * Check to see if proposed path is still parameterized, and reject if the
1249 : * parameterization wouldn't be sensible.
1250 : */
1251 787932 : required_outer = calc_non_nestloop_required_outer(outer_path,
1252 : inner_path);
1253 787932 : if (required_outer &&
1254 132582 : !bms_overlap(required_outer, extra->param_source_rels))
1255 : {
1256 : /* Waste no memory when we reject a path here */
1257 118696 : bms_free(required_outer);
1258 118696 : return;
1259 : }
1260 :
1261 : /*
1262 : * See comments in try_nestloop_path(). Also note that hashjoin paths
1263 : * never have any output pathkeys, per comments in create_hashjoin_path.
1264 : */
1265 669236 : initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
1266 : outer_path, inner_path, extra, false);
1267 :
1268 669236 : if (add_path_precheck(joinrel, workspace.disabled_nodes,
1269 : workspace.startup_cost, workspace.total_cost,
1270 : NIL, required_outer))
1271 : {
1272 285592 : add_path(joinrel, (Path *)
1273 285592 : create_hashjoin_path(root,
1274 : joinrel,
1275 : jointype,
1276 : &workspace,
1277 : extra,
1278 : outer_path,
1279 : inner_path,
1280 : false, /* parallel_hash */
1281 : extra->restrictlist,
1282 : required_outer,
1283 : hashclauses));
1284 : }
1285 : else
1286 : {
1287 : /* Waste no memory when we reject a path here */
1288 383644 : bms_free(required_outer);
1289 : }
1290 : }
1291 :
1292 : /*
1293 : * try_partial_hashjoin_path
1294 : * Consider a partial hashjoin join path; if it appears useful, push it into
1295 : * the joinrel's partial_pathlist via add_partial_path().
1296 : * The outer side is partial. If parallel_hash is true, then the inner path
1297 : * must be partial and will be run in parallel to create one or more shared
1298 : * hash tables; otherwise the inner path must be complete and a copy of it
1299 : * is run in every process to create separate identical private hash tables.
1300 : */
1301 : static void
1302 21958 : try_partial_hashjoin_path(PlannerInfo *root,
1303 : RelOptInfo *joinrel,
1304 : Path *outer_path,
1305 : Path *inner_path,
1306 : List *hashclauses,
1307 : JoinType jointype,
1308 : JoinPathExtraData *extra,
1309 : bool parallel_hash)
1310 : {
1311 : JoinCostWorkspace workspace;
1312 :
1313 : /*
1314 : * If the inner path is parameterized, we can't use a partial hashjoin.
1315 : * Parameterized partial paths are not supported. The caller should
1316 : * already have verified that no lateral rels are required here.
1317 : */
1318 : Assert(bms_is_empty(joinrel->lateral_relids));
1319 : Assert(bms_is_empty(PATH_REQ_OUTER(outer_path)));
1320 21958 : if (!bms_is_empty(PATH_REQ_OUTER(inner_path)))
1321 10626 : return;
1322 :
1323 : /*
1324 : * Before creating a path, get a quick lower bound on what it is likely to
1325 : * cost. Bail out right away if it looks terrible.
1326 : */
1327 21958 : initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
1328 : outer_path, inner_path, extra, parallel_hash);
1329 21958 : if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
1330 : workspace.total_cost, NIL))
1331 10626 : return;
1332 :
1333 : /* Might be good enough to be worth trying, so let's try it. */
1334 11332 : add_partial_path(joinrel, (Path *)
1335 11332 : create_hashjoin_path(root,
1336 : joinrel,
1337 : jointype,
1338 : &workspace,
1339 : extra,
1340 : outer_path,
1341 : inner_path,
1342 : parallel_hash,
1343 : extra->restrictlist,
1344 : NULL,
1345 : hashclauses));
1346 : }
1347 :
1348 : /*
1349 : * sort_inner_and_outer
1350 : * Create mergejoin join paths by explicitly sorting both the outer and
1351 : * inner join relations on each available merge ordering.
1352 : *
1353 : * 'joinrel' is the join relation
1354 : * 'outerrel' is the outer join relation
1355 : * 'innerrel' is the inner join relation
1356 : * 'jointype' is the type of join to do
1357 : * 'extra' contains additional input values
1358 : */
1359 : static void
1360 674796 : sort_inner_and_outer(PlannerInfo *root,
1361 : RelOptInfo *joinrel,
1362 : RelOptInfo *outerrel,
1363 : RelOptInfo *innerrel,
1364 : JoinType jointype,
1365 : JoinPathExtraData *extra)
1366 : {
1367 674796 : JoinType save_jointype = jointype;
1368 : Path *outer_path;
1369 : Path *inner_path;
1370 674796 : Path *cheapest_partial_outer = NULL;
1371 674796 : Path *cheapest_safe_inner = NULL;
1372 : List *all_pathkeys;
1373 : ListCell *l;
1374 :
1375 : /* Nothing to do if there are no available mergejoin clauses */
1376 674796 : if (extra->mergeclause_list == NIL)
1377 118790 : return;
1378 :
1379 : /*
1380 : * We only consider the cheapest-total-cost input paths, since we are
1381 : * assuming here that a sort is required. We will consider
1382 : * cheapest-startup-cost input paths later, and only if they don't need a
1383 : * sort.
1384 : *
1385 : * This function intentionally does not consider parameterized input
1386 : * paths, except when the cheapest-total is parameterized. If we did so,
1387 : * we'd have a combinatorial explosion of mergejoin paths of dubious
1388 : * value. This interacts with decisions elsewhere that also discriminate
1389 : * against mergejoins with parameterized inputs; see comments in
1390 : * src/backend/optimizer/README.
1391 : */
1392 556006 : outer_path = outerrel->cheapest_total_path;
1393 556006 : inner_path = innerrel->cheapest_total_path;
1394 :
1395 : /*
1396 : * If either cheapest-total path is parameterized by the other rel, we
1397 : * can't use a mergejoin. (There's no use looking for alternative input
1398 : * paths, since these should already be the least-parameterized available
1399 : * paths.)
1400 : */
1401 556006 : if (PATH_PARAM_BY_REL(outer_path, innerrel) ||
1402 555114 : PATH_PARAM_BY_REL(inner_path, outerrel))
1403 1820 : return;
1404 :
1405 : /*
1406 : * If unique-ification is requested, do it and then handle as a plain
1407 : * inner join.
1408 : */
1409 554186 : if (jointype == JOIN_UNIQUE_OUTER)
1410 : {
1411 5740 : outer_path = (Path *) create_unique_path(root, outerrel,
1412 : outer_path, extra->sjinfo);
1413 : Assert(outer_path);
1414 5740 : jointype = JOIN_INNER;
1415 : }
1416 548446 : else if (jointype == JOIN_UNIQUE_INNER)
1417 : {
1418 5740 : inner_path = (Path *) create_unique_path(root, innerrel,
1419 : inner_path, extra->sjinfo);
1420 : Assert(inner_path);
1421 5740 : jointype = JOIN_INNER;
1422 : }
1423 :
1424 : /*
1425 : * If the joinrel is parallel-safe, we may be able to consider a partial
1426 : * merge join. However, we can't handle JOIN_UNIQUE_OUTER, because the
1427 : * outer path will be partial, and therefore we won't be able to properly
1428 : * guarantee uniqueness. Similarly, we can't handle JOIN_FULL, JOIN_RIGHT
1429 : * and JOIN_RIGHT_ANTI, because they can produce false null extended rows.
1430 : * Also, the resulting path must not be parameterized.
1431 : */
1432 554186 : if (joinrel->consider_parallel &&
1433 490990 : save_jointype != JOIN_UNIQUE_OUTER &&
1434 488622 : save_jointype != JOIN_FULL &&
1435 400004 : save_jointype != JOIN_RIGHT &&
1436 398348 : save_jointype != JOIN_RIGHT_ANTI &&
1437 398348 : outerrel->partial_pathlist != NIL &&
1438 9700 : bms_is_empty(joinrel->lateral_relids))
1439 : {
1440 9700 : cheapest_partial_outer = (Path *) linitial(outerrel->partial_pathlist);
1441 :
1442 9700 : if (inner_path->parallel_safe)
1443 9604 : cheapest_safe_inner = inner_path;
1444 96 : else if (save_jointype != JOIN_UNIQUE_INNER)
1445 : cheapest_safe_inner =
1446 90 : get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
1447 : }
1448 :
1449 : /*
1450 : * Each possible ordering of the available mergejoin clauses will generate
1451 : * a differently-sorted result path at essentially the same cost. We have
1452 : * no basis for choosing one over another at this level of joining, but
1453 : * some sort orders may be more useful than others for higher-level
1454 : * mergejoins, so it's worth considering multiple orderings.
1455 : *
1456 : * Actually, it's not quite true that every mergeclause ordering will
1457 : * generate a different path order, because some of the clauses may be
1458 : * partially redundant (refer to the same EquivalenceClasses). Therefore,
1459 : * what we do is convert the mergeclause list to a list of canonical
1460 : * pathkeys, and then consider different orderings of the pathkeys.
1461 : *
1462 : * Generating a path for *every* permutation of the pathkeys doesn't seem
1463 : * like a winning strategy; the cost in planning time is too high. For
1464 : * now, we generate one path for each pathkey, listing that pathkey first
1465 : * and the rest in random order. This should allow at least a one-clause
1466 : * mergejoin without re-sorting against any other possible mergejoin
1467 : * partner path. But if we've not guessed the right ordering of secondary
1468 : * keys, we may end up evaluating clauses as qpquals when they could have
1469 : * been done as mergeclauses. (In practice, it's rare that there's more
1470 : * than two or three mergeclauses, so expending a huge amount of thought
1471 : * on that is probably not worth it.)
1472 : *
1473 : * The pathkey order returned by select_outer_pathkeys_for_merge() has
1474 : * some heuristics behind it (see that function), so be sure to try it
1475 : * exactly as-is as well as making variants.
1476 : */
1477 554186 : all_pathkeys = select_outer_pathkeys_for_merge(root,
1478 : extra->mergeclause_list,
1479 : joinrel);
1480 :
1481 1174638 : foreach(l, all_pathkeys)
1482 : {
1483 620452 : PathKey *front_pathkey = (PathKey *) lfirst(l);
1484 : List *cur_mergeclauses;
1485 : List *outerkeys;
1486 : List *innerkeys;
1487 : List *merge_pathkeys;
1488 :
1489 : /* Make a pathkey list with this guy first */
1490 620452 : if (l != list_head(all_pathkeys))
1491 66266 : outerkeys = lcons(front_pathkey,
1492 : list_delete_nth_cell(list_copy(all_pathkeys),
1493 : foreach_current_index(l)));
1494 : else
1495 554186 : outerkeys = all_pathkeys; /* no work at first one... */
1496 :
1497 : /* Sort the mergeclauses into the corresponding ordering */
1498 : cur_mergeclauses =
1499 620452 : find_mergeclauses_for_outer_pathkeys(root,
1500 : outerkeys,
1501 : extra->mergeclause_list);
1502 :
1503 : /* Should have used them all... */
1504 : Assert(list_length(cur_mergeclauses) == list_length(extra->mergeclause_list));
1505 :
1506 : /* Build sort pathkeys for the inner side */
1507 620452 : innerkeys = make_inner_pathkeys_for_merge(root,
1508 : cur_mergeclauses,
1509 : outerkeys);
1510 :
1511 : /* Build pathkeys representing output sort order */
1512 620452 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1513 : outerkeys);
1514 :
1515 : /*
1516 : * And now we can make the path.
1517 : *
1518 : * Note: it's possible that the cheapest paths will already be sorted
1519 : * properly. try_mergejoin_path will detect that case and suppress an
1520 : * explicit sort step, so we needn't do so here.
1521 : */
1522 620452 : try_mergejoin_path(root,
1523 : joinrel,
1524 : outer_path,
1525 : inner_path,
1526 : merge_pathkeys,
1527 : cur_mergeclauses,
1528 : outerkeys,
1529 : innerkeys,
1530 : jointype,
1531 : extra,
1532 : false);
1533 :
1534 : /*
1535 : * If we have partial outer and parallel safe inner path then try
1536 : * partial mergejoin path.
1537 : */
1538 620452 : if (cheapest_partial_outer && cheapest_safe_inner)
1539 13284 : try_partial_mergejoin_path(root,
1540 : joinrel,
1541 : cheapest_partial_outer,
1542 : cheapest_safe_inner,
1543 : merge_pathkeys,
1544 : cur_mergeclauses,
1545 : outerkeys,
1546 : innerkeys,
1547 : jointype,
1548 : extra);
1549 : }
1550 : }
1551 :
1552 : /*
1553 : * generate_mergejoin_paths
1554 : * Creates possible mergejoin paths for input outerpath.
1555 : *
1556 : * We generate mergejoins if mergejoin clauses are available. We have
1557 : * two ways to generate the inner path for a mergejoin: sort the cheapest
1558 : * inner path, or use an inner path that is already suitably ordered for the
1559 : * merge. If we have several mergeclauses, it could be that there is no inner
1560 : * path (or only a very expensive one) for the full list of mergeclauses, but
1561 : * better paths exist if we truncate the mergeclause list (thereby discarding
1562 : * some sort key requirements). So, we consider truncations of the
1563 : * mergeclause list as well as the full list. (Ideally we'd consider all
1564 : * subsets of the mergeclause list, but that seems way too expensive.)
1565 : */
1566 : static void
1567 1310612 : generate_mergejoin_paths(PlannerInfo *root,
1568 : RelOptInfo *joinrel,
1569 : RelOptInfo *innerrel,
1570 : Path *outerpath,
1571 : JoinType jointype,
1572 : JoinPathExtraData *extra,
1573 : bool useallclauses,
1574 : Path *inner_cheapest_total,
1575 : List *merge_pathkeys,
1576 : bool is_partial)
1577 : {
1578 : List *mergeclauses;
1579 : List *innersortkeys;
1580 : List *trialsortkeys;
1581 : Path *cheapest_startup_inner;
1582 : Path *cheapest_total_inner;
1583 1310612 : JoinType save_jointype = jointype;
1584 : int num_sortkeys;
1585 : int sortkeycnt;
1586 :
1587 1310612 : if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
1588 15532 : jointype = JOIN_INNER;
1589 :
1590 : /* Look for useful mergeclauses (if any) */
1591 : mergeclauses =
1592 1310612 : find_mergeclauses_for_outer_pathkeys(root,
1593 : outerpath->pathkeys,
1594 : extra->mergeclause_list);
1595 :
1596 : /*
1597 : * Done with this outer path if no chance for a mergejoin.
1598 : *
1599 : * Special corner case: for "x FULL JOIN y ON true", there will be no join
1600 : * clauses at all. Ordinarily we'd generate a clauseless nestloop path,
1601 : * but since mergejoin is our only join type that supports FULL JOIN
1602 : * without any join clauses, it's necessary to generate a clauseless
1603 : * mergejoin path instead.
1604 : */
1605 1310612 : if (mergeclauses == NIL)
1606 : {
1607 870800 : if (jointype == JOIN_FULL)
1608 : /* okay to try for mergejoin */ ;
1609 : else
1610 867416 : return;
1611 : }
1612 538524 : if (useallclauses &&
1613 95328 : list_length(mergeclauses) != list_length(extra->mergeclause_list))
1614 16064 : return;
1615 :
1616 : /* Compute the required ordering of the inner path */
1617 427132 : innersortkeys = make_inner_pathkeys_for_merge(root,
1618 : mergeclauses,
1619 : outerpath->pathkeys);
1620 :
1621 : /*
1622 : * Generate a mergejoin on the basis of sorting the cheapest inner. Since
1623 : * a sort will be needed, only cheapest total cost matters. (But
1624 : * try_mergejoin_path will do the right thing if inner_cheapest_total is
1625 : * already correctly sorted.)
1626 : */
1627 427132 : try_mergejoin_path(root,
1628 : joinrel,
1629 : outerpath,
1630 : inner_cheapest_total,
1631 : merge_pathkeys,
1632 : mergeclauses,
1633 : NIL,
1634 : innersortkeys,
1635 : jointype,
1636 : extra,
1637 : is_partial);
1638 :
1639 : /* Can't do anything else if inner path needs to be unique'd */
1640 427132 : if (save_jointype == JOIN_UNIQUE_INNER)
1641 2140 : return;
1642 :
1643 : /*
1644 : * Look for presorted inner paths that satisfy the innersortkey list ---
1645 : * or any truncation thereof, if we are allowed to build a mergejoin using
1646 : * a subset of the merge clauses. Here, we consider both cheap startup
1647 : * cost and cheap total cost.
1648 : *
1649 : * Currently we do not consider parameterized inner paths here. This
1650 : * interacts with decisions elsewhere that also discriminate against
1651 : * mergejoins with parameterized inputs; see comments in
1652 : * src/backend/optimizer/README.
1653 : *
1654 : * As we shorten the sortkey list, we should consider only paths that are
1655 : * strictly cheaper than (in particular, not the same as) any path found
1656 : * in an earlier iteration. Otherwise we'd be intentionally using fewer
1657 : * merge keys than a given path allows (treating the rest as plain
1658 : * joinquals), which is unlikely to be a good idea. Also, eliminating
1659 : * paths here on the basis of compare_path_costs is a lot cheaper than
1660 : * building the mergejoin path only to throw it away.
1661 : *
1662 : * If inner_cheapest_total is well enough sorted to have not required a
1663 : * sort in the path made above, we shouldn't make a duplicate path with
1664 : * it, either. We handle that case with the same logic that handles the
1665 : * previous consideration, by initializing the variables that track
1666 : * cheapest-so-far properly. Note that we do NOT reject
1667 : * inner_cheapest_total if we find it matches some shorter set of
1668 : * pathkeys. That case corresponds to using fewer mergekeys to avoid
1669 : * sorting inner_cheapest_total, whereas we did sort it above, so the
1670 : * plans being considered are different.
1671 : */
1672 424992 : if (pathkeys_contained_in(innersortkeys,
1673 : inner_cheapest_total->pathkeys))
1674 : {
1675 : /* inner_cheapest_total didn't require a sort */
1676 14530 : cheapest_startup_inner = inner_cheapest_total;
1677 14530 : cheapest_total_inner = inner_cheapest_total;
1678 : }
1679 : else
1680 : {
1681 : /* it did require a sort, at least for the full set of keys */
1682 410462 : cheapest_startup_inner = NULL;
1683 410462 : cheapest_total_inner = NULL;
1684 : }
1685 424992 : num_sortkeys = list_length(innersortkeys);
1686 424992 : if (num_sortkeys > 1 && !useallclauses)
1687 15336 : trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
1688 : else
1689 409656 : trialsortkeys = innersortkeys; /* won't really truncate */
1690 :
1691 786192 : for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
1692 : {
1693 : Path *innerpath;
1694 440320 : List *newclauses = NIL;
1695 :
1696 : /*
1697 : * Look for an inner path ordered well enough for the first
1698 : * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
1699 : * destructively, which is why we made a copy...
1700 : */
1701 440320 : trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
1702 440320 : innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1703 : trialsortkeys,
1704 : NULL,
1705 : TOTAL_COST,
1706 : is_partial);
1707 440320 : if (innerpath != NULL &&
1708 24974 : (cheapest_total_inner == NULL ||
1709 24974 : compare_path_costs(innerpath, cheapest_total_inner,
1710 : TOTAL_COST) < 0))
1711 : {
1712 : /* Found a cheap (or even-cheaper) sorted path */
1713 : /* Select the right mergeclauses, if we didn't already */
1714 263794 : if (sortkeycnt < num_sortkeys)
1715 : {
1716 : newclauses =
1717 5554 : trim_mergeclauses_for_inner_pathkeys(root,
1718 : mergeclauses,
1719 : trialsortkeys);
1720 : Assert(newclauses != NIL);
1721 : }
1722 : else
1723 258240 : newclauses = mergeclauses;
1724 263794 : try_mergejoin_path(root,
1725 : joinrel,
1726 : outerpath,
1727 : innerpath,
1728 : merge_pathkeys,
1729 : newclauses,
1730 : NIL,
1731 : NIL,
1732 : jointype,
1733 : extra,
1734 : is_partial);
1735 263794 : cheapest_total_inner = innerpath;
1736 : }
1737 : /* Same on the basis of cheapest startup cost ... */
1738 440320 : innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1739 : trialsortkeys,
1740 : NULL,
1741 : STARTUP_COST,
1742 : is_partial);
1743 440320 : if (innerpath != NULL &&
1744 24974 : (cheapest_startup_inner == NULL ||
1745 24974 : compare_path_costs(innerpath, cheapest_startup_inner,
1746 : STARTUP_COST) < 0))
1747 : {
1748 : /* Found a cheap (or even-cheaper) sorted path */
1749 261242 : if (innerpath != cheapest_total_inner)
1750 : {
1751 : /*
1752 : * Avoid rebuilding clause list if we already made one; saves
1753 : * memory in big join trees...
1754 : */
1755 2426 : if (newclauses == NIL)
1756 : {
1757 80 : if (sortkeycnt < num_sortkeys)
1758 : {
1759 : newclauses =
1760 12 : trim_mergeclauses_for_inner_pathkeys(root,
1761 : mergeclauses,
1762 : trialsortkeys);
1763 : Assert(newclauses != NIL);
1764 : }
1765 : else
1766 68 : newclauses = mergeclauses;
1767 : }
1768 2426 : try_mergejoin_path(root,
1769 : joinrel,
1770 : outerpath,
1771 : innerpath,
1772 : merge_pathkeys,
1773 : newclauses,
1774 : NIL,
1775 : NIL,
1776 : jointype,
1777 : extra,
1778 : is_partial);
1779 : }
1780 261242 : cheapest_startup_inner = innerpath;
1781 : }
1782 :
1783 : /*
1784 : * Don't consider truncated sortkeys if we need all clauses.
1785 : */
1786 440320 : if (useallclauses)
1787 79120 : break;
1788 : }
1789 : }
1790 :
1791 : /*
1792 : * match_unsorted_outer
1793 : * Creates possible join paths for processing a single join relation
1794 : * 'joinrel' by employing either iterative substitution or
1795 : * mergejoining on each of its possible outer paths (considering
1796 : * only outer paths that are already ordered well enough for merging).
1797 : *
1798 : * We always generate a nestloop path for each available outer path.
1799 : * In fact we may generate as many as five: one on the cheapest-total-cost
1800 : * inner path, one on the same with materialization, one on the
1801 : * cheapest-startup-cost inner path (if different), one on the
1802 : * cheapest-total inner-indexscan path (if any), and one on the
1803 : * cheapest-startup inner-indexscan path (if different).
1804 : *
1805 : * We also consider mergejoins if mergejoin clauses are available. See
1806 : * detailed comments in generate_mergejoin_paths.
1807 : *
1808 : * 'joinrel' is the join relation
1809 : * 'outerrel' is the outer join relation
1810 : * 'innerrel' is the inner join relation
1811 : * 'jointype' is the type of join to do
1812 : * 'extra' contains additional input values
1813 : */
1814 : static void
1815 674796 : match_unsorted_outer(PlannerInfo *root,
1816 : RelOptInfo *joinrel,
1817 : RelOptInfo *outerrel,
1818 : RelOptInfo *innerrel,
1819 : JoinType jointype,
1820 : JoinPathExtraData *extra)
1821 : {
1822 674796 : JoinType save_jointype = jointype;
1823 : bool nestjoinOK;
1824 : bool useallclauses;
1825 674796 : Path *inner_cheapest_total = innerrel->cheapest_total_path;
1826 674796 : Path *matpath = NULL;
1827 : ListCell *lc1;
1828 :
1829 : /*
1830 : * For now we do not support RIGHT_SEMI join in mergejoin or nestloop
1831 : * join.
1832 : */
1833 674796 : if (jointype == JOIN_RIGHT_SEMI)
1834 102 : return;
1835 :
1836 : /*
1837 : * Nestloop only supports inner, left, semi, and anti joins. Also, if we
1838 : * are doing a right, right-anti or full mergejoin, we must use *all* the
1839 : * mergeclauses as join clauses, else we will not have a valid plan.
1840 : * (Although these two flags are currently inverses, keep them separate
1841 : * for clarity and possible future changes.)
1842 : */
1843 674694 : switch (jointype)
1844 : {
1845 556590 : case JOIN_INNER:
1846 : case JOIN_LEFT:
1847 : case JOIN_SEMI:
1848 : case JOIN_ANTI:
1849 556590 : nestjoinOK = true;
1850 556590 : useallclauses = false;
1851 556590 : break;
1852 103036 : case JOIN_RIGHT:
1853 : case JOIN_RIGHT_ANTI:
1854 : case JOIN_FULL:
1855 103036 : nestjoinOK = false;
1856 103036 : useallclauses = true;
1857 103036 : break;
1858 15068 : case JOIN_UNIQUE_OUTER:
1859 : case JOIN_UNIQUE_INNER:
1860 15068 : jointype = JOIN_INNER;
1861 15068 : nestjoinOK = true;
1862 15068 : useallclauses = false;
1863 15068 : break;
1864 0 : default:
1865 0 : elog(ERROR, "unrecognized join type: %d",
1866 : (int) jointype);
1867 : nestjoinOK = false; /* keep compiler quiet */
1868 : useallclauses = false;
1869 : break;
1870 : }
1871 :
1872 : /*
1873 : * If inner_cheapest_total is parameterized by the outer rel, ignore it;
1874 : * we will consider it below as a member of cheapest_parameterized_paths,
1875 : * but the other possibilities considered in this routine aren't usable.
1876 : */
1877 674694 : if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel))
1878 12200 : inner_cheapest_total = NULL;
1879 :
1880 : /*
1881 : * If we need to unique-ify the inner path, we will consider only the
1882 : * cheapest-total inner.
1883 : */
1884 674694 : if (save_jointype == JOIN_UNIQUE_INNER)
1885 : {
1886 : /* No way to do this with an inner path parameterized by outer rel */
1887 7534 : if (inner_cheapest_total == NULL)
1888 36 : return;
1889 : inner_cheapest_total = (Path *)
1890 7498 : create_unique_path(root, innerrel, inner_cheapest_total, extra->sjinfo);
1891 : Assert(inner_cheapest_total);
1892 : }
1893 667160 : else if (nestjoinOK)
1894 : {
1895 : /*
1896 : * Consider materializing the cheapest inner path, unless
1897 : * enable_material is off or the path in question materializes its
1898 : * output anyway.
1899 : */
1900 564124 : if (enable_material && inner_cheapest_total != NULL &&
1901 551490 : !ExecMaterializesOutput(inner_cheapest_total->pathtype))
1902 : matpath = (Path *)
1903 522430 : create_material_path(innerrel, inner_cheapest_total);
1904 : }
1905 :
1906 2251546 : foreach(lc1, outerrel->pathlist)
1907 : {
1908 1576888 : Path *outerpath = (Path *) lfirst(lc1);
1909 : List *merge_pathkeys;
1910 :
1911 : /*
1912 : * We cannot use an outer path that is parameterized by the inner rel.
1913 : */
1914 1576888 : if (PATH_PARAM_BY_REL(outerpath, innerrel))
1915 259004 : continue;
1916 :
1917 : /*
1918 : * If we need to unique-ify the outer path, it's pointless to consider
1919 : * any but the cheapest outer. (XXX we don't consider parameterized
1920 : * outers, nor inners, for unique-ified cases. Should we?)
1921 : */
1922 1317884 : if (save_jointype == JOIN_UNIQUE_OUTER)
1923 : {
1924 8808 : if (outerpath != outerrel->cheapest_total_path)
1925 1310 : continue;
1926 7498 : outerpath = (Path *) create_unique_path(root, outerrel,
1927 : outerpath, extra->sjinfo);
1928 : Assert(outerpath);
1929 : }
1930 :
1931 : /*
1932 : * The result will have this sort order (even if it is implemented as
1933 : * a nestloop, and even if some of the mergeclauses are implemented by
1934 : * qpquals rather than as true mergeclauses):
1935 : */
1936 1316574 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1937 : outerpath->pathkeys);
1938 :
1939 1316574 : if (save_jointype == JOIN_UNIQUE_INNER)
1940 : {
1941 : /*
1942 : * Consider nestloop join, but only with the unique-ified cheapest
1943 : * inner path
1944 : */
1945 14548 : try_nestloop_path(root,
1946 : joinrel,
1947 : outerpath,
1948 : inner_cheapest_total,
1949 : merge_pathkeys,
1950 : jointype,
1951 : extra);
1952 : }
1953 1302026 : else if (nestjoinOK)
1954 : {
1955 : /*
1956 : * Consider nestloop joins using this outer path and various
1957 : * available paths for the inner relation. We consider the
1958 : * cheapest-total paths for each available parameterization of the
1959 : * inner relation, including the unparameterized case.
1960 : */
1961 : ListCell *lc2;
1962 :
1963 2916130 : foreach(lc2, innerrel->cheapest_parameterized_paths)
1964 : {
1965 1810486 : Path *innerpath = (Path *) lfirst(lc2);
1966 : Path *mpath;
1967 :
1968 1810486 : try_nestloop_path(root,
1969 : joinrel,
1970 : outerpath,
1971 : innerpath,
1972 : merge_pathkeys,
1973 : jointype,
1974 : extra);
1975 :
1976 : /*
1977 : * Try generating a memoize path and see if that makes the
1978 : * nested loop any cheaper.
1979 : */
1980 1810486 : mpath = get_memoize_path(root, innerrel, outerrel,
1981 : innerpath, outerpath, jointype,
1982 : extra);
1983 1810486 : if (mpath != NULL)
1984 314396 : try_nestloop_path(root,
1985 : joinrel,
1986 : outerpath,
1987 : mpath,
1988 : merge_pathkeys,
1989 : jointype,
1990 : extra);
1991 : }
1992 :
1993 : /* Also consider materialized form of the cheapest inner path */
1994 1105644 : if (matpath != NULL)
1995 1032830 : try_nestloop_path(root,
1996 : joinrel,
1997 : outerpath,
1998 : matpath,
1999 : merge_pathkeys,
2000 : jointype,
2001 : extra);
2002 : }
2003 :
2004 : /* Can't do anything else if outer path needs to be unique'd */
2005 1316574 : if (save_jointype == JOIN_UNIQUE_OUTER)
2006 7498 : continue;
2007 :
2008 : /* Can't do anything else if inner rel is parameterized by outer */
2009 1309076 : if (inner_cheapest_total == NULL)
2010 13222 : continue;
2011 :
2012 : /* Generate merge join paths */
2013 1295854 : generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
2014 : save_jointype, extra, useallclauses,
2015 : inner_cheapest_total, merge_pathkeys,
2016 : false);
2017 : }
2018 :
2019 : /*
2020 : * Consider partial nestloop and mergejoin plan if outerrel has any
2021 : * partial path and the joinrel is parallel-safe. However, we can't
2022 : * handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and
2023 : * therefore we won't be able to properly guarantee uniqueness. Nor can
2024 : * we handle joins needing lateral rels, since partial paths must not be
2025 : * parameterized. Similarly, we can't handle JOIN_FULL, JOIN_RIGHT and
2026 : * JOIN_RIGHT_ANTI, because they can produce false null extended rows.
2027 : */
2028 674658 : if (joinrel->consider_parallel &&
2029 567394 : save_jointype != JOIN_UNIQUE_OUTER &&
2030 564906 : save_jointype != JOIN_FULL &&
2031 474642 : save_jointype != JOIN_RIGHT &&
2032 472968 : save_jointype != JOIN_RIGHT_ANTI &&
2033 472968 : outerrel->partial_pathlist != NIL &&
2034 11362 : bms_is_empty(joinrel->lateral_relids))
2035 : {
2036 11362 : if (nestjoinOK)
2037 11362 : consider_parallel_nestloop(root, joinrel, outerrel, innerrel,
2038 : save_jointype, extra);
2039 :
2040 : /*
2041 : * If inner_cheapest_total is NULL or non parallel-safe then find the
2042 : * cheapest total parallel safe path. If doing JOIN_UNIQUE_INNER, we
2043 : * can't use any alternative inner path.
2044 : */
2045 11362 : if (inner_cheapest_total == NULL ||
2046 10954 : !inner_cheapest_total->parallel_safe)
2047 : {
2048 780 : if (save_jointype == JOIN_UNIQUE_INNER)
2049 6 : return;
2050 :
2051 774 : inner_cheapest_total = get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
2052 : }
2053 :
2054 11356 : if (inner_cheapest_total)
2055 10942 : consider_parallel_mergejoin(root, joinrel, outerrel, innerrel,
2056 : save_jointype, extra,
2057 : inner_cheapest_total);
2058 : }
2059 : }
2060 :
2061 : /*
2062 : * consider_parallel_mergejoin
2063 : * Try to build partial paths for a joinrel by joining a partial path
2064 : * for the outer relation to a complete path for the inner relation.
2065 : *
2066 : * 'joinrel' is the join relation
2067 : * 'outerrel' is the outer join relation
2068 : * 'innerrel' is the inner join relation
2069 : * 'jointype' is the type of join to do
2070 : * 'extra' contains additional input values
2071 : * 'inner_cheapest_total' cheapest total path for innerrel
2072 : */
2073 : static void
2074 10942 : consider_parallel_mergejoin(PlannerInfo *root,
2075 : RelOptInfo *joinrel,
2076 : RelOptInfo *outerrel,
2077 : RelOptInfo *innerrel,
2078 : JoinType jointype,
2079 : JoinPathExtraData *extra,
2080 : Path *inner_cheapest_total)
2081 : {
2082 : ListCell *lc1;
2083 :
2084 : /* generate merge join path for each partial outer path */
2085 25700 : foreach(lc1, outerrel->partial_pathlist)
2086 : {
2087 14758 : Path *outerpath = (Path *) lfirst(lc1);
2088 : List *merge_pathkeys;
2089 :
2090 : /*
2091 : * Figure out what useful ordering any paths we create will have.
2092 : */
2093 14758 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
2094 : outerpath->pathkeys);
2095 :
2096 14758 : generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype,
2097 : extra, false, inner_cheapest_total,
2098 : merge_pathkeys, true);
2099 : }
2100 10942 : }
2101 :
2102 : /*
2103 : * consider_parallel_nestloop
2104 : * Try to build partial paths for a joinrel by joining a partial path for the
2105 : * outer relation to a complete path for the inner relation.
2106 : *
2107 : * 'joinrel' is the join relation
2108 : * 'outerrel' is the outer join relation
2109 : * 'innerrel' is the inner join relation
2110 : * 'jointype' is the type of join to do
2111 : * 'extra' contains additional input values
2112 : */
2113 : static void
2114 11362 : consider_parallel_nestloop(PlannerInfo *root,
2115 : RelOptInfo *joinrel,
2116 : RelOptInfo *outerrel,
2117 : RelOptInfo *innerrel,
2118 : JoinType jointype,
2119 : JoinPathExtraData *extra)
2120 : {
2121 11362 : JoinType save_jointype = jointype;
2122 11362 : Path *inner_cheapest_total = innerrel->cheapest_total_path;
2123 11362 : Path *matpath = NULL;
2124 : ListCell *lc1;
2125 :
2126 11362 : if (jointype == JOIN_UNIQUE_INNER)
2127 642 : jointype = JOIN_INNER;
2128 :
2129 : /*
2130 : * Consider materializing the cheapest inner path, unless: 1) we're doing
2131 : * JOIN_UNIQUE_INNER, because in this case we have to unique-ify the
2132 : * cheapest inner path, 2) enable_material is off, 3) the cheapest inner
2133 : * path is not parallel-safe, 4) the cheapest inner path is parameterized
2134 : * by the outer rel, or 5) the cheapest inner path materializes its output
2135 : * anyway.
2136 : */
2137 11362 : if (save_jointype != JOIN_UNIQUE_INNER &&
2138 10512 : enable_material && inner_cheapest_total->parallel_safe &&
2139 10320 : !PATH_PARAM_BY_REL(inner_cheapest_total, outerrel) &&
2140 9930 : !ExecMaterializesOutput(inner_cheapest_total->pathtype))
2141 : {
2142 : matpath = (Path *)
2143 9834 : create_material_path(innerrel, inner_cheapest_total);
2144 : Assert(matpath->parallel_safe);
2145 : }
2146 :
2147 26678 : foreach(lc1, outerrel->partial_pathlist)
2148 : {
2149 15316 : Path *outerpath = (Path *) lfirst(lc1);
2150 : List *pathkeys;
2151 : ListCell *lc2;
2152 :
2153 : /* Figure out what useful ordering any paths we create will have. */
2154 15316 : pathkeys = build_join_pathkeys(root, joinrel, jointype,
2155 : outerpath->pathkeys);
2156 :
2157 : /*
2158 : * Try the cheapest parameterized paths; only those which will produce
2159 : * an unparameterized path when joined to this outerrel will survive
2160 : * try_partial_nestloop_path. The cheapest unparameterized path is
2161 : * also in this list.
2162 : */
2163 40450 : foreach(lc2, innerrel->cheapest_parameterized_paths)
2164 : {
2165 25134 : Path *innerpath = (Path *) lfirst(lc2);
2166 : Path *mpath;
2167 :
2168 : /* Can't join to an inner path that is not parallel-safe */
2169 25134 : if (!innerpath->parallel_safe)
2170 426 : continue;
2171 :
2172 : /*
2173 : * If we're doing JOIN_UNIQUE_INNER, we can only use the inner's
2174 : * cheapest_total_path, and we have to unique-ify it. (We might
2175 : * be able to relax this to allow other safe, unparameterized
2176 : * inner paths, but right now create_unique_path is not on board
2177 : * with that.)
2178 : */
2179 24708 : if (save_jointype == JOIN_UNIQUE_INNER)
2180 : {
2181 2130 : if (innerpath != innerrel->cheapest_total_path)
2182 1146 : continue;
2183 984 : innerpath = (Path *) create_unique_path(root, innerrel,
2184 : innerpath,
2185 : extra->sjinfo);
2186 : Assert(innerpath);
2187 : }
2188 :
2189 23562 : try_partial_nestloop_path(root, joinrel, outerpath, innerpath,
2190 : pathkeys, jointype, extra);
2191 :
2192 : /*
2193 : * Try generating a memoize path and see if that makes the nested
2194 : * loop any cheaper.
2195 : */
2196 23562 : mpath = get_memoize_path(root, innerrel, outerrel,
2197 : innerpath, outerpath, jointype,
2198 : extra);
2199 23562 : if (mpath != NULL)
2200 5866 : try_partial_nestloop_path(root, joinrel, outerpath, mpath,
2201 : pathkeys, jointype, extra);
2202 : }
2203 :
2204 : /* Also consider materialized form of the cheapest inner path */
2205 15316 : if (matpath != NULL)
2206 13278 : try_partial_nestloop_path(root, joinrel, outerpath, matpath,
2207 : pathkeys, jointype, extra);
2208 : }
2209 11362 : }
2210 :
2211 : /*
2212 : * hash_inner_and_outer
2213 : * Create hashjoin join paths by explicitly hashing both the outer and
2214 : * inner keys of each available hash clause.
2215 : *
2216 : * 'joinrel' is the join relation
2217 : * 'outerrel' is the outer join relation
2218 : * 'innerrel' is the inner join relation
2219 : * 'jointype' is the type of join to do
2220 : * 'extra' contains additional input values
2221 : */
2222 : static void
2223 686652 : hash_inner_and_outer(PlannerInfo *root,
2224 : RelOptInfo *joinrel,
2225 : RelOptInfo *outerrel,
2226 : RelOptInfo *innerrel,
2227 : JoinType jointype,
2228 : JoinPathExtraData *extra)
2229 : {
2230 686652 : JoinType save_jointype = jointype;
2231 686652 : bool isouterjoin = IS_OUTER_JOIN(jointype);
2232 : List *hashclauses;
2233 : ListCell *l;
2234 :
2235 : /*
2236 : * We need to build only one hashclauses list for any given pair of outer
2237 : * and inner relations; all of the hashable clauses will be used as keys.
2238 : *
2239 : * Scan the join's restrictinfo list to find hashjoinable clauses that are
2240 : * usable with this pair of sub-relations.
2241 : */
2242 686652 : hashclauses = NIL;
2243 1440576 : foreach(l, extra->restrictlist)
2244 : {
2245 753924 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
2246 :
2247 : /*
2248 : * If processing an outer join, only use its own join clauses for
2249 : * hashing. For inner joins we need not be so picky.
2250 : */
2251 753924 : if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
2252 11532 : continue;
2253 :
2254 742392 : if (!restrictinfo->can_join ||
2255 658440 : restrictinfo->hashjoinoperator == InvalidOid)
2256 106276 : continue; /* not hashjoinable */
2257 :
2258 : /*
2259 : * Check if clause has the form "outer op inner" or "inner op outer".
2260 : */
2261 636116 : if (!clause_sides_match_join(restrictinfo, outerrel->relids,
2262 : innerrel->relids))
2263 216 : continue; /* no good for these input relations */
2264 :
2265 : /*
2266 : * If clause has the form "inner op outer", check if its operator has
2267 : * valid commutator. This is necessary because hashclauses in this
2268 : * form will get commuted in createplan.c to put the outer var on the
2269 : * left (see get_switched_clauses). This probably shouldn't ever
2270 : * fail, since hashable operators ought to have commutators, but be
2271 : * paranoid.
2272 : *
2273 : * The clause being hashjoinable indicates that it's an OpExpr.
2274 : */
2275 953850 : if (!restrictinfo->outer_is_left &&
2276 317950 : !OidIsValid(get_commutator(castNode(OpExpr, restrictinfo->clause)->opno)))
2277 6 : continue;
2278 :
2279 635894 : hashclauses = lappend(hashclauses, restrictinfo);
2280 : }
2281 :
2282 : /* If we found any usable hashclauses, make paths */
2283 686652 : if (hashclauses)
2284 : {
2285 : /*
2286 : * We consider both the cheapest-total-cost and cheapest-startup-cost
2287 : * outer paths. There's no need to consider any but the
2288 : * cheapest-total-cost inner path, however.
2289 : */
2290 568810 : Path *cheapest_startup_outer = outerrel->cheapest_startup_path;
2291 568810 : Path *cheapest_total_outer = outerrel->cheapest_total_path;
2292 568810 : Path *cheapest_total_inner = innerrel->cheapest_total_path;
2293 :
2294 : /*
2295 : * If either cheapest-total path is parameterized by the other rel, we
2296 : * can't use a hashjoin. (There's no use looking for alternative
2297 : * input paths, since these should already be the least-parameterized
2298 : * available paths.)
2299 : */
2300 568810 : if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) ||
2301 567906 : PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
2302 1808 : return;
2303 :
2304 : /* Unique-ify if need be; we ignore parameterized possibilities */
2305 567002 : if (jointype == JOIN_UNIQUE_OUTER)
2306 : {
2307 : cheapest_total_outer = (Path *)
2308 5638 : create_unique_path(root, outerrel,
2309 : cheapest_total_outer, extra->sjinfo);
2310 : Assert(cheapest_total_outer);
2311 5638 : jointype = JOIN_INNER;
2312 5638 : try_hashjoin_path(root,
2313 : joinrel,
2314 : cheapest_total_outer,
2315 : cheapest_total_inner,
2316 : hashclauses,
2317 : jointype,
2318 : extra);
2319 : /* no possibility of cheap startup here */
2320 : }
2321 561364 : else if (jointype == JOIN_UNIQUE_INNER)
2322 : {
2323 : cheapest_total_inner = (Path *)
2324 5638 : create_unique_path(root, innerrel,
2325 : cheapest_total_inner, extra->sjinfo);
2326 : Assert(cheapest_total_inner);
2327 5638 : jointype = JOIN_INNER;
2328 5638 : try_hashjoin_path(root,
2329 : joinrel,
2330 : cheapest_total_outer,
2331 : cheapest_total_inner,
2332 : hashclauses,
2333 : jointype,
2334 : extra);
2335 5638 : if (cheapest_startup_outer != NULL &&
2336 : cheapest_startup_outer != cheapest_total_outer)
2337 304 : try_hashjoin_path(root,
2338 : joinrel,
2339 : cheapest_startup_outer,
2340 : cheapest_total_inner,
2341 : hashclauses,
2342 : jointype,
2343 : extra);
2344 : }
2345 : else
2346 : {
2347 : /*
2348 : * For other jointypes, we consider the cheapest startup outer
2349 : * together with the cheapest total inner, and then consider
2350 : * pairings of cheapest-total paths including parameterized ones.
2351 : * There is no use in generating parameterized paths on the basis
2352 : * of possibly cheap startup cost, so this is sufficient.
2353 : */
2354 : ListCell *lc1;
2355 : ListCell *lc2;
2356 :
2357 555726 : if (cheapest_startup_outer != NULL)
2358 555090 : try_hashjoin_path(root,
2359 : joinrel,
2360 : cheapest_startup_outer,
2361 : cheapest_total_inner,
2362 : hashclauses,
2363 : jointype,
2364 : extra);
2365 :
2366 1420522 : foreach(lc1, outerrel->cheapest_parameterized_paths)
2367 : {
2368 864796 : Path *outerpath = (Path *) lfirst(lc1);
2369 :
2370 : /*
2371 : * We cannot use an outer path that is parameterized by the
2372 : * inner rel.
2373 : */
2374 864796 : if (PATH_PARAM_BY_REL(outerpath, innerrel))
2375 251366 : continue;
2376 :
2377 1585606 : foreach(lc2, innerrel->cheapest_parameterized_paths)
2378 : {
2379 972176 : Path *innerpath = (Path *) lfirst(lc2);
2380 :
2381 : /*
2382 : * We cannot use an inner path that is parameterized by
2383 : * the outer rel, either.
2384 : */
2385 972176 : if (PATH_PARAM_BY_REL(innerpath, outerrel))
2386 284926 : continue;
2387 :
2388 687250 : if (outerpath == cheapest_startup_outer &&
2389 : innerpath == cheapest_total_inner)
2390 465928 : continue; /* already tried it */
2391 :
2392 221322 : try_hashjoin_path(root,
2393 : joinrel,
2394 : outerpath,
2395 : innerpath,
2396 : hashclauses,
2397 : jointype,
2398 : extra);
2399 : }
2400 : }
2401 : }
2402 :
2403 : /*
2404 : * If the joinrel is parallel-safe, we may be able to consider a
2405 : * partial hash join. However, we can't handle JOIN_UNIQUE_OUTER,
2406 : * because the outer path will be partial, and therefore we won't be
2407 : * able to properly guarantee uniqueness. Also, the resulting path
2408 : * must not be parameterized.
2409 : */
2410 567002 : if (joinrel->consider_parallel &&
2411 503566 : save_jointype != JOIN_UNIQUE_OUTER &&
2412 503566 : outerrel->partial_pathlist != NIL &&
2413 17132 : bms_is_empty(joinrel->lateral_relids))
2414 : {
2415 : Path *cheapest_partial_outer;
2416 17132 : Path *cheapest_partial_inner = NULL;
2417 17132 : Path *cheapest_safe_inner = NULL;
2418 :
2419 17132 : cheapest_partial_outer =
2420 17132 : (Path *) linitial(outerrel->partial_pathlist);
2421 :
2422 : /*
2423 : * Can we use a partial inner plan too, so that we can build a
2424 : * shared hash table in parallel? We can't handle
2425 : * JOIN_UNIQUE_INNER because we can't guarantee uniqueness.
2426 : */
2427 17132 : if (innerrel->partial_pathlist != NIL &&
2428 12828 : save_jointype != JOIN_UNIQUE_INNER &&
2429 : enable_parallel_hash)
2430 : {
2431 12552 : cheapest_partial_inner =
2432 12552 : (Path *) linitial(innerrel->partial_pathlist);
2433 12552 : try_partial_hashjoin_path(root, joinrel,
2434 : cheapest_partial_outer,
2435 : cheapest_partial_inner,
2436 : hashclauses, jointype, extra,
2437 : true /* parallel_hash */ );
2438 : }
2439 :
2440 : /*
2441 : * Normally, given that the joinrel is parallel-safe, the cheapest
2442 : * total inner path will also be parallel-safe, but if not, we'll
2443 : * have to search for the cheapest safe, unparameterized inner
2444 : * path. If doing JOIN_UNIQUE_INNER, we can't use any alternative
2445 : * inner path. If full, right, right-semi or right-anti join, we
2446 : * can't use parallelism (building the hash table in each backend)
2447 : * because no one process has all the match bits.
2448 : */
2449 17132 : if (save_jointype == JOIN_FULL ||
2450 13316 : save_jointype == JOIN_RIGHT ||
2451 9748 : save_jointype == JOIN_RIGHT_SEMI ||
2452 : save_jointype == JOIN_RIGHT_ANTI)
2453 7714 : cheapest_safe_inner = NULL;
2454 9418 : else if (cheapest_total_inner->parallel_safe)
2455 9190 : cheapest_safe_inner = cheapest_total_inner;
2456 228 : else if (save_jointype != JOIN_UNIQUE_INNER)
2457 : cheapest_safe_inner =
2458 222 : get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
2459 :
2460 17132 : if (cheapest_safe_inner != NULL)
2461 9406 : try_partial_hashjoin_path(root, joinrel,
2462 : cheapest_partial_outer,
2463 : cheapest_safe_inner,
2464 : hashclauses, jointype, extra,
2465 : false /* parallel_hash */ );
2466 : }
2467 : }
2468 : }
2469 :
2470 : /*
2471 : * select_mergejoin_clauses
2472 : * Select mergejoin clauses that are usable for a particular join.
2473 : * Returns a list of RestrictInfo nodes for those clauses.
2474 : *
2475 : * *mergejoin_allowed is normally set to true, but it is set to false if
2476 : * this is a right-semi join, or this is a right/right-anti/full join and
2477 : * there are nonmergejoinable join clauses. The executor's mergejoin
2478 : * machinery cannot handle such cases, so we have to avoid generating a
2479 : * mergejoin plan. (Note that this flag does NOT consider whether there are
2480 : * actually any mergejoinable clauses. This is correct because in some
2481 : * cases we need to build a clauseless mergejoin. Simply returning NIL is
2482 : * therefore not enough to distinguish safe from unsafe cases.)
2483 : *
2484 : * We also mark each selected RestrictInfo to show which side is currently
2485 : * being considered as outer. These are transient markings that are only
2486 : * good for the duration of the current add_paths_to_joinrel() call!
2487 : *
2488 : * We examine each restrictinfo clause known for the join to see
2489 : * if it is mergejoinable and involves vars from the two sub-relations
2490 : * currently of interest.
2491 : */
2492 : static List *
2493 688388 : select_mergejoin_clauses(PlannerInfo *root,
2494 : RelOptInfo *joinrel,
2495 : RelOptInfo *outerrel,
2496 : RelOptInfo *innerrel,
2497 : List *restrictlist,
2498 : JoinType jointype,
2499 : bool *mergejoin_allowed)
2500 : {
2501 688388 : List *result_list = NIL;
2502 688388 : bool isouterjoin = IS_OUTER_JOIN(jointype);
2503 688388 : bool have_nonmergeable_joinclause = false;
2504 : ListCell *l;
2505 :
2506 : /*
2507 : * For now we do not support RIGHT_SEMI join in mergejoin: the benefit of
2508 : * swapping inputs tends to be small here.
2509 : */
2510 688388 : if (jointype == JOIN_RIGHT_SEMI)
2511 : {
2512 7224 : *mergejoin_allowed = false;
2513 7224 : return NIL;
2514 : }
2515 :
2516 1430844 : foreach(l, restrictlist)
2517 : {
2518 749680 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
2519 :
2520 : /*
2521 : * If processing an outer join, only use its own join clauses in the
2522 : * merge. For inner joins we can use pushed-down clauses too. (Note:
2523 : * we don't set have_nonmergeable_joinclause here because pushed-down
2524 : * clauses will become otherquals not joinquals.)
2525 : */
2526 749680 : if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
2527 11556 : continue;
2528 :
2529 : /* Check that clause is a mergeable operator clause */
2530 738124 : if (!restrictinfo->can_join ||
2531 654190 : restrictinfo->mergeopfamilies == NIL)
2532 : {
2533 : /*
2534 : * The executor can handle extra joinquals that are constants, but
2535 : * not anything else, when doing right/right-anti/full merge join.
2536 : * (The reason to support constants is so we can do FULL JOIN ON
2537 : * FALSE.)
2538 : */
2539 104804 : if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
2540 94384 : have_nonmergeable_joinclause = true;
2541 104804 : continue; /* not mergejoinable */
2542 : }
2543 :
2544 : /*
2545 : * Check if clause has the form "outer op inner" or "inner op outer".
2546 : */
2547 633320 : if (!clause_sides_match_join(restrictinfo, outerrel->relids,
2548 : innerrel->relids))
2549 : {
2550 1008 : have_nonmergeable_joinclause = true;
2551 1008 : continue; /* no good for these input relations */
2552 : }
2553 :
2554 : /*
2555 : * If clause has the form "inner op outer", check if its operator has
2556 : * valid commutator. This is necessary because mergejoin clauses in
2557 : * this form will get commuted in createplan.c to put the outer var on
2558 : * the left (see get_switched_clauses). This probably shouldn't ever
2559 : * fail, since mergejoinable operators ought to have commutators, but
2560 : * be paranoid.
2561 : *
2562 : * The clause being mergejoinable indicates that it's an OpExpr.
2563 : */
2564 945660 : if (!restrictinfo->outer_is_left &&
2565 313348 : !OidIsValid(get_commutator(castNode(OpExpr, restrictinfo->clause)->opno)))
2566 : {
2567 36 : have_nonmergeable_joinclause = true;
2568 36 : continue;
2569 : }
2570 :
2571 : /*
2572 : * Insist that each side have a non-redundant eclass. This
2573 : * restriction is needed because various bits of the planner expect
2574 : * that each clause in a merge be associable with some pathkey in a
2575 : * canonical pathkey list, but redundant eclasses can't appear in
2576 : * canonical sort orderings. (XXX it might be worth relaxing this,
2577 : * but not enough time to address it for 8.3.)
2578 : */
2579 632276 : update_mergeclause_eclasses(root, restrictinfo);
2580 :
2581 632276 : if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
2582 632228 : EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
2583 : {
2584 128 : have_nonmergeable_joinclause = true;
2585 128 : continue; /* can't handle redundant eclasses */
2586 : }
2587 :
2588 632148 : result_list = lappend(result_list, restrictinfo);
2589 : }
2590 :
2591 : /*
2592 : * Report whether mergejoin is allowed (see comment at top of function).
2593 : */
2594 681164 : switch (jointype)
2595 : {
2596 112784 : case JOIN_RIGHT:
2597 : case JOIN_RIGHT_ANTI:
2598 : case JOIN_FULL:
2599 112784 : *mergejoin_allowed = !have_nonmergeable_joinclause;
2600 112784 : break;
2601 568380 : default:
2602 568380 : *mergejoin_allowed = true;
2603 568380 : break;
2604 : }
2605 :
2606 681164 : return result_list;
2607 : }
|