Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * joinpath.c
4 : * Routines to find all possible paths for processing a set of joins
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/optimizer/path/joinpath.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "executor/executor.h"
18 : #include "foreign/fdwapi.h"
19 : #include "nodes/nodeFuncs.h"
20 : #include "optimizer/cost.h"
21 : #include "optimizer/optimizer.h"
22 : #include "optimizer/pathnode.h"
23 : #include "optimizer/paths.h"
24 : #include "optimizer/placeholder.h"
25 : #include "optimizer/planmain.h"
26 : #include "optimizer/restrictinfo.h"
27 : #include "utils/lsyscache.h"
28 : #include "utils/typcache.h"
29 :
30 : /* Hook for plugins to get control in add_paths_to_joinrel() */
31 : set_join_pathlist_hook_type set_join_pathlist_hook = NULL;
32 :
33 : /*
34 : * Paths parameterized by a parent rel can be considered to be parameterized
35 : * by any of its children, when we are performing partitionwise joins. These
36 : * macros simplify checking for such cases. Beware multiple eval of args.
37 : */
38 : #define PATH_PARAM_BY_PARENT(path, rel) \
39 : ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), \
40 : (rel)->top_parent_relids))
41 : #define PATH_PARAM_BY_REL_SELF(path, rel) \
42 : ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
43 :
44 : #define PATH_PARAM_BY_REL(path, rel) \
45 : (PATH_PARAM_BY_REL_SELF(path, rel) || PATH_PARAM_BY_PARENT(path, rel))
46 :
47 : static void try_partial_mergejoin_path(PlannerInfo *root,
48 : RelOptInfo *joinrel,
49 : Path *outer_path,
50 : Path *inner_path,
51 : List *pathkeys,
52 : List *mergeclauses,
53 : List *outersortkeys,
54 : List *innersortkeys,
55 : JoinType jointype,
56 : JoinPathExtraData *extra);
57 : static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
58 : RelOptInfo *outerrel, RelOptInfo *innerrel,
59 : JoinType jointype, JoinPathExtraData *extra);
60 : static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
61 : RelOptInfo *outerrel, RelOptInfo *innerrel,
62 : JoinType jointype, JoinPathExtraData *extra);
63 : static void consider_parallel_nestloop(PlannerInfo *root,
64 : RelOptInfo *joinrel,
65 : RelOptInfo *outerrel,
66 : RelOptInfo *innerrel,
67 : JoinType jointype,
68 : JoinPathExtraData *extra);
69 : static void consider_parallel_mergejoin(PlannerInfo *root,
70 : RelOptInfo *joinrel,
71 : RelOptInfo *outerrel,
72 : RelOptInfo *innerrel,
73 : JoinType jointype,
74 : JoinPathExtraData *extra,
75 : Path *inner_cheapest_total);
76 : static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
77 : RelOptInfo *outerrel, RelOptInfo *innerrel,
78 : JoinType jointype, JoinPathExtraData *extra);
79 : static List *select_mergejoin_clauses(PlannerInfo *root,
80 : RelOptInfo *joinrel,
81 : RelOptInfo *outerrel,
82 : RelOptInfo *innerrel,
83 : List *restrictlist,
84 : JoinType jointype,
85 : bool *mergejoin_allowed);
86 : static void generate_mergejoin_paths(PlannerInfo *root,
87 : RelOptInfo *joinrel,
88 : RelOptInfo *innerrel,
89 : Path *outerpath,
90 : JoinType jointype,
91 : JoinPathExtraData *extra,
92 : bool useallclauses,
93 : Path *inner_cheapest_total,
94 : List *merge_pathkeys,
95 : bool is_partial);
96 :
97 :
98 : /*
99 : * add_paths_to_joinrel
100 : * Given a join relation and two component rels from which it can be made,
101 : * consider all possible paths that use the two component rels as outer
102 : * and inner rel respectively. Add these paths to the join rel's pathlist
103 : * if they survive comparison with other paths (and remove any existing
104 : * paths that are dominated by these paths).
105 : *
106 : * Modifies the pathlist field of the joinrel node to contain the best
107 : * paths found so far.
108 : *
109 : * jointype is not necessarily the same as sjinfo->jointype; it might be
110 : * "flipped around" if we are considering joining the rels in the opposite
111 : * direction from what's indicated in sjinfo.
112 : *
113 : * Also, this routine accepts the special JoinTypes JOIN_UNIQUE_OUTER and
114 : * JOIN_UNIQUE_INNER to indicate that the outer or inner relation has been
115 : * unique-ified and a regular inner join should then be applied. These values
116 : * are not allowed to propagate outside this routine, however. Path cost
117 : * estimation code, as well as match_unsorted_outer, may need to recognize that
118 : * it's dealing with such a case --- the combination of nominal jointype INNER
119 : * with sjinfo->jointype == JOIN_SEMI indicates that.
120 : */
121 : void
122 762572 : add_paths_to_joinrel(PlannerInfo *root,
123 : RelOptInfo *joinrel,
124 : RelOptInfo *outerrel,
125 : RelOptInfo *innerrel,
126 : JoinType jointype,
127 : SpecialJoinInfo *sjinfo,
128 : List *restrictlist)
129 : {
130 762572 : JoinType save_jointype = jointype;
131 : JoinPathExtraData extra;
132 762572 : bool mergejoin_allowed = true;
133 : ListCell *lc;
134 : Relids joinrelids;
135 :
136 : /*
137 : * PlannerInfo doesn't contain the SpecialJoinInfos created for joins
138 : * between child relations, even if there is a SpecialJoinInfo node for
139 : * the join between the topmost parents. So, while calculating Relids set
140 : * representing the restriction, consider relids of topmost parent of
141 : * partitions.
142 : */
143 762572 : if (joinrel->reloptkind == RELOPT_OTHER_JOINREL)
144 64948 : joinrelids = joinrel->top_parent_relids;
145 : else
146 697624 : joinrelids = joinrel->relids;
147 :
148 762572 : extra.restrictlist = restrictlist;
149 762572 : extra.mergeclause_list = NIL;
150 762572 : extra.sjinfo = sjinfo;
151 762572 : extra.param_source_rels = NULL;
152 :
153 : /*
154 : * See if the inner relation is provably unique for this outer rel.
155 : *
156 : * We have some special cases: for JOIN_SEMI, it doesn't matter since the
157 : * executor can make the equivalent optimization anyway. It also doesn't
158 : * help enable use of Memoize, since a semijoin with a provably unique
159 : * inner side should have been reduced to an inner join in that case.
160 : * Therefore, we need not expend planner cycles on proofs. (For
161 : * JOIN_ANTI, although it doesn't help the executor for the same reason,
162 : * it can benefit Memoize paths.) For JOIN_UNIQUE_INNER, we must be
163 : * considering a semijoin whose inner side is not provably unique (else
164 : * reduce_unique_semijoins would've simplified it), so there's no point in
165 : * calling innerrel_is_unique. However, if the LHS covers all of the
166 : * semijoin's min_lefthand, then it's appropriate to set inner_unique
167 : * because the unique relation produced by create_unique_paths will be
168 : * unique relative to the LHS. (If we have an LHS that's only part of the
169 : * min_lefthand, that is *not* true.) For JOIN_UNIQUE_OUTER, pass
170 : * JOIN_INNER to avoid letting that value escape this module.
171 : */
172 762572 : switch (jointype)
173 : {
174 7652 : case JOIN_SEMI:
175 7652 : extra.inner_unique = false; /* well, unproven */
176 7652 : break;
177 6336 : case JOIN_UNIQUE_INNER:
178 12672 : extra.inner_unique = bms_is_subset(sjinfo->min_lefthand,
179 6336 : outerrel->relids);
180 6336 : break;
181 6336 : case JOIN_UNIQUE_OUTER:
182 6336 : extra.inner_unique = innerrel_is_unique(root,
183 : joinrel->relids,
184 : outerrel->relids,
185 : innerrel,
186 : JOIN_INNER,
187 : restrictlist,
188 : false);
189 6336 : break;
190 742248 : default:
191 742248 : extra.inner_unique = innerrel_is_unique(root,
192 : joinrel->relids,
193 : outerrel->relids,
194 : innerrel,
195 : jointype,
196 : restrictlist,
197 : false);
198 742248 : break;
199 : }
200 :
201 : /*
202 : * If the outer or inner relation has been unique-ified, handle as a plain
203 : * inner join.
204 : */
205 762572 : if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
206 12672 : jointype = JOIN_INNER;
207 :
208 : /*
209 : * Find potential mergejoin clauses. We can skip this if we are not
210 : * interested in doing a mergejoin. However, mergejoin may be our only
211 : * way of implementing a full outer join, so override enable_mergejoin if
212 : * it's a full join.
213 : */
214 762572 : if (enable_mergejoin || jointype == JOIN_FULL)
215 758628 : extra.mergeclause_list = select_mergejoin_clauses(root,
216 : joinrel,
217 : outerrel,
218 : innerrel,
219 : restrictlist,
220 : jointype,
221 : &mergejoin_allowed);
222 :
223 : /*
224 : * If it's SEMI, ANTI, or inner_unique join, compute correction factors
225 : * for cost estimation. These will be the same for all paths.
226 : */
227 762572 : if (jointype == JOIN_SEMI || jointype == JOIN_ANTI || extra.inner_unique)
228 216758 : compute_semi_anti_join_factors(root, joinrel, outerrel, innerrel,
229 : jointype, sjinfo, restrictlist,
230 : &extra.semifactors);
231 :
232 : /*
233 : * Decide whether it's sensible to generate parameterized paths for this
234 : * joinrel, and if so, which relations such paths should require. There
235 : * is usually no need to create a parameterized result path unless there
236 : * is a join order restriction that prevents joining one of our input rels
237 : * directly to the parameter source rel instead of joining to the other
238 : * input rel. (But see allow_star_schema_join().) This restriction
239 : * reduces the number of parameterized paths we have to deal with at
240 : * higher join levels, without compromising the quality of the resulting
241 : * plan. We express the restriction as a Relids set that must overlap the
242 : * parameterization of any proposed join path. Note: param_source_rels
243 : * should contain only baserels, not OJ relids, so starting from
244 : * all_baserels not all_query_rels is correct.
245 : */
246 1483280 : foreach(lc, root->join_info_list)
247 : {
248 720708 : SpecialJoinInfo *sjinfo2 = (SpecialJoinInfo *) lfirst(lc);
249 :
250 : /*
251 : * SJ is relevant to this join if we have some part of its RHS
252 : * (possibly not all of it), and haven't yet joined to its LHS. (This
253 : * test is pretty simplistic, but should be sufficient considering the
254 : * join has already been proven legal.) If the SJ is relevant, it
255 : * presents constraints for joining to anything not in its RHS.
256 : */
257 720708 : if (bms_overlap(joinrelids, sjinfo2->min_righthand) &&
258 480340 : !bms_overlap(joinrelids, sjinfo2->min_lefthand))
259 21932 : extra.param_source_rels = bms_join(extra.param_source_rels,
260 21932 : bms_difference(root->all_baserels,
261 21932 : sjinfo2->min_righthand));
262 :
263 : /* full joins constrain both sides symmetrically */
264 725624 : if (sjinfo2->jointype == JOIN_FULL &&
265 4916 : bms_overlap(joinrelids, sjinfo2->min_lefthand) &&
266 4860 : !bms_overlap(joinrelids, sjinfo2->min_righthand))
267 672 : extra.param_source_rels = bms_join(extra.param_source_rels,
268 672 : bms_difference(root->all_baserels,
269 672 : sjinfo2->min_lefthand));
270 : }
271 :
272 : /*
273 : * However, when a LATERAL subquery is involved, there will simply not be
274 : * any paths for the joinrel that aren't parameterized by whatever the
275 : * subquery is parameterized by, unless its parameterization is resolved
276 : * within the joinrel. So we might as well allow additional dependencies
277 : * on whatever residual lateral dependencies the joinrel will have.
278 : */
279 1525144 : extra.param_source_rels = bms_add_members(extra.param_source_rels,
280 762572 : joinrel->lateral_relids);
281 :
282 : /*
283 : * 1. Consider mergejoin paths where both relations must be explicitly
284 : * sorted. Skip this if we can't mergejoin.
285 : */
286 762572 : if (mergejoin_allowed)
287 746526 : sort_inner_and_outer(root, joinrel, outerrel, innerrel,
288 : jointype, &extra);
289 :
290 : /*
291 : * 2. Consider paths where the outer relation need not be explicitly
292 : * sorted. This includes both nestloops and mergejoins where the outer
293 : * path is already ordered. Again, skip this if we can't mergejoin.
294 : * (That's okay because we know that nestloop can't handle
295 : * right/right-anti/right-semi/full joins at all, so it wouldn't work in
296 : * the prohibited cases either.)
297 : */
298 762572 : if (mergejoin_allowed)
299 746526 : match_unsorted_outer(root, joinrel, outerrel, innerrel,
300 : jointype, &extra);
301 :
302 : #ifdef NOT_USED
303 :
304 : /*
305 : * 3. Consider paths where the inner relation need not be explicitly
306 : * sorted. This includes mergejoins only (nestloops were already built in
307 : * match_unsorted_outer).
308 : *
309 : * Diked out as redundant 2/13/2000 -- tgl. There isn't any really
310 : * significant difference between the inner and outer side of a mergejoin,
311 : * so match_unsorted_inner creates no paths that aren't equivalent to
312 : * those made by match_unsorted_outer when add_paths_to_joinrel() is
313 : * invoked with the two rels given in the other order.
314 : */
315 : if (mergejoin_allowed)
316 : match_unsorted_inner(root, joinrel, outerrel, innerrel,
317 : jointype, &extra);
318 : #endif
319 :
320 : /*
321 : * 4. Consider paths where both outer and inner relations must be hashed
322 : * before being joined. As above, disregard enable_hashjoin for full
323 : * joins, because there may be no other alternative.
324 : */
325 762572 : if (enable_hashjoin || jointype == JOIN_FULL)
326 757228 : hash_inner_and_outer(root, joinrel, outerrel, innerrel,
327 : jointype, &extra);
328 :
329 : /*
330 : * 5. If inner and outer relations are foreign tables (or joins) belonging
331 : * to the same server and assigned to the same user to check access
332 : * permissions as, give the FDW a chance to push down joins.
333 : */
334 762572 : if (joinrel->fdwroutine &&
335 2724 : joinrel->fdwroutine->GetForeignJoinPaths)
336 2720 : joinrel->fdwroutine->GetForeignJoinPaths(root, joinrel,
337 : outerrel, innerrel,
338 : save_jointype, &extra);
339 :
340 : /*
341 : * 6. Finally, give extensions a chance to manipulate the path list. They
342 : * could add new paths (such as CustomPaths) by calling add_path(), or
343 : * add_partial_path() if parallel aware. They could also delete or modify
344 : * paths added by the core code.
345 : */
346 762572 : if (set_join_pathlist_hook)
347 0 : set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
348 : save_jointype, &extra);
349 762572 : }
350 :
351 : /*
352 : * We override the param_source_rels heuristic to accept nestloop paths in
353 : * which the outer rel satisfies some but not all of the inner path's
354 : * parameterization. This is necessary to get good plans for star-schema
355 : * scenarios, in which a parameterized path for a large table may require
356 : * parameters from multiple small tables that will not get joined directly to
357 : * each other. We can handle that by stacking nestloops that have the small
358 : * tables on the outside; but this breaks the rule the param_source_rels
359 : * heuristic is based on, namely that parameters should not be passed down
360 : * across joins unless there's a join-order-constraint-based reason to do so.
361 : * So we ignore the param_source_rels restriction when this case applies.
362 : *
363 : * allow_star_schema_join() returns true if the param_source_rels restriction
364 : * should be overridden, ie, it's okay to perform this join.
365 : */
366 : static inline bool
367 315490 : allow_star_schema_join(PlannerInfo *root,
368 : Relids outerrelids,
369 : Relids inner_paramrels)
370 : {
371 : /*
372 : * It's a star-schema case if the outer rel provides some but not all of
373 : * the inner rel's parameterization.
374 : */
375 370086 : return (bms_overlap(inner_paramrels, outerrelids) &&
376 54596 : bms_nonempty_difference(inner_paramrels, outerrelids));
377 : }
378 :
379 : /*
380 : * If the parameterization is only partly satisfied by the outer rel,
381 : * the unsatisfied part can't include any outer-join relids that could
382 : * null rels of the satisfied part. That would imply that we're trying
383 : * to use a clause involving a Var with nonempty varnullingrels at
384 : * a join level where that value isn't yet computable.
385 : *
386 : * In practice, this test never finds a problem because earlier join order
387 : * restrictions prevent us from attempting a join that would cause a problem.
388 : * (That's unsurprising, because the code worked before we ever added
389 : * outer-join relids to expression relids.) It still seems worth checking
390 : * as a backstop, but we only do so in assert-enabled builds.
391 : */
392 : #ifdef USE_ASSERT_CHECKING
393 : static inline bool
394 : have_unsafe_outer_join_ref(PlannerInfo *root,
395 : Relids outerrelids,
396 : Relids inner_paramrels)
397 : {
398 : bool result = false;
399 : Relids unsatisfied = bms_difference(inner_paramrels, outerrelids);
400 : Relids satisfied = bms_intersect(inner_paramrels, outerrelids);
401 :
402 : if (bms_overlap(unsatisfied, root->outer_join_rels))
403 : {
404 : ListCell *lc;
405 :
406 : foreach(lc, root->join_info_list)
407 : {
408 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
409 :
410 : if (!bms_is_member(sjinfo->ojrelid, unsatisfied))
411 : continue; /* not relevant */
412 : if (bms_overlap(satisfied, sjinfo->min_righthand) ||
413 : (sjinfo->jointype == JOIN_FULL &&
414 : bms_overlap(satisfied, sjinfo->min_lefthand)))
415 : {
416 : result = true; /* doesn't work */
417 : break;
418 : }
419 : }
420 : }
421 :
422 : /* Waste no memory when we reject a path here */
423 : bms_free(unsatisfied);
424 : bms_free(satisfied);
425 :
426 : return result;
427 : }
428 : #endif /* USE_ASSERT_CHECKING */
429 :
430 : /*
431 : * paraminfo_get_equal_hashops
432 : * Determine if the clauses in param_info and innerrel's lateral vars
433 : * can be hashed.
434 : * Returns true if hashing is possible, otherwise false.
435 : *
436 : * Additionally, on success we collect the outer expressions and the
437 : * appropriate equality operators for each hashable parameter to innerrel.
438 : * These are returned in parallel lists in *param_exprs and *operators.
439 : * We also set *binary_mode to indicate whether strict binary matching is
440 : * required.
441 : */
442 : static bool
443 396422 : paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
444 : RelOptInfo *outerrel, RelOptInfo *innerrel,
445 : List *ph_lateral_vars, List **param_exprs,
446 : List **operators, bool *binary_mode)
447 :
448 : {
449 : List *lateral_vars;
450 : ListCell *lc;
451 :
452 396422 : *param_exprs = NIL;
453 396422 : *operators = NIL;
454 396422 : *binary_mode = false;
455 :
456 : /* Add join clauses from param_info to the hash key */
457 396422 : if (param_info != NULL)
458 : {
459 396422 : List *clauses = param_info->ppi_clauses;
460 :
461 748874 : foreach(lc, clauses)
462 : {
463 430732 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
464 : OpExpr *opexpr;
465 : Node *expr;
466 : Oid hasheqoperator;
467 :
468 430732 : opexpr = (OpExpr *) rinfo->clause;
469 :
470 : /*
471 : * Bail if the rinfo is not compatible. We need a join OpExpr
472 : * with 2 args.
473 : */
474 430732 : if (!IsA(opexpr, OpExpr) || list_length(opexpr->args) != 2 ||
475 412956 : !clause_sides_match_join(rinfo, outerrel->relids,
476 : innerrel->relids))
477 : {
478 78124 : list_free(*operators);
479 78124 : list_free(*param_exprs);
480 78280 : return false;
481 : }
482 :
483 352608 : if (rinfo->outer_is_left)
484 : {
485 196988 : expr = (Node *) linitial(opexpr->args);
486 196988 : hasheqoperator = rinfo->left_hasheqoperator;
487 : }
488 : else
489 : {
490 155620 : expr = (Node *) lsecond(opexpr->args);
491 155620 : hasheqoperator = rinfo->right_hasheqoperator;
492 : }
493 :
494 : /* can't do memoize if we can't hash the outer type */
495 352608 : if (!OidIsValid(hasheqoperator))
496 : {
497 156 : list_free(*operators);
498 156 : list_free(*param_exprs);
499 156 : return false;
500 : }
501 :
502 : /*
503 : * 'expr' may already exist as a parameter from a previous item in
504 : * ppi_clauses. No need to include it again, however we'd better
505 : * ensure we do switch into binary mode if required. See below.
506 : */
507 352452 : if (!list_member(*param_exprs, expr))
508 : {
509 352446 : *operators = lappend_oid(*operators, hasheqoperator);
510 352446 : *param_exprs = lappend(*param_exprs, expr);
511 : }
512 :
513 : /*
514 : * When the join operator is not hashable then it's possible that
515 : * the operator will be able to distinguish something that the
516 : * hash equality operator could not. For example with floating
517 : * point types -0.0 and +0.0 are classed as equal by the hash
518 : * function and equality function, but some other operator may be
519 : * able to tell those values apart. This means that we must put
520 : * memoize into binary comparison mode so that it does bit-by-bit
521 : * comparisons rather than a "logical" comparison as it would
522 : * using the hash equality operator.
523 : */
524 352452 : if (!OidIsValid(rinfo->hashjoinoperator))
525 1270 : *binary_mode = true;
526 : }
527 : }
528 :
529 : /* Now add any lateral vars to the cache key too */
530 318142 : lateral_vars = list_concat(ph_lateral_vars, innerrel->lateral_vars);
531 325922 : foreach(lc, lateral_vars)
532 : {
533 7978 : Node *expr = (Node *) lfirst(lc);
534 : TypeCacheEntry *typentry;
535 :
536 : /* Reject if there are any volatile functions in lateral vars */
537 7978 : if (contain_volatile_functions(expr))
538 : {
539 0 : list_free(*operators);
540 0 : list_free(*param_exprs);
541 198 : return false;
542 : }
543 :
544 7978 : typentry = lookup_type_cache(exprType(expr),
545 : TYPECACHE_HASH_PROC | TYPECACHE_EQ_OPR);
546 :
547 : /* can't use memoize without a valid hash proc and equals operator */
548 7978 : if (!OidIsValid(typentry->hash_proc) || !OidIsValid(typentry->eq_opr))
549 : {
550 198 : list_free(*operators);
551 198 : list_free(*param_exprs);
552 198 : return false;
553 : }
554 :
555 : /*
556 : * 'expr' may already exist as a parameter from the ppi_clauses. No
557 : * need to include it again, however we'd better ensure we do switch
558 : * into binary mode.
559 : */
560 7780 : if (!list_member(*param_exprs, expr))
561 : {
562 7282 : *operators = lappend_oid(*operators, typentry->eq_opr);
563 7282 : *param_exprs = lappend(*param_exprs, expr);
564 : }
565 :
566 : /*
567 : * We must go into binary mode as we don't have too much of an idea of
568 : * how these lateral Vars are being used. See comment above when we
569 : * set *binary_mode for the non-lateral Var case. This could be
570 : * relaxed a bit if we had the RestrictInfos and knew the operators
571 : * being used, however for cases like Vars that are arguments to
572 : * functions we must operate in binary mode as we don't have
573 : * visibility into what the function is doing with the Vars.
574 : */
575 7780 : *binary_mode = true;
576 : }
577 :
578 : /* We're okay to use memoize */
579 317944 : return true;
580 : }
581 :
582 : /*
583 : * extract_lateral_vars_from_PHVs
584 : * Extract lateral references within PlaceHolderVars that are due to be
585 : * evaluated at 'innerrelids'.
586 : */
587 : static List *
588 1476518 : extract_lateral_vars_from_PHVs(PlannerInfo *root, Relids innerrelids)
589 : {
590 1476518 : List *ph_lateral_vars = NIL;
591 : ListCell *lc;
592 :
593 : /* Nothing would be found if the query contains no LATERAL RTEs */
594 1476518 : if (!root->hasLateralRTEs)
595 1431820 : return NIL;
596 :
597 : /*
598 : * No need to consider PHVs that are due to be evaluated at joinrels,
599 : * since we do not add Memoize nodes on top of joinrel paths.
600 : */
601 44698 : if (bms_membership(innerrelids) == BMS_MULTIPLE)
602 15722 : return NIL;
603 :
604 31840 : foreach(lc, root->placeholder_list)
605 : {
606 2864 : PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
607 : List *vars;
608 : ListCell *cell;
609 :
610 : /* PHV is uninteresting if no lateral refs */
611 2864 : if (phinfo->ph_lateral == NULL)
612 1638 : continue;
613 :
614 : /* PHV is uninteresting if not due to be evaluated at innerrelids */
615 1226 : if (!bms_equal(phinfo->ph_eval_at, innerrelids))
616 982 : continue;
617 :
618 : /*
619 : * If the PHV does not reference any rels in innerrelids, use its
620 : * contained expression as a cache key rather than extracting the
621 : * Vars/PHVs from it and using those. This can be beneficial in cases
622 : * where the expression results in fewer distinct values to cache
623 : * tuples for.
624 : */
625 244 : if (!bms_overlap(pull_varnos(root, (Node *) phinfo->ph_var->phexpr),
626 : innerrelids))
627 : {
628 232 : ph_lateral_vars = lappend(ph_lateral_vars, phinfo->ph_var->phexpr);
629 232 : continue;
630 : }
631 :
632 : /* Fetch Vars and PHVs of lateral references within PlaceHolderVars */
633 12 : vars = pull_vars_of_level((Node *) phinfo->ph_var->phexpr, 0);
634 36 : foreach(cell, vars)
635 : {
636 24 : Node *node = (Node *) lfirst(cell);
637 :
638 24 : if (IsA(node, Var))
639 : {
640 12 : Var *var = (Var *) node;
641 :
642 : Assert(var->varlevelsup == 0);
643 :
644 12 : if (bms_is_member(var->varno, phinfo->ph_lateral))
645 0 : ph_lateral_vars = lappend(ph_lateral_vars, node);
646 : }
647 12 : else if (IsA(node, PlaceHolderVar))
648 : {
649 12 : PlaceHolderVar *phv = (PlaceHolderVar *) node;
650 :
651 : Assert(phv->phlevelsup == 0);
652 :
653 12 : if (bms_is_subset(find_placeholder_info(root, phv)->ph_eval_at,
654 12 : phinfo->ph_lateral))
655 12 : ph_lateral_vars = lappend(ph_lateral_vars, node);
656 : }
657 : else
658 : Assert(false);
659 : }
660 :
661 12 : list_free(vars);
662 : }
663 :
664 28976 : return ph_lateral_vars;
665 : }
666 :
667 : /*
668 : * get_memoize_path
669 : * If possible, make and return a Memoize path atop of 'inner_path'.
670 : * Otherwise return NULL.
671 : *
672 : * Note that currently we do not add Memoize nodes on top of join relation
673 : * paths. This is because the ParamPathInfos for join relation paths do not
674 : * maintain ppi_clauses, as the set of relevant clauses varies depending on how
675 : * the join is formed. In addition, joinrels do not maintain lateral_vars. So
676 : * we do not have a way to extract cache keys from joinrels.
677 : */
678 : static Path *
679 2041958 : get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
680 : RelOptInfo *outerrel, Path *inner_path,
681 : Path *outer_path, JoinType jointype,
682 : JoinPathExtraData *extra)
683 : {
684 : List *param_exprs;
685 : List *hash_operators;
686 : ListCell *lc;
687 : bool binary_mode;
688 : List *ph_lateral_vars;
689 :
690 : /* Obviously not if it's disabled */
691 2041958 : if (!enable_memoize)
692 412 : return NULL;
693 :
694 : /*
695 : * We can safely not bother with all this unless we expect to perform more
696 : * than one inner scan. The first scan is always going to be a cache
697 : * miss. This would likely fail later anyway based on costs, so this is
698 : * really just to save some wasted effort.
699 : */
700 2041546 : if (outer_path->parent->rows < 2)
701 565028 : return NULL;
702 :
703 : /*
704 : * Extract lateral Vars/PHVs within PlaceHolderVars that are due to be
705 : * evaluated at innerrel. These lateral Vars/PHVs could be used as
706 : * memoize cache keys.
707 : */
708 1476518 : ph_lateral_vars = extract_lateral_vars_from_PHVs(root, innerrel->relids);
709 :
710 : /*
711 : * We can only have a memoize node when there's some kind of cache key,
712 : * either parameterized path clauses or lateral Vars. No cache key sounds
713 : * more like something a Materialize node might be more useful for.
714 : */
715 1476518 : if ((inner_path->param_info == NULL ||
716 536432 : inner_path->param_info->ppi_clauses == NIL) &&
717 972802 : innerrel->lateral_vars == NIL &&
718 : ph_lateral_vars == NIL)
719 966572 : return NULL;
720 :
721 : /*
722 : * Currently we don't do this for SEMI and ANTI joins, because nested loop
723 : * SEMI/ANTI joins don't scan the inner node to completion, which means
724 : * memoize cannot mark the cache entry as complete. Nor can we mark the
725 : * cache entry as complete after fetching the first inner tuple, because
726 : * if that tuple and the current outer tuple don't satisfy the join
727 : * clauses, a second inner tuple that satisfies the parameters would find
728 : * the cache entry already marked as complete. The only exception is when
729 : * the inner relation is provably unique, as in that case, there won't be
730 : * a second matching tuple and we can safely mark the cache entry as
731 : * complete after fetching the first inner tuple. Note that in such
732 : * cases, the SEMI join should have been reduced to an inner join by
733 : * reduce_unique_semijoins.
734 : */
735 509946 : if ((jointype == JOIN_SEMI || jointype == JOIN_ANTI) &&
736 10176 : !extra->inner_unique)
737 6414 : return NULL;
738 :
739 : /*
740 : * Memoize normally marks cache entries as complete when it runs out of
741 : * tuples to read from its subplan. However, with unique joins, Nested
742 : * Loop will skip to the next outer tuple after finding the first matching
743 : * inner tuple. This means that we may not read the inner side of the
744 : * join to completion which leaves no opportunity to mark the cache entry
745 : * as complete. To work around that, when the join is unique we
746 : * automatically mark cache entries as complete after fetching the first
747 : * tuple. This works when the entire join condition is parameterized.
748 : * Otherwise, when the parameterization is only a subset of the join
749 : * condition, we can't be sure which part of it causes the join to be
750 : * unique. This means there are no guarantees that only 1 tuple will be
751 : * read. We cannot mark the cache entry as complete after reading the
752 : * first tuple without that guarantee. This means the scope of Memoize
753 : * node's usefulness is limited to only outer rows that have no join
754 : * partner as this is the only case where Nested Loop would exhaust the
755 : * inner scan of a unique join. Since the scope is limited to that, we
756 : * just don't bother making a memoize path in this case.
757 : *
758 : * Lateral vars needn't be considered here as they're not considered when
759 : * determining if the join is unique.
760 : */
761 503532 : if (extra->inner_unique)
762 : {
763 : Bitmapset *ppi_serials;
764 :
765 299374 : if (inner_path->param_info == NULL)
766 0 : return NULL;
767 :
768 299374 : ppi_serials = inner_path->param_info->ppi_serials;
769 :
770 721864 : foreach_node(RestrictInfo, rinfo, extra->restrictlist)
771 : {
772 337020 : if (!bms_is_member(rinfo->rinfo_serial, ppi_serials))
773 106952 : return NULL;
774 : }
775 : }
776 :
777 : /*
778 : * We can't use a memoize node if there are volatile functions in the
779 : * inner rel's target list or restrict list. A cache hit could reduce the
780 : * number of calls to these functions.
781 : */
782 396580 : if (contain_volatile_functions((Node *) innerrel->reltarget))
783 0 : return NULL;
784 :
785 639136 : foreach(lc, innerrel->baserestrictinfo)
786 : {
787 242712 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
788 :
789 242712 : if (contain_volatile_functions((Node *) rinfo))
790 156 : return NULL;
791 : }
792 :
793 : /*
794 : * Also check the parameterized path restrictinfos for volatile functions.
795 : * Indexed functions must be immutable so shouldn't have any volatile
796 : * functions, however, with a lateral join the inner scan may not be an
797 : * index scan.
798 : */
799 396424 : if (inner_path->param_info != NULL)
800 : {
801 857696 : foreach(lc, inner_path->param_info->ppi_clauses)
802 : {
803 461274 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
804 :
805 461274 : if (contain_volatile_functions((Node *) rinfo))
806 2 : return NULL;
807 : }
808 : }
809 :
810 : /* Check if we have hash ops for each parameter to the path */
811 396422 : if (paraminfo_get_equal_hashops(root,
812 : inner_path->param_info,
813 396422 : outerrel->top_parent ?
814 : outerrel->top_parent : outerrel,
815 : innerrel,
816 : ph_lateral_vars,
817 : ¶m_exprs,
818 : &hash_operators,
819 : &binary_mode))
820 : {
821 317944 : return (Path *) create_memoize_path(root,
822 : innerrel,
823 : inner_path,
824 : param_exprs,
825 : hash_operators,
826 317944 : extra->inner_unique,
827 : binary_mode,
828 : outer_path->rows);
829 : }
830 :
831 78478 : return NULL;
832 : }
833 :
834 : /*
835 : * try_nestloop_path
836 : * Consider a nestloop join path; if it appears useful, push it into
837 : * the joinrel's pathlist via add_path().
838 : */
839 : static void
840 3410962 : try_nestloop_path(PlannerInfo *root,
841 : RelOptInfo *joinrel,
842 : Path *outer_path,
843 : Path *inner_path,
844 : List *pathkeys,
845 : JoinType jointype,
846 : JoinPathExtraData *extra)
847 : {
848 : Relids required_outer;
849 : JoinCostWorkspace workspace;
850 3410962 : RelOptInfo *innerrel = inner_path->parent;
851 3410962 : RelOptInfo *outerrel = outer_path->parent;
852 : Relids innerrelids;
853 : Relids outerrelids;
854 3410962 : Relids inner_paramrels = PATH_REQ_OUTER(inner_path);
855 3410962 : Relids outer_paramrels = PATH_REQ_OUTER(outer_path);
856 :
857 : /*
858 : * If we are forming an outer join at this join, it's nonsensical to use
859 : * an input path that uses the outer join as part of its parameterization.
860 : * (This can happen despite our join order restrictions, since those apply
861 : * to what is in an input relation not what its parameters are.)
862 : */
863 4097058 : if (extra->sjinfo->ojrelid != 0 &&
864 1372192 : (bms_is_member(extra->sjinfo->ojrelid, inner_paramrels) ||
865 686096 : bms_is_member(extra->sjinfo->ojrelid, outer_paramrels)))
866 314298 : return;
867 :
868 : /*
869 : * Any parameterization of the input paths refers to topmost parents of
870 : * the relevant relations, because reparameterize_path_by_child() hasn't
871 : * been called yet. So we must consider topmost parents of the relations
872 : * being joined, too, while determining parameterization of the result and
873 : * checking for disallowed parameterization cases.
874 : */
875 3410842 : if (innerrel->top_parent_relids)
876 173908 : innerrelids = innerrel->top_parent_relids;
877 : else
878 3236934 : innerrelids = innerrel->relids;
879 :
880 3410842 : if (outerrel->top_parent_relids)
881 173908 : outerrelids = outerrel->top_parent_relids;
882 : else
883 3236934 : outerrelids = outerrel->relids;
884 :
885 : /*
886 : * Check to see if proposed path is still parameterized, and reject if the
887 : * parameterization wouldn't be sensible --- unless allow_star_schema_join
888 : * says to allow it anyway.
889 : */
890 3410842 : required_outer = calc_nestloop_required_outer(outerrelids, outer_paramrels,
891 : innerrelids, inner_paramrels);
892 3410842 : if (required_outer &&
893 366186 : !bms_overlap(required_outer, extra->param_source_rels) &&
894 315490 : !allow_star_schema_join(root, outerrelids, inner_paramrels))
895 : {
896 : /* Waste no memory when we reject a path here */
897 314178 : bms_free(required_outer);
898 314178 : return;
899 : }
900 :
901 : /* If we got past that, we shouldn't have any unsafe outer-join refs */
902 : Assert(!have_unsafe_outer_join_ref(root, outerrelids, inner_paramrels));
903 :
904 : /*
905 : * If the inner path is parameterized, it is parameterized by the topmost
906 : * parent of the outer rel, not the outer rel itself. We will need to
907 : * translate the parameterization, if this path is chosen, during
908 : * create_plan(). Here we just check whether we will be able to perform
909 : * the translation, and if not avoid creating a nestloop path.
910 : */
911 3096664 : if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent) &&
912 13336 : !path_is_reparameterizable_by_child(inner_path, outer_path->parent))
913 : {
914 0 : bms_free(required_outer);
915 0 : return;
916 : }
917 :
918 : /*
919 : * Do a precheck to quickly eliminate obviously-inferior paths. We
920 : * calculate a cheap lower bound on the path's cost and then use
921 : * add_path_precheck() to see if the path is clearly going to be dominated
922 : * by some existing path for the joinrel. If not, do the full pushup with
923 : * creating a fully valid path structure and submitting it to add_path().
924 : * The latter two steps are expensive enough to make this two-phase
925 : * methodology worthwhile.
926 : */
927 3096664 : initial_cost_nestloop(root, &workspace, jointype,
928 : outer_path, inner_path, extra);
929 :
930 3096664 : if (add_path_precheck(joinrel, workspace.disabled_nodes,
931 : workspace.startup_cost, workspace.total_cost,
932 : pathkeys, required_outer))
933 : {
934 1438976 : add_path(joinrel, (Path *)
935 1438976 : create_nestloop_path(root,
936 : joinrel,
937 : jointype,
938 : &workspace,
939 : extra,
940 : outer_path,
941 : inner_path,
942 : extra->restrictlist,
943 : pathkeys,
944 : required_outer));
945 : }
946 : else
947 : {
948 : /* Waste no memory when we reject a path here */
949 1657688 : bms_free(required_outer);
950 : }
951 : }
952 :
953 : /*
954 : * try_partial_nestloop_path
955 : * Consider a partial nestloop join path; if it appears useful, push it into
956 : * the joinrel's partial_pathlist via add_partial_path().
957 : */
958 : static void
959 202772 : try_partial_nestloop_path(PlannerInfo *root,
960 : RelOptInfo *joinrel,
961 : Path *outer_path,
962 : Path *inner_path,
963 : List *pathkeys,
964 : JoinType jointype,
965 : JoinPathExtraData *extra)
966 : {
967 : JoinCostWorkspace workspace;
968 :
969 : /*
970 : * If the inner path is parameterized, the parameterization must be fully
971 : * satisfied by the proposed outer path. Parameterized partial paths are
972 : * not supported. The caller should already have verified that no lateral
973 : * rels are required here.
974 : */
975 : Assert(bms_is_empty(joinrel->lateral_relids));
976 : Assert(bms_is_empty(PATH_REQ_OUTER(outer_path)));
977 202772 : if (inner_path->param_info != NULL)
978 : {
979 15692 : Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
980 15692 : RelOptInfo *outerrel = outer_path->parent;
981 : Relids outerrelids;
982 :
983 : /*
984 : * The inner and outer paths are parameterized, if at all, by the top
985 : * level parents, not the child relations, so we must use those relids
986 : * for our parameterization tests.
987 : */
988 15692 : if (outerrel->top_parent_relids)
989 11592 : outerrelids = outerrel->top_parent_relids;
990 : else
991 4100 : outerrelids = outerrel->relids;
992 :
993 15692 : if (!bms_is_subset(inner_paramrels, outerrelids))
994 158612 : return;
995 : }
996 :
997 : /*
998 : * If the inner path is parameterized, it is parameterized by the topmost
999 : * parent of the outer rel, not the outer rel itself. We will need to
1000 : * translate the parameterization, if this path is chosen, during
1001 : * create_plan(). Here we just check whether we will be able to perform
1002 : * the translation, and if not avoid creating a nestloop path.
1003 : */
1004 201414 : if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent) &&
1005 10620 : !path_is_reparameterizable_by_child(inner_path, outer_path->parent))
1006 0 : return;
1007 :
1008 : /*
1009 : * Before creating a path, get a quick lower bound on what it is likely to
1010 : * cost. Bail out right away if it looks terrible.
1011 : */
1012 201414 : initial_cost_nestloop(root, &workspace, jointype,
1013 : outer_path, inner_path, extra);
1014 201414 : if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
1015 : workspace.total_cost, pathkeys))
1016 157254 : return;
1017 :
1018 : /* Might be good enough to be worth trying, so let's try it. */
1019 44160 : add_partial_path(joinrel, (Path *)
1020 44160 : create_nestloop_path(root,
1021 : joinrel,
1022 : jointype,
1023 : &workspace,
1024 : extra,
1025 : outer_path,
1026 : inner_path,
1027 : extra->restrictlist,
1028 : pathkeys,
1029 : NULL));
1030 : }
1031 :
1032 : /*
1033 : * try_mergejoin_path
1034 : * Consider a merge join path; if it appears useful, push it into
1035 : * the joinrel's pathlist via add_path().
1036 : */
1037 : static void
1038 1457618 : try_mergejoin_path(PlannerInfo *root,
1039 : RelOptInfo *joinrel,
1040 : Path *outer_path,
1041 : Path *inner_path,
1042 : List *pathkeys,
1043 : List *mergeclauses,
1044 : List *outersortkeys,
1045 : List *innersortkeys,
1046 : JoinType jointype,
1047 : JoinPathExtraData *extra,
1048 : bool is_partial)
1049 : {
1050 : Relids required_outer;
1051 1457618 : int outer_presorted_keys = 0;
1052 : JoinCostWorkspace workspace;
1053 :
1054 1457618 : if (is_partial)
1055 : {
1056 23306 : try_partial_mergejoin_path(root,
1057 : joinrel,
1058 : outer_path,
1059 : inner_path,
1060 : pathkeys,
1061 : mergeclauses,
1062 : outersortkeys,
1063 : innersortkeys,
1064 : jointype,
1065 : extra);
1066 64592 : return;
1067 : }
1068 :
1069 : /*
1070 : * If we are forming an outer join at this join, it's nonsensical to use
1071 : * an input path that uses the outer join as part of its parameterization.
1072 : * (This can happen despite our join order restrictions, since those apply
1073 : * to what is in an input relation not what its parameters are.)
1074 : */
1075 1819764 : if (extra->sjinfo->ojrelid != 0 &&
1076 770904 : (bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(inner_path)) ||
1077 385452 : bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(outer_path))))
1078 12 : return;
1079 :
1080 : /*
1081 : * Check to see if proposed path is still parameterized, and reject if the
1082 : * parameterization wouldn't be sensible.
1083 : */
1084 1434300 : required_outer = calc_non_nestloop_required_outer(outer_path,
1085 : inner_path);
1086 1434300 : if (required_outer &&
1087 47656 : !bms_overlap(required_outer, extra->param_source_rels))
1088 : {
1089 : /* Waste no memory when we reject a path here */
1090 41274 : bms_free(required_outer);
1091 41274 : return;
1092 : }
1093 :
1094 : /*
1095 : * If the given paths are already well enough ordered, we can skip doing
1096 : * an explicit sort.
1097 : *
1098 : * We need to determine the number of presorted keys of the outer path to
1099 : * decide whether explicit incremental sort can be applied when
1100 : * outersortkeys is not NIL. We do not need to do the same for the inner
1101 : * path though, as incremental sort currently does not support
1102 : * mark/restore.
1103 : */
1104 2096180 : if (outersortkeys &&
1105 703154 : pathkeys_count_contained_in(outersortkeys, outer_path->pathkeys,
1106 : &outer_presorted_keys))
1107 17920 : outersortkeys = NIL;
1108 2528322 : if (innersortkeys &&
1109 1135296 : pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
1110 31304 : innersortkeys = NIL;
1111 :
1112 : /*
1113 : * See comments in try_nestloop_path().
1114 : */
1115 1393026 : initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
1116 : outer_path, inner_path,
1117 : outersortkeys, innersortkeys,
1118 : outer_presorted_keys,
1119 : extra);
1120 :
1121 1393026 : if (add_path_precheck(joinrel, workspace.disabled_nodes,
1122 : workspace.startup_cost, workspace.total_cost,
1123 : pathkeys, required_outer))
1124 : {
1125 400078 : add_path(joinrel, (Path *)
1126 400078 : create_mergejoin_path(root,
1127 : joinrel,
1128 : jointype,
1129 : &workspace,
1130 : extra,
1131 : outer_path,
1132 : inner_path,
1133 : extra->restrictlist,
1134 : pathkeys,
1135 : required_outer,
1136 : mergeclauses,
1137 : outersortkeys,
1138 : innersortkeys,
1139 : outer_presorted_keys));
1140 : }
1141 : else
1142 : {
1143 : /* Waste no memory when we reject a path here */
1144 992948 : bms_free(required_outer);
1145 : }
1146 : }
1147 :
1148 : /*
1149 : * try_partial_mergejoin_path
1150 : * Consider a partial merge join path; if it appears useful, push it into
1151 : * the joinrel's pathlist via add_partial_path().
1152 : */
1153 : static void
1154 99838 : try_partial_mergejoin_path(PlannerInfo *root,
1155 : RelOptInfo *joinrel,
1156 : Path *outer_path,
1157 : Path *inner_path,
1158 : List *pathkeys,
1159 : List *mergeclauses,
1160 : List *outersortkeys,
1161 : List *innersortkeys,
1162 : JoinType jointype,
1163 : JoinPathExtraData *extra)
1164 : {
1165 99838 : int outer_presorted_keys = 0;
1166 : JoinCostWorkspace workspace;
1167 :
1168 : /*
1169 : * See comments in try_partial_hashjoin_path().
1170 : */
1171 : Assert(bms_is_empty(joinrel->lateral_relids));
1172 : Assert(bms_is_empty(PATH_REQ_OUTER(outer_path)));
1173 99838 : if (!bms_is_empty(PATH_REQ_OUTER(inner_path)))
1174 34004 : return;
1175 :
1176 : /*
1177 : * If the given paths are already well enough ordered, we can skip doing
1178 : * an explicit sort.
1179 : *
1180 : * We need to determine the number of presorted keys of the outer path to
1181 : * decide whether explicit incremental sort can be applied when
1182 : * outersortkeys is not NIL. We do not need to do the same for the inner
1183 : * path though, as incremental sort currently does not support
1184 : * mark/restore.
1185 : */
1186 176370 : if (outersortkeys &&
1187 76532 : pathkeys_count_contained_in(outersortkeys, outer_path->pathkeys,
1188 : &outer_presorted_keys))
1189 228 : outersortkeys = NIL;
1190 196450 : if (innersortkeys &&
1191 96612 : pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
1192 642 : innersortkeys = NIL;
1193 :
1194 : /*
1195 : * See comments in try_partial_nestloop_path().
1196 : */
1197 99838 : initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
1198 : outer_path, inner_path,
1199 : outersortkeys, innersortkeys,
1200 : outer_presorted_keys,
1201 : extra);
1202 :
1203 99838 : if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
1204 : workspace.total_cost, pathkeys))
1205 34004 : return;
1206 :
1207 : /* Might be good enough to be worth trying, so let's try it. */
1208 65834 : add_partial_path(joinrel, (Path *)
1209 65834 : create_mergejoin_path(root,
1210 : joinrel,
1211 : jointype,
1212 : &workspace,
1213 : extra,
1214 : outer_path,
1215 : inner_path,
1216 : extra->restrictlist,
1217 : pathkeys,
1218 : NULL,
1219 : mergeclauses,
1220 : outersortkeys,
1221 : innersortkeys,
1222 : outer_presorted_keys));
1223 : }
1224 :
1225 : /*
1226 : * try_hashjoin_path
1227 : * Consider a hash join path; if it appears useful, push it into
1228 : * the joinrel's pathlist via add_path().
1229 : */
1230 : static void
1231 859452 : try_hashjoin_path(PlannerInfo *root,
1232 : RelOptInfo *joinrel,
1233 : Path *outer_path,
1234 : Path *inner_path,
1235 : List *hashclauses,
1236 : JoinType jointype,
1237 : JoinPathExtraData *extra)
1238 : {
1239 : Relids required_outer;
1240 : JoinCostWorkspace workspace;
1241 :
1242 : /*
1243 : * If we are forming an outer join at this join, it's nonsensical to use
1244 : * an input path that uses the outer join as part of its parameterization.
1245 : * (This can happen despite our join order restrictions, since those apply
1246 : * to what is in an input relation not what its parameters are.)
1247 : */
1248 1113052 : if (extra->sjinfo->ojrelid != 0 &&
1249 507170 : (bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(inner_path)) ||
1250 253570 : bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(outer_path))))
1251 125376 : return;
1252 :
1253 : /*
1254 : * Check to see if proposed path is still parameterized, and reject if the
1255 : * parameterization wouldn't be sensible.
1256 : */
1257 859392 : required_outer = calc_non_nestloop_required_outer(outer_path,
1258 : inner_path);
1259 859392 : if (required_outer &&
1260 143264 : !bms_overlap(required_outer, extra->param_source_rels))
1261 : {
1262 : /* Waste no memory when we reject a path here */
1263 125316 : bms_free(required_outer);
1264 125316 : return;
1265 : }
1266 :
1267 : /*
1268 : * See comments in try_nestloop_path(). Also note that hashjoin paths
1269 : * never have any output pathkeys, per comments in create_hashjoin_path.
1270 : */
1271 734076 : initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
1272 : outer_path, inner_path, extra, false);
1273 :
1274 734076 : if (add_path_precheck(joinrel, workspace.disabled_nodes,
1275 : workspace.startup_cost, workspace.total_cost,
1276 : NIL, required_outer))
1277 : {
1278 351342 : add_path(joinrel, (Path *)
1279 351342 : create_hashjoin_path(root,
1280 : joinrel,
1281 : jointype,
1282 : &workspace,
1283 : extra,
1284 : outer_path,
1285 : inner_path,
1286 : false, /* parallel_hash */
1287 : extra->restrictlist,
1288 : required_outer,
1289 : hashclauses));
1290 : }
1291 : else
1292 : {
1293 : /* Waste no memory when we reject a path here */
1294 382734 : bms_free(required_outer);
1295 : }
1296 : }
1297 :
1298 : /*
1299 : * try_partial_hashjoin_path
1300 : * Consider a partial hashjoin join path; if it appears useful, push it into
1301 : * the joinrel's partial_pathlist via add_partial_path().
1302 : * The outer side is partial. If parallel_hash is true, then the inner path
1303 : * must be partial and will be run in parallel to create one or more shared
1304 : * hash tables; otherwise the inner path must be complete and a copy of it
1305 : * is run in every process to create separate identical private hash tables.
1306 : */
1307 : static void
1308 147832 : try_partial_hashjoin_path(PlannerInfo *root,
1309 : RelOptInfo *joinrel,
1310 : Path *outer_path,
1311 : Path *inner_path,
1312 : List *hashclauses,
1313 : JoinType jointype,
1314 : JoinPathExtraData *extra,
1315 : bool parallel_hash)
1316 : {
1317 : JoinCostWorkspace workspace;
1318 :
1319 : /*
1320 : * If the inner path is parameterized, we can't use a partial hashjoin.
1321 : * Parameterized partial paths are not supported. The caller should
1322 : * already have verified that no lateral rels are required here.
1323 : */
1324 : Assert(bms_is_empty(joinrel->lateral_relids));
1325 : Assert(bms_is_empty(PATH_REQ_OUTER(outer_path)));
1326 147832 : if (!bms_is_empty(PATH_REQ_OUTER(inner_path)))
1327 40670 : return;
1328 :
1329 : /*
1330 : * Before creating a path, get a quick lower bound on what it is likely to
1331 : * cost. Bail out right away if it looks terrible.
1332 : */
1333 147832 : initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
1334 : outer_path, inner_path, extra, parallel_hash);
1335 147832 : if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
1336 : workspace.total_cost, NIL))
1337 40670 : return;
1338 :
1339 : /* Might be good enough to be worth trying, so let's try it. */
1340 107162 : add_partial_path(joinrel, (Path *)
1341 107162 : create_hashjoin_path(root,
1342 : joinrel,
1343 : jointype,
1344 : &workspace,
1345 : extra,
1346 : outer_path,
1347 : inner_path,
1348 : parallel_hash,
1349 : extra->restrictlist,
1350 : NULL,
1351 : hashclauses));
1352 : }
1353 :
1354 : /*
1355 : * sort_inner_and_outer
1356 : * Create mergejoin join paths by explicitly sorting both the outer and
1357 : * inner join relations on each available merge ordering.
1358 : *
1359 : * 'joinrel' is the join relation
1360 : * 'outerrel' is the outer join relation
1361 : * 'innerrel' is the inner join relation
1362 : * 'jointype' is the type of join to do
1363 : * 'extra' contains additional input values
1364 : */
1365 : static void
1366 746526 : sort_inner_and_outer(PlannerInfo *root,
1367 : RelOptInfo *joinrel,
1368 : RelOptInfo *outerrel,
1369 : RelOptInfo *innerrel,
1370 : JoinType jointype,
1371 : JoinPathExtraData *extra)
1372 : {
1373 : Path *outer_path;
1374 : Path *inner_path;
1375 746526 : Path *cheapest_partial_outer = NULL;
1376 746526 : Path *cheapest_safe_inner = NULL;
1377 : List *all_pathkeys;
1378 : ListCell *l;
1379 :
1380 : /* Nothing to do if there are no available mergejoin clauses */
1381 746526 : if (extra->mergeclause_list == NIL)
1382 119810 : return;
1383 :
1384 : /*
1385 : * We only consider the cheapest-total-cost input paths, since we are
1386 : * assuming here that a sort is required. We will consider
1387 : * cheapest-startup-cost input paths later, and only if they don't need a
1388 : * sort.
1389 : *
1390 : * This function intentionally does not consider parameterized input
1391 : * paths, except when the cheapest-total is parameterized. If we did so,
1392 : * we'd have a combinatorial explosion of mergejoin paths of dubious
1393 : * value. This interacts with decisions elsewhere that also discriminate
1394 : * against mergejoins with parameterized inputs; see comments in
1395 : * src/backend/optimizer/README.
1396 : */
1397 626716 : outer_path = outerrel->cheapest_total_path;
1398 626716 : inner_path = innerrel->cheapest_total_path;
1399 :
1400 : /*
1401 : * If either cheapest-total path is parameterized by the other rel, we
1402 : * can't use a mergejoin. (There's no use looking for alternative input
1403 : * paths, since these should already be the least-parameterized available
1404 : * paths.)
1405 : */
1406 626716 : if (PATH_PARAM_BY_REL(outer_path, innerrel) ||
1407 625824 : PATH_PARAM_BY_REL(inner_path, outerrel))
1408 1820 : return;
1409 :
1410 : /*
1411 : * If the joinrel is parallel-safe, we may be able to consider a partial
1412 : * merge join. However, we can't handle JOIN_FULL, JOIN_RIGHT and
1413 : * JOIN_RIGHT_ANTI, because they can produce false null extended rows.
1414 : * Also, the resulting path must not be parameterized.
1415 : */
1416 624896 : if (joinrel->consider_parallel &&
1417 562484 : jointype != JOIN_FULL &&
1418 479778 : jointype != JOIN_RIGHT &&
1419 478298 : jointype != JOIN_RIGHT_ANTI &&
1420 478298 : outerrel->partial_pathlist != NIL &&
1421 72874 : bms_is_empty(joinrel->lateral_relids))
1422 : {
1423 72874 : cheapest_partial_outer = (Path *) linitial(outerrel->partial_pathlist);
1424 :
1425 72874 : if (inner_path->parallel_safe)
1426 72694 : cheapest_safe_inner = inner_path;
1427 : else
1428 : cheapest_safe_inner =
1429 180 : get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
1430 : }
1431 :
1432 : /*
1433 : * Each possible ordering of the available mergejoin clauses will generate
1434 : * a differently-sorted result path at essentially the same cost. We have
1435 : * no basis for choosing one over another at this level of joining, but
1436 : * some sort orders may be more useful than others for higher-level
1437 : * mergejoins, so it's worth considering multiple orderings.
1438 : *
1439 : * Actually, it's not quite true that every mergeclause ordering will
1440 : * generate a different path order, because some of the clauses may be
1441 : * partially redundant (refer to the same EquivalenceClasses). Therefore,
1442 : * what we do is convert the mergeclause list to a list of canonical
1443 : * pathkeys, and then consider different orderings of the pathkeys.
1444 : *
1445 : * Generating a path for *every* permutation of the pathkeys doesn't seem
1446 : * like a winning strategy; the cost in planning time is too high. For
1447 : * now, we generate one path for each pathkey, listing that pathkey first
1448 : * and the rest in random order. This should allow at least a one-clause
1449 : * mergejoin without re-sorting against any other possible mergejoin
1450 : * partner path. But if we've not guessed the right ordering of secondary
1451 : * keys, we may end up evaluating clauses as qpquals when they could have
1452 : * been done as mergeclauses. (In practice, it's rare that there's more
1453 : * than two or three mergeclauses, so expending a huge amount of thought
1454 : * on that is probably not worth it.)
1455 : *
1456 : * The pathkey order returned by select_outer_pathkeys_for_merge() has
1457 : * some heuristics behind it (see that function), so be sure to try it
1458 : * exactly as-is as well as making variants.
1459 : */
1460 624896 : all_pathkeys = select_outer_pathkeys_for_merge(root,
1461 : extra->mergeclause_list,
1462 : joinrel);
1463 :
1464 1328050 : foreach(l, all_pathkeys)
1465 : {
1466 703154 : PathKey *front_pathkey = (PathKey *) lfirst(l);
1467 : List *cur_mergeclauses;
1468 : List *outerkeys;
1469 : List *innerkeys;
1470 : List *merge_pathkeys;
1471 :
1472 : /* Make a pathkey list with this guy first */
1473 703154 : if (l != list_head(all_pathkeys))
1474 78258 : outerkeys = lcons(front_pathkey,
1475 : list_delete_nth_cell(list_copy(all_pathkeys),
1476 : foreach_current_index(l)));
1477 : else
1478 624896 : outerkeys = all_pathkeys; /* no work at first one... */
1479 :
1480 : /* Sort the mergeclauses into the corresponding ordering */
1481 : cur_mergeclauses =
1482 703154 : find_mergeclauses_for_outer_pathkeys(root,
1483 : outerkeys,
1484 : extra->mergeclause_list);
1485 :
1486 : /* Should have used them all... */
1487 : Assert(list_length(cur_mergeclauses) == list_length(extra->mergeclause_list));
1488 :
1489 : /* Build sort pathkeys for the inner side */
1490 703154 : innerkeys = make_inner_pathkeys_for_merge(root,
1491 : cur_mergeclauses,
1492 : outerkeys);
1493 :
1494 : /* Build pathkeys representing output sort order */
1495 703154 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1496 : outerkeys);
1497 :
1498 : /*
1499 : * And now we can make the path.
1500 : *
1501 : * Note: it's possible that the cheapest paths will already be sorted
1502 : * properly. try_mergejoin_path will detect that case and suppress an
1503 : * explicit sort step, so we needn't do so here.
1504 : */
1505 703154 : try_mergejoin_path(root,
1506 : joinrel,
1507 : outer_path,
1508 : inner_path,
1509 : merge_pathkeys,
1510 : cur_mergeclauses,
1511 : outerkeys,
1512 : innerkeys,
1513 : jointype,
1514 : extra,
1515 : false);
1516 :
1517 : /*
1518 : * If we have partial outer and parallel safe inner path then try
1519 : * partial mergejoin path.
1520 : */
1521 703154 : if (cheapest_partial_outer && cheapest_safe_inner)
1522 76532 : try_partial_mergejoin_path(root,
1523 : joinrel,
1524 : cheapest_partial_outer,
1525 : cheapest_safe_inner,
1526 : merge_pathkeys,
1527 : cur_mergeclauses,
1528 : outerkeys,
1529 : innerkeys,
1530 : jointype,
1531 : extra);
1532 : }
1533 : }
1534 :
1535 : /*
1536 : * generate_mergejoin_paths
1537 : * Creates possible mergejoin paths for input outerpath.
1538 : *
1539 : * We generate mergejoins if mergejoin clauses are available. We have
1540 : * two ways to generate the inner path for a mergejoin: sort the cheapest
1541 : * inner path, or use an inner path that is already suitably ordered for the
1542 : * merge. If we have several mergeclauses, it could be that there is no inner
1543 : * path (or only a very expensive one) for the full list of mergeclauses, but
1544 : * better paths exist if we truncate the mergeclause list (thereby discarding
1545 : * some sort key requirements). So, we consider truncations of the
1546 : * mergeclause list as well as the full list. (Ideally we'd consider all
1547 : * subsets of the mergeclause list, but that seems way too expensive.)
1548 : */
1549 : static void
1550 1493310 : generate_mergejoin_paths(PlannerInfo *root,
1551 : RelOptInfo *joinrel,
1552 : RelOptInfo *innerrel,
1553 : Path *outerpath,
1554 : JoinType jointype,
1555 : JoinPathExtraData *extra,
1556 : bool useallclauses,
1557 : Path *inner_cheapest_total,
1558 : List *merge_pathkeys,
1559 : bool is_partial)
1560 : {
1561 : List *mergeclauses;
1562 : List *innersortkeys;
1563 : List *trialsortkeys;
1564 : Path *cheapest_startup_inner;
1565 : Path *cheapest_total_inner;
1566 : int num_sortkeys;
1567 : int sortkeycnt;
1568 :
1569 : /* Look for useful mergeclauses (if any) */
1570 : mergeclauses =
1571 1493310 : find_mergeclauses_for_outer_pathkeys(root,
1572 : outerpath->pathkeys,
1573 : extra->mergeclause_list);
1574 :
1575 : /*
1576 : * Done with this outer path if no chance for a mergejoin.
1577 : *
1578 : * Special corner case: for "x FULL JOIN y ON true", there will be no join
1579 : * clauses at all. Ordinarily we'd generate a clauseless nestloop path,
1580 : * but since mergejoin is our only join type that supports FULL JOIN
1581 : * without any join clauses, it's necessary to generate a clauseless
1582 : * mergejoin path instead.
1583 : */
1584 1493310 : if (mergeclauses == NIL)
1585 : {
1586 1007428 : if (jointype == JOIN_FULL)
1587 : /* okay to try for mergejoin */ ;
1588 : else
1589 1004032 : return;
1590 : }
1591 578142 : if (useallclauses &&
1592 88864 : list_length(mergeclauses) != list_length(extra->mergeclause_list))
1593 13684 : return;
1594 :
1595 : /* Compute the required ordering of the inner path */
1596 475594 : innersortkeys = make_inner_pathkeys_for_merge(root,
1597 : mergeclauses,
1598 : outerpath->pathkeys);
1599 :
1600 : /*
1601 : * Generate a mergejoin on the basis of sorting the cheapest inner. Since
1602 : * a sort will be needed, only cheapest total cost matters. (But
1603 : * try_mergejoin_path will do the right thing if inner_cheapest_total is
1604 : * already correctly sorted.)
1605 : */
1606 475594 : try_mergejoin_path(root,
1607 : joinrel,
1608 : outerpath,
1609 : inner_cheapest_total,
1610 : merge_pathkeys,
1611 : mergeclauses,
1612 : NIL,
1613 : innersortkeys,
1614 : jointype,
1615 : extra,
1616 : is_partial);
1617 :
1618 : /*
1619 : * Look for presorted inner paths that satisfy the innersortkey list ---
1620 : * or any truncation thereof, if we are allowed to build a mergejoin using
1621 : * a subset of the merge clauses. Here, we consider both cheap startup
1622 : * cost and cheap total cost.
1623 : *
1624 : * Currently we do not consider parameterized inner paths here. This
1625 : * interacts with decisions elsewhere that also discriminate against
1626 : * mergejoins with parameterized inputs; see comments in
1627 : * src/backend/optimizer/README.
1628 : *
1629 : * As we shorten the sortkey list, we should consider only paths that are
1630 : * strictly cheaper than (in particular, not the same as) any path found
1631 : * in an earlier iteration. Otherwise we'd be intentionally using fewer
1632 : * merge keys than a given path allows (treating the rest as plain
1633 : * joinquals), which is unlikely to be a good idea. Also, eliminating
1634 : * paths here on the basis of compare_path_costs is a lot cheaper than
1635 : * building the mergejoin path only to throw it away.
1636 : *
1637 : * If inner_cheapest_total is well enough sorted to have not required a
1638 : * sort in the path made above, we shouldn't make a duplicate path with
1639 : * it, either. We handle that case with the same logic that handles the
1640 : * previous consideration, by initializing the variables that track
1641 : * cheapest-so-far properly. Note that we do NOT reject
1642 : * inner_cheapest_total if we find it matches some shorter set of
1643 : * pathkeys. That case corresponds to using fewer mergekeys to avoid
1644 : * sorting inner_cheapest_total, whereas we did sort it above, so the
1645 : * plans being considered are different.
1646 : */
1647 475594 : if (pathkeys_contained_in(innersortkeys,
1648 : inner_cheapest_total->pathkeys))
1649 : {
1650 : /* inner_cheapest_total didn't require a sort */
1651 15144 : cheapest_startup_inner = inner_cheapest_total;
1652 15144 : cheapest_total_inner = inner_cheapest_total;
1653 : }
1654 : else
1655 : {
1656 : /* it did require a sort, at least for the full set of keys */
1657 460450 : cheapest_startup_inner = NULL;
1658 460450 : cheapest_total_inner = NULL;
1659 : }
1660 475594 : num_sortkeys = list_length(innersortkeys);
1661 475594 : if (num_sortkeys > 1 && !useallclauses)
1662 25570 : trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
1663 : else
1664 450024 : trialsortkeys = innersortkeys; /* won't really truncate */
1665 :
1666 902458 : for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
1667 : {
1668 : Path *innerpath;
1669 501900 : List *newclauses = NIL;
1670 :
1671 : /*
1672 : * Look for an inner path ordered well enough for the first
1673 : * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
1674 : * destructively, which is why we made a copy...
1675 : */
1676 501900 : trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
1677 501900 : innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1678 : trialsortkeys,
1679 : NULL,
1680 : TOTAL_COST,
1681 : is_partial);
1682 501900 : if (innerpath != NULL &&
1683 25880 : (cheapest_total_inner == NULL ||
1684 25880 : compare_path_costs(innerpath, cheapest_total_inner,
1685 : TOTAL_COST) < 0))
1686 : {
1687 : /* Found a cheap (or even-cheaper) sorted path */
1688 : /* Select the right mergeclauses, if we didn't already */
1689 276030 : if (sortkeycnt < num_sortkeys)
1690 : {
1691 : newclauses =
1692 15418 : trim_mergeclauses_for_inner_pathkeys(root,
1693 : mergeclauses,
1694 : trialsortkeys);
1695 : Assert(newclauses != NIL);
1696 : }
1697 : else
1698 260612 : newclauses = mergeclauses;
1699 276030 : try_mergejoin_path(root,
1700 : joinrel,
1701 : outerpath,
1702 : innerpath,
1703 : merge_pathkeys,
1704 : newclauses,
1705 : NIL,
1706 : NIL,
1707 : jointype,
1708 : extra,
1709 : is_partial);
1710 276030 : cheapest_total_inner = innerpath;
1711 : }
1712 : /* Same on the basis of cheapest startup cost ... */
1713 501900 : innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1714 : trialsortkeys,
1715 : NULL,
1716 : STARTUP_COST,
1717 : is_partial);
1718 501900 : if (innerpath != NULL &&
1719 25880 : (cheapest_startup_inner == NULL ||
1720 25880 : compare_path_costs(innerpath, cheapest_startup_inner,
1721 : STARTUP_COST) < 0))
1722 : {
1723 : /* Found a cheap (or even-cheaper) sorted path */
1724 273424 : if (innerpath != cheapest_total_inner)
1725 : {
1726 : /*
1727 : * Avoid rebuilding clause list if we already made one; saves
1728 : * memory in big join trees...
1729 : */
1730 2840 : if (newclauses == NIL)
1731 : {
1732 82 : if (sortkeycnt < num_sortkeys)
1733 : {
1734 : newclauses =
1735 0 : trim_mergeclauses_for_inner_pathkeys(root,
1736 : mergeclauses,
1737 : trialsortkeys);
1738 : Assert(newclauses != NIL);
1739 : }
1740 : else
1741 82 : newclauses = mergeclauses;
1742 : }
1743 2840 : try_mergejoin_path(root,
1744 : joinrel,
1745 : outerpath,
1746 : innerpath,
1747 : merge_pathkeys,
1748 : newclauses,
1749 : NIL,
1750 : NIL,
1751 : jointype,
1752 : extra,
1753 : is_partial);
1754 : }
1755 273424 : cheapest_startup_inner = innerpath;
1756 : }
1757 :
1758 : /*
1759 : * Don't consider truncated sortkeys if we need all clauses.
1760 : */
1761 501900 : if (useallclauses)
1762 75036 : break;
1763 : }
1764 : }
1765 :
1766 : /*
1767 : * match_unsorted_outer
1768 : * Creates possible join paths for processing a single join relation
1769 : * 'joinrel' by employing either iterative substitution or
1770 : * mergejoining on each of its possible outer paths (considering
1771 : * only outer paths that are already ordered well enough for merging).
1772 : *
1773 : * We always generate a nestloop path for each available outer path.
1774 : * In fact we may generate as many as five: one on the cheapest-total-cost
1775 : * inner path, one on the same with materialization, one on the
1776 : * cheapest-startup-cost inner path (if different), one on the
1777 : * cheapest-total inner-indexscan path (if any), and one on the
1778 : * cheapest-startup inner-indexscan path (if different).
1779 : *
1780 : * We also consider mergejoins if mergejoin clauses are available. See
1781 : * detailed comments in generate_mergejoin_paths.
1782 : *
1783 : * 'joinrel' is the join relation
1784 : * 'outerrel' is the outer join relation
1785 : * 'innerrel' is the inner join relation
1786 : * 'jointype' is the type of join to do
1787 : * 'extra' contains additional input values
1788 : */
1789 : static void
1790 746526 : match_unsorted_outer(PlannerInfo *root,
1791 : RelOptInfo *joinrel,
1792 : RelOptInfo *outerrel,
1793 : RelOptInfo *innerrel,
1794 : JoinType jointype,
1795 : JoinPathExtraData *extra)
1796 : {
1797 : bool nestjoinOK;
1798 : bool useallclauses;
1799 746526 : Path *inner_cheapest_total = innerrel->cheapest_total_path;
1800 746526 : Path *matpath = NULL;
1801 : ListCell *lc1;
1802 :
1803 : /*
1804 : * For now we do not support RIGHT_SEMI join in mergejoin or nestloop
1805 : * join.
1806 : */
1807 746526 : if (jointype == JOIN_RIGHT_SEMI)
1808 102 : return;
1809 :
1810 : /*
1811 : * Nestloop only supports inner, left, semi, and anti joins. Also, if we
1812 : * are doing a right, right-anti or full mergejoin, we must use *all* the
1813 : * mergeclauses as join clauses, else we will not have a valid plan.
1814 : * (Although these two flags are currently inverses, keep them separate
1815 : * for clarity and possible future changes.)
1816 : */
1817 746424 : switch (jointype)
1818 : {
1819 649604 : case JOIN_INNER:
1820 : case JOIN_LEFT:
1821 : case JOIN_SEMI:
1822 : case JOIN_ANTI:
1823 649604 : nestjoinOK = true;
1824 649604 : useallclauses = false;
1825 649604 : break;
1826 96820 : case JOIN_RIGHT:
1827 : case JOIN_RIGHT_ANTI:
1828 : case JOIN_FULL:
1829 96820 : nestjoinOK = false;
1830 96820 : useallclauses = true;
1831 96820 : break;
1832 0 : default:
1833 0 : elog(ERROR, "unrecognized join type: %d",
1834 : (int) jointype);
1835 : nestjoinOK = false; /* keep compiler quiet */
1836 : useallclauses = false;
1837 : break;
1838 : }
1839 :
1840 : /*
1841 : * If inner_cheapest_total is parameterized by the outer rel, ignore it;
1842 : * we will consider it below as a member of cheapest_parameterized_paths,
1843 : * but the other possibilities considered in this routine aren't usable.
1844 : *
1845 : * Furthermore, if the inner side is a unique-ified relation, we cannot
1846 : * generate any valid paths here, because the inner rel's dependency on
1847 : * the outer rel makes unique-ification meaningless.
1848 : */
1849 746424 : if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel))
1850 : {
1851 13444 : inner_cheapest_total = NULL;
1852 :
1853 13444 : if (RELATION_WAS_MADE_UNIQUE(innerrel, extra->sjinfo, jointype))
1854 36 : return;
1855 : }
1856 :
1857 746388 : if (nestjoinOK)
1858 : {
1859 : /*
1860 : * Consider materializing the cheapest inner path, unless
1861 : * enable_material is off or the path in question materializes its
1862 : * output anyway.
1863 : */
1864 649568 : if (enable_material && inner_cheapest_total != NULL &&
1865 635678 : !ExecMaterializesOutput(inner_cheapest_total->pathtype))
1866 : matpath = (Path *)
1867 607960 : create_material_path(innerrel, inner_cheapest_total);
1868 : }
1869 :
1870 2423726 : foreach(lc1, outerrel->pathlist)
1871 : {
1872 1677338 : Path *outerpath = (Path *) lfirst(lc1);
1873 : List *merge_pathkeys;
1874 :
1875 : /*
1876 : * We cannot use an outer path that is parameterized by the inner rel.
1877 : */
1878 1677338 : if (PATH_PARAM_BY_REL(outerpath, innerrel))
1879 263610 : continue;
1880 :
1881 : /*
1882 : * The result will have this sort order (even if it is implemented as
1883 : * a nestloop, and even if some of the mergeclauses are implemented by
1884 : * qpquals rather than as true mergeclauses):
1885 : */
1886 1413728 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1887 : outerpath->pathkeys);
1888 :
1889 1413728 : if (nestjoinOK)
1890 : {
1891 : /*
1892 : * Consider nestloop joins using this outer path and various
1893 : * available paths for the inner relation. We consider the
1894 : * cheapest-total paths for each available parameterization of the
1895 : * inner relation, including the unparameterized case.
1896 : */
1897 : ListCell *lc2;
1898 :
1899 3168886 : foreach(lc2, innerrel->cheapest_parameterized_paths)
1900 : {
1901 1938792 : Path *innerpath = (Path *) lfirst(lc2);
1902 : Path *mpath;
1903 :
1904 1938792 : try_nestloop_path(root,
1905 : joinrel,
1906 : outerpath,
1907 : innerpath,
1908 : merge_pathkeys,
1909 : jointype,
1910 : extra);
1911 :
1912 : /*
1913 : * Try generating a memoize path and see if that makes the
1914 : * nested loop any cheaper.
1915 : */
1916 1938792 : mpath = get_memoize_path(root, innerrel, outerrel,
1917 : innerpath, outerpath, jointype,
1918 : extra);
1919 1938792 : if (mpath != NULL)
1920 311822 : try_nestloop_path(root,
1921 : joinrel,
1922 : outerpath,
1923 : mpath,
1924 : merge_pathkeys,
1925 : jointype,
1926 : extra);
1927 : }
1928 :
1929 : /* Also consider materialized form of the cheapest inner path */
1930 1230094 : if (matpath != NULL)
1931 1160348 : try_nestloop_path(root,
1932 : joinrel,
1933 : outerpath,
1934 : matpath,
1935 : merge_pathkeys,
1936 : jointype,
1937 : extra);
1938 : }
1939 :
1940 : /* Can't do anything else if inner rel is parameterized by outer */
1941 1413728 : if (inner_cheapest_total == NULL)
1942 14464 : continue;
1943 :
1944 : /* Generate merge join paths */
1945 1399264 : generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
1946 : jointype, extra, useallclauses,
1947 : inner_cheapest_total, merge_pathkeys,
1948 : false);
1949 : }
1950 :
1951 : /*
1952 : * Consider partial nestloop and mergejoin plan if outerrel has any
1953 : * partial path and the joinrel is parallel-safe. However, we can't
1954 : * handle joins needing lateral rels, since partial paths must not be
1955 : * parameterized. Similarly, we can't handle JOIN_FULL, JOIN_RIGHT and
1956 : * JOIN_RIGHT_ANTI, because they can produce false null extended rows.
1957 : */
1958 746388 : if (joinrel->consider_parallel &&
1959 640734 : jointype != JOIN_FULL &&
1960 556358 : jointype != JOIN_RIGHT &&
1961 554860 : jointype != JOIN_RIGHT_ANTI &&
1962 554860 : outerrel->partial_pathlist != NIL &&
1963 74612 : bms_is_empty(joinrel->lateral_relids))
1964 : {
1965 74612 : if (nestjoinOK)
1966 74612 : consider_parallel_nestloop(root, joinrel, outerrel, innerrel,
1967 : jointype, extra);
1968 :
1969 : /*
1970 : * If inner_cheapest_total is NULL or non parallel-safe then find the
1971 : * cheapest total parallel safe path.
1972 : */
1973 74612 : if (inner_cheapest_total == NULL ||
1974 74192 : !inner_cheapest_total->parallel_safe)
1975 : {
1976 : inner_cheapest_total =
1977 876 : get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
1978 : }
1979 :
1980 74612 : if (inner_cheapest_total)
1981 74150 : consider_parallel_mergejoin(root, joinrel, outerrel, innerrel,
1982 : jointype, extra,
1983 : inner_cheapest_total);
1984 : }
1985 : }
1986 :
1987 : /*
1988 : * consider_parallel_mergejoin
1989 : * Try to build partial paths for a joinrel by joining a partial path
1990 : * for the outer relation to a complete path for the inner relation.
1991 : *
1992 : * 'joinrel' is the join relation
1993 : * 'outerrel' is the outer join relation
1994 : * 'innerrel' is the inner join relation
1995 : * 'jointype' is the type of join to do
1996 : * 'extra' contains additional input values
1997 : * 'inner_cheapest_total' cheapest total path for innerrel
1998 : */
1999 : static void
2000 74150 : consider_parallel_mergejoin(PlannerInfo *root,
2001 : RelOptInfo *joinrel,
2002 : RelOptInfo *outerrel,
2003 : RelOptInfo *innerrel,
2004 : JoinType jointype,
2005 : JoinPathExtraData *extra,
2006 : Path *inner_cheapest_total)
2007 : {
2008 : ListCell *lc1;
2009 :
2010 : /* generate merge join path for each partial outer path */
2011 168196 : foreach(lc1, outerrel->partial_pathlist)
2012 : {
2013 94046 : Path *outerpath = (Path *) lfirst(lc1);
2014 : List *merge_pathkeys;
2015 :
2016 : /*
2017 : * Figure out what useful ordering any paths we create will have.
2018 : */
2019 94046 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
2020 : outerpath->pathkeys);
2021 :
2022 94046 : generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype,
2023 : extra, false, inner_cheapest_total,
2024 : merge_pathkeys, true);
2025 : }
2026 74150 : }
2027 :
2028 : /*
2029 : * consider_parallel_nestloop
2030 : * Try to build partial paths for a joinrel by joining a partial path for the
2031 : * outer relation to a complete path for the inner relation.
2032 : *
2033 : * 'joinrel' is the join relation
2034 : * 'outerrel' is the outer join relation
2035 : * 'innerrel' is the inner join relation
2036 : * 'jointype' is the type of join to do
2037 : * 'extra' contains additional input values
2038 : */
2039 : static void
2040 74612 : consider_parallel_nestloop(PlannerInfo *root,
2041 : RelOptInfo *joinrel,
2042 : RelOptInfo *outerrel,
2043 : RelOptInfo *innerrel,
2044 : JoinType jointype,
2045 : JoinPathExtraData *extra)
2046 : {
2047 74612 : Path *inner_cheapest_total = innerrel->cheapest_total_path;
2048 74612 : Path *matpath = NULL;
2049 : ListCell *lc1;
2050 :
2051 : /*
2052 : * Consider materializing the cheapest inner path, unless: 1)
2053 : * enable_material is off, 2) the cheapest inner path is not
2054 : * parallel-safe, 3) the cheapest inner path is parameterized by the outer
2055 : * rel, or 4) the cheapest inner path materializes its output anyway.
2056 : */
2057 74612 : if (enable_material && inner_cheapest_total->parallel_safe &&
2058 74110 : !PATH_PARAM_BY_REL(inner_cheapest_total, outerrel) &&
2059 73720 : !ExecMaterializesOutput(inner_cheapest_total->pathtype))
2060 : {
2061 : matpath = (Path *)
2062 73624 : create_material_path(innerrel, inner_cheapest_total);
2063 : Assert(matpath->parallel_safe);
2064 : }
2065 :
2066 169258 : foreach(lc1, outerrel->partial_pathlist)
2067 : {
2068 94646 : Path *outerpath = (Path *) lfirst(lc1);
2069 : List *pathkeys;
2070 : ListCell *lc2;
2071 :
2072 : /* Figure out what useful ordering any paths we create will have. */
2073 94646 : pathkeys = build_join_pathkeys(root, joinrel, jointype,
2074 : outerpath->pathkeys);
2075 :
2076 : /*
2077 : * Try the cheapest parameterized paths; only those which will produce
2078 : * an unparameterized path when joined to this outerrel will survive
2079 : * try_partial_nestloop_path. The cheapest unparameterized path is
2080 : * also in this list.
2081 : */
2082 198346 : foreach(lc2, innerrel->cheapest_parameterized_paths)
2083 : {
2084 103700 : Path *innerpath = (Path *) lfirst(lc2);
2085 : Path *mpath;
2086 :
2087 : /* Can't join to an inner path that is not parallel-safe */
2088 103700 : if (!innerpath->parallel_safe)
2089 534 : continue;
2090 :
2091 103166 : try_partial_nestloop_path(root, joinrel, outerpath, innerpath,
2092 : pathkeys, jointype, extra);
2093 :
2094 : /*
2095 : * Try generating a memoize path and see if that makes the nested
2096 : * loop any cheaper.
2097 : */
2098 103166 : mpath = get_memoize_path(root, innerrel, outerrel,
2099 : innerpath, outerpath, jointype,
2100 : extra);
2101 103166 : if (mpath != NULL)
2102 6122 : try_partial_nestloop_path(root, joinrel, outerpath, mpath,
2103 : pathkeys, jointype, extra);
2104 : }
2105 :
2106 : /* Also consider materialized form of the cheapest inner path */
2107 94646 : if (matpath != NULL)
2108 93484 : try_partial_nestloop_path(root, joinrel, outerpath, matpath,
2109 : pathkeys, jointype, extra);
2110 : }
2111 74612 : }
2112 :
2113 : /*
2114 : * hash_inner_and_outer
2115 : * Create hashjoin join paths by explicitly hashing both the outer and
2116 : * inner keys of each available hash clause.
2117 : *
2118 : * 'joinrel' is the join relation
2119 : * 'outerrel' is the outer join relation
2120 : * 'innerrel' is the inner join relation
2121 : * 'jointype' is the type of join to do
2122 : * 'extra' contains additional input values
2123 : */
2124 : static void
2125 757228 : hash_inner_and_outer(PlannerInfo *root,
2126 : RelOptInfo *joinrel,
2127 : RelOptInfo *outerrel,
2128 : RelOptInfo *innerrel,
2129 : JoinType jointype,
2130 : JoinPathExtraData *extra)
2131 : {
2132 757228 : bool isouterjoin = IS_OUTER_JOIN(jointype);
2133 : List *hashclauses;
2134 : ListCell *l;
2135 :
2136 : /*
2137 : * We need to build only one hashclauses list for any given pair of outer
2138 : * and inner relations; all of the hashable clauses will be used as keys.
2139 : *
2140 : * Scan the join's restrictinfo list to find hashjoinable clauses that are
2141 : * usable with this pair of sub-relations.
2142 : */
2143 757228 : hashclauses = NIL;
2144 1580504 : foreach(l, extra->restrictlist)
2145 : {
2146 823276 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
2147 :
2148 : /*
2149 : * If processing an outer join, only use its own join clauses for
2150 : * hashing. For inner joins we need not be so picky.
2151 : */
2152 823276 : if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
2153 10316 : continue;
2154 :
2155 812960 : if (!restrictinfo->can_join ||
2156 736176 : restrictinfo->hashjoinoperator == InvalidOid)
2157 95000 : continue; /* not hashjoinable */
2158 :
2159 : /*
2160 : * Check if clause has the form "outer op inner" or "inner op outer".
2161 : */
2162 717960 : if (!clause_sides_match_join(restrictinfo, outerrel->relids,
2163 : innerrel->relids))
2164 216 : continue; /* no good for these input relations */
2165 :
2166 : /*
2167 : * If clause has the form "inner op outer", check if its operator has
2168 : * valid commutator. This is necessary because hashclauses in this
2169 : * form will get commuted in createplan.c to put the outer var on the
2170 : * left (see get_switched_clauses). This probably shouldn't ever
2171 : * fail, since hashable operators ought to have commutators, but be
2172 : * paranoid.
2173 : *
2174 : * The clause being hashjoinable indicates that it's an OpExpr.
2175 : */
2176 1076616 : if (!restrictinfo->outer_is_left &&
2177 358872 : !OidIsValid(get_commutator(castNode(OpExpr, restrictinfo->clause)->opno)))
2178 6 : continue;
2179 :
2180 717738 : hashclauses = lappend(hashclauses, restrictinfo);
2181 : }
2182 :
2183 : /* If we found any usable hashclauses, make paths */
2184 757228 : if (hashclauses)
2185 : {
2186 : /*
2187 : * We consider both the cheapest-total-cost and cheapest-startup-cost
2188 : * outer paths. There's no need to consider any but the
2189 : * cheapest-total-cost inner path, however.
2190 : */
2191 638662 : Path *cheapest_startup_outer = outerrel->cheapest_startup_path;
2192 638662 : Path *cheapest_total_outer = outerrel->cheapest_total_path;
2193 638662 : Path *cheapest_total_inner = innerrel->cheapest_total_path;
2194 : ListCell *lc1;
2195 : ListCell *lc2;
2196 :
2197 : /*
2198 : * If either cheapest-total path is parameterized by the other rel, we
2199 : * can't use a hashjoin. (There's no use looking for alternative
2200 : * input paths, since these should already be the least-parameterized
2201 : * available paths.)
2202 : */
2203 638662 : if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) ||
2204 637758 : PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
2205 1808 : return;
2206 :
2207 : /*
2208 : * Consider the cheapest startup outer together with the cheapest
2209 : * total inner, and then consider pairings of cheapest-total paths
2210 : * including parameterized ones. There is no use in generating
2211 : * parameterized paths on the basis of possibly cheap startup cost, so
2212 : * this is sufficient.
2213 : */
2214 636854 : if (cheapest_startup_outer != NULL)
2215 634966 : try_hashjoin_path(root,
2216 : joinrel,
2217 : cheapest_startup_outer,
2218 : cheapest_total_inner,
2219 : hashclauses,
2220 : jointype,
2221 : extra);
2222 :
2223 1590382 : foreach(lc1, outerrel->cheapest_parameterized_paths)
2224 : {
2225 953528 : Path *outerpath = (Path *) lfirst(lc1);
2226 :
2227 : /*
2228 : * We cannot use an outer path that is parameterized by the inner
2229 : * rel.
2230 : */
2231 953528 : if (PATH_PARAM_BY_REL(outerpath, innerrel))
2232 255022 : continue;
2233 :
2234 1765080 : foreach(lc2, innerrel->cheapest_parameterized_paths)
2235 : {
2236 1066574 : Path *innerpath = (Path *) lfirst(lc2);
2237 :
2238 : /*
2239 : * We cannot use an inner path that is parameterized by the
2240 : * outer rel, either.
2241 : */
2242 1066574 : if (PATH_PARAM_BY_REL(innerpath, outerrel))
2243 290360 : continue;
2244 :
2245 776214 : if (outerpath == cheapest_startup_outer &&
2246 : innerpath == cheapest_total_inner)
2247 551728 : continue; /* already tried it */
2248 :
2249 224486 : try_hashjoin_path(root,
2250 : joinrel,
2251 : outerpath,
2252 : innerpath,
2253 : hashclauses,
2254 : jointype,
2255 : extra);
2256 : }
2257 : }
2258 :
2259 : /*
2260 : * If the joinrel is parallel-safe, we may be able to consider a
2261 : * partial hash join.
2262 : *
2263 : * However, we can't handle JOIN_RIGHT_SEMI, because the hash table is
2264 : * either a shared hash table or a private hash table per backend. In
2265 : * the shared case, there is no concurrency protection for the match
2266 : * flags, so multiple workers could inspect and set the flags
2267 : * concurrently, potentially producing incorrect results. In the
2268 : * private case, each worker has its own copy of the hash table, so no
2269 : * single process has all the match flags.
2270 : *
2271 : * Also, the resulting path must not be parameterized.
2272 : */
2273 636854 : if (joinrel->consider_parallel &&
2274 571606 : jointype != JOIN_RIGHT_SEMI &&
2275 571606 : outerrel->partial_pathlist != NIL &&
2276 76834 : bms_is_empty(joinrel->lateral_relids))
2277 : {
2278 : Path *cheapest_partial_outer;
2279 76834 : Path *cheapest_partial_inner = NULL;
2280 76834 : Path *cheapest_safe_inner = NULL;
2281 :
2282 76834 : cheapest_partial_outer =
2283 76834 : (Path *) linitial(outerrel->partial_pathlist);
2284 :
2285 : /*
2286 : * Can we use a partial inner plan too, so that we can build a
2287 : * shared hash table in parallel?
2288 : */
2289 76834 : if (innerrel->partial_pathlist != NIL &&
2290 : enable_parallel_hash)
2291 : {
2292 75186 : cheapest_partial_inner =
2293 75186 : (Path *) linitial(innerrel->partial_pathlist);
2294 75186 : try_partial_hashjoin_path(root, joinrel,
2295 : cheapest_partial_outer,
2296 : cheapest_partial_inner,
2297 : hashclauses, jointype, extra,
2298 : true /* parallel_hash */ );
2299 : }
2300 :
2301 : /*
2302 : * Normally, given that the joinrel is parallel-safe, the cheapest
2303 : * total inner path will also be parallel-safe, but if not, we'll
2304 : * have to search for the cheapest safe, unparameterized inner
2305 : * path. If full, right, or right-anti join, we can't use
2306 : * parallelism (building the hash table in each backend) because
2307 : * no one process has all the match bits.
2308 : */
2309 76834 : if (jointype == JOIN_FULL ||
2310 73018 : jointype == JOIN_RIGHT ||
2311 : jointype == JOIN_RIGHT_ANTI)
2312 4146 : cheapest_safe_inner = NULL;
2313 72688 : else if (cheapest_total_inner->parallel_safe)
2314 72376 : cheapest_safe_inner = cheapest_total_inner;
2315 : else
2316 : cheapest_safe_inner =
2317 312 : get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
2318 :
2319 76834 : if (cheapest_safe_inner != NULL)
2320 72646 : try_partial_hashjoin_path(root, joinrel,
2321 : cheapest_partial_outer,
2322 : cheapest_safe_inner,
2323 : hashclauses, jointype, extra,
2324 : false /* parallel_hash */ );
2325 : }
2326 : }
2327 : }
2328 :
2329 : /*
2330 : * select_mergejoin_clauses
2331 : * Select mergejoin clauses that are usable for a particular join.
2332 : * Returns a list of RestrictInfo nodes for those clauses.
2333 : *
2334 : * *mergejoin_allowed is normally set to true, but it is set to false if
2335 : * this is a right-semi join, or this is a right/right-anti/full join and
2336 : * there are nonmergejoinable join clauses. The executor's mergejoin
2337 : * machinery cannot handle such cases, so we have to avoid generating a
2338 : * mergejoin plan. (Note that this flag does NOT consider whether there are
2339 : * actually any mergejoinable clauses. This is correct because in some
2340 : * cases we need to build a clauseless mergejoin. Simply returning NIL is
2341 : * therefore not enough to distinguish safe from unsafe cases.)
2342 : *
2343 : * We also mark each selected RestrictInfo to show which side is currently
2344 : * being considered as outer. These are transient markings that are only
2345 : * good for the duration of the current add_paths_to_joinrel() call!
2346 : *
2347 : * We examine each restrictinfo clause known for the join to see
2348 : * if it is mergejoinable and involves vars from the two sub-relations
2349 : * currently of interest.
2350 : */
2351 : static List *
2352 758628 : select_mergejoin_clauses(PlannerInfo *root,
2353 : RelOptInfo *joinrel,
2354 : RelOptInfo *outerrel,
2355 : RelOptInfo *innerrel,
2356 : List *restrictlist,
2357 : JoinType jointype,
2358 : bool *mergejoin_allowed)
2359 : {
2360 758628 : List *result_list = NIL;
2361 758628 : bool isouterjoin = IS_OUTER_JOIN(jointype);
2362 758628 : bool have_nonmergeable_joinclause = false;
2363 : ListCell *l;
2364 :
2365 : /*
2366 : * For now we do not support RIGHT_SEMI join in mergejoin: the benefit of
2367 : * swapping inputs tends to be small here.
2368 : */
2369 758628 : if (jointype == JOIN_RIGHT_SEMI)
2370 : {
2371 7550 : *mergejoin_allowed = false;
2372 7550 : return NIL;
2373 : }
2374 :
2375 1569484 : foreach(l, restrictlist)
2376 : {
2377 818406 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
2378 :
2379 : /*
2380 : * If processing an outer join, only use its own join clauses in the
2381 : * merge. For inner joins we can use pushed-down clauses too. (Note:
2382 : * we don't set have_nonmergeable_joinclause here because pushed-down
2383 : * clauses will become otherquals not joinquals.)
2384 : */
2385 818406 : if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
2386 10340 : continue;
2387 :
2388 : /* Check that clause is a mergeable operator clause */
2389 808066 : if (!restrictinfo->can_join ||
2390 731300 : restrictinfo->mergeopfamilies == NIL)
2391 : {
2392 : /*
2393 : * The executor can handle extra joinquals that are constants, but
2394 : * not anything else, when doing right/right-anti/full merge join.
2395 : * (The reason to support constants is so we can do FULL JOIN ON
2396 : * FALSE.)
2397 : */
2398 93516 : if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
2399 83192 : have_nonmergeable_joinclause = true;
2400 93516 : continue; /* not mergejoinable */
2401 : }
2402 :
2403 : /*
2404 : * Check if clause has the form "outer op inner" or "inner op outer".
2405 : */
2406 714550 : if (!clause_sides_match_join(restrictinfo, outerrel->relids,
2407 : innerrel->relids))
2408 : {
2409 1008 : have_nonmergeable_joinclause = true;
2410 1008 : continue; /* no good for these input relations */
2411 : }
2412 :
2413 : /*
2414 : * If clause has the form "inner op outer", check if its operator has
2415 : * valid commutator. This is necessary because mergejoin clauses in
2416 : * this form will get commuted in createplan.c to put the outer var on
2417 : * the left (see get_switched_clauses). This probably shouldn't ever
2418 : * fail, since mergejoinable operators ought to have commutators, but
2419 : * be paranoid.
2420 : *
2421 : * The clause being mergejoinable indicates that it's an OpExpr.
2422 : */
2423 1067366 : if (!restrictinfo->outer_is_left &&
2424 353824 : !OidIsValid(get_commutator(castNode(OpExpr, restrictinfo->clause)->opno)))
2425 : {
2426 36 : have_nonmergeable_joinclause = true;
2427 36 : continue;
2428 : }
2429 :
2430 : /*
2431 : * Insist that each side have a non-redundant eclass. This
2432 : * restriction is needed because various bits of the planner expect
2433 : * that each clause in a merge be associable with some pathkey in a
2434 : * canonical pathkey list, but redundant eclasses can't appear in
2435 : * canonical sort orderings. (XXX it might be worth relaxing this,
2436 : * but not enough time to address it for 8.3.)
2437 : */
2438 713506 : update_mergeclause_eclasses(root, restrictinfo);
2439 :
2440 713506 : if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
2441 713458 : EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
2442 : {
2443 128 : have_nonmergeable_joinclause = true;
2444 128 : continue; /* can't handle redundant eclasses */
2445 : }
2446 :
2447 713378 : result_list = lappend(result_list, restrictinfo);
2448 : }
2449 :
2450 : /*
2451 : * Report whether mergejoin is allowed (see comment at top of function).
2452 : */
2453 751078 : switch (jointype)
2454 : {
2455 105046 : case JOIN_RIGHT:
2456 : case JOIN_RIGHT_ANTI:
2457 : case JOIN_FULL:
2458 105046 : *mergejoin_allowed = !have_nonmergeable_joinclause;
2459 105046 : break;
2460 646032 : default:
2461 646032 : *mergejoin_allowed = true;
2462 646032 : break;
2463 : }
2464 :
2465 751078 : return result_list;
2466 : }
|