Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * joinpath.c
4 : * Routines to find all possible paths for processing a set of joins
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/optimizer/path/joinpath.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include <math.h>
18 :
19 : #include "executor/executor.h"
20 : #include "foreign/fdwapi.h"
21 : #include "nodes/nodeFuncs.h"
22 : #include "optimizer/cost.h"
23 : #include "optimizer/optimizer.h"
24 : #include "optimizer/pathnode.h"
25 : #include "optimizer/paths.h"
26 : #include "optimizer/placeholder.h"
27 : #include "optimizer/planmain.h"
28 : #include "optimizer/restrictinfo.h"
29 : #include "utils/lsyscache.h"
30 : #include "utils/typcache.h"
31 :
32 : /* Hook for plugins to get control in add_paths_to_joinrel() */
33 : set_join_pathlist_hook_type set_join_pathlist_hook = NULL;
34 :
35 : /*
36 : * Paths parameterized by a parent rel can be considered to be parameterized
37 : * by any of its children, when we are performing partitionwise joins. These
38 : * macros simplify checking for such cases. Beware multiple eval of args.
39 : */
40 : #define PATH_PARAM_BY_PARENT(path, rel) \
41 : ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), \
42 : (rel)->top_parent_relids))
43 : #define PATH_PARAM_BY_REL_SELF(path, rel) \
44 : ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
45 :
46 : #define PATH_PARAM_BY_REL(path, rel) \
47 : (PATH_PARAM_BY_REL_SELF(path, rel) || PATH_PARAM_BY_PARENT(path, rel))
48 :
49 : static void try_partial_mergejoin_path(PlannerInfo *root,
50 : RelOptInfo *joinrel,
51 : Path *outer_path,
52 : Path *inner_path,
53 : List *pathkeys,
54 : List *mergeclauses,
55 : List *outersortkeys,
56 : List *innersortkeys,
57 : JoinType jointype,
58 : JoinPathExtraData *extra);
59 : static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
60 : RelOptInfo *outerrel, RelOptInfo *innerrel,
61 : JoinType jointype, JoinPathExtraData *extra);
62 : static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
63 : RelOptInfo *outerrel, RelOptInfo *innerrel,
64 : JoinType jointype, JoinPathExtraData *extra);
65 : static void consider_parallel_nestloop(PlannerInfo *root,
66 : RelOptInfo *joinrel,
67 : RelOptInfo *outerrel,
68 : RelOptInfo *innerrel,
69 : JoinType jointype,
70 : JoinPathExtraData *extra);
71 : static void consider_parallel_mergejoin(PlannerInfo *root,
72 : RelOptInfo *joinrel,
73 : RelOptInfo *outerrel,
74 : RelOptInfo *innerrel,
75 : JoinType jointype,
76 : JoinPathExtraData *extra,
77 : Path *inner_cheapest_total);
78 : static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
79 : RelOptInfo *outerrel, RelOptInfo *innerrel,
80 : JoinType jointype, JoinPathExtraData *extra);
81 : static List *select_mergejoin_clauses(PlannerInfo *root,
82 : RelOptInfo *joinrel,
83 : RelOptInfo *outerrel,
84 : RelOptInfo *innerrel,
85 : List *restrictlist,
86 : JoinType jointype,
87 : bool *mergejoin_allowed);
88 : static void generate_mergejoin_paths(PlannerInfo *root,
89 : RelOptInfo *joinrel,
90 : RelOptInfo *innerrel,
91 : Path *outerpath,
92 : JoinType jointype,
93 : JoinPathExtraData *extra,
94 : bool useallclauses,
95 : Path *inner_cheapest_total,
96 : List *merge_pathkeys,
97 : bool is_partial);
98 :
99 :
100 : /*
101 : * add_paths_to_joinrel
102 : * Given a join relation and two component rels from which it can be made,
103 : * consider all possible paths that use the two component rels as outer
104 : * and inner rel respectively. Add these paths to the join rel's pathlist
105 : * if they survive comparison with other paths (and remove any existing
106 : * paths that are dominated by these paths).
107 : *
108 : * Modifies the pathlist field of the joinrel node to contain the best
109 : * paths found so far.
110 : *
111 : * jointype is not necessarily the same as sjinfo->jointype; it might be
112 : * "flipped around" if we are considering joining the rels in the opposite
113 : * direction from what's indicated in sjinfo.
114 : *
115 : * Also, this routine and others in this module accept the special JoinTypes
116 : * JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should
117 : * unique-ify the outer or inner relation and then apply a regular inner
118 : * join. These values are not allowed to propagate outside this module,
119 : * however. Path cost estimation code may need to recognize that it's
120 : * dealing with such a case --- the combination of nominal jointype INNER
121 : * with sjinfo->jointype == JOIN_SEMI indicates that.
122 : */
123 : void
124 691240 : add_paths_to_joinrel(PlannerInfo *root,
125 : RelOptInfo *joinrel,
126 : RelOptInfo *outerrel,
127 : RelOptInfo *innerrel,
128 : JoinType jointype,
129 : SpecialJoinInfo *sjinfo,
130 : List *restrictlist)
131 : {
132 : JoinPathExtraData extra;
133 691240 : bool mergejoin_allowed = true;
134 : ListCell *lc;
135 : Relids joinrelids;
136 :
137 : /*
138 : * PlannerInfo doesn't contain the SpecialJoinInfos created for joins
139 : * between child relations, even if there is a SpecialJoinInfo node for
140 : * the join between the topmost parents. So, while calculating Relids set
141 : * representing the restriction, consider relids of topmost parent of
142 : * partitions.
143 : */
144 691240 : if (joinrel->reloptkind == RELOPT_OTHER_JOINREL)
145 11728 : joinrelids = joinrel->top_parent_relids;
146 : else
147 679512 : joinrelids = joinrel->relids;
148 :
149 691240 : extra.restrictlist = restrictlist;
150 691240 : extra.mergeclause_list = NIL;
151 691240 : extra.sjinfo = sjinfo;
152 691240 : extra.param_source_rels = NULL;
153 :
154 : /*
155 : * See if the inner relation is provably unique for this outer rel.
156 : *
157 : * We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't
158 : * matter since the executor can make the equivalent optimization anyway;
159 : * we need not expend planner cycles on proofs. For JOIN_UNIQUE_INNER, we
160 : * must be considering a semijoin whose inner side is not provably unique
161 : * (else reduce_unique_semijoins would've simplified it), so there's no
162 : * point in calling innerrel_is_unique. However, if the LHS covers all of
163 : * the semijoin's min_lefthand, then it's appropriate to set inner_unique
164 : * because the path produced by create_unique_path will be unique relative
165 : * to the LHS. (If we have an LHS that's only part of the min_lefthand,
166 : * that is *not* true.) For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid
167 : * letting that value escape this module.
168 : */
169 691240 : switch (jointype)
170 : {
171 12790 : case JOIN_SEMI:
172 : case JOIN_ANTI:
173 :
174 : /*
175 : * XXX it may be worth proving this to allow a Memoize to be
176 : * considered for Nested Loop Semi/Anti Joins.
177 : */
178 12790 : extra.inner_unique = false; /* well, unproven */
179 12790 : break;
180 7522 : case JOIN_UNIQUE_INNER:
181 15044 : extra.inner_unique = bms_is_subset(sjinfo->min_lefthand,
182 7522 : outerrel->relids);
183 7522 : break;
184 7522 : case JOIN_UNIQUE_OUTER:
185 7522 : extra.inner_unique = innerrel_is_unique(root,
186 : joinrel->relids,
187 : outerrel->relids,
188 : innerrel,
189 : JOIN_INNER,
190 : restrictlist,
191 : false);
192 7522 : break;
193 663406 : default:
194 663406 : extra.inner_unique = innerrel_is_unique(root,
195 : joinrel->relids,
196 : outerrel->relids,
197 : innerrel,
198 : jointype,
199 : restrictlist,
200 : false);
201 663406 : break;
202 : }
203 :
204 : /*
205 : * Find potential mergejoin clauses. We can skip this if we are not
206 : * interested in doing a mergejoin. However, mergejoin may be our only
207 : * way of implementing a full outer join, so override enable_mergejoin if
208 : * it's a full join.
209 : */
210 691240 : if (enable_mergejoin || jointype == JOIN_FULL)
211 687644 : extra.mergeclause_list = select_mergejoin_clauses(root,
212 : joinrel,
213 : outerrel,
214 : innerrel,
215 : restrictlist,
216 : jointype,
217 : &mergejoin_allowed);
218 :
219 : /*
220 : * If it's SEMI, ANTI, or inner_unique join, compute correction factors
221 : * for cost estimation. These will be the same for all paths.
222 : */
223 691240 : if (jointype == JOIN_SEMI || jointype == JOIN_ANTI || extra.inner_unique)
224 213528 : compute_semi_anti_join_factors(root, joinrel, outerrel, innerrel,
225 : jointype, sjinfo, restrictlist,
226 : &extra.semifactors);
227 :
228 : /*
229 : * Decide whether it's sensible to generate parameterized paths for this
230 : * joinrel, and if so, which relations such paths should require. There
231 : * is usually no need to create a parameterized result path unless there
232 : * is a join order restriction that prevents joining one of our input rels
233 : * directly to the parameter source rel instead of joining to the other
234 : * input rel. (But see allow_star_schema_join().) This restriction
235 : * reduces the number of parameterized paths we have to deal with at
236 : * higher join levels, without compromising the quality of the resulting
237 : * plan. We express the restriction as a Relids set that must overlap the
238 : * parameterization of any proposed join path. Note: param_source_rels
239 : * should contain only baserels, not OJ relids, so starting from
240 : * all_baserels not all_query_rels is correct.
241 : */
242 1484468 : foreach(lc, root->join_info_list)
243 : {
244 793228 : SpecialJoinInfo *sjinfo2 = (SpecialJoinInfo *) lfirst(lc);
245 :
246 : /*
247 : * SJ is relevant to this join if we have some part of its RHS
248 : * (possibly not all of it), and haven't yet joined to its LHS. (This
249 : * test is pretty simplistic, but should be sufficient considering the
250 : * join has already been proven legal.) If the SJ is relevant, it
251 : * presents constraints for joining to anything not in its RHS.
252 : */
253 793228 : if (bms_overlap(joinrelids, sjinfo2->min_righthand) &&
254 525360 : !bms_overlap(joinrelids, sjinfo2->min_lefthand))
255 22080 : extra.param_source_rels = bms_join(extra.param_source_rels,
256 22080 : bms_difference(root->all_baserels,
257 22080 : sjinfo2->min_righthand));
258 :
259 : /* full joins constrain both sides symmetrically */
260 798132 : if (sjinfo2->jointype == JOIN_FULL &&
261 4904 : bms_overlap(joinrelids, sjinfo2->min_lefthand) &&
262 4848 : !bms_overlap(joinrelids, sjinfo2->min_righthand))
263 672 : extra.param_source_rels = bms_join(extra.param_source_rels,
264 672 : bms_difference(root->all_baserels,
265 672 : sjinfo2->min_lefthand));
266 : }
267 :
268 : /*
269 : * However, when a LATERAL subquery is involved, there will simply not be
270 : * any paths for the joinrel that aren't parameterized by whatever the
271 : * subquery is parameterized by, unless its parameterization is resolved
272 : * within the joinrel. So we might as well allow additional dependencies
273 : * on whatever residual lateral dependencies the joinrel will have.
274 : */
275 1382480 : extra.param_source_rels = bms_add_members(extra.param_source_rels,
276 691240 : joinrel->lateral_relids);
277 :
278 : /*
279 : * 1. Consider mergejoin paths where both relations must be explicitly
280 : * sorted. Skip this if we can't mergejoin.
281 : */
282 691240 : if (mergejoin_allowed)
283 674096 : sort_inner_and_outer(root, joinrel, outerrel, innerrel,
284 : jointype, &extra);
285 :
286 : /*
287 : * 2. Consider paths where the outer relation need not be explicitly
288 : * sorted. This includes both nestloops and mergejoins where the outer
289 : * path is already ordered. Again, skip this if we can't mergejoin.
290 : * (That's okay because we know that nestloop can't handle
291 : * right/right-anti/right-semi/full joins at all, so it wouldn't work in
292 : * the prohibited cases either.)
293 : */
294 691240 : if (mergejoin_allowed)
295 674096 : match_unsorted_outer(root, joinrel, outerrel, innerrel,
296 : jointype, &extra);
297 :
298 : #ifdef NOT_USED
299 :
300 : /*
301 : * 3. Consider paths where the inner relation need not be explicitly
302 : * sorted. This includes mergejoins only (nestloops were already built in
303 : * match_unsorted_outer).
304 : *
305 : * Diked out as redundant 2/13/2000 -- tgl. There isn't any really
306 : * significant difference between the inner and outer side of a mergejoin,
307 : * so match_unsorted_inner creates no paths that aren't equivalent to
308 : * those made by match_unsorted_outer when add_paths_to_joinrel() is
309 : * invoked with the two rels given in the other order.
310 : */
311 : if (mergejoin_allowed)
312 : match_unsorted_inner(root, joinrel, outerrel, innerrel,
313 : jointype, &extra);
314 : #endif
315 :
316 : /*
317 : * 4. Consider paths where both outer and inner relations must be hashed
318 : * before being joined. As above, disregard enable_hashjoin for full
319 : * joins, because there may be no other alternative.
320 : */
321 691240 : if (enable_hashjoin || jointype == JOIN_FULL)
322 685908 : hash_inner_and_outer(root, joinrel, outerrel, innerrel,
323 : jointype, &extra);
324 :
325 : /*
326 : * 5. If inner and outer relations are foreign tables (or joins) belonging
327 : * to the same server and assigned to the same user to check access
328 : * permissions as, give the FDW a chance to push down joins.
329 : */
330 691240 : if (joinrel->fdwroutine &&
331 2708 : joinrel->fdwroutine->GetForeignJoinPaths)
332 2704 : joinrel->fdwroutine->GetForeignJoinPaths(root, joinrel,
333 : outerrel, innerrel,
334 : jointype, &extra);
335 :
336 : /*
337 : * 6. Finally, give extensions a chance to manipulate the path list. They
338 : * could add new paths (such as CustomPaths) by calling add_path(), or
339 : * add_partial_path() if parallel aware. They could also delete or modify
340 : * paths added by the core code.
341 : */
342 691240 : if (set_join_pathlist_hook)
343 0 : set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
344 : jointype, &extra);
345 691240 : }
346 :
347 : /*
348 : * We override the param_source_rels heuristic to accept nestloop paths in
349 : * which the outer rel satisfies some but not all of the inner path's
350 : * parameterization. This is necessary to get good plans for star-schema
351 : * scenarios, in which a parameterized path for a large table may require
352 : * parameters from multiple small tables that will not get joined directly to
353 : * each other. We can handle that by stacking nestloops that have the small
354 : * tables on the outside; but this breaks the rule the param_source_rels
355 : * heuristic is based on, namely that parameters should not be passed down
356 : * across joins unless there's a join-order-constraint-based reason to do so.
357 : * So we ignore the param_source_rels restriction when this case applies.
358 : *
359 : * allow_star_schema_join() returns true if the param_source_rels restriction
360 : * should be overridden, ie, it's okay to perform this join.
361 : */
362 : static inline bool
363 303558 : allow_star_schema_join(PlannerInfo *root,
364 : Relids outerrelids,
365 : Relids inner_paramrels)
366 : {
367 : /*
368 : * It's a star-schema case if the outer rel provides some but not all of
369 : * the inner rel's parameterization.
370 : */
371 353458 : return (bms_overlap(inner_paramrels, outerrelids) &&
372 49900 : bms_nonempty_difference(inner_paramrels, outerrelids));
373 : }
374 :
375 : /*
376 : * If the parameterization is only partly satisfied by the outer rel,
377 : * the unsatisfied part can't include any outer-join relids that could
378 : * null rels of the satisfied part. That would imply that we're trying
379 : * to use a clause involving a Var with nonempty varnullingrels at
380 : * a join level where that value isn't yet computable.
381 : *
382 : * In practice, this test never finds a problem because earlier join order
383 : * restrictions prevent us from attempting a join that would cause a problem.
384 : * (That's unsurprising, because the code worked before we ever added
385 : * outer-join relids to expression relids.) It still seems worth checking
386 : * as a backstop, but we only do so in assert-enabled builds.
387 : */
388 : #ifdef USE_ASSERT_CHECKING
389 : static inline bool
390 : have_unsafe_outer_join_ref(PlannerInfo *root,
391 : Relids outerrelids,
392 : Relids inner_paramrels)
393 : {
394 : bool result = false;
395 : Relids unsatisfied = bms_difference(inner_paramrels, outerrelids);
396 : Relids satisfied = bms_intersect(inner_paramrels, outerrelids);
397 :
398 : if (bms_overlap(unsatisfied, root->outer_join_rels))
399 : {
400 : ListCell *lc;
401 :
402 : foreach(lc, root->join_info_list)
403 : {
404 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
405 :
406 : if (!bms_is_member(sjinfo->ojrelid, unsatisfied))
407 : continue; /* not relevant */
408 : if (bms_overlap(satisfied, sjinfo->min_righthand) ||
409 : (sjinfo->jointype == JOIN_FULL &&
410 : bms_overlap(satisfied, sjinfo->min_lefthand)))
411 : {
412 : result = true; /* doesn't work */
413 : break;
414 : }
415 : }
416 : }
417 :
418 : /* Waste no memory when we reject a path here */
419 : bms_free(unsatisfied);
420 : bms_free(satisfied);
421 :
422 : return result;
423 : }
424 : #endif /* USE_ASSERT_CHECKING */
425 :
426 : /*
427 : * paraminfo_get_equal_hashops
428 : * Determine if the clauses in param_info and innerrel's lateral vars
429 : * can be hashed.
430 : * Returns true if hashing is possible, otherwise false.
431 : *
432 : * Additionally, on success we collect the outer expressions and the
433 : * appropriate equality operators for each hashable parameter to innerrel.
434 : * These are returned in parallel lists in *param_exprs and *operators.
435 : * We also set *binary_mode to indicate whether strict binary matching is
436 : * required.
437 : */
438 : static bool
439 396772 : paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
440 : RelOptInfo *outerrel, RelOptInfo *innerrel,
441 : List *ph_lateral_vars, List **param_exprs,
442 : List **operators, bool *binary_mode)
443 :
444 : {
445 : List *lateral_vars;
446 : ListCell *lc;
447 :
448 396772 : *param_exprs = NIL;
449 396772 : *operators = NIL;
450 396772 : *binary_mode = false;
451 :
452 : /* Add join clauses from param_info to the hash key */
453 396772 : if (param_info != NULL)
454 : {
455 396772 : List *clauses = param_info->ppi_clauses;
456 :
457 756360 : foreach(lc, clauses)
458 : {
459 438378 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
460 : OpExpr *opexpr;
461 : Node *expr;
462 : Oid hasheqoperator;
463 :
464 438378 : opexpr = (OpExpr *) rinfo->clause;
465 :
466 : /*
467 : * Bail if the rinfo is not compatible. We need a join OpExpr
468 : * with 2 args.
469 : */
470 438378 : if (!IsA(opexpr, OpExpr) || list_length(opexpr->args) != 2 ||
471 417486 : !clause_sides_match_join(rinfo, outerrel->relids,
472 : innerrel->relids))
473 : {
474 78634 : list_free(*operators);
475 78634 : list_free(*param_exprs);
476 78790 : return false;
477 : }
478 :
479 359744 : if (rinfo->outer_is_left)
480 : {
481 206012 : expr = (Node *) linitial(opexpr->args);
482 206012 : hasheqoperator = rinfo->left_hasheqoperator;
483 : }
484 : else
485 : {
486 153732 : expr = (Node *) lsecond(opexpr->args);
487 153732 : hasheqoperator = rinfo->right_hasheqoperator;
488 : }
489 :
490 : /* can't do memoize if we can't hash the outer type */
491 359744 : if (!OidIsValid(hasheqoperator))
492 : {
493 156 : list_free(*operators);
494 156 : list_free(*param_exprs);
495 156 : return false;
496 : }
497 :
498 : /*
499 : * 'expr' may already exist as a parameter from a previous item in
500 : * ppi_clauses. No need to include it again, however we'd better
501 : * ensure we do switch into binary mode if required. See below.
502 : */
503 359588 : if (!list_member(*param_exprs, expr))
504 : {
505 359582 : *operators = lappend_oid(*operators, hasheqoperator);
506 359582 : *param_exprs = lappend(*param_exprs, expr);
507 : }
508 :
509 : /*
510 : * When the join operator is not hashable then it's possible that
511 : * the operator will be able to distinguish something that the
512 : * hash equality operator could not. For example with floating
513 : * point types -0.0 and +0.0 are classed as equal by the hash
514 : * function and equality function, but some other operator may be
515 : * able to tell those values apart. This means that we must put
516 : * memoize into binary comparison mode so that it does bit-by-bit
517 : * comparisons rather than a "logical" comparison as it would
518 : * using the hash equality operator.
519 : */
520 359588 : if (!OidIsValid(rinfo->hashjoinoperator))
521 1194 : *binary_mode = true;
522 : }
523 : }
524 :
525 : /* Now add any lateral vars to the cache key too */
526 317982 : lateral_vars = list_concat(ph_lateral_vars, innerrel->lateral_vars);
527 322690 : foreach(lc, lateral_vars)
528 : {
529 4906 : Node *expr = (Node *) lfirst(lc);
530 : TypeCacheEntry *typentry;
531 :
532 : /* Reject if there are any volatile functions in lateral vars */
533 4906 : if (contain_volatile_functions(expr))
534 : {
535 0 : list_free(*operators);
536 0 : list_free(*param_exprs);
537 198 : return false;
538 : }
539 :
540 4906 : typentry = lookup_type_cache(exprType(expr),
541 : TYPECACHE_HASH_PROC | TYPECACHE_EQ_OPR);
542 :
543 : /* can't use memoize without a valid hash proc and equals operator */
544 4906 : if (!OidIsValid(typentry->hash_proc) || !OidIsValid(typentry->eq_opr))
545 : {
546 198 : list_free(*operators);
547 198 : list_free(*param_exprs);
548 198 : return false;
549 : }
550 :
551 : /*
552 : * 'expr' may already exist as a parameter from the ppi_clauses. No
553 : * need to include it again, however we'd better ensure we do switch
554 : * into binary mode.
555 : */
556 4708 : if (!list_member(*param_exprs, expr))
557 : {
558 4258 : *operators = lappend_oid(*operators, typentry->eq_opr);
559 4258 : *param_exprs = lappend(*param_exprs, expr);
560 : }
561 :
562 : /*
563 : * We must go into binary mode as we don't have too much of an idea of
564 : * how these lateral Vars are being used. See comment above when we
565 : * set *binary_mode for the non-lateral Var case. This could be
566 : * relaxed a bit if we had the RestrictInfos and knew the operators
567 : * being used, however for cases like Vars that are arguments to
568 : * functions we must operate in binary mode as we don't have
569 : * visibility into what the function is doing with the Vars.
570 : */
571 4708 : *binary_mode = true;
572 : }
573 :
574 : /* We're okay to use memoize */
575 317784 : return true;
576 : }
577 :
578 : /*
579 : * extract_lateral_vars_from_PHVs
580 : * Extract lateral references within PlaceHolderVars that are due to be
581 : * evaluated at 'innerrelids'.
582 : */
583 : static List *
584 1314184 : extract_lateral_vars_from_PHVs(PlannerInfo *root, Relids innerrelids)
585 : {
586 1314184 : List *ph_lateral_vars = NIL;
587 : ListCell *lc;
588 :
589 : /* Nothing would be found if the query contains no LATERAL RTEs */
590 1314184 : if (!root->hasLateralRTEs)
591 1294630 : return NIL;
592 :
593 : /*
594 : * No need to consider PHVs that are due to be evaluated at joinrels,
595 : * since we do not add Memoize nodes on top of joinrel paths.
596 : */
597 19554 : if (bms_membership(innerrelids) == BMS_MULTIPLE)
598 6136 : return NIL;
599 :
600 16246 : foreach(lc, root->placeholder_list)
601 : {
602 2828 : PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
603 : List *vars;
604 : ListCell *cell;
605 :
606 : /* PHV is uninteresting if no lateral refs */
607 2828 : if (phinfo->ph_lateral == NULL)
608 1602 : continue;
609 :
610 : /* PHV is uninteresting if not due to be evaluated at innerrelids */
611 1226 : if (!bms_equal(phinfo->ph_eval_at, innerrelids))
612 982 : continue;
613 :
614 : /*
615 : * If the PHV does not reference any rels in innerrelids, use its
616 : * contained expression as a cache key rather than extracting the
617 : * Vars/PHVs from it and using those. This can be beneficial in cases
618 : * where the expression results in fewer distinct values to cache
619 : * tuples for.
620 : */
621 244 : if (!bms_overlap(pull_varnos(root, (Node *) phinfo->ph_var->phexpr),
622 : innerrelids))
623 : {
624 232 : ph_lateral_vars = lappend(ph_lateral_vars, phinfo->ph_var->phexpr);
625 232 : continue;
626 : }
627 :
628 : /* Fetch Vars and PHVs of lateral references within PlaceHolderVars */
629 12 : vars = pull_vars_of_level((Node *) phinfo->ph_var->phexpr, 0);
630 36 : foreach(cell, vars)
631 : {
632 24 : Node *node = (Node *) lfirst(cell);
633 :
634 24 : if (IsA(node, Var))
635 : {
636 12 : Var *var = (Var *) node;
637 :
638 : Assert(var->varlevelsup == 0);
639 :
640 12 : if (bms_is_member(var->varno, phinfo->ph_lateral))
641 0 : ph_lateral_vars = lappend(ph_lateral_vars, node);
642 : }
643 12 : else if (IsA(node, PlaceHolderVar))
644 : {
645 12 : PlaceHolderVar *phv = (PlaceHolderVar *) node;
646 :
647 : Assert(phv->phlevelsup == 0);
648 :
649 12 : if (bms_is_subset(find_placeholder_info(root, phv)->ph_eval_at,
650 12 : phinfo->ph_lateral))
651 12 : ph_lateral_vars = lappend(ph_lateral_vars, node);
652 : }
653 : else
654 : Assert(false);
655 : }
656 :
657 12 : list_free(vars);
658 : }
659 :
660 13418 : return ph_lateral_vars;
661 : }
662 :
663 : /*
664 : * get_memoize_path
665 : * If possible, make and return a Memoize path atop of 'inner_path'.
666 : * Otherwise return NULL.
667 : *
668 : * Note that currently we do not add Memoize nodes on top of join relation
669 : * paths. This is because the ParamPathInfos for join relation paths do not
670 : * maintain ppi_clauses, as the set of relevant clauses varies depending on how
671 : * the join is formed. In addition, joinrels do not maintain lateral_vars. So
672 : * we do not have a way to extract cache keys from joinrels.
673 : */
674 : static Path *
675 1839538 : get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
676 : RelOptInfo *outerrel, Path *inner_path,
677 : Path *outer_path, JoinType jointype,
678 : JoinPathExtraData *extra)
679 : {
680 : List *param_exprs;
681 : List *hash_operators;
682 : ListCell *lc;
683 : bool binary_mode;
684 : List *ph_lateral_vars;
685 :
686 : /* Obviously not if it's disabled */
687 1839538 : if (!enable_memoize)
688 388 : return NULL;
689 :
690 : /*
691 : * We can safely not bother with all this unless we expect to perform more
692 : * than one inner scan. The first scan is always going to be a cache
693 : * miss. This would likely fail later anyway based on costs, so this is
694 : * really just to save some wasted effort.
695 : */
696 1839150 : if (outer_path->parent->rows < 2)
697 524688 : return NULL;
698 :
699 : /*
700 : * Extract lateral Vars/PHVs within PlaceHolderVars that are due to be
701 : * evaluated at innerrel. These lateral Vars/PHVs could be used as
702 : * memoize cache keys.
703 : */
704 1314462 : ph_lateral_vars = extract_lateral_vars_from_PHVs(root, innerrel->relids);
705 :
706 : /*
707 : * We can only have a memoize node when there's some kind of cache key,
708 : * either parameterized path clauses or lateral Vars. No cache key sounds
709 : * more like something a Materialize node might be more useful for.
710 : */
711 1314462 : if ((inner_path->param_info == NULL ||
712 550392 : inner_path->param_info->ppi_clauses == NIL) &&
713 790360 : innerrel->lateral_vars == NIL &&
714 : ph_lateral_vars == NIL)
715 787118 : return NULL;
716 :
717 : /*
718 : * Currently we don't do this for SEMI and ANTI joins unless they're
719 : * marked as inner_unique. This is because nested loop SEMI/ANTI joins
720 : * don't scan the inner node to completion, which will mean memoize cannot
721 : * mark the cache entry as complete.
722 : *
723 : * XXX Currently we don't attempt to mark SEMI/ANTI joins as inner_unique
724 : * = true. Should we? See add_paths_to_joinrel()
725 : */
726 527344 : if (!extra->inner_unique && (jointype == JOIN_SEMI ||
727 : jointype == JOIN_ANTI))
728 10518 : return NULL;
729 :
730 : /*
731 : * Memoize normally marks cache entries as complete when it runs out of
732 : * tuples to read from its subplan. However, with unique joins, Nested
733 : * Loop will skip to the next outer tuple after finding the first matching
734 : * inner tuple. This means that we may not read the inner side of the
735 : * join to completion which leaves no opportunity to mark the cache entry
736 : * as complete. To work around that, when the join is unique we
737 : * automatically mark cache entries as complete after fetching the first
738 : * tuple. This works when the entire join condition is parameterized.
739 : * Otherwise, when the parameterization is only a subset of the join
740 : * condition, we can't be sure which part of it causes the join to be
741 : * unique. This means there are no guarantees that only 1 tuple will be
742 : * read. We cannot mark the cache entry as complete after reading the
743 : * first tuple without that guarantee. This means the scope of Memoize
744 : * node's usefulness is limited to only outer rows that have no join
745 : * partner as this is the only case where Nested Loop would exhaust the
746 : * inner scan of a unique join. Since the scope is limited to that, we
747 : * just don't bother making a memoize path in this case.
748 : *
749 : * Lateral vars needn't be considered here as they're not considered when
750 : * determining if the join is unique.
751 : */
752 516826 : if (extra->inner_unique)
753 : {
754 : Bitmapset *ppi_serials;
755 :
756 313942 : if (inner_path->param_info == NULL)
757 0 : return NULL;
758 :
759 313942 : ppi_serials = inner_path->param_info->ppi_serials;
760 :
761 744254 : foreach_node(RestrictInfo, rinfo, extra->restrictlist)
762 : {
763 356322 : if (!bms_is_member(rinfo->rinfo_serial, ppi_serials))
764 119976 : return NULL;
765 : }
766 : }
767 :
768 : /*
769 : * We can't use a memoize node if there are volatile functions in the
770 : * inner rel's target list or restrict list. A cache hit could reduce the
771 : * number of calls to these functions.
772 : */
773 396850 : if (contain_volatile_functions((Node *) innerrel->reltarget))
774 0 : return NULL;
775 :
776 663110 : foreach(lc, innerrel->baserestrictinfo)
777 : {
778 266272 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
779 :
780 266272 : if (contain_volatile_functions((Node *) rinfo))
781 12 : return NULL;
782 : }
783 :
784 : /*
785 : * Also check the parameterized path restrictinfos for volatile functions.
786 : * Indexed functions must be immutable so shouldn't have any volatile
787 : * functions, however, with a lateral join the inner scan may not be an
788 : * index scan.
789 : */
790 396838 : if (inner_path->param_info != NULL)
791 : {
792 863256 : foreach(lc, inner_path->param_info->ppi_clauses)
793 : {
794 466420 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
795 :
796 466420 : if (contain_volatile_functions((Node *) rinfo))
797 2 : return NULL;
798 : }
799 : }
800 :
801 : /* Check if we have hash ops for each parameter to the path */
802 396836 : if (paraminfo_get_equal_hashops(root,
803 : inner_path->param_info,
804 396836 : outerrel->top_parent ?
805 : outerrel->top_parent : outerrel,
806 : innerrel,
807 : ph_lateral_vars,
808 : ¶m_exprs,
809 : &hash_operators,
810 : &binary_mode))
811 : {
812 317844 : return (Path *) create_memoize_path(root,
813 : innerrel,
814 : inner_path,
815 : param_exprs,
816 : hash_operators,
817 317844 : extra->inner_unique,
818 : binary_mode,
819 : outer_path->rows);
820 : }
821 :
822 78992 : return NULL;
823 : }
824 :
825 : /*
826 : * try_nestloop_path
827 : * Consider a nestloop join path; if it appears useful, push it into
828 : * the joinrel's pathlist via add_path().
829 : */
830 : static void
831 3176376 : try_nestloop_path(PlannerInfo *root,
832 : RelOptInfo *joinrel,
833 : Path *outer_path,
834 : Path *inner_path,
835 : List *pathkeys,
836 : JoinType jointype,
837 : JoinPathExtraData *extra)
838 : {
839 : Relids required_outer;
840 : JoinCostWorkspace workspace;
841 3176376 : RelOptInfo *innerrel = inner_path->parent;
842 3176376 : RelOptInfo *outerrel = outer_path->parent;
843 : Relids innerrelids;
844 : Relids outerrelids;
845 3176376 : Relids inner_paramrels = PATH_REQ_OUTER(inner_path);
846 3176376 : Relids outer_paramrels = PATH_REQ_OUTER(outer_path);
847 :
848 : /*
849 : * If we are forming an outer join at this join, it's nonsensical to use
850 : * an input path that uses the outer join as part of its parameterization.
851 : * (This can happen despite our join order restrictions, since those apply
852 : * to what is in an input relation not what its parameters are.)
853 : */
854 3935402 : if (extra->sjinfo->ojrelid != 0 &&
855 1518052 : (bms_is_member(extra->sjinfo->ojrelid, inner_paramrels) ||
856 759026 : bms_is_member(extra->sjinfo->ojrelid, outer_paramrels)))
857 303362 : return;
858 :
859 : /*
860 : * Any parameterization of the input paths refers to topmost parents of
861 : * the relevant relations, because reparameterize_path_by_child() hasn't
862 : * been called yet. So we must consider topmost parents of the relations
863 : * being joined, too, while determining parameterization of the result and
864 : * checking for disallowed parameterization cases.
865 : */
866 3176256 : if (innerrel->top_parent_relids)
867 37132 : innerrelids = innerrel->top_parent_relids;
868 : else
869 3139124 : innerrelids = innerrel->relids;
870 :
871 3176256 : if (outerrel->top_parent_relids)
872 37132 : outerrelids = outerrel->top_parent_relids;
873 : else
874 3139124 : outerrelids = outerrel->relids;
875 :
876 : /*
877 : * Check to see if proposed path is still parameterized, and reject if the
878 : * parameterization wouldn't be sensible --- unless allow_star_schema_join
879 : * says to allow it anyway.
880 : */
881 3176256 : required_outer = calc_nestloop_required_outer(outerrelids, outer_paramrels,
882 : innerrelids, inner_paramrels);
883 3176256 : if (required_outer &&
884 339312 : !bms_overlap(required_outer, extra->param_source_rels) &&
885 303658 : !allow_star_schema_join(root, outerrelids, inner_paramrels))
886 : {
887 : /* Waste no memory when we reject a path here */
888 303242 : bms_free(required_outer);
889 303242 : return;
890 : }
891 :
892 : /* If we got past that, we shouldn't have any unsafe outer-join refs */
893 : Assert(!have_unsafe_outer_join_ref(root, outerrelids, inner_paramrels));
894 :
895 : /*
896 : * If the inner path is parameterized, it is parameterized by the topmost
897 : * parent of the outer rel, not the outer rel itself. We will need to
898 : * translate the parameterization, if this path is chosen, during
899 : * create_plan(). Here we just check whether we will be able to perform
900 : * the translation, and if not avoid creating a nestloop path.
901 : */
902 2873014 : if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent) &&
903 12160 : !path_is_reparameterizable_by_child(inner_path, outer_path->parent))
904 : {
905 0 : bms_free(required_outer);
906 0 : return;
907 : }
908 :
909 : /*
910 : * Do a precheck to quickly eliminate obviously-inferior paths. We
911 : * calculate a cheap lower bound on the path's cost and then use
912 : * add_path_precheck() to see if the path is clearly going to be dominated
913 : * by some existing path for the joinrel. If not, do the full pushup with
914 : * creating a fully valid path structure and submitting it to add_path().
915 : * The latter two steps are expensive enough to make this two-phase
916 : * methodology worthwhile.
917 : */
918 2873014 : initial_cost_nestloop(root, &workspace, jointype,
919 : outer_path, inner_path, extra);
920 :
921 2873014 : if (add_path_precheck(joinrel, workspace.disabled_nodes,
922 : workspace.startup_cost, workspace.total_cost,
923 : pathkeys, required_outer))
924 : {
925 1391516 : add_path(joinrel, (Path *)
926 1391516 : create_nestloop_path(root,
927 : joinrel,
928 : jointype,
929 : &workspace,
930 : extra,
931 : outer_path,
932 : inner_path,
933 : extra->restrictlist,
934 : pathkeys,
935 : required_outer));
936 : }
937 : else
938 : {
939 : /* Waste no memory when we reject a path here */
940 1481498 : bms_free(required_outer);
941 : }
942 : }
943 :
944 : /*
945 : * try_partial_nestloop_path
946 : * Consider a partial nestloop join path; if it appears useful, push it into
947 : * the joinrel's partial_pathlist via add_partial_path().
948 : */
949 : static void
950 42690 : try_partial_nestloop_path(PlannerInfo *root,
951 : RelOptInfo *joinrel,
952 : Path *outer_path,
953 : Path *inner_path,
954 : List *pathkeys,
955 : JoinType jointype,
956 : JoinPathExtraData *extra)
957 : {
958 : JoinCostWorkspace workspace;
959 :
960 : /*
961 : * If the inner path is parameterized, the parameterization must be fully
962 : * satisfied by the proposed outer path. Parameterized partial paths are
963 : * not supported. The caller should already have verified that no lateral
964 : * rels are required here.
965 : */
966 : Assert(bms_is_empty(joinrel->lateral_relids));
967 : Assert(bms_is_empty(PATH_REQ_OUTER(outer_path)));
968 42690 : if (inner_path->param_info != NULL)
969 : {
970 15038 : Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
971 15038 : RelOptInfo *outerrel = outer_path->parent;
972 : Relids outerrelids;
973 :
974 : /*
975 : * The inner and outer paths are parameterized, if at all, by the top
976 : * level parents, not the child relations, so we must use those relids
977 : * for our parameterization tests.
978 : */
979 15038 : if (outerrel->top_parent_relids)
980 11016 : outerrelids = outerrel->top_parent_relids;
981 : else
982 4022 : outerrelids = outerrel->relids;
983 :
984 15038 : if (!bms_is_subset(inner_paramrels, outerrelids))
985 29898 : return;
986 : }
987 :
988 : /*
989 : * If the inner path is parameterized, it is parameterized by the topmost
990 : * parent of the outer rel, not the outer rel itself. We will need to
991 : * translate the parameterization, if this path is chosen, during
992 : * create_plan(). Here we just check whether we will be able to perform
993 : * the translation, and if not avoid creating a nestloop path.
994 : */
995 41382 : if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent) &&
996 10116 : !path_is_reparameterizable_by_child(inner_path, outer_path->parent))
997 0 : return;
998 :
999 : /*
1000 : * Before creating a path, get a quick lower bound on what it is likely to
1001 : * cost. Bail out right away if it looks terrible.
1002 : */
1003 41382 : initial_cost_nestloop(root, &workspace, jointype,
1004 : outer_path, inner_path, extra);
1005 41382 : if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
1006 : workspace.total_cost, pathkeys))
1007 28590 : return;
1008 :
1009 : /* Might be good enough to be worth trying, so let's try it. */
1010 12792 : add_partial_path(joinrel, (Path *)
1011 12792 : create_nestloop_path(root,
1012 : joinrel,
1013 : jointype,
1014 : &workspace,
1015 : extra,
1016 : outer_path,
1017 : inner_path,
1018 : extra->restrictlist,
1019 : pathkeys,
1020 : NULL));
1021 : }
1022 :
1023 : /*
1024 : * try_mergejoin_path
1025 : * Consider a merge join path; if it appears useful, push it into
1026 : * the joinrel's pathlist via add_path().
1027 : */
1028 : static void
1029 1314364 : try_mergejoin_path(PlannerInfo *root,
1030 : RelOptInfo *joinrel,
1031 : Path *outer_path,
1032 : Path *inner_path,
1033 : List *pathkeys,
1034 : List *mergeclauses,
1035 : List *outersortkeys,
1036 : List *innersortkeys,
1037 : JoinType jointype,
1038 : JoinPathExtraData *extra,
1039 : bool is_partial)
1040 : {
1041 : Relids required_outer;
1042 1314364 : int outer_presorted_keys = 0;
1043 : JoinCostWorkspace workspace;
1044 :
1045 1314364 : if (is_partial)
1046 : {
1047 6952 : try_partial_mergejoin_path(root,
1048 : joinrel,
1049 : outer_path,
1050 : inner_path,
1051 : pathkeys,
1052 : mergeclauses,
1053 : outersortkeys,
1054 : innersortkeys,
1055 : jointype,
1056 : extra);
1057 49130 : return;
1058 : }
1059 :
1060 : /*
1061 : * If we are forming an outer join at this join, it's nonsensical to use
1062 : * an input path that uses the outer join as part of its parameterization.
1063 : * (This can happen despite our join order restrictions, since those apply
1064 : * to what is in an input relation not what its parameters are.)
1065 : */
1066 1729856 : if (extra->sjinfo->ojrelid != 0 &&
1067 844888 : (bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(inner_path)) ||
1068 422444 : bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(outer_path))))
1069 12 : return;
1070 :
1071 : /*
1072 : * Check to see if proposed path is still parameterized, and reject if the
1073 : * parameterization wouldn't be sensible.
1074 : */
1075 1307400 : required_outer = calc_non_nestloop_required_outer(outer_path,
1076 : inner_path);
1077 1307400 : if (required_outer &&
1078 44670 : !bms_overlap(required_outer, extra->param_source_rels))
1079 : {
1080 : /* Waste no memory when we reject a path here */
1081 42166 : bms_free(required_outer);
1082 42166 : return;
1083 : }
1084 :
1085 : /*
1086 : * If the given paths are already well enough ordered, we can skip doing
1087 : * an explicit sort.
1088 : *
1089 : * We need to determine the number of presorted keys of the outer path to
1090 : * decide whether explicit incremental sort can be applied when
1091 : * outersortkeys is not NIL. We do not need to do the same for the inner
1092 : * path though, as incremental sort currently does not support
1093 : * mark/restore.
1094 : */
1095 1885634 : if (outersortkeys &&
1096 620400 : pathkeys_count_contained_in(outersortkeys, outer_path->pathkeys,
1097 : &outer_presorted_keys))
1098 16740 : outersortkeys = NIL;
1099 2284882 : if (innersortkeys &&
1100 1019648 : pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
1101 29178 : innersortkeys = NIL;
1102 :
1103 : /*
1104 : * See comments in try_nestloop_path().
1105 : */
1106 1265234 : initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
1107 : outer_path, inner_path,
1108 : outersortkeys, innersortkeys,
1109 : outer_presorted_keys,
1110 : extra);
1111 :
1112 1265234 : if (add_path_precheck(joinrel, workspace.disabled_nodes,
1113 : workspace.startup_cost, workspace.total_cost,
1114 : pathkeys, required_outer))
1115 : {
1116 318144 : add_path(joinrel, (Path *)
1117 318144 : create_mergejoin_path(root,
1118 : joinrel,
1119 : jointype,
1120 : &workspace,
1121 : extra,
1122 : outer_path,
1123 : inner_path,
1124 : extra->restrictlist,
1125 : pathkeys,
1126 : required_outer,
1127 : mergeclauses,
1128 : outersortkeys,
1129 : innersortkeys,
1130 : outer_presorted_keys));
1131 : }
1132 : else
1133 : {
1134 : /* Waste no memory when we reject a path here */
1135 947090 : bms_free(required_outer);
1136 : }
1137 : }
1138 :
1139 : /*
1140 : * try_partial_mergejoin_path
1141 : * Consider a partial merge join path; if it appears useful, push it into
1142 : * the joinrel's pathlist via add_partial_path().
1143 : */
1144 : static void
1145 20236 : try_partial_mergejoin_path(PlannerInfo *root,
1146 : RelOptInfo *joinrel,
1147 : Path *outer_path,
1148 : Path *inner_path,
1149 : List *pathkeys,
1150 : List *mergeclauses,
1151 : List *outersortkeys,
1152 : List *innersortkeys,
1153 : JoinType jointype,
1154 : JoinPathExtraData *extra)
1155 : {
1156 20236 : int outer_presorted_keys = 0;
1157 : JoinCostWorkspace workspace;
1158 :
1159 : /*
1160 : * See comments in try_partial_hashjoin_path().
1161 : */
1162 : Assert(bms_is_empty(joinrel->lateral_relids));
1163 : Assert(bms_is_empty(PATH_REQ_OUTER(outer_path)));
1164 20236 : if (!bms_is_empty(PATH_REQ_OUTER(inner_path)))
1165 10772 : return;
1166 :
1167 : /*
1168 : * If the given paths are already well enough ordered, we can skip doing
1169 : * an explicit sort.
1170 : *
1171 : * We need to determine the number of presorted keys of the outer path to
1172 : * decide whether explicit incremental sort can be applied when
1173 : * outersortkeys is not NIL. We do not need to do the same for the inner
1174 : * path though, as incremental sort currently does not support
1175 : * mark/restore.
1176 : */
1177 33520 : if (outersortkeys &&
1178 13284 : pathkeys_count_contained_in(outersortkeys, outer_path->pathkeys,
1179 : &outer_presorted_keys))
1180 228 : outersortkeys = NIL;
1181 37608 : if (innersortkeys &&
1182 17372 : pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
1183 300 : innersortkeys = NIL;
1184 :
1185 : /*
1186 : * See comments in try_partial_nestloop_path().
1187 : */
1188 20236 : initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
1189 : outer_path, inner_path,
1190 : outersortkeys, innersortkeys,
1191 : outer_presorted_keys,
1192 : extra);
1193 :
1194 20236 : if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
1195 : workspace.total_cost, pathkeys))
1196 10772 : return;
1197 :
1198 : /* Might be good enough to be worth trying, so let's try it. */
1199 9464 : add_partial_path(joinrel, (Path *)
1200 9464 : create_mergejoin_path(root,
1201 : joinrel,
1202 : jointype,
1203 : &workspace,
1204 : extra,
1205 : outer_path,
1206 : inner_path,
1207 : extra->restrictlist,
1208 : pathkeys,
1209 : NULL,
1210 : mergeclauses,
1211 : outersortkeys,
1212 : innersortkeys,
1213 : outer_presorted_keys));
1214 : }
1215 :
1216 : /*
1217 : * try_hashjoin_path
1218 : * Consider a hash join path; if it appears useful, push it into
1219 : * the joinrel's pathlist via add_path().
1220 : */
1221 : static void
1222 789462 : try_hashjoin_path(PlannerInfo *root,
1223 : RelOptInfo *joinrel,
1224 : Path *outer_path,
1225 : Path *inner_path,
1226 : List *hashclauses,
1227 : JoinType jointype,
1228 : JoinPathExtraData *extra)
1229 : {
1230 : Relids required_outer;
1231 : JoinCostWorkspace workspace;
1232 :
1233 : /*
1234 : * If we are forming an outer join at this join, it's nonsensical to use
1235 : * an input path that uses the outer join as part of its parameterization.
1236 : * (This can happen despite our join order restrictions, since those apply
1237 : * to what is in an input relation not what its parameters are.)
1238 : */
1239 1067184 : if (extra->sjinfo->ojrelid != 0 &&
1240 555414 : (bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(inner_path)) ||
1241 277692 : bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(outer_path))))
1242 119584 : return;
1243 :
1244 : /*
1245 : * Check to see if proposed path is still parameterized, and reject if the
1246 : * parameterization wouldn't be sensible.
1247 : */
1248 789402 : required_outer = calc_non_nestloop_required_outer(outer_path,
1249 : inner_path);
1250 789402 : if (required_outer &&
1251 133390 : !bms_overlap(required_outer, extra->param_source_rels))
1252 : {
1253 : /* Waste no memory when we reject a path here */
1254 119524 : bms_free(required_outer);
1255 119524 : return;
1256 : }
1257 :
1258 : /*
1259 : * See comments in try_nestloop_path(). Also note that hashjoin paths
1260 : * never have any output pathkeys, per comments in create_hashjoin_path.
1261 : */
1262 669878 : initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
1263 : outer_path, inner_path, extra, false);
1264 :
1265 669878 : if (add_path_precheck(joinrel, workspace.disabled_nodes,
1266 : workspace.startup_cost, workspace.total_cost,
1267 : NIL, required_outer))
1268 : {
1269 284872 : add_path(joinrel, (Path *)
1270 284872 : create_hashjoin_path(root,
1271 : joinrel,
1272 : jointype,
1273 : &workspace,
1274 : extra,
1275 : outer_path,
1276 : inner_path,
1277 : false, /* parallel_hash */
1278 : extra->restrictlist,
1279 : required_outer,
1280 : hashclauses));
1281 : }
1282 : else
1283 : {
1284 : /* Waste no memory when we reject a path here */
1285 385006 : bms_free(required_outer);
1286 : }
1287 : }
1288 :
1289 : /*
1290 : * try_partial_hashjoin_path
1291 : * Consider a partial hashjoin join path; if it appears useful, push it into
1292 : * the joinrel's partial_pathlist via add_partial_path().
1293 : * The outer side is partial. If parallel_hash is true, then the inner path
1294 : * must be partial and will be run in parallel to create one or more shared
1295 : * hash tables; otherwise the inner path must be complete and a copy of it
1296 : * is run in every process to create separate identical private hash tables.
1297 : */
1298 : static void
1299 21958 : try_partial_hashjoin_path(PlannerInfo *root,
1300 : RelOptInfo *joinrel,
1301 : Path *outer_path,
1302 : Path *inner_path,
1303 : List *hashclauses,
1304 : JoinType jointype,
1305 : JoinPathExtraData *extra,
1306 : bool parallel_hash)
1307 : {
1308 : JoinCostWorkspace workspace;
1309 :
1310 : /*
1311 : * If the inner path is parameterized, we can't use a partial hashjoin.
1312 : * Parameterized partial paths are not supported. The caller should
1313 : * already have verified that no lateral rels are required here.
1314 : */
1315 : Assert(bms_is_empty(joinrel->lateral_relids));
1316 : Assert(bms_is_empty(PATH_REQ_OUTER(outer_path)));
1317 21958 : if (!bms_is_empty(PATH_REQ_OUTER(inner_path)))
1318 10616 : return;
1319 :
1320 : /*
1321 : * Before creating a path, get a quick lower bound on what it is likely to
1322 : * cost. Bail out right away if it looks terrible.
1323 : */
1324 21958 : initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
1325 : outer_path, inner_path, extra, parallel_hash);
1326 21958 : if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
1327 : workspace.total_cost, NIL))
1328 10616 : return;
1329 :
1330 : /* Might be good enough to be worth trying, so let's try it. */
1331 11342 : add_partial_path(joinrel, (Path *)
1332 11342 : create_hashjoin_path(root,
1333 : joinrel,
1334 : jointype,
1335 : &workspace,
1336 : extra,
1337 : outer_path,
1338 : inner_path,
1339 : parallel_hash,
1340 : extra->restrictlist,
1341 : NULL,
1342 : hashclauses));
1343 : }
1344 :
1345 : /*
1346 : * sort_inner_and_outer
1347 : * Create mergejoin join paths by explicitly sorting both the outer and
1348 : * inner join relations on each available merge ordering.
1349 : *
1350 : * 'joinrel' is the join relation
1351 : * 'outerrel' is the outer join relation
1352 : * 'innerrel' is the inner join relation
1353 : * 'jointype' is the type of join to do
1354 : * 'extra' contains additional input values
1355 : */
1356 : static void
1357 674218 : sort_inner_and_outer(PlannerInfo *root,
1358 : RelOptInfo *joinrel,
1359 : RelOptInfo *outerrel,
1360 : RelOptInfo *innerrel,
1361 : JoinType jointype,
1362 : JoinPathExtraData *extra)
1363 : {
1364 674218 : JoinType save_jointype = jointype;
1365 : Path *outer_path;
1366 : Path *inner_path;
1367 674218 : Path *cheapest_partial_outer = NULL;
1368 674218 : Path *cheapest_safe_inner = NULL;
1369 : List *all_pathkeys;
1370 : ListCell *l;
1371 :
1372 : /* Nothing to do if there are no available mergejoin clauses */
1373 674218 : if (extra->mergeclause_list == NIL)
1374 118344 : return;
1375 :
1376 : /*
1377 : * We only consider the cheapest-total-cost input paths, since we are
1378 : * assuming here that a sort is required. We will consider
1379 : * cheapest-startup-cost input paths later, and only if they don't need a
1380 : * sort.
1381 : *
1382 : * This function intentionally does not consider parameterized input
1383 : * paths, except when the cheapest-total is parameterized. If we did so,
1384 : * we'd have a combinatorial explosion of mergejoin paths of dubious
1385 : * value. This interacts with decisions elsewhere that also discriminate
1386 : * against mergejoins with parameterized inputs; see comments in
1387 : * src/backend/optimizer/README.
1388 : */
1389 555874 : outer_path = outerrel->cheapest_total_path;
1390 555874 : inner_path = innerrel->cheapest_total_path;
1391 :
1392 : /*
1393 : * If either cheapest-total path is parameterized by the other rel, we
1394 : * can't use a mergejoin. (There's no use looking for alternative input
1395 : * paths, since these should already be the least-parameterized available
1396 : * paths.)
1397 : */
1398 555874 : if (PATH_PARAM_BY_REL(outer_path, innerrel) ||
1399 555018 : PATH_PARAM_BY_REL(inner_path, outerrel))
1400 1736 : return;
1401 :
1402 : /*
1403 : * If unique-ification is requested, do it and then handle as a plain
1404 : * inner join.
1405 : */
1406 554138 : if (jointype == JOIN_UNIQUE_OUTER)
1407 : {
1408 5740 : outer_path = (Path *) create_unique_path(root, outerrel,
1409 : outer_path, extra->sjinfo);
1410 : Assert(outer_path);
1411 5740 : jointype = JOIN_INNER;
1412 : }
1413 548398 : else if (jointype == JOIN_UNIQUE_INNER)
1414 : {
1415 5740 : inner_path = (Path *) create_unique_path(root, innerrel,
1416 : inner_path, extra->sjinfo);
1417 : Assert(inner_path);
1418 5740 : jointype = JOIN_INNER;
1419 : }
1420 :
1421 : /*
1422 : * If the joinrel is parallel-safe, we may be able to consider a partial
1423 : * merge join. However, we can't handle JOIN_UNIQUE_OUTER, because the
1424 : * outer path will be partial, and therefore we won't be able to properly
1425 : * guarantee uniqueness. Similarly, we can't handle JOIN_FULL, JOIN_RIGHT
1426 : * and JOIN_RIGHT_ANTI, because they can produce false null extended rows.
1427 : * Also, the resulting path must not be parameterized.
1428 : */
1429 554138 : if (joinrel->consider_parallel &&
1430 491038 : save_jointype != JOIN_UNIQUE_OUTER &&
1431 488670 : save_jointype != JOIN_FULL &&
1432 400054 : save_jointype != JOIN_RIGHT &&
1433 398398 : save_jointype != JOIN_RIGHT_ANTI &&
1434 398398 : outerrel->partial_pathlist != NIL &&
1435 9700 : bms_is_empty(joinrel->lateral_relids))
1436 : {
1437 9700 : cheapest_partial_outer = (Path *) linitial(outerrel->partial_pathlist);
1438 :
1439 9700 : if (inner_path->parallel_safe)
1440 9604 : cheapest_safe_inner = inner_path;
1441 96 : else if (save_jointype != JOIN_UNIQUE_INNER)
1442 : cheapest_safe_inner =
1443 90 : get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
1444 : }
1445 :
1446 : /*
1447 : * Each possible ordering of the available mergejoin clauses will generate
1448 : * a differently-sorted result path at essentially the same cost. We have
1449 : * no basis for choosing one over another at this level of joining, but
1450 : * some sort orders may be more useful than others for higher-level
1451 : * mergejoins, so it's worth considering multiple orderings.
1452 : *
1453 : * Actually, it's not quite true that every mergeclause ordering will
1454 : * generate a different path order, because some of the clauses may be
1455 : * partially redundant (refer to the same EquivalenceClasses). Therefore,
1456 : * what we do is convert the mergeclause list to a list of canonical
1457 : * pathkeys, and then consider different orderings of the pathkeys.
1458 : *
1459 : * Generating a path for *every* permutation of the pathkeys doesn't seem
1460 : * like a winning strategy; the cost in planning time is too high. For
1461 : * now, we generate one path for each pathkey, listing that pathkey first
1462 : * and the rest in random order. This should allow at least a one-clause
1463 : * mergejoin without re-sorting against any other possible mergejoin
1464 : * partner path. But if we've not guessed the right ordering of secondary
1465 : * keys, we may end up evaluating clauses as qpquals when they could have
1466 : * been done as mergeclauses. (In practice, it's rare that there's more
1467 : * than two or three mergeclauses, so expending a huge amount of thought
1468 : * on that is probably not worth it.)
1469 : *
1470 : * The pathkey order returned by select_outer_pathkeys_for_merge() has
1471 : * some heuristics behind it (see that function), so be sure to try it
1472 : * exactly as-is as well as making variants.
1473 : */
1474 554138 : all_pathkeys = select_outer_pathkeys_for_merge(root,
1475 : extra->mergeclause_list,
1476 : joinrel);
1477 :
1478 1174538 : foreach(l, all_pathkeys)
1479 : {
1480 620400 : PathKey *front_pathkey = (PathKey *) lfirst(l);
1481 : List *cur_mergeclauses;
1482 : List *outerkeys;
1483 : List *innerkeys;
1484 : List *merge_pathkeys;
1485 :
1486 : /* Make a pathkey list with this guy first */
1487 620400 : if (l != list_head(all_pathkeys))
1488 66262 : outerkeys = lcons(front_pathkey,
1489 : list_delete_nth_cell(list_copy(all_pathkeys),
1490 : foreach_current_index(l)));
1491 : else
1492 554138 : outerkeys = all_pathkeys; /* no work at first one... */
1493 :
1494 : /* Sort the mergeclauses into the corresponding ordering */
1495 : cur_mergeclauses =
1496 620400 : find_mergeclauses_for_outer_pathkeys(root,
1497 : outerkeys,
1498 : extra->mergeclause_list);
1499 :
1500 : /* Should have used them all... */
1501 : Assert(list_length(cur_mergeclauses) == list_length(extra->mergeclause_list));
1502 :
1503 : /* Build sort pathkeys for the inner side */
1504 620400 : innerkeys = make_inner_pathkeys_for_merge(root,
1505 : cur_mergeclauses,
1506 : outerkeys);
1507 :
1508 : /* Build pathkeys representing output sort order */
1509 620400 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1510 : outerkeys);
1511 :
1512 : /*
1513 : * And now we can make the path.
1514 : *
1515 : * Note: it's possible that the cheapest paths will already be sorted
1516 : * properly. try_mergejoin_path will detect that case and suppress an
1517 : * explicit sort step, so we needn't do so here.
1518 : */
1519 620400 : try_mergejoin_path(root,
1520 : joinrel,
1521 : outer_path,
1522 : inner_path,
1523 : merge_pathkeys,
1524 : cur_mergeclauses,
1525 : outerkeys,
1526 : innerkeys,
1527 : jointype,
1528 : extra,
1529 : false);
1530 :
1531 : /*
1532 : * If we have partial outer and parallel safe inner path then try
1533 : * partial mergejoin path.
1534 : */
1535 620400 : if (cheapest_partial_outer && cheapest_safe_inner)
1536 13284 : try_partial_mergejoin_path(root,
1537 : joinrel,
1538 : cheapest_partial_outer,
1539 : cheapest_safe_inner,
1540 : merge_pathkeys,
1541 : cur_mergeclauses,
1542 : outerkeys,
1543 : innerkeys,
1544 : jointype,
1545 : extra);
1546 : }
1547 : }
1548 :
1549 : /*
1550 : * generate_mergejoin_paths
1551 : * Creates possible mergejoin paths for input outerpath.
1552 : *
1553 : * We generate mergejoins if mergejoin clauses are available. We have
1554 : * two ways to generate the inner path for a mergejoin: sort the cheapest
1555 : * inner path, or use an inner path that is already suitably ordered for the
1556 : * merge. If we have several mergeclauses, it could be that there is no inner
1557 : * path (or only a very expensive one) for the full list of mergeclauses, but
1558 : * better paths exist if we truncate the mergeclause list (thereby discarding
1559 : * some sort key requirements). So, we consider truncations of the
1560 : * mergeclause list as well as the full list. (Ideally we'd consider all
1561 : * subsets of the mergeclause list, but that seems way too expensive.)
1562 : */
1563 : static void
1564 1311878 : generate_mergejoin_paths(PlannerInfo *root,
1565 : RelOptInfo *joinrel,
1566 : RelOptInfo *innerrel,
1567 : Path *outerpath,
1568 : JoinType jointype,
1569 : JoinPathExtraData *extra,
1570 : bool useallclauses,
1571 : Path *inner_cheapest_total,
1572 : List *merge_pathkeys,
1573 : bool is_partial)
1574 : {
1575 : List *mergeclauses;
1576 : List *innersortkeys;
1577 : List *trialsortkeys;
1578 : Path *cheapest_startup_inner;
1579 : Path *cheapest_total_inner;
1580 1311878 : JoinType save_jointype = jointype;
1581 : int num_sortkeys;
1582 : int sortkeycnt;
1583 :
1584 1311878 : if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
1585 15532 : jointype = JOIN_INNER;
1586 :
1587 : /* Look for useful mergeclauses (if any) */
1588 : mergeclauses =
1589 1311878 : find_mergeclauses_for_outer_pathkeys(root,
1590 : outerpath->pathkeys,
1591 : extra->mergeclause_list);
1592 :
1593 : /*
1594 : * Done with this outer path if no chance for a mergejoin.
1595 : *
1596 : * Special corner case: for "x FULL JOIN y ON true", there will be no join
1597 : * clauses at all. Ordinarily we'd generate a clauseless nestloop path,
1598 : * but since mergejoin is our only join type that supports FULL JOIN
1599 : * without any join clauses, it's necessary to generate a clauseless
1600 : * mergejoin path instead.
1601 : */
1602 1311878 : if (mergeclauses == NIL)
1603 : {
1604 871878 : if (jointype == JOIN_FULL)
1605 : /* okay to try for mergejoin */ ;
1606 : else
1607 868494 : return;
1608 : }
1609 538680 : if (useallclauses &&
1610 95296 : list_length(mergeclauses) != list_length(extra->mergeclause_list))
1611 16064 : return;
1612 :
1613 : /* Compute the required ordering of the inner path */
1614 427320 : innersortkeys = make_inner_pathkeys_for_merge(root,
1615 : mergeclauses,
1616 : outerpath->pathkeys);
1617 :
1618 : /*
1619 : * Generate a mergejoin on the basis of sorting the cheapest inner. Since
1620 : * a sort will be needed, only cheapest total cost matters. (But
1621 : * try_mergejoin_path will do the right thing if inner_cheapest_total is
1622 : * already correctly sorted.)
1623 : */
1624 427320 : try_mergejoin_path(root,
1625 : joinrel,
1626 : outerpath,
1627 : inner_cheapest_total,
1628 : merge_pathkeys,
1629 : mergeclauses,
1630 : NIL,
1631 : innersortkeys,
1632 : jointype,
1633 : extra,
1634 : is_partial);
1635 :
1636 : /* Can't do anything else if inner path needs to be unique'd */
1637 427320 : if (save_jointype == JOIN_UNIQUE_INNER)
1638 2140 : return;
1639 :
1640 : /*
1641 : * Look for presorted inner paths that satisfy the innersortkey list ---
1642 : * or any truncation thereof, if we are allowed to build a mergejoin using
1643 : * a subset of the merge clauses. Here, we consider both cheap startup
1644 : * cost and cheap total cost.
1645 : *
1646 : * Currently we do not consider parameterized inner paths here. This
1647 : * interacts with decisions elsewhere that also discriminate against
1648 : * mergejoins with parameterized inputs; see comments in
1649 : * src/backend/optimizer/README.
1650 : *
1651 : * As we shorten the sortkey list, we should consider only paths that are
1652 : * strictly cheaper than (in particular, not the same as) any path found
1653 : * in an earlier iteration. Otherwise we'd be intentionally using fewer
1654 : * merge keys than a given path allows (treating the rest as plain
1655 : * joinquals), which is unlikely to be a good idea. Also, eliminating
1656 : * paths here on the basis of compare_path_costs is a lot cheaper than
1657 : * building the mergejoin path only to throw it away.
1658 : *
1659 : * If inner_cheapest_total is well enough sorted to have not required a
1660 : * sort in the path made above, we shouldn't make a duplicate path with
1661 : * it, either. We handle that case with the same logic that handles the
1662 : * previous consideration, by initializing the variables that track
1663 : * cheapest-so-far properly. Note that we do NOT reject
1664 : * inner_cheapest_total if we find it matches some shorter set of
1665 : * pathkeys. That case corresponds to using fewer mergekeys to avoid
1666 : * sorting inner_cheapest_total, whereas we did sort it above, so the
1667 : * plans being considered are different.
1668 : */
1669 425180 : if (pathkeys_contained_in(innersortkeys,
1670 : inner_cheapest_total->pathkeys))
1671 : {
1672 : /* inner_cheapest_total didn't require a sort */
1673 14206 : cheapest_startup_inner = inner_cheapest_total;
1674 14206 : cheapest_total_inner = inner_cheapest_total;
1675 : }
1676 : else
1677 : {
1678 : /* it did require a sort, at least for the full set of keys */
1679 410974 : cheapest_startup_inner = NULL;
1680 410974 : cheapest_total_inner = NULL;
1681 : }
1682 425180 : num_sortkeys = list_length(innersortkeys);
1683 425180 : if (num_sortkeys > 1 && !useallclauses)
1684 15382 : trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
1685 : else
1686 409798 : trialsortkeys = innersortkeys; /* won't really truncate */
1687 :
1688 786646 : for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
1689 : {
1690 : Path *innerpath;
1691 440554 : List *newclauses = NIL;
1692 :
1693 : /*
1694 : * Look for an inner path ordered well enough for the first
1695 : * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
1696 : * destructively, which is why we made a copy...
1697 : */
1698 440554 : trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
1699 440554 : innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1700 : trialsortkeys,
1701 : NULL,
1702 : TOTAL_COST,
1703 : is_partial);
1704 440554 : if (innerpath != NULL &&
1705 24696 : (cheapest_total_inner == NULL ||
1706 24696 : compare_path_costs(innerpath, cheapest_total_inner,
1707 : TOTAL_COST) < 0))
1708 : {
1709 : /* Found a cheap (or even-cheaper) sorted path */
1710 : /* Select the right mergeclauses, if we didn't already */
1711 264238 : if (sortkeycnt < num_sortkeys)
1712 : {
1713 : newclauses =
1714 5594 : trim_mergeclauses_for_inner_pathkeys(root,
1715 : mergeclauses,
1716 : trialsortkeys);
1717 : Assert(newclauses != NIL);
1718 : }
1719 : else
1720 258644 : newclauses = mergeclauses;
1721 264238 : try_mergejoin_path(root,
1722 : joinrel,
1723 : outerpath,
1724 : innerpath,
1725 : merge_pathkeys,
1726 : newclauses,
1727 : NIL,
1728 : NIL,
1729 : jointype,
1730 : extra,
1731 : is_partial);
1732 264238 : cheapest_total_inner = innerpath;
1733 : }
1734 : /* Same on the basis of cheapest startup cost ... */
1735 440554 : innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1736 : trialsortkeys,
1737 : NULL,
1738 : STARTUP_COST,
1739 : is_partial);
1740 440554 : if (innerpath != NULL &&
1741 24696 : (cheapest_startup_inner == NULL ||
1742 24696 : compare_path_costs(innerpath, cheapest_startup_inner,
1743 : STARTUP_COST) < 0))
1744 : {
1745 : /* Found a cheap (or even-cheaper) sorted path */
1746 261704 : if (innerpath != cheapest_total_inner)
1747 : {
1748 : /*
1749 : * Avoid rebuilding clause list if we already made one; saves
1750 : * memory in big join trees...
1751 : */
1752 2406 : if (newclauses == NIL)
1753 : {
1754 68 : if (sortkeycnt < num_sortkeys)
1755 : {
1756 : newclauses =
1757 0 : trim_mergeclauses_for_inner_pathkeys(root,
1758 : mergeclauses,
1759 : trialsortkeys);
1760 : Assert(newclauses != NIL);
1761 : }
1762 : else
1763 68 : newclauses = mergeclauses;
1764 : }
1765 2406 : try_mergejoin_path(root,
1766 : joinrel,
1767 : outerpath,
1768 : innerpath,
1769 : merge_pathkeys,
1770 : newclauses,
1771 : NIL,
1772 : NIL,
1773 : jointype,
1774 : extra,
1775 : is_partial);
1776 : }
1777 261704 : cheapest_startup_inner = innerpath;
1778 : }
1779 :
1780 : /*
1781 : * Don't consider truncated sortkeys if we need all clauses.
1782 : */
1783 440554 : if (useallclauses)
1784 79088 : break;
1785 : }
1786 : }
1787 :
1788 : /*
1789 : * match_unsorted_outer
1790 : * Creates possible join paths for processing a single join relation
1791 : * 'joinrel' by employing either iterative substitution or
1792 : * mergejoining on each of its possible outer paths (considering
1793 : * only outer paths that are already ordered well enough for merging).
1794 : *
1795 : * We always generate a nestloop path for each available outer path.
1796 : * In fact we may generate as many as five: one on the cheapest-total-cost
1797 : * inner path, one on the same with materialization, one on the
1798 : * cheapest-startup-cost inner path (if different), one on the
1799 : * cheapest-total inner-indexscan path (if any), and one on the
1800 : * cheapest-startup inner-indexscan path (if different).
1801 : *
1802 : * We also consider mergejoins if mergejoin clauses are available. See
1803 : * detailed comments in generate_mergejoin_paths.
1804 : *
1805 : * 'joinrel' is the join relation
1806 : * 'outerrel' is the outer join relation
1807 : * 'innerrel' is the inner join relation
1808 : * 'jointype' is the type of join to do
1809 : * 'extra' contains additional input values
1810 : */
1811 : static void
1812 674218 : match_unsorted_outer(PlannerInfo *root,
1813 : RelOptInfo *joinrel,
1814 : RelOptInfo *outerrel,
1815 : RelOptInfo *innerrel,
1816 : JoinType jointype,
1817 : JoinPathExtraData *extra)
1818 : {
1819 674218 : JoinType save_jointype = jointype;
1820 : bool nestjoinOK;
1821 : bool useallclauses;
1822 674218 : Path *inner_cheapest_total = innerrel->cheapest_total_path;
1823 674218 : Path *matpath = NULL;
1824 : ListCell *lc1;
1825 :
1826 : /*
1827 : * For now we do not support RIGHT_SEMI join in mergejoin or nestloop
1828 : * join.
1829 : */
1830 674218 : if (jointype == JOIN_RIGHT_SEMI)
1831 102 : return;
1832 :
1833 : /*
1834 : * Nestloop only supports inner, left, semi, and anti joins. Also, if we
1835 : * are doing a right, right-anti or full mergejoin, we must use *all* the
1836 : * mergeclauses as join clauses, else we will not have a valid plan.
1837 : * (Although these two flags are currently inverses, keep them separate
1838 : * for clarity and possible future changes.)
1839 : */
1840 674116 : switch (jointype)
1841 : {
1842 556094 : case JOIN_INNER:
1843 : case JOIN_LEFT:
1844 : case JOIN_SEMI:
1845 : case JOIN_ANTI:
1846 556094 : nestjoinOK = true;
1847 556094 : useallclauses = false;
1848 556094 : break;
1849 102978 : case JOIN_RIGHT:
1850 : case JOIN_RIGHT_ANTI:
1851 : case JOIN_FULL:
1852 102978 : nestjoinOK = false;
1853 102978 : useallclauses = true;
1854 102978 : break;
1855 15044 : case JOIN_UNIQUE_OUTER:
1856 : case JOIN_UNIQUE_INNER:
1857 15044 : jointype = JOIN_INNER;
1858 15044 : nestjoinOK = true;
1859 15044 : useallclauses = false;
1860 15044 : break;
1861 0 : default:
1862 0 : elog(ERROR, "unrecognized join type: %d",
1863 : (int) jointype);
1864 : nestjoinOK = false; /* keep compiler quiet */
1865 : useallclauses = false;
1866 : break;
1867 : }
1868 :
1869 : /*
1870 : * If inner_cheapest_total is parameterized by the outer rel, ignore it;
1871 : * we will consider it below as a member of cheapest_parameterized_paths,
1872 : * but the other possibilities considered in this routine aren't usable.
1873 : */
1874 674116 : if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel))
1875 11968 : inner_cheapest_total = NULL;
1876 :
1877 : /*
1878 : * If we need to unique-ify the inner path, we will consider only the
1879 : * cheapest-total inner.
1880 : */
1881 674116 : if (save_jointype == JOIN_UNIQUE_INNER)
1882 : {
1883 : /* No way to do this with an inner path parameterized by outer rel */
1884 7522 : if (inner_cheapest_total == NULL)
1885 24 : return;
1886 : inner_cheapest_total = (Path *)
1887 7498 : create_unique_path(root, innerrel, inner_cheapest_total, extra->sjinfo);
1888 : Assert(inner_cheapest_total);
1889 : }
1890 666594 : else if (nestjoinOK)
1891 : {
1892 : /*
1893 : * Consider materializing the cheapest inner path, unless
1894 : * enable_material is off or the path in question materializes its
1895 : * output anyway.
1896 : */
1897 563616 : if (enable_material && inner_cheapest_total != NULL &&
1898 551202 : !ExecMaterializesOutput(inner_cheapest_total->pathtype))
1899 : matpath = (Path *)
1900 522138 : create_material_path(innerrel, inner_cheapest_total);
1901 : }
1902 :
1903 2252986 : foreach(lc1, outerrel->pathlist)
1904 : {
1905 1578894 : Path *outerpath = (Path *) lfirst(lc1);
1906 : List *merge_pathkeys;
1907 :
1908 : /*
1909 : * We cannot use an outer path that is parameterized by the inner rel.
1910 : */
1911 1578894 : if (PATH_PARAM_BY_REL(outerpath, innerrel))
1912 259982 : continue;
1913 :
1914 : /*
1915 : * If we need to unique-ify the outer path, it's pointless to consider
1916 : * any but the cheapest outer. (XXX we don't consider parameterized
1917 : * outers, nor inners, for unique-ified cases. Should we?)
1918 : */
1919 1318912 : if (save_jointype == JOIN_UNIQUE_OUTER)
1920 : {
1921 8808 : if (outerpath != outerrel->cheapest_total_path)
1922 1310 : continue;
1923 7498 : outerpath = (Path *) create_unique_path(root, outerrel,
1924 : outerpath, extra->sjinfo);
1925 : Assert(outerpath);
1926 : }
1927 :
1928 : /*
1929 : * The result will have this sort order (even if it is implemented as
1930 : * a nestloop, and even if some of the mergeclauses are implemented by
1931 : * qpquals rather than as true mergeclauses):
1932 : */
1933 1317602 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1934 : outerpath->pathkeys);
1935 :
1936 1317602 : if (save_jointype == JOIN_UNIQUE_INNER)
1937 : {
1938 : /*
1939 : * Consider nestloop join, but only with the unique-ified cheapest
1940 : * inner path
1941 : */
1942 14548 : try_nestloop_path(root,
1943 : joinrel,
1944 : outerpath,
1945 : inner_cheapest_total,
1946 : merge_pathkeys,
1947 : jointype,
1948 : extra);
1949 : }
1950 1303054 : else if (nestjoinOK)
1951 : {
1952 : /*
1953 : * Consider nestloop joins using this outer path and various
1954 : * available paths for the inner relation. We consider the
1955 : * cheapest-total paths for each available parameterization of the
1956 : * inner relation, including the unparameterized case.
1957 : */
1958 : ListCell *lc2;
1959 :
1960 2922460 : foreach(lc2, innerrel->cheapest_parameterized_paths)
1961 : {
1962 1815982 : Path *innerpath = (Path *) lfirst(lc2);
1963 : Path *mpath;
1964 :
1965 1815982 : try_nestloop_path(root,
1966 : joinrel,
1967 : outerpath,
1968 : innerpath,
1969 : merge_pathkeys,
1970 : jointype,
1971 : extra);
1972 :
1973 : /*
1974 : * Try generating a memoize path and see if that makes the
1975 : * nested loop any cheaper.
1976 : */
1977 1815982 : mpath = get_memoize_path(root, innerrel, outerrel,
1978 : innerpath, outerpath, jointype,
1979 : extra);
1980 1815982 : if (mpath != NULL)
1981 311988 : try_nestloop_path(root,
1982 : joinrel,
1983 : outerpath,
1984 : mpath,
1985 : merge_pathkeys,
1986 : jointype,
1987 : extra);
1988 : }
1989 :
1990 : /* Also consider materialized form of the cheapest inner path */
1991 1106478 : if (matpath != NULL)
1992 1033858 : try_nestloop_path(root,
1993 : joinrel,
1994 : outerpath,
1995 : matpath,
1996 : merge_pathkeys,
1997 : jointype,
1998 : extra);
1999 : }
2000 :
2001 : /* Can't do anything else if outer path needs to be unique'd */
2002 1317602 : if (save_jointype == JOIN_UNIQUE_OUTER)
2003 7498 : continue;
2004 :
2005 : /* Can't do anything else if inner rel is parameterized by outer */
2006 1310104 : if (inner_cheapest_total == NULL)
2007 12984 : continue;
2008 :
2009 : /* Generate merge join paths */
2010 1297120 : generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
2011 : save_jointype, extra, useallclauses,
2012 : inner_cheapest_total, merge_pathkeys,
2013 : false);
2014 : }
2015 :
2016 : /*
2017 : * Consider partial nestloop and mergejoin plan if outerrel has any
2018 : * partial path and the joinrel is parallel-safe. However, we can't
2019 : * handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and
2020 : * therefore we won't be able to properly guarantee uniqueness. Nor can
2021 : * we handle joins needing lateral rels, since partial paths must not be
2022 : * parameterized. Similarly, we can't handle JOIN_FULL, JOIN_RIGHT and
2023 : * JOIN_RIGHT_ANTI, because they can produce false null extended rows.
2024 : */
2025 674092 : if (joinrel->consider_parallel &&
2026 567418 : save_jointype != JOIN_UNIQUE_OUTER &&
2027 564930 : save_jointype != JOIN_FULL &&
2028 474668 : save_jointype != JOIN_RIGHT &&
2029 473006 : save_jointype != JOIN_RIGHT_ANTI &&
2030 473006 : outerrel->partial_pathlist != NIL &&
2031 11362 : bms_is_empty(joinrel->lateral_relids))
2032 : {
2033 11362 : if (nestjoinOK)
2034 11362 : consider_parallel_nestloop(root, joinrel, outerrel, innerrel,
2035 : save_jointype, extra);
2036 :
2037 : /*
2038 : * If inner_cheapest_total is NULL or non parallel-safe then find the
2039 : * cheapest total parallel safe path. If doing JOIN_UNIQUE_INNER, we
2040 : * can't use any alternative inner path.
2041 : */
2042 11362 : if (inner_cheapest_total == NULL ||
2043 10954 : !inner_cheapest_total->parallel_safe)
2044 : {
2045 780 : if (save_jointype == JOIN_UNIQUE_INNER)
2046 6 : return;
2047 :
2048 774 : inner_cheapest_total = get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
2049 : }
2050 :
2051 11356 : if (inner_cheapest_total)
2052 10942 : consider_parallel_mergejoin(root, joinrel, outerrel, innerrel,
2053 : save_jointype, extra,
2054 : inner_cheapest_total);
2055 : }
2056 : }
2057 :
2058 : /*
2059 : * consider_parallel_mergejoin
2060 : * Try to build partial paths for a joinrel by joining a partial path
2061 : * for the outer relation to a complete path for the inner relation.
2062 : *
2063 : * 'joinrel' is the join relation
2064 : * 'outerrel' is the outer join relation
2065 : * 'innerrel' is the inner join relation
2066 : * 'jointype' is the type of join to do
2067 : * 'extra' contains additional input values
2068 : * 'inner_cheapest_total' cheapest total path for innerrel
2069 : */
2070 : static void
2071 10942 : consider_parallel_mergejoin(PlannerInfo *root,
2072 : RelOptInfo *joinrel,
2073 : RelOptInfo *outerrel,
2074 : RelOptInfo *innerrel,
2075 : JoinType jointype,
2076 : JoinPathExtraData *extra,
2077 : Path *inner_cheapest_total)
2078 : {
2079 : ListCell *lc1;
2080 :
2081 : /* generate merge join path for each partial outer path */
2082 25700 : foreach(lc1, outerrel->partial_pathlist)
2083 : {
2084 14758 : Path *outerpath = (Path *) lfirst(lc1);
2085 : List *merge_pathkeys;
2086 :
2087 : /*
2088 : * Figure out what useful ordering any paths we create will have.
2089 : */
2090 14758 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
2091 : outerpath->pathkeys);
2092 :
2093 14758 : generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype,
2094 : extra, false, inner_cheapest_total,
2095 : merge_pathkeys, true);
2096 : }
2097 10942 : }
2098 :
2099 : /*
2100 : * consider_parallel_nestloop
2101 : * Try to build partial paths for a joinrel by joining a partial path for the
2102 : * outer relation to a complete path for the inner relation.
2103 : *
2104 : * 'joinrel' is the join relation
2105 : * 'outerrel' is the outer join relation
2106 : * 'innerrel' is the inner join relation
2107 : * 'jointype' is the type of join to do
2108 : * 'extra' contains additional input values
2109 : */
2110 : static void
2111 11362 : consider_parallel_nestloop(PlannerInfo *root,
2112 : RelOptInfo *joinrel,
2113 : RelOptInfo *outerrel,
2114 : RelOptInfo *innerrel,
2115 : JoinType jointype,
2116 : JoinPathExtraData *extra)
2117 : {
2118 11362 : JoinType save_jointype = jointype;
2119 11362 : Path *inner_cheapest_total = innerrel->cheapest_total_path;
2120 11362 : Path *matpath = NULL;
2121 : ListCell *lc1;
2122 :
2123 11362 : if (jointype == JOIN_UNIQUE_INNER)
2124 642 : jointype = JOIN_INNER;
2125 :
2126 : /*
2127 : * Consider materializing the cheapest inner path, unless: 1) we're doing
2128 : * JOIN_UNIQUE_INNER, because in this case we have to unique-ify the
2129 : * cheapest inner path, 2) enable_material is off, 3) the cheapest inner
2130 : * path is not parallel-safe, 4) the cheapest inner path is parameterized
2131 : * by the outer rel, or 5) the cheapest inner path materializes its output
2132 : * anyway.
2133 : */
2134 11362 : if (save_jointype != JOIN_UNIQUE_INNER &&
2135 10512 : enable_material && inner_cheapest_total->parallel_safe &&
2136 10320 : !PATH_PARAM_BY_REL(inner_cheapest_total, outerrel) &&
2137 9930 : !ExecMaterializesOutput(inner_cheapest_total->pathtype))
2138 : {
2139 : matpath = (Path *)
2140 9834 : create_material_path(innerrel, inner_cheapest_total);
2141 : Assert(matpath->parallel_safe);
2142 : }
2143 :
2144 26678 : foreach(lc1, outerrel->partial_pathlist)
2145 : {
2146 15316 : Path *outerpath = (Path *) lfirst(lc1);
2147 : List *pathkeys;
2148 : ListCell *lc2;
2149 :
2150 : /* Figure out what useful ordering any paths we create will have. */
2151 15316 : pathkeys = build_join_pathkeys(root, joinrel, jointype,
2152 : outerpath->pathkeys);
2153 :
2154 : /*
2155 : * Try the cheapest parameterized paths; only those which will produce
2156 : * an unparameterized path when joined to this outerrel will survive
2157 : * try_partial_nestloop_path. The cheapest unparameterized path is
2158 : * also in this list.
2159 : */
2160 40444 : foreach(lc2, innerrel->cheapest_parameterized_paths)
2161 : {
2162 25128 : Path *innerpath = (Path *) lfirst(lc2);
2163 : Path *mpath;
2164 :
2165 : /* Can't join to an inner path that is not parallel-safe */
2166 25128 : if (!innerpath->parallel_safe)
2167 426 : continue;
2168 :
2169 : /*
2170 : * If we're doing JOIN_UNIQUE_INNER, we can only use the inner's
2171 : * cheapest_total_path, and we have to unique-ify it. (We might
2172 : * be able to relax this to allow other safe, unparameterized
2173 : * inner paths, but right now create_unique_path is not on board
2174 : * with that.)
2175 : */
2176 24702 : if (save_jointype == JOIN_UNIQUE_INNER)
2177 : {
2178 2130 : if (innerpath != innerrel->cheapest_total_path)
2179 1146 : continue;
2180 984 : innerpath = (Path *) create_unique_path(root, innerrel,
2181 : innerpath,
2182 : extra->sjinfo);
2183 : Assert(innerpath);
2184 : }
2185 :
2186 23556 : try_partial_nestloop_path(root, joinrel, outerpath, innerpath,
2187 : pathkeys, jointype, extra);
2188 :
2189 : /*
2190 : * Try generating a memoize path and see if that makes the nested
2191 : * loop any cheaper.
2192 : */
2193 23556 : mpath = get_memoize_path(root, innerrel, outerrel,
2194 : innerpath, outerpath, jointype,
2195 : extra);
2196 23556 : if (mpath != NULL)
2197 5856 : try_partial_nestloop_path(root, joinrel, outerpath, mpath,
2198 : pathkeys, jointype, extra);
2199 : }
2200 :
2201 : /* Also consider materialized form of the cheapest inner path */
2202 15316 : if (matpath != NULL)
2203 13278 : try_partial_nestloop_path(root, joinrel, outerpath, matpath,
2204 : pathkeys, jointype, extra);
2205 : }
2206 11362 : }
2207 :
2208 : /*
2209 : * hash_inner_and_outer
2210 : * Create hashjoin join paths by explicitly hashing both the outer and
2211 : * inner keys of each available hash clause.
2212 : *
2213 : * 'joinrel' is the join relation
2214 : * 'outerrel' is the outer join relation
2215 : * 'innerrel' is the inner join relation
2216 : * 'jointype' is the type of join to do
2217 : * 'extra' contains additional input values
2218 : */
2219 : static void
2220 686044 : hash_inner_and_outer(PlannerInfo *root,
2221 : RelOptInfo *joinrel,
2222 : RelOptInfo *outerrel,
2223 : RelOptInfo *innerrel,
2224 : JoinType jointype,
2225 : JoinPathExtraData *extra)
2226 : {
2227 686044 : JoinType save_jointype = jointype;
2228 686044 : bool isouterjoin = IS_OUTER_JOIN(jointype);
2229 : List *hashclauses;
2230 : ListCell *l;
2231 :
2232 : /*
2233 : * We need to build only one hashclauses list for any given pair of outer
2234 : * and inner relations; all of the hashable clauses will be used as keys.
2235 : *
2236 : * Scan the join's restrictinfo list to find hashjoinable clauses that are
2237 : * usable with this pair of sub-relations.
2238 : */
2239 686044 : hashclauses = NIL;
2240 1439760 : foreach(l, extra->restrictlist)
2241 : {
2242 753716 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
2243 :
2244 : /*
2245 : * If processing an outer join, only use its own join clauses for
2246 : * hashing. For inner joins we need not be so picky.
2247 : */
2248 753716 : if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
2249 11532 : continue;
2250 :
2251 742184 : if (!restrictinfo->can_join ||
2252 658256 : restrictinfo->hashjoinoperator == InvalidOid)
2253 106216 : continue; /* not hashjoinable */
2254 :
2255 : /*
2256 : * Check if clause has the form "outer op inner" or "inner op outer".
2257 : */
2258 635968 : if (!clause_sides_match_join(restrictinfo, outerrel->relids,
2259 : innerrel->relids))
2260 216 : continue; /* no good for these input relations */
2261 :
2262 : /*
2263 : * If clause has the form "inner op outer", check if its operator has
2264 : * valid commutator. This is necessary because hashclauses in this
2265 : * form will get commuted in createplan.c to put the outer var on the
2266 : * left (see get_switched_clauses). This probably shouldn't ever
2267 : * fail, since hashable operators ought to have commutators, but be
2268 : * paranoid.
2269 : *
2270 : * The clause being hashjoinable indicates that it's an OpExpr.
2271 : */
2272 953628 : if (!restrictinfo->outer_is_left &&
2273 317876 : !OidIsValid(get_commutator(castNode(OpExpr, restrictinfo->clause)->opno)))
2274 6 : continue;
2275 :
2276 635746 : hashclauses = lappend(hashclauses, restrictinfo);
2277 : }
2278 :
2279 : /* If we found any usable hashclauses, make paths */
2280 686044 : if (hashclauses)
2281 : {
2282 : /*
2283 : * We consider both the cheapest-total-cost and cheapest-startup-cost
2284 : * outer paths. There's no need to consider any but the
2285 : * cheapest-total-cost inner path, however.
2286 : */
2287 568666 : Path *cheapest_startup_outer = outerrel->cheapest_startup_path;
2288 568666 : Path *cheapest_total_outer = outerrel->cheapest_total_path;
2289 568666 : Path *cheapest_total_inner = innerrel->cheapest_total_path;
2290 :
2291 : /*
2292 : * If either cheapest-total path is parameterized by the other rel, we
2293 : * can't use a hashjoin. (There's no use looking for alternative
2294 : * input paths, since these should already be the least-parameterized
2295 : * available paths.)
2296 : */
2297 568666 : if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) ||
2298 567810 : PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
2299 1712 : return;
2300 :
2301 : /* Unique-ify if need be; we ignore parameterized possibilities */
2302 566954 : if (jointype == JOIN_UNIQUE_OUTER)
2303 : {
2304 : cheapest_total_outer = (Path *)
2305 5638 : create_unique_path(root, outerrel,
2306 : cheapest_total_outer, extra->sjinfo);
2307 : Assert(cheapest_total_outer);
2308 5638 : jointype = JOIN_INNER;
2309 5638 : try_hashjoin_path(root,
2310 : joinrel,
2311 : cheapest_total_outer,
2312 : cheapest_total_inner,
2313 : hashclauses,
2314 : jointype,
2315 : extra);
2316 : /* no possibility of cheap startup here */
2317 : }
2318 561316 : else if (jointype == JOIN_UNIQUE_INNER)
2319 : {
2320 : cheapest_total_inner = (Path *)
2321 5638 : create_unique_path(root, innerrel,
2322 : cheapest_total_inner, extra->sjinfo);
2323 : Assert(cheapest_total_inner);
2324 5638 : jointype = JOIN_INNER;
2325 5638 : try_hashjoin_path(root,
2326 : joinrel,
2327 : cheapest_total_outer,
2328 : cheapest_total_inner,
2329 : hashclauses,
2330 : jointype,
2331 : extra);
2332 5638 : if (cheapest_startup_outer != NULL &&
2333 : cheapest_startup_outer != cheapest_total_outer)
2334 304 : try_hashjoin_path(root,
2335 : joinrel,
2336 : cheapest_startup_outer,
2337 : cheapest_total_inner,
2338 : hashclauses,
2339 : jointype,
2340 : extra);
2341 : }
2342 : else
2343 : {
2344 : /*
2345 : * For other jointypes, we consider the cheapest startup outer
2346 : * together with the cheapest total inner, and then consider
2347 : * pairings of cheapest-total paths including parameterized ones.
2348 : * There is no use in generating parameterized paths on the basis
2349 : * of possibly cheap startup cost, so this is sufficient.
2350 : */
2351 : ListCell *lc1;
2352 : ListCell *lc2;
2353 :
2354 555678 : if (cheapest_startup_outer != NULL)
2355 555042 : try_hashjoin_path(root,
2356 : joinrel,
2357 : cheapest_startup_outer,
2358 : cheapest_total_inner,
2359 : hashclauses,
2360 : jointype,
2361 : extra);
2362 :
2363 1421994 : foreach(lc1, outerrel->cheapest_parameterized_paths)
2364 : {
2365 866316 : Path *outerpath = (Path *) lfirst(lc1);
2366 :
2367 : /*
2368 : * We cannot use an outer path that is parameterized by the
2369 : * inner rel.
2370 : */
2371 866316 : if (PATH_PARAM_BY_REL(outerpath, innerrel))
2372 252570 : continue;
2373 :
2374 1588118 : foreach(lc2, innerrel->cheapest_parameterized_paths)
2375 : {
2376 974372 : Path *innerpath = (Path *) lfirst(lc2);
2377 :
2378 : /*
2379 : * We cannot use an inner path that is parameterized by
2380 : * the outer rel, either.
2381 : */
2382 974372 : if (PATH_PARAM_BY_REL(innerpath, outerrel))
2383 286362 : continue;
2384 :
2385 688010 : if (outerpath == cheapest_startup_outer &&
2386 : innerpath == cheapest_total_inner)
2387 465170 : continue; /* already tried it */
2388 :
2389 222840 : try_hashjoin_path(root,
2390 : joinrel,
2391 : outerpath,
2392 : innerpath,
2393 : hashclauses,
2394 : jointype,
2395 : extra);
2396 : }
2397 : }
2398 : }
2399 :
2400 : /*
2401 : * If the joinrel is parallel-safe, we may be able to consider a
2402 : * partial hash join. However, we can't handle JOIN_UNIQUE_OUTER,
2403 : * because the outer path will be partial, and therefore we won't be
2404 : * able to properly guarantee uniqueness. Also, the resulting path
2405 : * must not be parameterized.
2406 : */
2407 566954 : if (joinrel->consider_parallel &&
2408 503614 : save_jointype != JOIN_UNIQUE_OUTER &&
2409 503614 : outerrel->partial_pathlist != NIL &&
2410 17132 : bms_is_empty(joinrel->lateral_relids))
2411 : {
2412 : Path *cheapest_partial_outer;
2413 17132 : Path *cheapest_partial_inner = NULL;
2414 17132 : Path *cheapest_safe_inner = NULL;
2415 :
2416 17132 : cheapest_partial_outer =
2417 17132 : (Path *) linitial(outerrel->partial_pathlist);
2418 :
2419 : /*
2420 : * Can we use a partial inner plan too, so that we can build a
2421 : * shared hash table in parallel? We can't handle
2422 : * JOIN_UNIQUE_INNER because we can't guarantee uniqueness.
2423 : */
2424 17132 : if (innerrel->partial_pathlist != NIL &&
2425 12828 : save_jointype != JOIN_UNIQUE_INNER &&
2426 : enable_parallel_hash)
2427 : {
2428 12552 : cheapest_partial_inner =
2429 12552 : (Path *) linitial(innerrel->partial_pathlist);
2430 12552 : try_partial_hashjoin_path(root, joinrel,
2431 : cheapest_partial_outer,
2432 : cheapest_partial_inner,
2433 : hashclauses, jointype, extra,
2434 : true /* parallel_hash */ );
2435 : }
2436 :
2437 : /*
2438 : * Normally, given that the joinrel is parallel-safe, the cheapest
2439 : * total inner path will also be parallel-safe, but if not, we'll
2440 : * have to search for the cheapest safe, unparameterized inner
2441 : * path. If doing JOIN_UNIQUE_INNER, we can't use any alternative
2442 : * inner path. If full, right, right-semi or right-anti join, we
2443 : * can't use parallelism (building the hash table in each backend)
2444 : * because no one process has all the match bits.
2445 : */
2446 17132 : if (save_jointype == JOIN_FULL ||
2447 13316 : save_jointype == JOIN_RIGHT ||
2448 9748 : save_jointype == JOIN_RIGHT_SEMI ||
2449 : save_jointype == JOIN_RIGHT_ANTI)
2450 7714 : cheapest_safe_inner = NULL;
2451 9418 : else if (cheapest_total_inner->parallel_safe)
2452 9190 : cheapest_safe_inner = cheapest_total_inner;
2453 228 : else if (save_jointype != JOIN_UNIQUE_INNER)
2454 : cheapest_safe_inner =
2455 222 : get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
2456 :
2457 17132 : if (cheapest_safe_inner != NULL)
2458 9406 : try_partial_hashjoin_path(root, joinrel,
2459 : cheapest_partial_outer,
2460 : cheapest_safe_inner,
2461 : hashclauses, jointype, extra,
2462 : false /* parallel_hash */ );
2463 : }
2464 : }
2465 : }
2466 :
2467 : /*
2468 : * select_mergejoin_clauses
2469 : * Select mergejoin clauses that are usable for a particular join.
2470 : * Returns a list of RestrictInfo nodes for those clauses.
2471 : *
2472 : * *mergejoin_allowed is normally set to true, but it is set to false if
2473 : * this is a right-semi join, or this is a right/right-anti/full join and
2474 : * there are nonmergejoinable join clauses. The executor's mergejoin
2475 : * machinery cannot handle such cases, so we have to avoid generating a
2476 : * mergejoin plan. (Note that this flag does NOT consider whether there are
2477 : * actually any mergejoinable clauses. This is correct because in some
2478 : * cases we need to build a clauseless mergejoin. Simply returning NIL is
2479 : * therefore not enough to distinguish safe from unsafe cases.)
2480 : *
2481 : * We also mark each selected RestrictInfo to show which side is currently
2482 : * being considered as outer. These are transient markings that are only
2483 : * good for the duration of the current add_paths_to_joinrel() call!
2484 : *
2485 : * We examine each restrictinfo clause known for the join to see
2486 : * if it is mergejoinable and involves vars from the two sub-relations
2487 : * currently of interest.
2488 : */
2489 : static List *
2490 687780 : select_mergejoin_clauses(PlannerInfo *root,
2491 : RelOptInfo *joinrel,
2492 : RelOptInfo *outerrel,
2493 : RelOptInfo *innerrel,
2494 : List *restrictlist,
2495 : JoinType jointype,
2496 : bool *mergejoin_allowed)
2497 : {
2498 687780 : List *result_list = NIL;
2499 687780 : bool isouterjoin = IS_OUTER_JOIN(jointype);
2500 687780 : bool have_nonmergeable_joinclause = false;
2501 : ListCell *l;
2502 :
2503 : /*
2504 : * For now we do not support RIGHT_SEMI join in mergejoin: the benefit of
2505 : * swapping inputs tends to be small here.
2506 : */
2507 687780 : if (jointype == JOIN_RIGHT_SEMI)
2508 : {
2509 7212 : *mergejoin_allowed = false;
2510 7212 : return NIL;
2511 : }
2512 :
2513 1430052 : foreach(l, restrictlist)
2514 : {
2515 749484 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
2516 :
2517 : /*
2518 : * If processing an outer join, only use its own join clauses in the
2519 : * merge. For inner joins we can use pushed-down clauses too. (Note:
2520 : * we don't set have_nonmergeable_joinclause here because pushed-down
2521 : * clauses will become otherquals not joinquals.)
2522 : */
2523 749484 : if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
2524 11556 : continue;
2525 :
2526 : /* Check that clause is a mergeable operator clause */
2527 737928 : if (!restrictinfo->can_join ||
2528 654018 : restrictinfo->mergeopfamilies == NIL)
2529 : {
2530 : /*
2531 : * The executor can handle extra joinquals that are constants, but
2532 : * not anything else, when doing right/right-anti/full merge join.
2533 : * (The reason to support constants is so we can do FULL JOIN ON
2534 : * FALSE.)
2535 : */
2536 104744 : if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
2537 94348 : have_nonmergeable_joinclause = true;
2538 104744 : continue; /* not mergejoinable */
2539 : }
2540 :
2541 : /*
2542 : * Check if clause has the form "outer op inner" or "inner op outer".
2543 : */
2544 633184 : if (!clause_sides_match_join(restrictinfo, outerrel->relids,
2545 : innerrel->relids))
2546 : {
2547 1008 : have_nonmergeable_joinclause = true;
2548 1008 : continue; /* no good for these input relations */
2549 : }
2550 :
2551 : /*
2552 : * If clause has the form "inner op outer", check if its operator has
2553 : * valid commutator. This is necessary because mergejoin clauses in
2554 : * this form will get commuted in createplan.c to put the outer var on
2555 : * the left (see get_switched_clauses). This probably shouldn't ever
2556 : * fail, since mergejoinable operators ought to have commutators, but
2557 : * be paranoid.
2558 : *
2559 : * The clause being mergejoinable indicates that it's an OpExpr.
2560 : */
2561 945462 : if (!restrictinfo->outer_is_left &&
2562 313286 : !OidIsValid(get_commutator(castNode(OpExpr, restrictinfo->clause)->opno)))
2563 : {
2564 36 : have_nonmergeable_joinclause = true;
2565 36 : continue;
2566 : }
2567 :
2568 : /*
2569 : * Insist that each side have a non-redundant eclass. This
2570 : * restriction is needed because various bits of the planner expect
2571 : * that each clause in a merge be associable with some pathkey in a
2572 : * canonical pathkey list, but redundant eclasses can't appear in
2573 : * canonical sort orderings. (XXX it might be worth relaxing this,
2574 : * but not enough time to address it for 8.3.)
2575 : */
2576 632140 : update_mergeclause_eclasses(root, restrictinfo);
2577 :
2578 632140 : if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
2579 632092 : EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
2580 : {
2581 128 : have_nonmergeable_joinclause = true;
2582 128 : continue; /* can't handle redundant eclasses */
2583 : }
2584 :
2585 632012 : result_list = lappend(result_list, restrictinfo);
2586 : }
2587 :
2588 : /*
2589 : * Report whether mergejoin is allowed (see comment at top of function).
2590 : */
2591 680568 : switch (jointype)
2592 : {
2593 112708 : case JOIN_RIGHT:
2594 : case JOIN_RIGHT_ANTI:
2595 : case JOIN_FULL:
2596 112708 : *mergejoin_allowed = !have_nonmergeable_joinclause;
2597 112708 : break;
2598 567860 : default:
2599 567860 : *mergejoin_allowed = true;
2600 567860 : break;
2601 : }
2602 :
2603 680568 : return result_list;
2604 : }
|