Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * joinpath.c
4 : * Routines to find all possible paths for processing a set of joins
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/optimizer/path/joinpath.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include <math.h>
18 :
19 : #include "executor/executor.h"
20 : #include "foreign/fdwapi.h"
21 : #include "nodes/nodeFuncs.h"
22 : #include "optimizer/cost.h"
23 : #include "optimizer/optimizer.h"
24 : #include "optimizer/pathnode.h"
25 : #include "optimizer/paths.h"
26 : #include "optimizer/planmain.h"
27 : #include "utils/typcache.h"
28 :
29 : /* Hook for plugins to get control in add_paths_to_joinrel() */
30 : set_join_pathlist_hook_type set_join_pathlist_hook = NULL;
31 :
32 : /*
33 : * Paths parameterized by the parent can be considered to be parameterized by
34 : * any of its child.
35 : */
36 : #define PATH_PARAM_BY_PARENT(path, rel) \
37 : ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), \
38 : (rel)->top_parent_relids))
39 : #define PATH_PARAM_BY_REL_SELF(path, rel) \
40 : ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
41 :
42 : #define PATH_PARAM_BY_REL(path, rel) \
43 : (PATH_PARAM_BY_REL_SELF(path, rel) || PATH_PARAM_BY_PARENT(path, rel))
44 :
45 : static void try_partial_mergejoin_path(PlannerInfo *root,
46 : RelOptInfo *joinrel,
47 : Path *outer_path,
48 : Path *inner_path,
49 : List *pathkeys,
50 : List *mergeclauses,
51 : List *outersortkeys,
52 : List *innersortkeys,
53 : JoinType jointype,
54 : JoinPathExtraData *extra);
55 : static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
56 : RelOptInfo *outerrel, RelOptInfo *innerrel,
57 : JoinType jointype, JoinPathExtraData *extra);
58 : static inline bool clause_sides_match_join(RestrictInfo *rinfo,
59 : RelOptInfo *outerrel,
60 : RelOptInfo *innerrel);
61 : static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
62 : RelOptInfo *outerrel, RelOptInfo *innerrel,
63 : JoinType jointype, JoinPathExtraData *extra);
64 : static void consider_parallel_nestloop(PlannerInfo *root,
65 : RelOptInfo *joinrel,
66 : RelOptInfo *outerrel,
67 : RelOptInfo *innerrel,
68 : JoinType jointype,
69 : JoinPathExtraData *extra);
70 : static void consider_parallel_mergejoin(PlannerInfo *root,
71 : RelOptInfo *joinrel,
72 : RelOptInfo *outerrel,
73 : RelOptInfo *innerrel,
74 : JoinType jointype,
75 : JoinPathExtraData *extra,
76 : Path *inner_cheapest_total);
77 : static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
78 : RelOptInfo *outerrel, RelOptInfo *innerrel,
79 : JoinType jointype, JoinPathExtraData *extra);
80 : static List *select_mergejoin_clauses(PlannerInfo *root,
81 : RelOptInfo *joinrel,
82 : RelOptInfo *outerrel,
83 : RelOptInfo *innerrel,
84 : List *restrictlist,
85 : JoinType jointype,
86 : bool *mergejoin_allowed);
87 : static void generate_mergejoin_paths(PlannerInfo *root,
88 : RelOptInfo *joinrel,
89 : RelOptInfo *innerrel,
90 : Path *outerpath,
91 : JoinType jointype,
92 : JoinPathExtraData *extra,
93 : bool useallclauses,
94 : Path *inner_cheapest_total,
95 : List *merge_pathkeys,
96 : bool is_partial);
97 :
98 :
99 : /*
100 : * add_paths_to_joinrel
101 : * Given a join relation and two component rels from which it can be made,
102 : * consider all possible paths that use the two component rels as outer
103 : * and inner rel respectively. Add these paths to the join rel's pathlist
104 : * if they survive comparison with other paths (and remove any existing
105 : * paths that are dominated by these paths).
106 : *
107 : * Modifies the pathlist field of the joinrel node to contain the best
108 : * paths found so far.
109 : *
110 : * jointype is not necessarily the same as sjinfo->jointype; it might be
111 : * "flipped around" if we are considering joining the rels in the opposite
112 : * direction from what's indicated in sjinfo.
113 : *
114 : * Also, this routine and others in this module accept the special JoinTypes
115 : * JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should
116 : * unique-ify the outer or inner relation and then apply a regular inner
117 : * join. These values are not allowed to propagate outside this module,
118 : * however. Path cost estimation code may need to recognize that it's
119 : * dealing with such a case --- the combination of nominal jointype INNER
120 : * with sjinfo->jointype == JOIN_SEMI indicates that.
121 : */
122 : void
123 498796 : add_paths_to_joinrel(PlannerInfo *root,
124 : RelOptInfo *joinrel,
125 : RelOptInfo *outerrel,
126 : RelOptInfo *innerrel,
127 : JoinType jointype,
128 : SpecialJoinInfo *sjinfo,
129 : List *restrictlist)
130 : {
131 : JoinPathExtraData extra;
132 498796 : bool mergejoin_allowed = true;
133 : ListCell *lc;
134 : Relids joinrelids;
135 :
136 : /*
137 : * PlannerInfo doesn't contain the SpecialJoinInfos created for joins
138 : * between child relations, even if there is a SpecialJoinInfo node for
139 : * the join between the topmost parents. So, while calculating Relids set
140 : * representing the restriction, consider relids of topmost parent of
141 : * partitions.
142 : */
143 498796 : if (joinrel->reloptkind == RELOPT_OTHER_JOINREL)
144 10156 : joinrelids = joinrel->top_parent_relids;
145 : else
146 488640 : joinrelids = joinrel->relids;
147 :
148 498796 : extra.restrictlist = restrictlist;
149 498796 : extra.mergeclause_list = NIL;
150 498796 : extra.sjinfo = sjinfo;
151 498796 : extra.param_source_rels = NULL;
152 :
153 : /*
154 : * See if the inner relation is provably unique for this outer rel.
155 : *
156 : * We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't
157 : * matter since the executor can make the equivalent optimization anyway;
158 : * we need not expend planner cycles on proofs. For JOIN_UNIQUE_INNER, we
159 : * must be considering a semijoin whose inner side is not provably unique
160 : * (else reduce_unique_semijoins would've simplified it), so there's no
161 : * point in calling innerrel_is_unique. However, if the LHS covers all of
162 : * the semijoin's min_lefthand, then it's appropriate to set inner_unique
163 : * because the path produced by create_unique_path will be unique relative
164 : * to the LHS. (If we have an LHS that's only part of the min_lefthand,
165 : * that is *not* true.) For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid
166 : * letting that value escape this module.
167 : */
168 498796 : switch (jointype)
169 : {
170 7728 : case JOIN_SEMI:
171 : case JOIN_ANTI:
172 :
173 : /*
174 : * XXX it may be worth proving this to allow a Memoize to be
175 : * considered for Nested Loop Semi/Anti Joins.
176 : */
177 7728 : extra.inner_unique = false; /* well, unproven */
178 7728 : break;
179 3586 : case JOIN_UNIQUE_INNER:
180 7172 : extra.inner_unique = bms_is_subset(sjinfo->min_lefthand,
181 3586 : outerrel->relids);
182 3586 : break;
183 3586 : case JOIN_UNIQUE_OUTER:
184 3586 : extra.inner_unique = innerrel_is_unique(root,
185 : joinrel->relids,
186 : outerrel->relids,
187 : innerrel,
188 : JOIN_INNER,
189 : restrictlist,
190 : false);
191 3586 : break;
192 483896 : default:
193 483896 : extra.inner_unique = innerrel_is_unique(root,
194 : joinrel->relids,
195 : outerrel->relids,
196 : innerrel,
197 : jointype,
198 : restrictlist,
199 : false);
200 483896 : break;
201 : }
202 :
203 : /*
204 : * Find potential mergejoin clauses. We can skip this if we are not
205 : * interested in doing a mergejoin. However, mergejoin may be our only
206 : * way of implementing a full outer join, so override enable_mergejoin if
207 : * it's a full join.
208 : */
209 498796 : if (enable_mergejoin || jointype == JOIN_FULL)
210 495614 : extra.mergeclause_list = select_mergejoin_clauses(root,
211 : joinrel,
212 : outerrel,
213 : innerrel,
214 : restrictlist,
215 : jointype,
216 : &mergejoin_allowed);
217 :
218 : /*
219 : * If it's SEMI, ANTI, or inner_unique join, compute correction factors
220 : * for cost estimation. These will be the same for all paths.
221 : */
222 498796 : if (jointype == JOIN_SEMI || jointype == JOIN_ANTI || extra.inner_unique)
223 159396 : compute_semi_anti_join_factors(root, joinrel, outerrel, innerrel,
224 : jointype, sjinfo, restrictlist,
225 : &extra.semifactors);
226 :
227 : /*
228 : * Decide whether it's sensible to generate parameterized paths for this
229 : * joinrel, and if so, which relations such paths should require. There
230 : * is usually no need to create a parameterized result path unless there
231 : * is a join order restriction that prevents joining one of our input rels
232 : * directly to the parameter source rel instead of joining to the other
233 : * input rel. (But see allow_star_schema_join().) This restriction
234 : * reduces the number of parameterized paths we have to deal with at
235 : * higher join levels, without compromising the quality of the resulting
236 : * plan. We express the restriction as a Relids set that must overlap the
237 : * parameterization of any proposed join path. Note: param_source_rels
238 : * should contain only baserels, not OJ relids, so starting from
239 : * all_baserels not all_query_rels is correct.
240 : */
241 1081568 : foreach(lc, root->join_info_list)
242 : {
243 582772 : SpecialJoinInfo *sjinfo2 = (SpecialJoinInfo *) lfirst(lc);
244 :
245 : /*
246 : * SJ is relevant to this join if we have some part of its RHS
247 : * (possibly not all of it), and haven't yet joined to its LHS. (This
248 : * test is pretty simplistic, but should be sufficient considering the
249 : * join has already been proven legal.) If the SJ is relevant, it
250 : * presents constraints for joining to anything not in its RHS.
251 : */
252 582772 : if (bms_overlap(joinrelids, sjinfo2->min_righthand) &&
253 383666 : !bms_overlap(joinrelids, sjinfo2->min_lefthand))
254 18962 : extra.param_source_rels = bms_join(extra.param_source_rels,
255 18962 : bms_difference(root->all_baserels,
256 18962 : sjinfo2->min_righthand));
257 :
258 : /* full joins constrain both sides symmetrically */
259 587412 : if (sjinfo2->jointype == JOIN_FULL &&
260 4640 : bms_overlap(joinrelids, sjinfo2->min_lefthand) &&
261 4584 : !bms_overlap(joinrelids, sjinfo2->min_righthand))
262 672 : extra.param_source_rels = bms_join(extra.param_source_rels,
263 672 : bms_difference(root->all_baserels,
264 672 : sjinfo2->min_lefthand));
265 : }
266 :
267 : /*
268 : * However, when a LATERAL subquery is involved, there will simply not be
269 : * any paths for the joinrel that aren't parameterized by whatever the
270 : * subquery is parameterized by, unless its parameterization is resolved
271 : * within the joinrel. So we might as well allow additional dependencies
272 : * on whatever residual lateral dependencies the joinrel will have.
273 : */
274 997592 : extra.param_source_rels = bms_add_members(extra.param_source_rels,
275 498796 : joinrel->lateral_relids);
276 :
277 : /*
278 : * 1. Consider mergejoin paths where both relations must be explicitly
279 : * sorted. Skip this if we can't mergejoin.
280 : */
281 498796 : if (mergejoin_allowed)
282 491772 : sort_inner_and_outer(root, joinrel, outerrel, innerrel,
283 : jointype, &extra);
284 :
285 : /*
286 : * 2. Consider paths where the outer relation need not be explicitly
287 : * sorted. This includes both nestloops and mergejoins where the outer
288 : * path is already ordered. Again, skip this if we can't mergejoin.
289 : * (That's okay because we know that nestloop can't handle
290 : * right/right-anti/full joins at all, so it wouldn't work in the
291 : * prohibited cases either.)
292 : */
293 498796 : if (mergejoin_allowed)
294 491772 : match_unsorted_outer(root, joinrel, outerrel, innerrel,
295 : jointype, &extra);
296 :
297 : #ifdef NOT_USED
298 :
299 : /*
300 : * 3. Consider paths where the inner relation need not be explicitly
301 : * sorted. This includes mergejoins only (nestloops were already built in
302 : * match_unsorted_outer).
303 : *
304 : * Diked out as redundant 2/13/2000 -- tgl. There isn't any really
305 : * significant difference between the inner and outer side of a mergejoin,
306 : * so match_unsorted_inner creates no paths that aren't equivalent to
307 : * those made by match_unsorted_outer when add_paths_to_joinrel() is
308 : * invoked with the two rels given in the other order.
309 : */
310 : if (mergejoin_allowed)
311 : match_unsorted_inner(root, joinrel, outerrel, innerrel,
312 : jointype, &extra);
313 : #endif
314 :
315 : /*
316 : * 4. Consider paths where both outer and inner relations must be hashed
317 : * before being joined. As above, disregard enable_hashjoin for full
318 : * joins, because there may be no other alternative.
319 : */
320 498796 : if (enable_hashjoin || jointype == JOIN_FULL)
321 494232 : hash_inner_and_outer(root, joinrel, outerrel, innerrel,
322 : jointype, &extra);
323 :
324 : /*
325 : * 5. If inner and outer relations are foreign tables (or joins) belonging
326 : * to the same server and assigned to the same user to check access
327 : * permissions as, give the FDW a chance to push down joins.
328 : */
329 498796 : if (joinrel->fdwroutine &&
330 1222 : joinrel->fdwroutine->GetForeignJoinPaths)
331 1218 : joinrel->fdwroutine->GetForeignJoinPaths(root, joinrel,
332 : outerrel, innerrel,
333 : jointype, &extra);
334 :
335 : /*
336 : * 6. Finally, give extensions a chance to manipulate the path list. They
337 : * could add new paths (such as CustomPaths) by calling add_path(), or
338 : * add_partial_path() if parallel aware. They could also delete or modify
339 : * paths added by the core code.
340 : */
341 498796 : if (set_join_pathlist_hook)
342 0 : set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
343 : jointype, &extra);
344 498796 : }
345 :
346 : /*
347 : * We override the param_source_rels heuristic to accept nestloop paths in
348 : * which the outer rel satisfies some but not all of the inner path's
349 : * parameterization. This is necessary to get good plans for star-schema
350 : * scenarios, in which a parameterized path for a large table may require
351 : * parameters from multiple small tables that will not get joined directly to
352 : * each other. We can handle that by stacking nestloops that have the small
353 : * tables on the outside; but this breaks the rule the param_source_rels
354 : * heuristic is based on, namely that parameters should not be passed down
355 : * across joins unless there's a join-order-constraint-based reason to do so.
356 : * So we ignore the param_source_rels restriction when this case applies.
357 : *
358 : * allow_star_schema_join() returns true if the param_source_rels restriction
359 : * should be overridden, ie, it's okay to perform this join.
360 : */
361 : static inline bool
362 180658 : allow_star_schema_join(PlannerInfo *root,
363 : Relids outerrelids,
364 : Relids inner_paramrels)
365 : {
366 : /*
367 : * It's a star-schema case if the outer rel provides some but not all of
368 : * the inner rel's parameterization.
369 : */
370 210194 : return (bms_overlap(inner_paramrels, outerrelids) &&
371 29536 : bms_nonempty_difference(inner_paramrels, outerrelids));
372 : }
373 :
374 : /*
375 : * If the parameterization is only partly satisfied by the outer rel,
376 : * the unsatisfied part can't include any outer-join relids that could
377 : * null rels of the satisfied part. That would imply that we're trying
378 : * to use a clause involving a Var with nonempty varnullingrels at
379 : * a join level where that value isn't yet computable.
380 : *
381 : * In practice, this test never finds a problem because earlier join order
382 : * restrictions prevent us from attempting a join that would cause a problem.
383 : * (That's unsurprising, because the code worked before we ever added
384 : * outer-join relids to expression relids.) It still seems worth checking
385 : * as a backstop, but we only do so in assert-enabled builds.
386 : */
387 : #ifdef USE_ASSERT_CHECKING
388 : static inline bool
389 : have_unsafe_outer_join_ref(PlannerInfo *root,
390 : Relids outerrelids,
391 : Relids inner_paramrels)
392 : {
393 : bool result = false;
394 : Relids unsatisfied = bms_difference(inner_paramrels, outerrelids);
395 : Relids satisfied = bms_intersect(inner_paramrels, outerrelids);
396 :
397 : if (bms_overlap(unsatisfied, root->outer_join_rels))
398 : {
399 : ListCell *lc;
400 :
401 : foreach(lc, root->join_info_list)
402 : {
403 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
404 :
405 : if (!bms_is_member(sjinfo->ojrelid, unsatisfied))
406 : continue; /* not relevant */
407 : if (bms_overlap(satisfied, sjinfo->min_righthand) ||
408 : (sjinfo->jointype == JOIN_FULL &&
409 : bms_overlap(satisfied, sjinfo->min_lefthand)))
410 : {
411 : result = true; /* doesn't work */
412 : break;
413 : }
414 : }
415 : }
416 :
417 : /* Waste no memory when we reject a path here */
418 : bms_free(unsatisfied);
419 : bms_free(satisfied);
420 :
421 : return result;
422 : }
423 : #endif /* USE_ASSERT_CHECKING */
424 :
425 : /*
426 : * paraminfo_get_equal_hashops
427 : * Determine if the clauses in param_info and innerrel's lateral_vars
428 : * can be hashed.
429 : * Returns true if hashing is possible, otherwise false.
430 : *
431 : * Additionally, on success we collect the outer expressions and the
432 : * appropriate equality operators for each hashable parameter to innerrel.
433 : * These are returned in parallel lists in *param_exprs and *operators.
434 : * We also set *binary_mode to indicate whether strict binary matching is
435 : * required.
436 : */
437 : static bool
438 271498 : paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
439 : RelOptInfo *outerrel, RelOptInfo *innerrel,
440 : List **param_exprs, List **operators,
441 : bool *binary_mode)
442 :
443 : {
444 : ListCell *lc;
445 :
446 271498 : *param_exprs = NIL;
447 271498 : *operators = NIL;
448 271498 : *binary_mode = false;
449 :
450 : /* Add join clauses from param_info to the hash key */
451 271498 : if (param_info != NULL)
452 : {
453 271498 : List *clauses = param_info->ppi_clauses;
454 :
455 493202 : foreach(lc, clauses)
456 : {
457 290660 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
458 : OpExpr *opexpr;
459 : Node *expr;
460 : Oid hasheqoperator;
461 :
462 290660 : opexpr = (OpExpr *) rinfo->clause;
463 :
464 : /*
465 : * Bail if the rinfo is not compatible. We need a join OpExpr
466 : * with 2 args.
467 : */
468 290660 : if (!IsA(opexpr, OpExpr) || list_length(opexpr->args) != 2 ||
469 271806 : !clause_sides_match_join(rinfo, outerrel, innerrel))
470 : {
471 68812 : list_free(*operators);
472 68812 : list_free(*param_exprs);
473 68956 : return false;
474 : }
475 :
476 221848 : if (rinfo->outer_is_left)
477 : {
478 130896 : expr = (Node *) linitial(opexpr->args);
479 130896 : hasheqoperator = rinfo->left_hasheqoperator;
480 : }
481 : else
482 : {
483 90952 : expr = (Node *) lsecond(opexpr->args);
484 90952 : hasheqoperator = rinfo->right_hasheqoperator;
485 : }
486 :
487 : /* can't do memoize if we can't hash the outer type */
488 221848 : if (!OidIsValid(hasheqoperator))
489 : {
490 144 : list_free(*operators);
491 144 : list_free(*param_exprs);
492 144 : return false;
493 : }
494 :
495 221704 : *operators = lappend_oid(*operators, hasheqoperator);
496 221704 : *param_exprs = lappend(*param_exprs, expr);
497 :
498 : /*
499 : * When the join operator is not hashable then it's possible that
500 : * the operator will be able to distinguish something that the
501 : * hash equality operator could not. For example with floating
502 : * point types -0.0 and +0.0 are classed as equal by the hash
503 : * function and equality function, but some other operator may be
504 : * able to tell those values apart. This means that we must put
505 : * memoize into binary comparison mode so that it does bit-by-bit
506 : * comparisons rather than a "logical" comparison as it would
507 : * using the hash equality operator.
508 : */
509 221704 : if (!OidIsValid(rinfo->hashjoinoperator))
510 598 : *binary_mode = true;
511 : }
512 : }
513 :
514 : /* Now add any lateral vars to the cache key too */
515 205656 : foreach(lc, innerrel->lateral_vars)
516 : {
517 3258 : Node *expr = (Node *) lfirst(lc);
518 : TypeCacheEntry *typentry;
519 :
520 : /* Reject if there are any volatile functions in lateral vars */
521 3258 : if (contain_volatile_functions(expr))
522 : {
523 0 : list_free(*operators);
524 0 : list_free(*param_exprs);
525 144 : return false;
526 : }
527 :
528 3258 : typentry = lookup_type_cache(exprType(expr),
529 : TYPECACHE_HASH_PROC | TYPECACHE_EQ_OPR);
530 :
531 : /* can't use memoize without a valid hash proc and equals operator */
532 3258 : if (!OidIsValid(typentry->hash_proc) || !OidIsValid(typentry->eq_opr))
533 : {
534 144 : list_free(*operators);
535 144 : list_free(*param_exprs);
536 144 : return false;
537 : }
538 :
539 3114 : *operators = lappend_oid(*operators, typentry->eq_opr);
540 3114 : *param_exprs = lappend(*param_exprs, expr);
541 :
542 : /*
543 : * We must go into binary mode as we don't have too much of an idea of
544 : * how these lateral Vars are being used. See comment above when we
545 : * set *binary_mode for the non-lateral Var case. This could be
546 : * relaxed a bit if we had the RestrictInfos and knew the operators
547 : * being used, however for cases like Vars that are arguments to
548 : * functions we must operate in binary mode as we don't have
549 : * visibility into what the function is doing with the Vars.
550 : */
551 3114 : *binary_mode = true;
552 : }
553 :
554 : /* We're okay to use memoize */
555 202398 : return true;
556 : }
557 :
558 : /*
559 : * get_memoize_path
560 : * If possible, make and return a Memoize path atop of 'inner_path'.
561 : * Otherwise return NULL.
562 : */
563 : static Path *
564 1208692 : get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
565 : RelOptInfo *outerrel, Path *inner_path,
566 : Path *outer_path, JoinType jointype,
567 : JoinPathExtraData *extra)
568 : {
569 : List *param_exprs;
570 : List *hash_operators;
571 : ListCell *lc;
572 : bool binary_mode;
573 :
574 : /* Obviously not if it's disabled */
575 1208692 : if (!enable_memoize)
576 388 : return NULL;
577 :
578 : /*
579 : * We can safely not bother with all this unless we expect to perform more
580 : * than one inner scan. The first scan is always going to be a cache
581 : * miss. This would likely fail later anyway based on costs, so this is
582 : * really just to save some wasted effort.
583 : */
584 1208304 : if (outer_path->parent->rows < 2)
585 406724 : return NULL;
586 :
587 : /*
588 : * We can only have a memoize node when there's some kind of cache key,
589 : * either parameterized path clauses or lateral Vars. No cache key sounds
590 : * more like something a Materialize node might be more useful for.
591 : */
592 801580 : if ((inner_path->param_info == NULL ||
593 331848 : inner_path->param_info->ppi_clauses == NIL) &&
594 485176 : innerrel->lateral_vars == NIL)
595 482822 : return NULL;
596 :
597 : /*
598 : * Currently we don't do this for SEMI and ANTI joins unless they're
599 : * marked as inner_unique. This is because nested loop SEMI/ANTI joins
600 : * don't scan the inner node to completion, which will mean memoize cannot
601 : * mark the cache entry as complete.
602 : *
603 : * XXX Currently we don't attempt to mark SEMI/ANTI joins as inner_unique
604 : * = true. Should we? See add_paths_to_joinrel()
605 : */
606 318758 : if (!extra->inner_unique && (jointype == JOIN_SEMI ||
607 : jointype == JOIN_ANTI))
608 7600 : return NULL;
609 :
610 : /*
611 : * Memoize normally marks cache entries as complete when it runs out of
612 : * tuples to read from its subplan. However, with unique joins, Nested
613 : * Loop will skip to the next outer tuple after finding the first matching
614 : * inner tuple. This means that we may not read the inner side of the
615 : * join to completion which leaves no opportunity to mark the cache entry
616 : * as complete. To work around that, when the join is unique we
617 : * automatically mark cache entries as complete after fetching the first
618 : * tuple. This works when the entire join condition is parameterized.
619 : * Otherwise, when the parameterization is only a subset of the join
620 : * condition, we can't be sure which part of it causes the join to be
621 : * unique. This means there are no guarantees that only 1 tuple will be
622 : * read. We cannot mark the cache entry as complete after reading the
623 : * first tuple without that guarantee. This means the scope of Memoize
624 : * node's usefulness is limited to only outer rows that have no join
625 : * partner as this is the only case where Nested Loop would exhaust the
626 : * inner scan of a unique join. Since the scope is limited to that, we
627 : * just don't bother making a memoize path in this case.
628 : *
629 : * Lateral vars needn't be considered here as they're not considered when
630 : * determining if the join is unique.
631 : *
632 : * XXX this could be enabled if the remaining join quals were made part of
633 : * the inner scan's filter instead of the join filter. Maybe it's worth
634 : * considering doing that?
635 : */
636 311158 : if (extra->inner_unique &&
637 369948 : (inner_path->param_info == NULL ||
638 184974 : list_length(inner_path->param_info->ppi_clauses) <
639 184974 : list_length(extra->restrictlist)))
640 39658 : return NULL;
641 :
642 : /*
643 : * We can't use a memoize node if there are volatile functions in the
644 : * inner rel's target list or restrict list. A cache hit could reduce the
645 : * number of calls to these functions.
646 : */
647 271500 : if (contain_volatile_functions((Node *) innerrel->reltarget))
648 0 : return NULL;
649 :
650 448240 : foreach(lc, innerrel->baserestrictinfo)
651 : {
652 176740 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
653 :
654 176740 : if (contain_volatile_functions((Node *) rinfo))
655 0 : return NULL;
656 : }
657 :
658 : /*
659 : * Also check the parameterized path restrictinfos for volatile functions.
660 : * Indexed functions must be immutable so shouldn't have any volatile
661 : * functions, however, with a lateral join the inner scan may not be an
662 : * index scan.
663 : */
664 271500 : if (inner_path->param_info != NULL)
665 : {
666 585004 : foreach(lc, inner_path->param_info->ppi_clauses)
667 : {
668 313506 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
669 :
670 313506 : if (contain_volatile_functions((Node *) rinfo))
671 2 : return NULL;
672 : }
673 : }
674 :
675 : /* Check if we have hash ops for each parameter to the path */
676 271498 : if (paraminfo_get_equal_hashops(root,
677 : inner_path->param_info,
678 271498 : outerrel->top_parent ?
679 : outerrel->top_parent : outerrel,
680 : innerrel,
681 : ¶m_exprs,
682 : &hash_operators,
683 : &binary_mode))
684 : {
685 202398 : return (Path *) create_memoize_path(root,
686 : innerrel,
687 : inner_path,
688 : param_exprs,
689 : hash_operators,
690 202398 : extra->inner_unique,
691 : binary_mode,
692 : outer_path->rows);
693 : }
694 :
695 69100 : return NULL;
696 : }
697 :
698 : /*
699 : * try_nestloop_path
700 : * Consider a nestloop join path; if it appears useful, push it into
701 : * the joinrel's pathlist via add_path().
702 : */
703 : static void
704 2074656 : try_nestloop_path(PlannerInfo *root,
705 : RelOptInfo *joinrel,
706 : Path *outer_path,
707 : Path *inner_path,
708 : List *pathkeys,
709 : JoinType jointype,
710 : JoinPathExtraData *extra)
711 : {
712 : Relids required_outer;
713 : JoinCostWorkspace workspace;
714 2074656 : RelOptInfo *innerrel = inner_path->parent;
715 2074656 : RelOptInfo *outerrel = outer_path->parent;
716 : Relids innerrelids;
717 : Relids outerrelids;
718 2074656 : Relids inner_paramrels = PATH_REQ_OUTER(inner_path);
719 2074656 : Relids outer_paramrels = PATH_REQ_OUTER(outer_path);
720 :
721 : /*
722 : * If we are forming an outer join at this join, it's nonsensical to use
723 : * an input path that uses the outer join as part of its parameterization.
724 : * (This can happen despite our join order restrictions, since those apply
725 : * to what is in an input relation not what its parameters are.)
726 : */
727 2619700 : if (extra->sjinfo->ojrelid != 0 &&
728 1090088 : (bms_is_member(extra->sjinfo->ojrelid, inner_paramrels) ||
729 545044 : bms_is_member(extra->sjinfo->ojrelid, outer_paramrels)))
730 180446 : return;
731 :
732 : /*
733 : * Paths are parameterized by top-level parents, so run parameterization
734 : * tests on the parent relids.
735 : */
736 2074536 : if (innerrel->top_parent_relids)
737 32500 : innerrelids = innerrel->top_parent_relids;
738 : else
739 2042036 : innerrelids = innerrel->relids;
740 :
741 2074536 : if (outerrel->top_parent_relids)
742 32500 : outerrelids = outerrel->top_parent_relids;
743 : else
744 2042036 : outerrelids = outerrel->relids;
745 :
746 : /*
747 : * Check to see if proposed path is still parameterized, and reject if the
748 : * parameterization wouldn't be sensible --- unless allow_star_schema_join
749 : * says to allow it anyway. Also, we must reject if have_dangerous_phv
750 : * doesn't like the look of it, which could only happen if the nestloop is
751 : * still parameterized.
752 : */
753 2074536 : required_outer = calc_nestloop_required_outer(outerrelids, outer_paramrels,
754 : innerrelids, inner_paramrels);
755 2074536 : if (required_outer &&
756 211084 : ((!bms_overlap(required_outer, extra->param_source_rels) &&
757 211512 : !allow_star_schema_join(root, outerrelids, inner_paramrels)) ||
758 30854 : have_dangerous_phv(root, outerrelids, inner_paramrels)))
759 : {
760 : /* Waste no memory when we reject a path here */
761 180326 : bms_free(required_outer);
762 180326 : return;
763 : }
764 :
765 : /* If we got past that, we shouldn't have any unsafe outer-join refs */
766 : Assert(!have_unsafe_outer_join_ref(root, outerrelids, inner_paramrels));
767 :
768 : /*
769 : * Do a precheck to quickly eliminate obviously-inferior paths. We
770 : * calculate a cheap lower bound on the path's cost and then use
771 : * add_path_precheck() to see if the path is clearly going to be dominated
772 : * by some existing path for the joinrel. If not, do the full pushup with
773 : * creating a fully valid path structure and submitting it to add_path().
774 : * The latter two steps are expensive enough to make this two-phase
775 : * methodology worthwhile.
776 : */
777 1894210 : initial_cost_nestloop(root, &workspace, jointype,
778 : outer_path, inner_path, extra);
779 :
780 1894210 : if (add_path_precheck(joinrel,
781 : workspace.startup_cost, workspace.total_cost,
782 : pathkeys, required_outer))
783 : {
784 : /*
785 : * If the inner path is parameterized, it is parameterized by the
786 : * topmost parent of the outer rel, not the outer rel itself. Fix
787 : * that.
788 : */
789 945562 : if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent))
790 : {
791 2638 : inner_path = reparameterize_path_by_child(root, inner_path,
792 : outer_path->parent);
793 :
794 : /*
795 : * If we could not translate the path, we can't create nest loop
796 : * path.
797 : */
798 2638 : if (!inner_path)
799 : {
800 0 : bms_free(required_outer);
801 0 : return;
802 : }
803 : }
804 :
805 945562 : add_path(joinrel, (Path *)
806 945562 : create_nestloop_path(root,
807 : joinrel,
808 : jointype,
809 : &workspace,
810 : extra,
811 : outer_path,
812 : inner_path,
813 : extra->restrictlist,
814 : pathkeys,
815 : required_outer));
816 : }
817 : else
818 : {
819 : /* Waste no memory when we reject a path here */
820 948648 : bms_free(required_outer);
821 : }
822 : }
823 :
824 : /*
825 : * try_partial_nestloop_path
826 : * Consider a partial nestloop join path; if it appears useful, push it into
827 : * the joinrel's partial_pathlist via add_partial_path().
828 : */
829 : static void
830 25432 : try_partial_nestloop_path(PlannerInfo *root,
831 : RelOptInfo *joinrel,
832 : Path *outer_path,
833 : Path *inner_path,
834 : List *pathkeys,
835 : JoinType jointype,
836 : JoinPathExtraData *extra)
837 : {
838 : JoinCostWorkspace workspace;
839 :
840 : /*
841 : * If the inner path is parameterized, the parameterization must be fully
842 : * satisfied by the proposed outer path. Parameterized partial paths are
843 : * not supported. The caller should already have verified that no lateral
844 : * rels are required here.
845 : */
846 : Assert(bms_is_empty(joinrel->lateral_relids));
847 25432 : if (inner_path->param_info != NULL)
848 : {
849 12598 : Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
850 12598 : RelOptInfo *outerrel = outer_path->parent;
851 : Relids outerrelids;
852 :
853 : /*
854 : * The inner and outer paths are parameterized, if at all, by the top
855 : * level parents, not the child relations, so we must use those relids
856 : * for our parameterization tests.
857 : */
858 12598 : if (outerrel->top_parent_relids)
859 9396 : outerrelids = outerrel->top_parent_relids;
860 : else
861 3202 : outerrelids = outerrel->relids;
862 :
863 12598 : if (!bms_is_subset(inner_paramrels, outerrelids))
864 18086 : return;
865 : }
866 :
867 : /*
868 : * Before creating a path, get a quick lower bound on what it is likely to
869 : * cost. Bail out right away if it looks terrible.
870 : */
871 24388 : initial_cost_nestloop(root, &workspace, jointype,
872 : outer_path, inner_path, extra);
873 24388 : if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
874 17042 : return;
875 :
876 : /*
877 : * If the inner path is parameterized, it is parameterized by the topmost
878 : * parent of the outer rel, not the outer rel itself. Fix that.
879 : */
880 7346 : if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent))
881 : {
882 2526 : inner_path = reparameterize_path_by_child(root, inner_path,
883 : outer_path->parent);
884 :
885 : /*
886 : * If we could not translate the path, we can't create nest loop path.
887 : */
888 2526 : if (!inner_path)
889 0 : return;
890 : }
891 :
892 : /* Might be good enough to be worth trying, so let's try it. */
893 7346 : add_partial_path(joinrel, (Path *)
894 7346 : create_nestloop_path(root,
895 : joinrel,
896 : jointype,
897 : &workspace,
898 : extra,
899 : outer_path,
900 : inner_path,
901 : extra->restrictlist,
902 : pathkeys,
903 : NULL));
904 : }
905 :
906 : /*
907 : * try_mergejoin_path
908 : * Consider a merge join path; if it appears useful, push it into
909 : * the joinrel's pathlist via add_path().
910 : */
911 : static void
912 895284 : try_mergejoin_path(PlannerInfo *root,
913 : RelOptInfo *joinrel,
914 : Path *outer_path,
915 : Path *inner_path,
916 : List *pathkeys,
917 : List *mergeclauses,
918 : List *outersortkeys,
919 : List *innersortkeys,
920 : JoinType jointype,
921 : JoinPathExtraData *extra,
922 : bool is_partial)
923 : {
924 : Relids required_outer;
925 : JoinCostWorkspace workspace;
926 :
927 895284 : if (is_partial)
928 : {
929 6018 : try_partial_mergejoin_path(root,
930 : joinrel,
931 : outer_path,
932 : inner_path,
933 : pathkeys,
934 : mergeclauses,
935 : outersortkeys,
936 : innersortkeys,
937 : jointype,
938 : extra);
939 27860 : return;
940 : }
941 :
942 : /*
943 : * If we are forming an outer join at this join, it's nonsensical to use
944 : * an input path that uses the outer join as part of its parameterization.
945 : * (This can happen despite our join order restrictions, since those apply
946 : * to what is in an input relation not what its parameters are.)
947 : */
948 1230488 : if (extra->sjinfo->ojrelid != 0 &&
949 682444 : (bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(inner_path)) ||
950 341222 : bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(outer_path))))
951 12 : return;
952 :
953 : /*
954 : * Check to see if proposed path is still parameterized, and reject if the
955 : * parameterization wouldn't be sensible.
956 : */
957 889254 : required_outer = calc_non_nestloop_required_outer(outer_path,
958 : inner_path);
959 889254 : if (required_outer &&
960 24130 : !bms_overlap(required_outer, extra->param_source_rels))
961 : {
962 : /* Waste no memory when we reject a path here */
963 21830 : bms_free(required_outer);
964 21830 : return;
965 : }
966 :
967 : /*
968 : * If the given paths are already well enough ordered, we can skip doing
969 : * an explicit sort.
970 : */
971 1294018 : if (outersortkeys &&
972 426594 : pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
973 12100 : outersortkeys = NIL;
974 1570406 : if (innersortkeys &&
975 702982 : pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
976 20004 : innersortkeys = NIL;
977 :
978 : /*
979 : * See comments in try_nestloop_path().
980 : */
981 867424 : initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
982 : outer_path, inner_path,
983 : outersortkeys, innersortkeys,
984 : extra);
985 :
986 867424 : if (add_path_precheck(joinrel,
987 : workspace.startup_cost, workspace.total_cost,
988 : pathkeys, required_outer))
989 : {
990 215406 : add_path(joinrel, (Path *)
991 215406 : create_mergejoin_path(root,
992 : joinrel,
993 : jointype,
994 : &workspace,
995 : extra,
996 : outer_path,
997 : inner_path,
998 : extra->restrictlist,
999 : pathkeys,
1000 : required_outer,
1001 : mergeclauses,
1002 : outersortkeys,
1003 : innersortkeys));
1004 : }
1005 : else
1006 : {
1007 : /* Waste no memory when we reject a path here */
1008 652018 : bms_free(required_outer);
1009 : }
1010 : }
1011 :
1012 : /*
1013 : * try_partial_mergejoin_path
1014 : * Consider a partial merge join path; if it appears useful, push it into
1015 : * the joinrel's pathlist via add_partial_path().
1016 : */
1017 : static void
1018 18132 : try_partial_mergejoin_path(PlannerInfo *root,
1019 : RelOptInfo *joinrel,
1020 : Path *outer_path,
1021 : Path *inner_path,
1022 : List *pathkeys,
1023 : List *mergeclauses,
1024 : List *outersortkeys,
1025 : List *innersortkeys,
1026 : JoinType jointype,
1027 : JoinPathExtraData *extra)
1028 : {
1029 : JoinCostWorkspace workspace;
1030 :
1031 : /*
1032 : * See comments in try_partial_hashjoin_path().
1033 : */
1034 : Assert(bms_is_empty(joinrel->lateral_relids));
1035 18132 : if (inner_path->param_info != NULL)
1036 : {
1037 0 : Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
1038 :
1039 0 : if (!bms_is_empty(inner_paramrels))
1040 9092 : return;
1041 : }
1042 :
1043 : /*
1044 : * If the given paths are already well enough ordered, we can skip doing
1045 : * an explicit sort.
1046 : */
1047 30246 : if (outersortkeys &&
1048 12114 : pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
1049 102 : outersortkeys = NIL;
1050 33822 : if (innersortkeys &&
1051 15690 : pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
1052 252 : innersortkeys = NIL;
1053 :
1054 : /*
1055 : * See comments in try_partial_nestloop_path().
1056 : */
1057 18132 : initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
1058 : outer_path, inner_path,
1059 : outersortkeys, innersortkeys,
1060 : extra);
1061 :
1062 18132 : if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
1063 9092 : return;
1064 :
1065 : /* Might be good enough to be worth trying, so let's try it. */
1066 9040 : add_partial_path(joinrel, (Path *)
1067 9040 : create_mergejoin_path(root,
1068 : joinrel,
1069 : jointype,
1070 : &workspace,
1071 : extra,
1072 : outer_path,
1073 : inner_path,
1074 : extra->restrictlist,
1075 : pathkeys,
1076 : NULL,
1077 : mergeclauses,
1078 : outersortkeys,
1079 : innersortkeys));
1080 : }
1081 :
1082 : /*
1083 : * try_hashjoin_path
1084 : * Consider a hash join path; if it appears useful, push it into
1085 : * the joinrel's pathlist via add_path().
1086 : */
1087 : static void
1088 524270 : try_hashjoin_path(PlannerInfo *root,
1089 : RelOptInfo *joinrel,
1090 : Path *outer_path,
1091 : Path *inner_path,
1092 : List *hashclauses,
1093 : JoinType jointype,
1094 : JoinPathExtraData *extra)
1095 : {
1096 : Relids required_outer;
1097 : JoinCostWorkspace workspace;
1098 :
1099 : /*
1100 : * If we are forming an outer join at this join, it's nonsensical to use
1101 : * an input path that uses the outer join as part of its parameterization.
1102 : * (This can happen despite our join order restrictions, since those apply
1103 : * to what is in an input relation not what its parameters are.)
1104 : */
1105 732202 : if (extra->sjinfo->ojrelid != 0 &&
1106 415834 : (bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(inner_path)) ||
1107 207902 : bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(outer_path))))
1108 68174 : return;
1109 :
1110 : /*
1111 : * Check to see if proposed path is still parameterized, and reject if the
1112 : * parameterization wouldn't be sensible.
1113 : */
1114 524210 : required_outer = calc_non_nestloop_required_outer(outer_path,
1115 : inner_path);
1116 524210 : if (required_outer &&
1117 79912 : !bms_overlap(required_outer, extra->param_source_rels))
1118 : {
1119 : /* Waste no memory when we reject a path here */
1120 68114 : bms_free(required_outer);
1121 68114 : return;
1122 : }
1123 :
1124 : /*
1125 : * See comments in try_nestloop_path(). Also note that hashjoin paths
1126 : * never have any output pathkeys, per comments in create_hashjoin_path.
1127 : */
1128 456096 : initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
1129 : outer_path, inner_path, extra, false);
1130 :
1131 456096 : if (add_path_precheck(joinrel,
1132 : workspace.startup_cost, workspace.total_cost,
1133 : NIL, required_outer))
1134 : {
1135 186942 : add_path(joinrel, (Path *)
1136 186942 : create_hashjoin_path(root,
1137 : joinrel,
1138 : jointype,
1139 : &workspace,
1140 : extra,
1141 : outer_path,
1142 : inner_path,
1143 : false, /* parallel_hash */
1144 : extra->restrictlist,
1145 : required_outer,
1146 : hashclauses));
1147 : }
1148 : else
1149 : {
1150 : /* Waste no memory when we reject a path here */
1151 269154 : bms_free(required_outer);
1152 : }
1153 : }
1154 :
1155 : /*
1156 : * try_partial_hashjoin_path
1157 : * Consider a partial hashjoin join path; if it appears useful, push it into
1158 : * the joinrel's partial_pathlist via add_partial_path().
1159 : * The outer side is partial. If parallel_hash is true, then the inner path
1160 : * must be partial and will be run in parallel to create one or more shared
1161 : * hash tables; otherwise the inner path must be complete and a copy of it
1162 : * is run in every process to create separate identical private hash tables.
1163 : */
1164 : static void
1165 20082 : try_partial_hashjoin_path(PlannerInfo *root,
1166 : RelOptInfo *joinrel,
1167 : Path *outer_path,
1168 : Path *inner_path,
1169 : List *hashclauses,
1170 : JoinType jointype,
1171 : JoinPathExtraData *extra,
1172 : bool parallel_hash)
1173 : {
1174 : JoinCostWorkspace workspace;
1175 :
1176 : /*
1177 : * If the inner path is parameterized, the parameterization must be fully
1178 : * satisfied by the proposed outer path. Parameterized partial paths are
1179 : * not supported. The caller should already have verified that no lateral
1180 : * rels are required here.
1181 : */
1182 : Assert(bms_is_empty(joinrel->lateral_relids));
1183 20082 : if (inner_path->param_info != NULL)
1184 : {
1185 0 : Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
1186 :
1187 0 : if (!bms_is_empty(inner_paramrels))
1188 9508 : return;
1189 : }
1190 :
1191 : /*
1192 : * Before creating a path, get a quick lower bound on what it is likely to
1193 : * cost. Bail out right away if it looks terrible.
1194 : */
1195 20082 : initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
1196 : outer_path, inner_path, extra, parallel_hash);
1197 20082 : if (!add_partial_path_precheck(joinrel, workspace.total_cost, NIL))
1198 9508 : return;
1199 :
1200 : /* Might be good enough to be worth trying, so let's try it. */
1201 10574 : add_partial_path(joinrel, (Path *)
1202 10574 : create_hashjoin_path(root,
1203 : joinrel,
1204 : jointype,
1205 : &workspace,
1206 : extra,
1207 : outer_path,
1208 : inner_path,
1209 : parallel_hash,
1210 : extra->restrictlist,
1211 : NULL,
1212 : hashclauses));
1213 : }
1214 :
1215 : /*
1216 : * clause_sides_match_join
1217 : * Determine whether a join clause is of the right form to use in this join.
1218 : *
1219 : * We already know that the clause is a binary opclause referencing only the
1220 : * rels in the current join. The point here is to check whether it has the
1221 : * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
1222 : * rather than mixing outer and inner vars on either side. If it matches,
1223 : * we set the transient flag outer_is_left to identify which side is which.
1224 : */
1225 : static inline bool
1226 1139070 : clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel,
1227 : RelOptInfo *innerrel)
1228 : {
1229 1705348 : if (bms_is_subset(rinfo->left_relids, outerrel->relids) &&
1230 566278 : bms_is_subset(rinfo->right_relids, innerrel->relids))
1231 : {
1232 : /* lefthand side is outer */
1233 565930 : rinfo->outer_is_left = true;
1234 565930 : return true;
1235 : }
1236 1131208 : else if (bms_is_subset(rinfo->left_relids, innerrel->relids) &&
1237 558068 : bms_is_subset(rinfo->right_relids, outerrel->relids))
1238 : {
1239 : /* righthand side is outer */
1240 522270 : rinfo->outer_is_left = false;
1241 522270 : return true;
1242 : }
1243 50870 : return false; /* no good for these input relations */
1244 : }
1245 :
1246 : /*
1247 : * sort_inner_and_outer
1248 : * Create mergejoin join paths by explicitly sorting both the outer and
1249 : * inner join relations on each available merge ordering.
1250 : *
1251 : * 'joinrel' is the join relation
1252 : * 'outerrel' is the outer join relation
1253 : * 'innerrel' is the inner join relation
1254 : * 'jointype' is the type of join to do
1255 : * 'extra' contains additional input values
1256 : */
1257 : static void
1258 491772 : sort_inner_and_outer(PlannerInfo *root,
1259 : RelOptInfo *joinrel,
1260 : RelOptInfo *outerrel,
1261 : RelOptInfo *innerrel,
1262 : JoinType jointype,
1263 : JoinPathExtraData *extra)
1264 : {
1265 491772 : JoinType save_jointype = jointype;
1266 : Path *outer_path;
1267 : Path *inner_path;
1268 491772 : Path *cheapest_partial_outer = NULL;
1269 491772 : Path *cheapest_safe_inner = NULL;
1270 : List *all_pathkeys;
1271 : ListCell *l;
1272 :
1273 : /*
1274 : * We only consider the cheapest-total-cost input paths, since we are
1275 : * assuming here that a sort is required. We will consider
1276 : * cheapest-startup-cost input paths later, and only if they don't need a
1277 : * sort.
1278 : *
1279 : * This function intentionally does not consider parameterized input
1280 : * paths, except when the cheapest-total is parameterized. If we did so,
1281 : * we'd have a combinatorial explosion of mergejoin paths of dubious
1282 : * value. This interacts with decisions elsewhere that also discriminate
1283 : * against mergejoins with parameterized inputs; see comments in
1284 : * src/backend/optimizer/README.
1285 : */
1286 491772 : outer_path = outerrel->cheapest_total_path;
1287 491772 : inner_path = innerrel->cheapest_total_path;
1288 :
1289 : /*
1290 : * If either cheapest-total path is parameterized by the other rel, we
1291 : * can't use a mergejoin. (There's no use looking for alternative input
1292 : * paths, since these should already be the least-parameterized available
1293 : * paths.)
1294 : */
1295 491772 : if (PATH_PARAM_BY_REL(outer_path, innerrel) ||
1296 481446 : PATH_PARAM_BY_REL(inner_path, outerrel))
1297 20688 : return;
1298 :
1299 : /*
1300 : * If unique-ification is requested, do it and then handle as a plain
1301 : * inner join.
1302 : */
1303 471084 : if (jointype == JOIN_UNIQUE_OUTER)
1304 : {
1305 3586 : outer_path = (Path *) create_unique_path(root, outerrel,
1306 : outer_path, extra->sjinfo);
1307 : Assert(outer_path);
1308 3586 : jointype = JOIN_INNER;
1309 : }
1310 467498 : else if (jointype == JOIN_UNIQUE_INNER)
1311 : {
1312 3586 : inner_path = (Path *) create_unique_path(root, innerrel,
1313 : inner_path, extra->sjinfo);
1314 : Assert(inner_path);
1315 3586 : jointype = JOIN_INNER;
1316 : }
1317 :
1318 : /*
1319 : * If the joinrel is parallel-safe, we may be able to consider a partial
1320 : * merge join. However, we can't handle JOIN_UNIQUE_OUTER, because the
1321 : * outer path will be partial, and therefore we won't be able to properly
1322 : * guarantee uniqueness. Similarly, we can't handle JOIN_FULL, JOIN_RIGHT
1323 : * and JOIN_RIGHT_ANTI, because they can produce false null extended rows.
1324 : * Also, the resulting path must not be parameterized.
1325 : */
1326 471084 : if (joinrel->consider_parallel &&
1327 409094 : save_jointype != JOIN_UNIQUE_OUTER &&
1328 406630 : save_jointype != JOIN_FULL &&
1329 334018 : save_jointype != JOIN_RIGHT &&
1330 332876 : save_jointype != JOIN_RIGHT_ANTI &&
1331 332876 : outerrel->partial_pathlist != NIL &&
1332 9720 : bms_is_empty(joinrel->lateral_relids))
1333 : {
1334 9720 : cheapest_partial_outer = (Path *) linitial(outerrel->partial_pathlist);
1335 :
1336 9720 : if (inner_path->parallel_safe)
1337 9372 : cheapest_safe_inner = inner_path;
1338 348 : else if (save_jointype != JOIN_UNIQUE_INNER)
1339 : cheapest_safe_inner =
1340 342 : get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
1341 : }
1342 :
1343 : /*
1344 : * Each possible ordering of the available mergejoin clauses will generate
1345 : * a differently-sorted result path at essentially the same cost. We have
1346 : * no basis for choosing one over another at this level of joining, but
1347 : * some sort orders may be more useful than others for higher-level
1348 : * mergejoins, so it's worth considering multiple orderings.
1349 : *
1350 : * Actually, it's not quite true that every mergeclause ordering will
1351 : * generate a different path order, because some of the clauses may be
1352 : * partially redundant (refer to the same EquivalenceClasses). Therefore,
1353 : * what we do is convert the mergeclause list to a list of canonical
1354 : * pathkeys, and then consider different orderings of the pathkeys.
1355 : *
1356 : * Generating a path for *every* permutation of the pathkeys doesn't seem
1357 : * like a winning strategy; the cost in planning time is too high. For
1358 : * now, we generate one path for each pathkey, listing that pathkey first
1359 : * and the rest in random order. This should allow at least a one-clause
1360 : * mergejoin without re-sorting against any other possible mergejoin
1361 : * partner path. But if we've not guessed the right ordering of secondary
1362 : * keys, we may end up evaluating clauses as qpquals when they could have
1363 : * been done as mergeclauses. (In practice, it's rare that there's more
1364 : * than two or three mergeclauses, so expending a huge amount of thought
1365 : * on that is probably not worth it.)
1366 : *
1367 : * The pathkey order returned by select_outer_pathkeys_for_merge() has
1368 : * some heuristics behind it (see that function), so be sure to try it
1369 : * exactly as-is as well as making variants.
1370 : */
1371 471084 : all_pathkeys = select_outer_pathkeys_for_merge(root,
1372 : extra->mergeclause_list,
1373 : joinrel);
1374 :
1375 897678 : foreach(l, all_pathkeys)
1376 : {
1377 426594 : PathKey *front_pathkey = (PathKey *) lfirst(l);
1378 : List *cur_mergeclauses;
1379 : List *outerkeys;
1380 : List *innerkeys;
1381 : List *merge_pathkeys;
1382 :
1383 : /* Make a pathkey list with this guy first */
1384 426594 : if (l != list_head(all_pathkeys))
1385 38118 : outerkeys = lcons(front_pathkey,
1386 : list_delete_nth_cell(list_copy(all_pathkeys),
1387 : foreach_current_index(l)));
1388 : else
1389 388476 : outerkeys = all_pathkeys; /* no work at first one... */
1390 :
1391 : /* Sort the mergeclauses into the corresponding ordering */
1392 : cur_mergeclauses =
1393 426594 : find_mergeclauses_for_outer_pathkeys(root,
1394 : outerkeys,
1395 : extra->mergeclause_list);
1396 :
1397 : /* Should have used them all... */
1398 : Assert(list_length(cur_mergeclauses) == list_length(extra->mergeclause_list));
1399 :
1400 : /* Build sort pathkeys for the inner side */
1401 426594 : innerkeys = make_inner_pathkeys_for_merge(root,
1402 : cur_mergeclauses,
1403 : outerkeys);
1404 :
1405 : /* Build pathkeys representing output sort order */
1406 426594 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1407 : outerkeys);
1408 :
1409 : /*
1410 : * And now we can make the path.
1411 : *
1412 : * Note: it's possible that the cheapest paths will already be sorted
1413 : * properly. try_mergejoin_path will detect that case and suppress an
1414 : * explicit sort step, so we needn't do so here.
1415 : */
1416 426594 : try_mergejoin_path(root,
1417 : joinrel,
1418 : outer_path,
1419 : inner_path,
1420 : merge_pathkeys,
1421 : cur_mergeclauses,
1422 : outerkeys,
1423 : innerkeys,
1424 : jointype,
1425 : extra,
1426 : false);
1427 :
1428 : /*
1429 : * If we have partial outer and parallel safe inner path then try
1430 : * partial mergejoin path.
1431 : */
1432 426594 : if (cheapest_partial_outer && cheapest_safe_inner)
1433 12114 : try_partial_mergejoin_path(root,
1434 : joinrel,
1435 : cheapest_partial_outer,
1436 : cheapest_safe_inner,
1437 : merge_pathkeys,
1438 : cur_mergeclauses,
1439 : outerkeys,
1440 : innerkeys,
1441 : jointype,
1442 : extra);
1443 : }
1444 : }
1445 :
1446 : /*
1447 : * generate_mergejoin_paths
1448 : * Creates possible mergejoin paths for input outerpath.
1449 : *
1450 : * We generate mergejoins if mergejoin clauses are available. We have
1451 : * two ways to generate the inner path for a mergejoin: sort the cheapest
1452 : * inner path, or use an inner path that is already suitably ordered for the
1453 : * merge. If we have several mergeclauses, it could be that there is no inner
1454 : * path (or only a very expensive one) for the full list of mergeclauses, but
1455 : * better paths exist if we truncate the mergeclause list (thereby discarding
1456 : * some sort key requirements). So, we consider truncations of the
1457 : * mergeclause list as well as the full list. (Ideally we'd consider all
1458 : * subsets of the mergeclause list, but that seems way too expensive.)
1459 : */
1460 : static void
1461 893206 : generate_mergejoin_paths(PlannerInfo *root,
1462 : RelOptInfo *joinrel,
1463 : RelOptInfo *innerrel,
1464 : Path *outerpath,
1465 : JoinType jointype,
1466 : JoinPathExtraData *extra,
1467 : bool useallclauses,
1468 : Path *inner_cheapest_total,
1469 : List *merge_pathkeys,
1470 : bool is_partial)
1471 : {
1472 : List *mergeclauses;
1473 : List *innersortkeys;
1474 : List *trialsortkeys;
1475 : Path *cheapest_startup_inner;
1476 : Path *cheapest_total_inner;
1477 893206 : JoinType save_jointype = jointype;
1478 : int num_sortkeys;
1479 : int sortkeycnt;
1480 :
1481 893206 : if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
1482 6320 : jointype = JOIN_INNER;
1483 :
1484 : /* Look for useful mergeclauses (if any) */
1485 : mergeclauses =
1486 893206 : find_mergeclauses_for_outer_pathkeys(root,
1487 : outerpath->pathkeys,
1488 : extra->mergeclause_list);
1489 :
1490 : /*
1491 : * Done with this outer path if no chance for a mergejoin.
1492 : *
1493 : * Special corner case: for "x FULL JOIN y ON true", there will be no join
1494 : * clauses at all. Ordinarily we'd generate a clauseless nestloop path,
1495 : * but since mergejoin is our only join type that supports FULL JOIN
1496 : * without any join clauses, it's necessary to generate a clauseless
1497 : * mergejoin path instead.
1498 : */
1499 893206 : if (mergeclauses == NIL)
1500 : {
1501 594098 : if (jointype == JOIN_FULL)
1502 : /* okay to try for mergejoin */ ;
1503 : else
1504 590918 : return;
1505 : }
1506 379010 : if (useallclauses &&
1507 76722 : list_length(mergeclauses) != list_length(extra->mergeclause_list))
1508 9592 : return;
1509 :
1510 : /* Compute the required ordering of the inner path */
1511 292696 : innersortkeys = make_inner_pathkeys_for_merge(root,
1512 : mergeclauses,
1513 : outerpath->pathkeys);
1514 :
1515 : /*
1516 : * Generate a mergejoin on the basis of sorting the cheapest inner. Since
1517 : * a sort will be needed, only cheapest total cost matters. (But
1518 : * try_mergejoin_path will do the right thing if inner_cheapest_total is
1519 : * already correctly sorted.)
1520 : */
1521 292696 : try_mergejoin_path(root,
1522 : joinrel,
1523 : outerpath,
1524 : inner_cheapest_total,
1525 : merge_pathkeys,
1526 : mergeclauses,
1527 : NIL,
1528 : innersortkeys,
1529 : jointype,
1530 : extra,
1531 : is_partial);
1532 :
1533 : /* Can't do anything else if inner path needs to be unique'd */
1534 292696 : if (save_jointype == JOIN_UNIQUE_INNER)
1535 1218 : return;
1536 :
1537 : /*
1538 : * Look for presorted inner paths that satisfy the innersortkey list ---
1539 : * or any truncation thereof, if we are allowed to build a mergejoin using
1540 : * a subset of the merge clauses. Here, we consider both cheap startup
1541 : * cost and cheap total cost.
1542 : *
1543 : * Currently we do not consider parameterized inner paths here. This
1544 : * interacts with decisions elsewhere that also discriminate against
1545 : * mergejoins with parameterized inputs; see comments in
1546 : * src/backend/optimizer/README.
1547 : *
1548 : * As we shorten the sortkey list, we should consider only paths that are
1549 : * strictly cheaper than (in particular, not the same as) any path found
1550 : * in an earlier iteration. Otherwise we'd be intentionally using fewer
1551 : * merge keys than a given path allows (treating the rest as plain
1552 : * joinquals), which is unlikely to be a good idea. Also, eliminating
1553 : * paths here on the basis of compare_path_costs is a lot cheaper than
1554 : * building the mergejoin path only to throw it away.
1555 : *
1556 : * If inner_cheapest_total is well enough sorted to have not required a
1557 : * sort in the path made above, we shouldn't make a duplicate path with
1558 : * it, either. We handle that case with the same logic that handles the
1559 : * previous consideration, by initializing the variables that track
1560 : * cheapest-so-far properly. Note that we do NOT reject
1561 : * inner_cheapest_total if we find it matches some shorter set of
1562 : * pathkeys. That case corresponds to using fewer mergekeys to avoid
1563 : * sorting inner_cheapest_total, whereas we did sort it above, so the
1564 : * plans being considered are different.
1565 : */
1566 291478 : if (pathkeys_contained_in(innersortkeys,
1567 : inner_cheapest_total->pathkeys))
1568 : {
1569 : /* inner_cheapest_total didn't require a sort */
1570 8586 : cheapest_startup_inner = inner_cheapest_total;
1571 8586 : cheapest_total_inner = inner_cheapest_total;
1572 : }
1573 : else
1574 : {
1575 : /* it did require a sort, at least for the full set of keys */
1576 282892 : cheapest_startup_inner = NULL;
1577 282892 : cheapest_total_inner = NULL;
1578 : }
1579 291478 : num_sortkeys = list_length(innersortkeys);
1580 291478 : if (num_sortkeys > 1 && !useallclauses)
1581 8636 : trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
1582 : else
1583 282842 : trialsortkeys = innersortkeys; /* won't really truncate */
1584 :
1585 524598 : for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
1586 : {
1587 : Path *innerpath;
1588 300106 : List *newclauses = NIL;
1589 :
1590 : /*
1591 : * Look for an inner path ordered well enough for the first
1592 : * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
1593 : * destructively, which is why we made a copy...
1594 : */
1595 300106 : trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
1596 300106 : innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1597 : trialsortkeys,
1598 : NULL,
1599 : TOTAL_COST,
1600 : is_partial);
1601 300106 : if (innerpath != NULL &&
1602 13220 : (cheapest_total_inner == NULL ||
1603 13220 : compare_path_costs(innerpath, cheapest_total_inner,
1604 : TOTAL_COST) < 0))
1605 : {
1606 : /* Found a cheap (or even-cheaper) sorted path */
1607 : /* Select the right mergeclauses, if we didn't already */
1608 175196 : if (sortkeycnt < num_sortkeys)
1609 : {
1610 : newclauses =
1611 2292 : trim_mergeclauses_for_inner_pathkeys(root,
1612 : mergeclauses,
1613 : trialsortkeys);
1614 : Assert(newclauses != NIL);
1615 : }
1616 : else
1617 172904 : newclauses = mergeclauses;
1618 175196 : try_mergejoin_path(root,
1619 : joinrel,
1620 : outerpath,
1621 : innerpath,
1622 : merge_pathkeys,
1623 : newclauses,
1624 : NIL,
1625 : NIL,
1626 : jointype,
1627 : extra,
1628 : is_partial);
1629 175196 : cheapest_total_inner = innerpath;
1630 : }
1631 : /* Same on the basis of cheapest startup cost ... */
1632 300106 : innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1633 : trialsortkeys,
1634 : NULL,
1635 : STARTUP_COST,
1636 : is_partial);
1637 300106 : if (innerpath != NULL &&
1638 13220 : (cheapest_startup_inner == NULL ||
1639 13220 : compare_path_costs(innerpath, cheapest_startup_inner,
1640 : STARTUP_COST) < 0))
1641 : {
1642 : /* Found a cheap (or even-cheaper) sorted path */
1643 174020 : if (innerpath != cheapest_total_inner)
1644 : {
1645 : /*
1646 : * Avoid rebuilding clause list if we already made one; saves
1647 : * memory in big join trees...
1648 : */
1649 798 : if (newclauses == NIL)
1650 : {
1651 2 : if (sortkeycnt < num_sortkeys)
1652 : {
1653 : newclauses =
1654 0 : trim_mergeclauses_for_inner_pathkeys(root,
1655 : mergeclauses,
1656 : trialsortkeys);
1657 : Assert(newclauses != NIL);
1658 : }
1659 : else
1660 2 : newclauses = mergeclauses;
1661 : }
1662 798 : try_mergejoin_path(root,
1663 : joinrel,
1664 : outerpath,
1665 : innerpath,
1666 : merge_pathkeys,
1667 : newclauses,
1668 : NIL,
1669 : NIL,
1670 : jointype,
1671 : extra,
1672 : is_partial);
1673 : }
1674 174020 : cheapest_startup_inner = innerpath;
1675 : }
1676 :
1677 : /*
1678 : * Don't consider truncated sortkeys if we need all clauses.
1679 : */
1680 300106 : if (useallclauses)
1681 66986 : break;
1682 : }
1683 : }
1684 :
1685 : /*
1686 : * match_unsorted_outer
1687 : * Creates possible join paths for processing a single join relation
1688 : * 'joinrel' by employing either iterative substitution or
1689 : * mergejoining on each of its possible outer paths (considering
1690 : * only outer paths that are already ordered well enough for merging).
1691 : *
1692 : * We always generate a nestloop path for each available outer path.
1693 : * In fact we may generate as many as five: one on the cheapest-total-cost
1694 : * inner path, one on the same with materialization, one on the
1695 : * cheapest-startup-cost inner path (if different), one on the
1696 : * cheapest-total inner-indexscan path (if any), and one on the
1697 : * cheapest-startup inner-indexscan path (if different).
1698 : *
1699 : * We also consider mergejoins if mergejoin clauses are available. See
1700 : * detailed comments in generate_mergejoin_paths.
1701 : *
1702 : * 'joinrel' is the join relation
1703 : * 'outerrel' is the outer join relation
1704 : * 'innerrel' is the inner join relation
1705 : * 'jointype' is the type of join to do
1706 : * 'extra' contains additional input values
1707 : */
1708 : static void
1709 491772 : match_unsorted_outer(PlannerInfo *root,
1710 : RelOptInfo *joinrel,
1711 : RelOptInfo *outerrel,
1712 : RelOptInfo *innerrel,
1713 : JoinType jointype,
1714 : JoinPathExtraData *extra)
1715 : {
1716 491772 : JoinType save_jointype = jointype;
1717 : bool nestjoinOK;
1718 : bool useallclauses;
1719 491772 : Path *inner_cheapest_total = innerrel->cheapest_total_path;
1720 491772 : Path *matpath = NULL;
1721 : ListCell *lc1;
1722 :
1723 : /*
1724 : * Nestloop only supports inner, left, semi, and anti joins. Also, if we
1725 : * are doing a right, right-anti or full mergejoin, we must use *all* the
1726 : * mergeclauses as join clauses, else we will not have a valid plan.
1727 : * (Although these two flags are currently inverses, keep them separate
1728 : * for clarity and possible future changes.)
1729 : */
1730 491772 : switch (jointype)
1731 : {
1732 401632 : case JOIN_INNER:
1733 : case JOIN_LEFT:
1734 : case JOIN_SEMI:
1735 : case JOIN_ANTI:
1736 401632 : nestjoinOK = true;
1737 401632 : useallclauses = false;
1738 401632 : break;
1739 82968 : case JOIN_RIGHT:
1740 : case JOIN_RIGHT_ANTI:
1741 : case JOIN_FULL:
1742 82968 : nestjoinOK = false;
1743 82968 : useallclauses = true;
1744 82968 : break;
1745 7172 : case JOIN_UNIQUE_OUTER:
1746 : case JOIN_UNIQUE_INNER:
1747 7172 : jointype = JOIN_INNER;
1748 7172 : nestjoinOK = true;
1749 7172 : useallclauses = false;
1750 7172 : break;
1751 0 : default:
1752 0 : elog(ERROR, "unrecognized join type: %d",
1753 : (int) jointype);
1754 : nestjoinOK = false; /* keep compiler quiet */
1755 : useallclauses = false;
1756 : break;
1757 : }
1758 :
1759 : /*
1760 : * If inner_cheapest_total is parameterized by the outer rel, ignore it;
1761 : * we will consider it below as a member of cheapest_parameterized_paths,
1762 : * but the other possibilities considered in this routine aren't usable.
1763 : */
1764 491772 : if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel))
1765 10362 : inner_cheapest_total = NULL;
1766 :
1767 : /*
1768 : * If we need to unique-ify the inner path, we will consider only the
1769 : * cheapest-total inner.
1770 : */
1771 491772 : if (save_jointype == JOIN_UNIQUE_INNER)
1772 : {
1773 : /* No way to do this with an inner path parameterized by outer rel */
1774 3586 : if (inner_cheapest_total == NULL)
1775 0 : return;
1776 : inner_cheapest_total = (Path *)
1777 3586 : create_unique_path(root, innerrel, inner_cheapest_total, extra->sjinfo);
1778 : Assert(inner_cheapest_total);
1779 : }
1780 488186 : else if (nestjoinOK)
1781 : {
1782 : /*
1783 : * Consider materializing the cheapest inner path, unless
1784 : * enable_material is off or the path in question materializes its
1785 : * output anyway.
1786 : */
1787 405218 : if (enable_material && inner_cheapest_total != NULL &&
1788 394430 : !ExecMaterializesOutput(inner_cheapest_total->pathtype))
1789 : matpath = (Path *)
1790 376530 : create_material_path(innerrel, inner_cheapest_total);
1791 : }
1792 :
1793 1574650 : foreach(lc1, outerrel->pathlist)
1794 : {
1795 1082878 : Path *outerpath = (Path *) lfirst(lc1);
1796 : List *merge_pathkeys;
1797 :
1798 : /*
1799 : * We cannot use an outer path that is parameterized by the inner rel.
1800 : */
1801 1082878 : if (PATH_PARAM_BY_REL(outerpath, innerrel))
1802 187564 : continue;
1803 :
1804 : /*
1805 : * If we need to unique-ify the outer path, it's pointless to consider
1806 : * any but the cheapest outer. (XXX we don't consider parameterized
1807 : * outers, nor inners, for unique-ified cases. Should we?)
1808 : */
1809 895314 : if (save_jointype == JOIN_UNIQUE_OUTER)
1810 : {
1811 4618 : if (outerpath != outerrel->cheapest_total_path)
1812 1032 : continue;
1813 3586 : outerpath = (Path *) create_unique_path(root, outerrel,
1814 : outerpath, extra->sjinfo);
1815 : Assert(outerpath);
1816 : }
1817 :
1818 : /*
1819 : * The result will have this sort order (even if it is implemented as
1820 : * a nestloop, and even if some of the mergeclauses are implemented by
1821 : * qpquals rather than as true mergeclauses):
1822 : */
1823 894282 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1824 : outerpath->pathkeys);
1825 :
1826 894282 : if (save_jointype == JOIN_UNIQUE_INNER)
1827 : {
1828 : /*
1829 : * Consider nestloop join, but only with the unique-ified cheapest
1830 : * inner path
1831 : */
1832 5474 : try_nestloop_path(root,
1833 : joinrel,
1834 : outerpath,
1835 : inner_cheapest_total,
1836 : merge_pathkeys,
1837 : jointype,
1838 : extra);
1839 : }
1840 888808 : else if (nestjoinOK)
1841 : {
1842 : /*
1843 : * Consider nestloop joins using this outer path and various
1844 : * available paths for the inner relation. We consider the
1845 : * cheapest-total paths for each available parameterization of the
1846 : * inner relation, including the unparameterized case.
1847 : */
1848 : ListCell *lc2;
1849 :
1850 1916456 : foreach(lc2, innerrel->cheapest_parameterized_paths)
1851 : {
1852 1188220 : Path *innerpath = (Path *) lfirst(lc2);
1853 : Path *mpath;
1854 :
1855 1188220 : try_nestloop_path(root,
1856 : joinrel,
1857 : outerpath,
1858 : innerpath,
1859 : merge_pathkeys,
1860 : jointype,
1861 : extra);
1862 :
1863 : /*
1864 : * Try generating a memoize path and see if that makes the
1865 : * nested loop any cheaper.
1866 : */
1867 1188220 : mpath = get_memoize_path(root, innerrel, outerrel,
1868 : innerpath, outerpath, jointype,
1869 : extra);
1870 1188220 : if (mpath != NULL)
1871 197438 : try_nestloop_path(root,
1872 : joinrel,
1873 : outerpath,
1874 : mpath,
1875 : merge_pathkeys,
1876 : jointype,
1877 : extra);
1878 : }
1879 :
1880 : /* Also consider materialized form of the cheapest inner path */
1881 728236 : if (matpath != NULL)
1882 683524 : try_nestloop_path(root,
1883 : joinrel,
1884 : outerpath,
1885 : matpath,
1886 : merge_pathkeys,
1887 : jointype,
1888 : extra);
1889 : }
1890 :
1891 : /* Can't do anything else if outer path needs to be unique'd */
1892 894282 : if (save_jointype == JOIN_UNIQUE_OUTER)
1893 3586 : continue;
1894 :
1895 : /* Can't do anything else if inner rel is parameterized by outer */
1896 890696 : if (inner_cheapest_total == NULL)
1897 10684 : continue;
1898 :
1899 : /* Generate merge join paths */
1900 880012 : generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
1901 : save_jointype, extra, useallclauses,
1902 : inner_cheapest_total, merge_pathkeys,
1903 : false);
1904 : }
1905 :
1906 : /*
1907 : * Consider partial nestloop and mergejoin plan if outerrel has any
1908 : * partial path and the joinrel is parallel-safe. However, we can't
1909 : * handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and
1910 : * therefore we won't be able to properly guarantee uniqueness. Nor can
1911 : * we handle joins needing lateral rels, since partial paths must not be
1912 : * parameterized. Similarly, we can't handle JOIN_FULL, JOIN_RIGHT and
1913 : * JOIN_RIGHT_ANTI, because they can produce false null extended rows.
1914 : */
1915 491772 : if (joinrel->consider_parallel &&
1916 413748 : save_jointype != JOIN_UNIQUE_OUTER &&
1917 411284 : save_jointype != JOIN_FULL &&
1918 338372 : save_jointype != JOIN_RIGHT &&
1919 337230 : save_jointype != JOIN_RIGHT_ANTI &&
1920 337230 : outerrel->partial_pathlist != NIL &&
1921 9858 : bms_is_empty(joinrel->lateral_relids))
1922 : {
1923 9858 : if (nestjoinOK)
1924 9858 : consider_parallel_nestloop(root, joinrel, outerrel, innerrel,
1925 : save_jointype, extra);
1926 :
1927 : /*
1928 : * If inner_cheapest_total is NULL or non parallel-safe then find the
1929 : * cheapest total parallel safe path. If doing JOIN_UNIQUE_INNER, we
1930 : * can't use any alternative inner path.
1931 : */
1932 9858 : if (inner_cheapest_total == NULL ||
1933 9720 : !inner_cheapest_total->parallel_safe)
1934 : {
1935 486 : if (save_jointype == JOIN_UNIQUE_INNER)
1936 6 : return;
1937 :
1938 480 : inner_cheapest_total = get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
1939 : }
1940 :
1941 9852 : if (inner_cheapest_total)
1942 9708 : consider_parallel_mergejoin(root, joinrel, outerrel, innerrel,
1943 : save_jointype, extra,
1944 : inner_cheapest_total);
1945 : }
1946 : }
1947 :
1948 : /*
1949 : * consider_parallel_mergejoin
1950 : * Try to build partial paths for a joinrel by joining a partial path
1951 : * for the outer relation to a complete path for the inner relation.
1952 : *
1953 : * 'joinrel' is the join relation
1954 : * 'outerrel' is the outer join relation
1955 : * 'innerrel' is the inner join relation
1956 : * 'jointype' is the type of join to do
1957 : * 'extra' contains additional input values
1958 : * 'inner_cheapest_total' cheapest total path for innerrel
1959 : */
1960 : static void
1961 9708 : consider_parallel_mergejoin(PlannerInfo *root,
1962 : RelOptInfo *joinrel,
1963 : RelOptInfo *outerrel,
1964 : RelOptInfo *innerrel,
1965 : JoinType jointype,
1966 : JoinPathExtraData *extra,
1967 : Path *inner_cheapest_total)
1968 : {
1969 : ListCell *lc1;
1970 :
1971 : /* generate merge join path for each partial outer path */
1972 22902 : foreach(lc1, outerrel->partial_pathlist)
1973 : {
1974 13194 : Path *outerpath = (Path *) lfirst(lc1);
1975 : List *merge_pathkeys;
1976 :
1977 : /*
1978 : * Figure out what useful ordering any paths we create will have.
1979 : */
1980 13194 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1981 : outerpath->pathkeys);
1982 :
1983 13194 : generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype,
1984 : extra, false, inner_cheapest_total,
1985 : merge_pathkeys, true);
1986 : }
1987 9708 : }
1988 :
1989 : /*
1990 : * consider_parallel_nestloop
1991 : * Try to build partial paths for a joinrel by joining a partial path for the
1992 : * outer relation to a complete path for the inner relation.
1993 : *
1994 : * 'joinrel' is the join relation
1995 : * 'outerrel' is the outer join relation
1996 : * 'innerrel' is the inner join relation
1997 : * 'jointype' is the type of join to do
1998 : * 'extra' contains additional input values
1999 : */
2000 : static void
2001 9858 : consider_parallel_nestloop(PlannerInfo *root,
2002 : RelOptInfo *joinrel,
2003 : RelOptInfo *outerrel,
2004 : RelOptInfo *innerrel,
2005 : JoinType jointype,
2006 : JoinPathExtraData *extra)
2007 : {
2008 9858 : JoinType save_jointype = jointype;
2009 : ListCell *lc1;
2010 :
2011 9858 : if (jointype == JOIN_UNIQUE_INNER)
2012 564 : jointype = JOIN_INNER;
2013 :
2014 23250 : foreach(lc1, outerrel->partial_pathlist)
2015 : {
2016 13392 : Path *outerpath = (Path *) lfirst(lc1);
2017 : List *pathkeys;
2018 : ListCell *lc2;
2019 :
2020 : /* Figure out what useful ordering any paths we create will have. */
2021 13392 : pathkeys = build_join_pathkeys(root, joinrel, jointype,
2022 : outerpath->pathkeys);
2023 :
2024 : /*
2025 : * Try the cheapest parameterized paths; only those which will produce
2026 : * an unparameterized path when joined to this outerrel will survive
2027 : * try_partial_nestloop_path. The cheapest unparameterized path is
2028 : * also in this list.
2029 : */
2030 35136 : foreach(lc2, innerrel->cheapest_parameterized_paths)
2031 : {
2032 21744 : Path *innerpath = (Path *) lfirst(lc2);
2033 : Path *mpath;
2034 :
2035 : /* Can't join to an inner path that is not parallel-safe */
2036 21744 : if (!innerpath->parallel_safe)
2037 396 : continue;
2038 :
2039 : /*
2040 : * If we're doing JOIN_UNIQUE_INNER, we can only use the inner's
2041 : * cheapest_total_path, and we have to unique-ify it. (We might
2042 : * be able to relax this to allow other safe, unparameterized
2043 : * inner paths, but right now create_unique_path is not on board
2044 : * with that.)
2045 : */
2046 21348 : if (save_jointype == JOIN_UNIQUE_INNER)
2047 : {
2048 1722 : if (innerpath != innerrel->cheapest_total_path)
2049 876 : continue;
2050 846 : innerpath = (Path *) create_unique_path(root, innerrel,
2051 : innerpath,
2052 : extra->sjinfo);
2053 : Assert(innerpath);
2054 : }
2055 :
2056 20472 : try_partial_nestloop_path(root, joinrel, outerpath, innerpath,
2057 : pathkeys, jointype, extra);
2058 :
2059 : /*
2060 : * Try generating a memoize path and see if that makes the nested
2061 : * loop any cheaper.
2062 : */
2063 20472 : mpath = get_memoize_path(root, innerrel, outerrel,
2064 : innerpath, outerpath, jointype,
2065 : extra);
2066 20472 : if (mpath != NULL)
2067 4960 : try_partial_nestloop_path(root, joinrel, outerpath, mpath,
2068 : pathkeys, jointype, extra);
2069 : }
2070 : }
2071 9858 : }
2072 :
2073 : /*
2074 : * hash_inner_and_outer
2075 : * Create hashjoin join paths by explicitly hashing both the outer and
2076 : * inner keys of each available hash clause.
2077 : *
2078 : * 'joinrel' is the join relation
2079 : * 'outerrel' is the outer join relation
2080 : * 'innerrel' is the inner join relation
2081 : * 'jointype' is the type of join to do
2082 : * 'extra' contains additional input values
2083 : */
2084 : static void
2085 494232 : hash_inner_and_outer(PlannerInfo *root,
2086 : RelOptInfo *joinrel,
2087 : RelOptInfo *outerrel,
2088 : RelOptInfo *innerrel,
2089 : JoinType jointype,
2090 : JoinPathExtraData *extra)
2091 : {
2092 494232 : JoinType save_jointype = jointype;
2093 494232 : bool isouterjoin = IS_OUTER_JOIN(jointype);
2094 : List *hashclauses;
2095 : ListCell *l;
2096 :
2097 : /*
2098 : * We need to build only one hashclauses list for any given pair of outer
2099 : * and inner relations; all of the hashable clauses will be used as keys.
2100 : *
2101 : * Scan the join's restrictinfo list to find hashjoinable clauses that are
2102 : * usable with this pair of sub-relations.
2103 : */
2104 494232 : hashclauses = NIL;
2105 1003016 : foreach(l, extra->restrictlist)
2106 : {
2107 508784 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
2108 :
2109 : /*
2110 : * If processing an outer join, only use its own join clauses for
2111 : * hashing. For inner joins we need not be so picky.
2112 : */
2113 508784 : if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
2114 7964 : continue;
2115 :
2116 500820 : if (!restrictinfo->can_join ||
2117 447690 : restrictinfo->hashjoinoperator == InvalidOid)
2118 68412 : continue; /* not hashjoinable */
2119 :
2120 : /*
2121 : * Check if clause has the form "outer op inner" or "inner op outer".
2122 : */
2123 432408 : if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
2124 144 : continue; /* no good for these input relations */
2125 :
2126 432264 : hashclauses = lappend(hashclauses, restrictinfo);
2127 : }
2128 :
2129 : /* If we found any usable hashclauses, make paths */
2130 494232 : if (hashclauses)
2131 : {
2132 : /*
2133 : * We consider both the cheapest-total-cost and cheapest-startup-cost
2134 : * outer paths. There's no need to consider any but the
2135 : * cheapest-total-cost inner path, however.
2136 : */
2137 394144 : Path *cheapest_startup_outer = outerrel->cheapest_startup_path;
2138 394144 : Path *cheapest_total_outer = outerrel->cheapest_total_path;
2139 394144 : Path *cheapest_total_inner = innerrel->cheapest_total_path;
2140 :
2141 : /*
2142 : * If either cheapest-total path is parameterized by the other rel, we
2143 : * can't use a hashjoin. (There's no use looking for alternative
2144 : * input paths, since these should already be the least-parameterized
2145 : * available paths.)
2146 : */
2147 394144 : if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) ||
2148 393762 : PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
2149 764 : return;
2150 :
2151 : /* Unique-ify if need be; we ignore parameterized possibilities */
2152 393380 : if (jointype == JOIN_UNIQUE_OUTER)
2153 : {
2154 : cheapest_total_outer = (Path *)
2155 1726 : create_unique_path(root, outerrel,
2156 : cheapest_total_outer, extra->sjinfo);
2157 : Assert(cheapest_total_outer);
2158 1726 : jointype = JOIN_INNER;
2159 1726 : try_hashjoin_path(root,
2160 : joinrel,
2161 : cheapest_total_outer,
2162 : cheapest_total_inner,
2163 : hashclauses,
2164 : jointype,
2165 : extra);
2166 : /* no possibility of cheap startup here */
2167 : }
2168 391654 : else if (jointype == JOIN_UNIQUE_INNER)
2169 : {
2170 : cheapest_total_inner = (Path *)
2171 1726 : create_unique_path(root, innerrel,
2172 : cheapest_total_inner, extra->sjinfo);
2173 : Assert(cheapest_total_inner);
2174 1726 : jointype = JOIN_INNER;
2175 1726 : try_hashjoin_path(root,
2176 : joinrel,
2177 : cheapest_total_outer,
2178 : cheapest_total_inner,
2179 : hashclauses,
2180 : jointype,
2181 : extra);
2182 1726 : if (cheapest_startup_outer != NULL &&
2183 : cheapest_startup_outer != cheapest_total_outer)
2184 56 : try_hashjoin_path(root,
2185 : joinrel,
2186 : cheapest_startup_outer,
2187 : cheapest_total_inner,
2188 : hashclauses,
2189 : jointype,
2190 : extra);
2191 : }
2192 : else
2193 : {
2194 : /*
2195 : * For other jointypes, we consider the cheapest startup outer
2196 : * together with the cheapest total inner, and then consider
2197 : * pairings of cheapest-total paths including parameterized ones.
2198 : * There is no use in generating parameterized paths on the basis
2199 : * of possibly cheap startup cost, so this is sufficient.
2200 : */
2201 : ListCell *lc1;
2202 : ListCell *lc2;
2203 :
2204 389928 : if (cheapest_startup_outer != NULL)
2205 389310 : try_hashjoin_path(root,
2206 : joinrel,
2207 : cheapest_startup_outer,
2208 : cheapest_total_inner,
2209 : hashclauses,
2210 : jointype,
2211 : extra);
2212 :
2213 994450 : foreach(lc1, outerrel->cheapest_parameterized_paths)
2214 : {
2215 604522 : Path *outerpath = (Path *) lfirst(lc1);
2216 :
2217 : /*
2218 : * We cannot use an outer path that is parameterized by the
2219 : * inner rel.
2220 : */
2221 604522 : if (PATH_PARAM_BY_REL(outerpath, innerrel))
2222 179474 : continue;
2223 :
2224 1094116 : foreach(lc2, innerrel->cheapest_parameterized_paths)
2225 : {
2226 669068 : Path *innerpath = (Path *) lfirst(lc2);
2227 :
2228 : /*
2229 : * We cannot use an inner path that is parameterized by
2230 : * the outer rel, either.
2231 : */
2232 669068 : if (PATH_PARAM_BY_REL(innerpath, outerrel))
2233 200242 : continue;
2234 :
2235 468826 : if (outerpath == cheapest_startup_outer &&
2236 : innerpath == cheapest_total_inner)
2237 337374 : continue; /* already tried it */
2238 :
2239 131452 : try_hashjoin_path(root,
2240 : joinrel,
2241 : outerpath,
2242 : innerpath,
2243 : hashclauses,
2244 : jointype,
2245 : extra);
2246 : }
2247 : }
2248 : }
2249 :
2250 : /*
2251 : * If the joinrel is parallel-safe, we may be able to consider a
2252 : * partial hash join. However, we can't handle JOIN_UNIQUE_OUTER,
2253 : * because the outer path will be partial, and therefore we won't be
2254 : * able to properly guarantee uniqueness. Also, the resulting path
2255 : * must not be parameterized.
2256 : */
2257 393380 : if (joinrel->consider_parallel &&
2258 352556 : save_jointype != JOIN_UNIQUE_OUTER &&
2259 352556 : outerrel->partial_pathlist != NIL &&
2260 12666 : bms_is_empty(joinrel->lateral_relids))
2261 : {
2262 : Path *cheapest_partial_outer;
2263 12666 : Path *cheapest_partial_inner = NULL;
2264 12666 : Path *cheapest_safe_inner = NULL;
2265 :
2266 12666 : cheapest_partial_outer =
2267 12666 : (Path *) linitial(outerrel->partial_pathlist);
2268 :
2269 : /*
2270 : * Can we use a partial inner plan too, so that we can build a
2271 : * shared hash table in parallel? We can't handle
2272 : * JOIN_UNIQUE_INNER because we can't guarantee uniqueness.
2273 : */
2274 12666 : if (innerrel->partial_pathlist != NIL &&
2275 11850 : save_jointype != JOIN_UNIQUE_INNER &&
2276 : enable_parallel_hash)
2277 : {
2278 11574 : cheapest_partial_inner =
2279 11574 : (Path *) linitial(innerrel->partial_pathlist);
2280 11574 : try_partial_hashjoin_path(root, joinrel,
2281 : cheapest_partial_outer,
2282 : cheapest_partial_inner,
2283 : hashclauses, jointype, extra,
2284 : true /* parallel_hash */ );
2285 : }
2286 :
2287 : /*
2288 : * Normally, given that the joinrel is parallel-safe, the cheapest
2289 : * total inner path will also be parallel-safe, but if not, we'll
2290 : * have to search for the cheapest safe, unparameterized inner
2291 : * path. If doing JOIN_UNIQUE_INNER, we can't use any alternative
2292 : * inner path. If full, right, or right-anti join, we can't use
2293 : * parallelism (building the hash table in each backend) because
2294 : * no one process has all the match bits.
2295 : */
2296 12666 : if (save_jointype == JOIN_FULL ||
2297 8850 : save_jointype == JOIN_RIGHT ||
2298 : save_jointype == JOIN_RIGHT_ANTI)
2299 4146 : cheapest_safe_inner = NULL;
2300 8520 : else if (cheapest_total_inner->parallel_safe)
2301 8292 : cheapest_safe_inner = cheapest_total_inner;
2302 228 : else if (save_jointype != JOIN_UNIQUE_INNER)
2303 : cheapest_safe_inner =
2304 222 : get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
2305 :
2306 12666 : if (cheapest_safe_inner != NULL)
2307 8508 : try_partial_hashjoin_path(root, joinrel,
2308 : cheapest_partial_outer,
2309 : cheapest_safe_inner,
2310 : hashclauses, jointype, extra,
2311 : false /* parallel_hash */ );
2312 : }
2313 : }
2314 : }
2315 :
2316 : /*
2317 : * select_mergejoin_clauses
2318 : * Select mergejoin clauses that are usable for a particular join.
2319 : * Returns a list of RestrictInfo nodes for those clauses.
2320 : *
2321 : * *mergejoin_allowed is normally set to true, but it is set to false if
2322 : * this is a right/right-anti/full join and there are nonmergejoinable join
2323 : * clauses. The executor's mergejoin machinery cannot handle such cases, so
2324 : * we have to avoid generating a mergejoin plan. (Note that this flag does
2325 : * NOT consider whether there are actually any mergejoinable clauses. This is
2326 : * correct because in some cases we need to build a clauseless mergejoin.
2327 : * Simply returning NIL is therefore not enough to distinguish safe from
2328 : * unsafe cases.)
2329 : *
2330 : * We also mark each selected RestrictInfo to show which side is currently
2331 : * being considered as outer. These are transient markings that are only
2332 : * good for the duration of the current add_paths_to_joinrel() call!
2333 : *
2334 : * We examine each restrictinfo clause known for the join to see
2335 : * if it is mergejoinable and involves vars from the two sub-relations
2336 : * currently of interest.
2337 : */
2338 : static List *
2339 495614 : select_mergejoin_clauses(PlannerInfo *root,
2340 : RelOptInfo *joinrel,
2341 : RelOptInfo *outerrel,
2342 : RelOptInfo *innerrel,
2343 : List *restrictlist,
2344 : JoinType jointype,
2345 : bool *mergejoin_allowed)
2346 : {
2347 495614 : List *result_list = NIL;
2348 495614 : bool isouterjoin = IS_OUTER_JOIN(jointype);
2349 495614 : bool have_nonmergeable_joinclause = false;
2350 : ListCell *l;
2351 :
2352 1005764 : foreach(l, restrictlist)
2353 : {
2354 510150 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
2355 :
2356 : /*
2357 : * If processing an outer join, only use its own join clauses in the
2358 : * merge. For inner joins we can use pushed-down clauses too. (Note:
2359 : * we don't set have_nonmergeable_joinclause here because pushed-down
2360 : * clauses will become otherquals not joinquals.)
2361 : */
2362 510150 : if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
2363 7988 : continue;
2364 :
2365 : /* Check that clause is a mergeable operator clause */
2366 502162 : if (!restrictinfo->can_join ||
2367 449032 : restrictinfo->mergeopfamilies == NIL)
2368 : {
2369 : /*
2370 : * The executor can handle extra joinquals that are constants, but
2371 : * not anything else, when doing right/right-anti/full merge join.
2372 : * (The reason to support constants is so we can do FULL JOIN ON
2373 : * FALSE.)
2374 : */
2375 67306 : if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
2376 59398 : have_nonmergeable_joinclause = true;
2377 67306 : continue; /* not mergejoinable */
2378 : }
2379 :
2380 : /*
2381 : * Check if clause has the form "outer op inner" or "inner op outer".
2382 : */
2383 434856 : if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
2384 : {
2385 768 : have_nonmergeable_joinclause = true;
2386 768 : continue; /* no good for these input relations */
2387 : }
2388 :
2389 : /*
2390 : * Insist that each side have a non-redundant eclass. This
2391 : * restriction is needed because various bits of the planner expect
2392 : * that each clause in a merge be associable with some pathkey in a
2393 : * canonical pathkey list, but redundant eclasses can't appear in
2394 : * canonical sort orderings. (XXX it might be worth relaxing this,
2395 : * but not enough time to address it for 8.3.)
2396 : */
2397 434088 : update_mergeclause_eclasses(root, restrictinfo);
2398 :
2399 434088 : if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
2400 434028 : EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
2401 : {
2402 140 : have_nonmergeable_joinclause = true;
2403 140 : continue; /* can't handle redundant eclasses */
2404 : }
2405 :
2406 433948 : result_list = lappend(result_list, restrictinfo);
2407 : }
2408 :
2409 : /*
2410 : * Report whether mergejoin is allowed (see comment at top of function).
2411 : */
2412 495614 : switch (jointype)
2413 : {
2414 89830 : case JOIN_RIGHT:
2415 : case JOIN_RIGHT_ANTI:
2416 : case JOIN_FULL:
2417 89830 : *mergejoin_allowed = !have_nonmergeable_joinclause;
2418 89830 : break;
2419 405784 : default:
2420 405784 : *mergejoin_allowed = true;
2421 405784 : break;
2422 : }
2423 :
2424 495614 : return result_list;
2425 : }
|