Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * analyzejoins.c
4 : * Routines for simplifying joins after initial query analysis
5 : *
6 : * While we do a great deal of join simplification in prep/prepjointree.c,
7 : * certain optimizations cannot be performed at that stage for lack of
8 : * detailed information about the query. The routines here are invoked
9 : * after initsplan.c has done its work, and can do additional join removal
10 : * and simplification steps based on the information extracted. The penalty
11 : * is that we have to work harder to clean up after ourselves when we modify
12 : * the query, since the derived data structures have to be updated too.
13 : *
14 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
15 : * Portions Copyright (c) 1994, Regents of the University of California
16 : *
17 : *
18 : * IDENTIFICATION
19 : * src/backend/optimizer/plan/analyzejoins.c
20 : *
21 : *-------------------------------------------------------------------------
22 : */
23 : #include "postgres.h"
24 :
25 : #include "nodes/nodeFuncs.h"
26 : #include "optimizer/joininfo.h"
27 : #include "optimizer/optimizer.h"
28 : #include "optimizer/pathnode.h"
29 : #include "optimizer/paths.h"
30 : #include "optimizer/placeholder.h"
31 : #include "optimizer/planmain.h"
32 : #include "optimizer/restrictinfo.h"
33 : #include "utils/lsyscache.h"
34 :
35 : /* local functions */
36 : static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
37 : static void remove_rel_from_query(PlannerInfo *root, int relid,
38 : SpecialJoinInfo *sjinfo);
39 : static void remove_rel_from_restrictinfo(RestrictInfo *rinfo,
40 : int relid, int ojrelid);
41 : static void remove_rel_from_eclass(EquivalenceClass *ec,
42 : int relid, int ojrelid);
43 : static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
44 : static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
45 : static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
46 : List *clause_list);
47 : static Oid distinct_col_search(int colno, List *colnos, List *opids);
48 : static bool is_innerrel_unique_for(PlannerInfo *root,
49 : Relids joinrelids,
50 : Relids outerrelids,
51 : RelOptInfo *innerrel,
52 : JoinType jointype,
53 : List *restrictlist);
54 :
55 :
56 : /*
57 : * remove_useless_joins
58 : * Check for relations that don't actually need to be joined at all,
59 : * and remove them from the query.
60 : *
61 : * We are passed the current joinlist and return the updated list. Other
62 : * data structures that have to be updated are accessible via "root".
63 : */
64 : List *
65 300288 : remove_useless_joins(PlannerInfo *root, List *joinlist)
66 : {
67 : ListCell *lc;
68 :
69 : /*
70 : * We are only interested in relations that are left-joined to, so we can
71 : * scan the join_info_list to find them easily.
72 : */
73 300288 : restart:
74 336676 : foreach(lc, root->join_info_list)
75 : {
76 45956 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
77 : int innerrelid;
78 : int nremoved;
79 :
80 : /* Skip if not removable */
81 45956 : if (!join_is_removable(root, sjinfo))
82 36388 : continue;
83 :
84 : /*
85 : * Currently, join_is_removable can only succeed when the sjinfo's
86 : * righthand is a single baserel. Remove that rel from the query and
87 : * joinlist.
88 : */
89 9568 : innerrelid = bms_singleton_member(sjinfo->min_righthand);
90 :
91 9568 : remove_rel_from_query(root, innerrelid, sjinfo);
92 :
93 : /* We verify that exactly one reference gets removed from joinlist */
94 9568 : nremoved = 0;
95 9568 : joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved);
96 9568 : if (nremoved != 1)
97 0 : elog(ERROR, "failed to find relation %d in joinlist", innerrelid);
98 :
99 : /*
100 : * We can delete this SpecialJoinInfo from the list too, since it's no
101 : * longer of interest. (Since we'll restart the foreach loop
102 : * immediately, we don't bother with foreach_delete_current.)
103 : */
104 9568 : root->join_info_list = list_delete_cell(root->join_info_list, lc);
105 :
106 : /*
107 : * Restart the scan. This is necessary to ensure we find all
108 : * removable joins independently of ordering of the join_info_list
109 : * (note that removal of attr_needed bits may make a join appear
110 : * removable that did not before).
111 : */
112 9568 : goto restart;
113 : }
114 :
115 290720 : return joinlist;
116 : }
117 :
118 : /*
119 : * join_is_removable
120 : * Check whether we need not perform this special join at all, because
121 : * it will just duplicate its left input.
122 : *
123 : * This is true for a left join for which the join condition cannot match
124 : * more than one inner-side row. (There are other possibly interesting
125 : * cases, but we don't have the infrastructure to prove them.) We also
126 : * have to check that the inner side doesn't generate any variables needed
127 : * above the join.
128 : */
129 : static bool
130 45956 : join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
131 : {
132 : int innerrelid;
133 : RelOptInfo *innerrel;
134 : Relids inputrelids;
135 : Relids joinrelids;
136 45956 : List *clause_list = NIL;
137 : ListCell *l;
138 : int attroff;
139 :
140 : /*
141 : * Must be a left join to a single baserel, else we aren't going to be
142 : * able to do anything with it.
143 : */
144 45956 : if (sjinfo->jointype != JOIN_LEFT)
145 6442 : return false;
146 :
147 39514 : if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
148 1212 : return false;
149 :
150 : /*
151 : * Never try to eliminate a left join to the query result rel. Although
152 : * the case is syntactically impossible in standard SQL, MERGE will build
153 : * a join tree that looks exactly like that.
154 : */
155 38302 : if (innerrelid == root->parse->resultRelation)
156 710 : return false;
157 :
158 37592 : innerrel = find_base_rel(root, innerrelid);
159 :
160 : /*
161 : * Before we go to the effort of checking whether any innerrel variables
162 : * are needed above the join, make a quick check to eliminate cases in
163 : * which we will surely be unable to prove uniqueness of the innerrel.
164 : */
165 37592 : if (!rel_supports_distinctness(root, innerrel))
166 2846 : return false;
167 :
168 : /* Compute the relid set for the join we are considering */
169 34746 : inputrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
170 : Assert(sjinfo->ojrelid != 0);
171 34746 : joinrelids = bms_copy(inputrelids);
172 34746 : joinrelids = bms_add_member(joinrelids, sjinfo->ojrelid);
173 :
174 : /*
175 : * We can't remove the join if any inner-rel attributes are used above the
176 : * join. Here, "above" the join includes pushed-down conditions, so we
177 : * should reject if attr_needed includes the OJ's own relid; therefore,
178 : * compare to inputrelids not joinrelids.
179 : *
180 : * As a micro-optimization, it seems better to start with max_attr and
181 : * count down rather than starting with min_attr and counting up, on the
182 : * theory that the system attributes are somewhat less likely to be wanted
183 : * and should be tested last.
184 : */
185 307878 : for (attroff = innerrel->max_attr - innerrel->min_attr;
186 : attroff >= 0;
187 273132 : attroff--)
188 : {
189 298142 : if (!bms_is_subset(innerrel->attr_needed[attroff], inputrelids))
190 25010 : return false;
191 : }
192 :
193 : /*
194 : * Similarly check that the inner rel isn't needed by any PlaceHolderVars
195 : * that will be used above the join. The PHV case is a little bit more
196 : * complicated, because PHVs may have been assigned a ph_eval_at location
197 : * that includes the innerrel, yet their contained expression might not
198 : * actually reference the innerrel (it could be just a constant, for
199 : * instance). If such a PHV is due to be evaluated above the join then it
200 : * needn't prevent join removal.
201 : */
202 9880 : foreach(l, root->placeholder_list)
203 : {
204 174 : PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
205 :
206 174 : if (bms_overlap(phinfo->ph_lateral, innerrel->relids))
207 30 : return false; /* it references innerrel laterally */
208 174 : if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids))
209 48 : continue; /* it definitely doesn't reference innerrel */
210 126 : if (bms_is_subset(phinfo->ph_needed, inputrelids))
211 6 : continue; /* PHV is not used above the join */
212 120 : if (!bms_is_member(sjinfo->ojrelid, phinfo->ph_eval_at))
213 30 : return false; /* it has to be evaluated below the join */
214 :
215 : /*
216 : * We need to be sure there will still be a place to evaluate the PHV
217 : * if we remove the join, ie that ph_eval_at wouldn't become empty.
218 : */
219 90 : if (!bms_overlap(sjinfo->min_lefthand, phinfo->ph_eval_at))
220 0 : return false; /* there isn't any other place to eval PHV */
221 : /* Check contained expression last, since this is a bit expensive */
222 90 : if (bms_overlap(pull_varnos(root, (Node *) phinfo->ph_var->phexpr),
223 90 : innerrel->relids))
224 0 : return false; /* contained expression references innerrel */
225 : }
226 :
227 : /*
228 : * Search for mergejoinable clauses that constrain the inner rel against
229 : * either the outer rel or a pseudoconstant. If an operator is
230 : * mergejoinable then it behaves like equality for some btree opclass, so
231 : * it's what we want. The mergejoinability test also eliminates clauses
232 : * containing volatile functions, which we couldn't depend on.
233 : */
234 19752 : foreach(l, innerrel->joininfo)
235 : {
236 10046 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
237 :
238 : /*
239 : * If the current join commutes with some other outer join(s) via
240 : * outer join identity 3, there will be multiple clones of its join
241 : * clauses in the joininfo list. We want to consider only the
242 : * has_clone form of such clauses. Processing more than one form
243 : * would be wasteful, and also some of the others would confuse the
244 : * RINFO_IS_PUSHED_DOWN test below.
245 : */
246 10046 : if (restrictinfo->is_clone)
247 136 : continue; /* ignore it */
248 :
249 : /*
250 : * If it's not a join clause for this outer join, we can't use it.
251 : * Note that if the clause is pushed-down, then it is logically from
252 : * above the outer join, even if it references no other rels (it might
253 : * be from WHERE, for example).
254 : */
255 9910 : if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
256 114 : continue; /* ignore; not useful here */
257 :
258 : /* Ignore if it's not a mergejoinable clause */
259 9796 : if (!restrictinfo->can_join ||
260 9720 : restrictinfo->mergeopfamilies == NIL)
261 76 : continue; /* not mergejoinable */
262 :
263 : /*
264 : * Check if the clause has the form "outer op inner" or "inner op
265 : * outer", and if so mark which side is inner.
266 : */
267 9720 : if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
268 : innerrel->relids))
269 6 : continue; /* no good for these input relations */
270 :
271 : /* OK, add to list */
272 9714 : clause_list = lappend(clause_list, restrictinfo);
273 : }
274 :
275 : /*
276 : * Now that we have the relevant equality join clauses, try to prove the
277 : * innerrel distinct.
278 : */
279 9706 : if (rel_is_distinct_for(root, innerrel, clause_list))
280 9568 : return true;
281 :
282 : /*
283 : * Some day it would be nice to check for other methods of establishing
284 : * distinctness.
285 : */
286 138 : return false;
287 : }
288 :
289 :
290 : /*
291 : * Remove the target relid and references to the target join from the
292 : * planner's data structures, having determined that there is no need
293 : * to include them in the query.
294 : *
295 : * We are not terribly thorough here. We only bother to update parts of
296 : * the planner's data structures that will actually be consulted later.
297 : */
298 : static void
299 9568 : remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo)
300 : {
301 9568 : RelOptInfo *rel = find_base_rel(root, relid);
302 9568 : int ojrelid = sjinfo->ojrelid;
303 : Relids joinrelids;
304 : Relids join_plus_commute;
305 : List *joininfos;
306 : Index rti;
307 : ListCell *l;
308 :
309 : /* Compute the relid set for the join we are considering */
310 9568 : joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
311 : Assert(ojrelid != 0);
312 9568 : joinrelids = bms_add_member(joinrelids, ojrelid);
313 :
314 : /*
315 : * Update all_baserels and related relid sets.
316 : */
317 9568 : root->all_baserels = bms_del_member(root->all_baserels, relid);
318 9568 : root->outer_join_rels = bms_del_member(root->outer_join_rels, ojrelid);
319 9568 : root->all_query_rels = bms_del_member(root->all_query_rels, relid);
320 9568 : root->all_query_rels = bms_del_member(root->all_query_rels, ojrelid);
321 :
322 : /*
323 : * Likewise remove references from SpecialJoinInfo data structures.
324 : *
325 : * This is relevant in case the outer join we're deleting is nested inside
326 : * other outer joins: the upper joins' relid sets have to be adjusted. The
327 : * RHS of the target outer join will be made empty here, but that's OK
328 : * since caller will delete that SpecialJoinInfo entirely.
329 : */
330 22640 : foreach(l, root->join_info_list)
331 : {
332 13072 : SpecialJoinInfo *sjinf = (SpecialJoinInfo *) lfirst(l);
333 :
334 : /*
335 : * initsplan.c is fairly cavalier about allowing SpecialJoinInfos'
336 : * lefthand/righthand relid sets to be shared with other data
337 : * structures. Ensure that we don't modify the original relid sets.
338 : * (The commute_xxx sets are always per-SpecialJoinInfo though.)
339 : */
340 13072 : sjinf->min_lefthand = bms_copy(sjinf->min_lefthand);
341 13072 : sjinf->min_righthand = bms_copy(sjinf->min_righthand);
342 13072 : sjinf->syn_lefthand = bms_copy(sjinf->syn_lefthand);
343 13072 : sjinf->syn_righthand = bms_copy(sjinf->syn_righthand);
344 : /* Now remove relid and ojrelid bits from the sets: */
345 13072 : sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand, relid);
346 13072 : sjinf->min_righthand = bms_del_member(sjinf->min_righthand, relid);
347 13072 : sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand, relid);
348 13072 : sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand, relid);
349 13072 : sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand, ojrelid);
350 13072 : sjinf->min_righthand = bms_del_member(sjinf->min_righthand, ojrelid);
351 13072 : sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand, ojrelid);
352 13072 : sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand, ojrelid);
353 : /* relid cannot appear in these fields, but ojrelid can: */
354 13072 : sjinf->commute_above_l = bms_del_member(sjinf->commute_above_l, ojrelid);
355 13072 : sjinf->commute_above_r = bms_del_member(sjinf->commute_above_r, ojrelid);
356 13072 : sjinf->commute_below_l = bms_del_member(sjinf->commute_below_l, ojrelid);
357 13072 : sjinf->commute_below_r = bms_del_member(sjinf->commute_below_r, ojrelid);
358 : }
359 :
360 : /*
361 : * Likewise remove references from PlaceHolderVar data structures,
362 : * removing any no-longer-needed placeholders entirely.
363 : *
364 : * Removal is a bit trickier than it might seem: we can remove PHVs that
365 : * are used at the target rel and/or in the join qual, but not those that
366 : * are used at join partner rels or above the join. It's not that easy to
367 : * distinguish PHVs used at partner rels from those used in the join qual,
368 : * since they will both have ph_needed sets that are subsets of
369 : * joinrelids. However, a PHV used at a partner rel could not have the
370 : * target rel in ph_eval_at, so we check that while deciding whether to
371 : * remove or just update the PHV. There is no corresponding test in
372 : * join_is_removable because it doesn't need to distinguish those cases.
373 : */
374 9694 : foreach(l, root->placeholder_list)
375 : {
376 126 : PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
377 :
378 : Assert(!bms_is_member(relid, phinfo->ph_lateral));
379 156 : if (bms_is_subset(phinfo->ph_needed, joinrelids) &&
380 30 : bms_is_member(relid, phinfo->ph_eval_at) &&
381 12 : !bms_is_member(ojrelid, phinfo->ph_eval_at))
382 : {
383 6 : root->placeholder_list = foreach_delete_current(root->placeholder_list,
384 : l);
385 6 : root->placeholder_array[phinfo->phid] = NULL;
386 : }
387 : else
388 : {
389 120 : PlaceHolderVar *phv = phinfo->ph_var;
390 :
391 120 : phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, relid);
392 120 : phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, ojrelid);
393 : Assert(!bms_is_empty(phinfo->ph_eval_at)); /* checked previously */
394 : /* Reduce ph_needed to contain only "relation 0"; see below */
395 120 : if (bms_is_member(0, phinfo->ph_needed))
396 48 : phinfo->ph_needed = bms_make_singleton(0);
397 : else
398 72 : phinfo->ph_needed = NULL;
399 120 : phv->phrels = bms_del_member(phv->phrels, relid);
400 120 : phv->phrels = bms_del_member(phv->phrels, ojrelid);
401 : Assert(!bms_is_empty(phv->phrels));
402 : Assert(phv->phnullingrels == NULL); /* no need to adjust */
403 : }
404 : }
405 :
406 : /*
407 : * Remove any joinquals referencing the rel from the joininfo lists.
408 : *
409 : * In some cases, a joinqual has to be put back after deleting its
410 : * reference to the target rel. This can occur for pseudoconstant and
411 : * outerjoin-delayed quals, which can get marked as requiring the rel in
412 : * order to force them to be evaluated at or above the join. We can't
413 : * just discard them, though. Only quals that logically belonged to the
414 : * outer join being discarded should be removed from the query.
415 : *
416 : * We might encounter a qual that is a clone of a deletable qual with some
417 : * outer-join relids added (see deconstruct_distribute_oj_quals). To
418 : * ensure we get rid of such clones as well, add the relids of all OJs
419 : * commutable with this one to the set we test against for
420 : * pushed-down-ness.
421 : */
422 9568 : join_plus_commute = bms_union(joinrelids,
423 9568 : sjinfo->commute_above_r);
424 9568 : join_plus_commute = bms_add_members(join_plus_commute,
425 9568 : sjinfo->commute_below_l);
426 :
427 : /*
428 : * We must make a copy of the rel's old joininfo list before starting the
429 : * loop, because otherwise remove_join_clause_from_rels would destroy the
430 : * list while we're scanning it.
431 : */
432 9568 : joininfos = list_copy(rel->joininfo);
433 19482 : foreach(l, joininfos)
434 : {
435 9914 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
436 :
437 9914 : remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
438 :
439 9914 : if (RINFO_IS_PUSHED_DOWN(rinfo, join_plus_commute))
440 : {
441 : /*
442 : * There might be references to relid or ojrelid in the
443 : * RestrictInfo's relid sets, as a consequence of PHVs having had
444 : * ph_eval_at sets that include those. We already checked above
445 : * that any such PHV is safe (and updated its ph_eval_at), so we
446 : * can just drop those references.
447 : */
448 108 : remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
449 :
450 : /*
451 : * Cross-check that the clause itself does not reference the
452 : * target rel or join.
453 : */
454 : #ifdef USE_ASSERT_CHECKING
455 : {
456 : Relids clause_varnos = pull_varnos(root,
457 : (Node *) rinfo->clause);
458 :
459 : Assert(!bms_is_member(relid, clause_varnos));
460 : Assert(!bms_is_member(ojrelid, clause_varnos));
461 : }
462 : #endif
463 : /* Now throw it back into the joininfo lists */
464 108 : distribute_restrictinfo_to_rels(root, rinfo);
465 : }
466 : }
467 :
468 : /*
469 : * Likewise remove references from EquivalenceClasses.
470 : */
471 52520 : foreach(l, root->eq_classes)
472 : {
473 42952 : EquivalenceClass *ec = (EquivalenceClass *) lfirst(l);
474 :
475 72264 : if (bms_is_member(relid, ec->ec_relids) ||
476 29312 : bms_is_member(ojrelid, ec->ec_relids))
477 13640 : remove_rel_from_eclass(ec, relid, ojrelid);
478 : }
479 :
480 : /*
481 : * There may be references to the rel in root->fkey_list, but if so,
482 : * match_foreign_keys_to_quals() will get rid of them.
483 : */
484 :
485 : /*
486 : * Now remove the rel from the baserel array to prevent it from being
487 : * referenced again. (We can't do this earlier because
488 : * remove_join_clause_from_rels will touch it.)
489 : */
490 9568 : root->simple_rel_array[relid] = NULL;
491 :
492 : /* And nuke the RelOptInfo, just in case there's another access path */
493 9568 : pfree(rel);
494 :
495 : /*
496 : * Finally, we must recompute per-Var attr_needed and per-PlaceHolderVar
497 : * ph_needed relid sets. These have to be known accurately, else we may
498 : * fail to remove other now-removable outer joins. And our removal of the
499 : * join clause(s) for this outer join may mean that Vars that were
500 : * formerly needed no longer are. So we have to do this honestly by
501 : * repeating the construction of those relid sets. We can cheat to one
502 : * small extent: we can avoid re-examining the targetlist and HAVING qual
503 : * by preserving "relation 0" bits from the existing relid sets. This is
504 : * safe because we'd never remove such references.
505 : *
506 : * So, start by removing all other bits from attr_needed sets. (We
507 : * already did this above for ph_needed.)
508 : */
509 60584 : for (rti = 1; rti < root->simple_rel_array_size; rti++)
510 : {
511 51016 : RelOptInfo *otherrel = root->simple_rel_array[rti];
512 : int attroff;
513 :
514 : /* there may be empty slots corresponding to non-baserel RTEs */
515 51016 : if (otherrel == NULL)
516 34458 : continue;
517 :
518 : Assert(otherrel->relid == rti); /* sanity check on array */
519 :
520 388686 : for (attroff = otherrel->max_attr - otherrel->min_attr;
521 : attroff >= 0;
522 372128 : attroff--)
523 : {
524 372128 : if (bms_is_member(0, otherrel->attr_needed[attroff]))
525 39208 : otherrel->attr_needed[attroff] = bms_make_singleton(0);
526 : else
527 332920 : otherrel->attr_needed[attroff] = NULL;
528 : }
529 : }
530 :
531 : /*
532 : * Now repeat construction of attr_needed bits coming from all other
533 : * sources.
534 : */
535 9568 : rebuild_placeholder_attr_needed(root);
536 9568 : rebuild_joinclause_attr_needed(root);
537 9568 : rebuild_eclass_attr_needed(root);
538 9568 : rebuild_lateral_attr_needed(root);
539 9568 : }
540 :
541 : /*
542 : * Remove any references to relid or ojrelid from the RestrictInfo.
543 : *
544 : * We only bother to clean out bits in clause_relids and required_relids,
545 : * not nullingrel bits in contained Vars and PHVs. (This might have to be
546 : * improved sometime.) However, if the RestrictInfo contains an OR clause
547 : * we have to also clean up the sub-clauses.
548 : */
549 : static void
550 4088 : remove_rel_from_restrictinfo(RestrictInfo *rinfo, int relid, int ojrelid)
551 : {
552 : /*
553 : * initsplan.c is fairly cavalier about allowing RestrictInfos to share
554 : * relid sets with other RestrictInfos, and SpecialJoinInfos too. Make
555 : * sure this RestrictInfo has its own relid sets before we modify them.
556 : * (In present usage, clause_relids is probably not shared, but
557 : * required_relids could be; let's not assume anything.)
558 : */
559 4088 : rinfo->clause_relids = bms_copy(rinfo->clause_relids);
560 4088 : rinfo->clause_relids = bms_del_member(rinfo->clause_relids, relid);
561 4088 : rinfo->clause_relids = bms_del_member(rinfo->clause_relids, ojrelid);
562 : /* Likewise for required_relids */
563 4088 : rinfo->required_relids = bms_copy(rinfo->required_relids);
564 4088 : rinfo->required_relids = bms_del_member(rinfo->required_relids, relid);
565 4088 : rinfo->required_relids = bms_del_member(rinfo->required_relids, ojrelid);
566 :
567 : /* If it's an OR, recurse to clean up sub-clauses */
568 4088 : if (restriction_is_or_clause(rinfo))
569 : {
570 : ListCell *lc;
571 :
572 : Assert(is_orclause(rinfo->orclause));
573 18 : foreach(lc, ((BoolExpr *) rinfo->orclause)->args)
574 : {
575 12 : Node *orarg = (Node *) lfirst(lc);
576 :
577 : /* OR arguments should be ANDs or sub-RestrictInfos */
578 12 : if (is_andclause(orarg))
579 : {
580 0 : List *andargs = ((BoolExpr *) orarg)->args;
581 : ListCell *lc2;
582 :
583 0 : foreach(lc2, andargs)
584 : {
585 0 : RestrictInfo *rinfo2 = lfirst_node(RestrictInfo, lc2);
586 :
587 0 : remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
588 : }
589 : }
590 : else
591 : {
592 12 : RestrictInfo *rinfo2 = castNode(RestrictInfo, orarg);
593 :
594 12 : remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
595 : }
596 : }
597 : }
598 4088 : }
599 :
600 : /*
601 : * Remove any references to relid or ojrelid from the EquivalenceClass.
602 : *
603 : * Like remove_rel_from_restrictinfo, we don't worry about cleaning out
604 : * any nullingrel bits in contained Vars and PHVs. (This might have to be
605 : * improved sometime.) We do need to fix the EC and EM relid sets to ensure
606 : * that implied join equalities will be generated at the appropriate join
607 : * level(s).
608 : */
609 : static void
610 13640 : remove_rel_from_eclass(EquivalenceClass *ec, int relid, int ojrelid)
611 : {
612 : ListCell *lc;
613 :
614 : /* Fix up the EC's overall relids */
615 13640 : ec->ec_relids = bms_del_member(ec->ec_relids, relid);
616 13640 : ec->ec_relids = bms_del_member(ec->ec_relids, ojrelid);
617 :
618 : /*
619 : * Fix up the member expressions. Any non-const member that ends with
620 : * empty em_relids must be a Var or PHV of the removed relation. We don't
621 : * need it anymore, so we can drop it.
622 : */
623 31248 : foreach(lc, ec->ec_members)
624 : {
625 17608 : EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
626 :
627 21576 : if (bms_is_member(relid, cur_em->em_relids) ||
628 3968 : bms_is_member(ojrelid, cur_em->em_relids))
629 : {
630 : Assert(!cur_em->em_is_const);
631 13640 : cur_em->em_relids = bms_del_member(cur_em->em_relids, relid);
632 13640 : cur_em->em_relids = bms_del_member(cur_em->em_relids, ojrelid);
633 13640 : if (bms_is_empty(cur_em->em_relids))
634 13628 : ec->ec_members = foreach_delete_current(ec->ec_members, lc);
635 : }
636 : }
637 :
638 : /* Fix up the source clauses, in case we can re-use them later */
639 17608 : foreach(lc, ec->ec_sources)
640 : {
641 3968 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
642 :
643 3968 : remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
644 : }
645 :
646 : /*
647 : * Rather than expend code on fixing up any already-derived clauses, just
648 : * drop them. (At this point, any such clauses would be base restriction
649 : * clauses, which we'd not need anymore anyway.)
650 : */
651 13640 : ec->ec_derives = NIL;
652 13640 : }
653 :
654 : /*
655 : * Remove any occurrences of the target relid from a joinlist structure.
656 : *
657 : * It's easiest to build a whole new list structure, so we handle it that
658 : * way. Efficiency is not a big deal here.
659 : *
660 : * *nremoved is incremented by the number of occurrences removed (there
661 : * should be exactly one, but the caller checks that).
662 : */
663 : static List *
664 9772 : remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
665 : {
666 9772 : List *result = NIL;
667 : ListCell *jl;
668 :
669 36102 : foreach(jl, joinlist)
670 : {
671 26330 : Node *jlnode = (Node *) lfirst(jl);
672 :
673 26330 : if (IsA(jlnode, RangeTblRef))
674 : {
675 26126 : int varno = ((RangeTblRef *) jlnode)->rtindex;
676 :
677 26126 : if (varno == relid)
678 9568 : (*nremoved)++;
679 : else
680 16558 : result = lappend(result, jlnode);
681 : }
682 204 : else if (IsA(jlnode, List))
683 : {
684 : /* Recurse to handle subproblem */
685 : List *sublist;
686 :
687 204 : sublist = remove_rel_from_joinlist((List *) jlnode,
688 : relid, nremoved);
689 : /* Avoid including empty sub-lists in the result */
690 204 : if (sublist)
691 204 : result = lappend(result, sublist);
692 : }
693 : else
694 : {
695 0 : elog(ERROR, "unrecognized joinlist node type: %d",
696 : (int) nodeTag(jlnode));
697 : }
698 : }
699 :
700 9772 : return result;
701 : }
702 :
703 :
704 : /*
705 : * reduce_unique_semijoins
706 : * Check for semijoins that can be simplified to plain inner joins
707 : * because the inner relation is provably unique for the join clauses.
708 : *
709 : * Ideally this would happen during reduce_outer_joins, but we don't have
710 : * enough information at that point.
711 : *
712 : * To perform the strength reduction when applicable, we need only delete
713 : * the semijoin's SpecialJoinInfo from root->join_info_list. (We don't
714 : * bother fixing the join type attributed to it in the query jointree,
715 : * since that won't be consulted again.)
716 : */
717 : void
718 290720 : reduce_unique_semijoins(PlannerInfo *root)
719 : {
720 : ListCell *lc;
721 :
722 : /*
723 : * Scan the join_info_list to find semijoins.
724 : */
725 326882 : foreach(lc, root->join_info_list)
726 : {
727 36162 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
728 : int innerrelid;
729 : RelOptInfo *innerrel;
730 : Relids joinrelids;
731 : List *restrictlist;
732 :
733 : /*
734 : * Must be a semijoin to a single baserel, else we aren't going to be
735 : * able to do anything with it.
736 : */
737 36162 : if (sjinfo->jointype != JOIN_SEMI)
738 35938 : continue;
739 :
740 1924 : if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
741 146 : continue;
742 :
743 1778 : innerrel = find_base_rel(root, innerrelid);
744 :
745 : /*
746 : * Before we trouble to run generate_join_implied_equalities, make a
747 : * quick check to eliminate cases in which we will surely be unable to
748 : * prove uniqueness of the innerrel.
749 : */
750 1778 : if (!rel_supports_distinctness(root, innerrel))
751 796 : continue;
752 :
753 : /* Compute the relid set for the join we are considering */
754 982 : joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
755 : Assert(sjinfo->ojrelid == 0); /* SEMI joins don't have RT indexes */
756 :
757 : /*
758 : * Since we're only considering a single-rel RHS, any join clauses it
759 : * has must be clauses linking it to the semijoin's min_lefthand. We
760 : * can also consider EC-derived join clauses.
761 : */
762 : restrictlist =
763 982 : list_concat(generate_join_implied_equalities(root,
764 : joinrelids,
765 : sjinfo->min_lefthand,
766 : innerrel,
767 : NULL),
768 982 : innerrel->joininfo);
769 :
770 : /* Test whether the innerrel is unique for those clauses. */
771 982 : if (!innerrel_is_unique(root,
772 : joinrelids, sjinfo->min_lefthand, innerrel,
773 : JOIN_SEMI, restrictlist, true))
774 758 : continue;
775 :
776 : /* OK, remove the SpecialJoinInfo from the list. */
777 224 : root->join_info_list = foreach_delete_current(root->join_info_list, lc);
778 : }
779 290720 : }
780 :
781 :
782 : /*
783 : * rel_supports_distinctness
784 : * Could the relation possibly be proven distinct on some set of columns?
785 : *
786 : * This is effectively a pre-checking function for rel_is_distinct_for().
787 : * It must return true if rel_is_distinct_for() could possibly return true
788 : * with this rel, but it should not expend a lot of cycles. The idea is
789 : * that callers can avoid doing possibly-expensive processing to compute
790 : * rel_is_distinct_for()'s argument lists if the call could not possibly
791 : * succeed.
792 : */
793 : static bool
794 495874 : rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
795 : {
796 : /* We only know about baserels ... */
797 495874 : if (rel->reloptkind != RELOPT_BASEREL)
798 160412 : return false;
799 335462 : if (rel->rtekind == RTE_RELATION)
800 : {
801 : /*
802 : * For a plain relation, we only know how to prove uniqueness by
803 : * reference to unique indexes. Make sure there's at least one
804 : * suitable unique index. It must be immediately enforced, and not a
805 : * partial index. (Keep these conditions in sync with
806 : * relation_has_unique_index_for!)
807 : */
808 : ListCell *lc;
809 :
810 432350 : foreach(lc, rel->indexlist)
811 : {
812 385448 : IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc);
813 :
814 385448 : if (ind->unique && ind->immediate && ind->indpred == NIL)
815 264226 : return true;
816 : }
817 : }
818 24334 : else if (rel->rtekind == RTE_SUBQUERY)
819 : {
820 3990 : Query *subquery = root->simple_rte_array[rel->relid]->subquery;
821 :
822 : /* Check if the subquery has any qualities that support distinctness */
823 3990 : if (query_supports_distinctness(subquery))
824 2548 : return true;
825 : }
826 : /* We have no proof rules for any other rtekinds. */
827 68688 : return false;
828 : }
829 :
830 : /*
831 : * rel_is_distinct_for
832 : * Does the relation return only distinct rows according to clause_list?
833 : *
834 : * clause_list is a list of join restriction clauses involving this rel and
835 : * some other one. Return true if no two rows emitted by this rel could
836 : * possibly join to the same row of the other rel.
837 : *
838 : * The caller must have already determined that each condition is a
839 : * mergejoinable equality with an expression in this relation on one side, and
840 : * an expression not involving this relation on the other. The transient
841 : * outer_is_left flag is used to identify which side references this relation:
842 : * left side if outer_is_left is false, right side if it is true.
843 : *
844 : * Note that the passed-in clause_list may be destructively modified! This
845 : * is OK for current uses, because the clause_list is built by the caller for
846 : * the sole purpose of passing to this function.
847 : */
848 : static bool
849 170170 : rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
850 : {
851 : /*
852 : * We could skip a couple of tests here if we assume all callers checked
853 : * rel_supports_distinctness first, but it doesn't seem worth taking any
854 : * risk for.
855 : */
856 170170 : if (rel->reloptkind != RELOPT_BASEREL)
857 0 : return false;
858 170170 : if (rel->rtekind == RTE_RELATION)
859 : {
860 : /*
861 : * Examine the indexes to see if we have a matching unique index.
862 : * relation_has_unique_index_for automatically adds any usable
863 : * restriction clauses for the rel, so we needn't do that here.
864 : */
865 168520 : if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL))
866 102342 : return true;
867 : }
868 1650 : else if (rel->rtekind == RTE_SUBQUERY)
869 : {
870 1650 : Index relid = rel->relid;
871 1650 : Query *subquery = root->simple_rte_array[relid]->subquery;
872 1650 : List *colnos = NIL;
873 1650 : List *opids = NIL;
874 : ListCell *l;
875 :
876 : /*
877 : * Build the argument lists for query_is_distinct_for: a list of
878 : * output column numbers that the query needs to be distinct over, and
879 : * a list of equality operators that the output columns need to be
880 : * distinct according to.
881 : *
882 : * (XXX we are not considering restriction clauses attached to the
883 : * subquery; is that worth doing?)
884 : */
885 3270 : foreach(l, clause_list)
886 : {
887 1620 : RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
888 : Oid op;
889 : Var *var;
890 :
891 : /*
892 : * Get the equality operator we need uniqueness according to.
893 : * (This might be a cross-type operator and thus not exactly the
894 : * same operator the subquery would consider; that's all right
895 : * since query_is_distinct_for can resolve such cases.) The
896 : * caller's mergejoinability test should have selected only
897 : * OpExprs.
898 : */
899 1620 : op = castNode(OpExpr, rinfo->clause)->opno;
900 :
901 : /* caller identified the inner side for us */
902 1620 : if (rinfo->outer_is_left)
903 1306 : var = (Var *) get_rightop(rinfo->clause);
904 : else
905 314 : var = (Var *) get_leftop(rinfo->clause);
906 :
907 : /*
908 : * We may ignore any RelabelType node above the operand. (There
909 : * won't be more than one, since eval_const_expressions() has been
910 : * applied already.)
911 : */
912 1620 : if (var && IsA(var, RelabelType))
913 626 : var = (Var *) ((RelabelType *) var)->arg;
914 :
915 : /*
916 : * If inner side isn't a Var referencing a subquery output column,
917 : * this clause doesn't help us.
918 : */
919 1620 : if (!var || !IsA(var, Var) ||
920 1608 : var->varno != relid || var->varlevelsup != 0)
921 12 : continue;
922 :
923 1608 : colnos = lappend_int(colnos, var->varattno);
924 1608 : opids = lappend_oid(opids, op);
925 : }
926 :
927 1650 : if (query_is_distinct_for(subquery, colnos, opids))
928 200 : return true;
929 : }
930 67628 : return false;
931 : }
932 :
933 :
934 : /*
935 : * query_supports_distinctness - could the query possibly be proven distinct
936 : * on some set of output columns?
937 : *
938 : * This is effectively a pre-checking function for query_is_distinct_for().
939 : * It must return true if query_is_distinct_for() could possibly return true
940 : * with this query, but it should not expend a lot of cycles. The idea is
941 : * that callers can avoid doing possibly-expensive processing to compute
942 : * query_is_distinct_for()'s argument lists if the call could not possibly
943 : * succeed.
944 : */
945 : bool
946 4638 : query_supports_distinctness(Query *query)
947 : {
948 : /* SRFs break distinctness except with DISTINCT, see below */
949 4638 : if (query->hasTargetSRFs && query->distinctClause == NIL)
950 944 : return false;
951 :
952 : /* check for features we can prove distinctness with */
953 3694 : if (query->distinctClause != NIL ||
954 3550 : query->groupClause != NIL ||
955 3380 : query->groupingSets != NIL ||
956 3380 : query->hasAggs ||
957 3108 : query->havingQual ||
958 3108 : query->setOperations)
959 3160 : return true;
960 :
961 534 : return false;
962 : }
963 :
964 : /*
965 : * query_is_distinct_for - does query never return duplicates of the
966 : * specified columns?
967 : *
968 : * query is a not-yet-planned subquery (in current usage, it's always from
969 : * a subquery RTE, which the planner avoids scribbling on).
970 : *
971 : * colnos is an integer list of output column numbers (resno's). We are
972 : * interested in whether rows consisting of just these columns are certain
973 : * to be distinct. "Distinctness" is defined according to whether the
974 : * corresponding upper-level equality operators listed in opids would think
975 : * the values are distinct. (Note: the opids entries could be cross-type
976 : * operators, and thus not exactly the equality operators that the subquery
977 : * would use itself. We use equality_ops_are_compatible() to check
978 : * compatibility. That looks at btree or hash opfamily membership, and so
979 : * should give trustworthy answers for all operators that we might need
980 : * to deal with here.)
981 : */
982 : bool
983 1760 : query_is_distinct_for(Query *query, List *colnos, List *opids)
984 : {
985 : ListCell *l;
986 : Oid opid;
987 :
988 : Assert(list_length(colnos) == list_length(opids));
989 :
990 : /*
991 : * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
992 : * columns in the DISTINCT clause appear in colnos and operator semantics
993 : * match. This is true even if there are SRFs in the DISTINCT columns or
994 : * elsewhere in the tlist.
995 : */
996 1760 : if (query->distinctClause)
997 : {
998 150 : foreach(l, query->distinctClause)
999 : {
1000 120 : SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
1001 120 : TargetEntry *tle = get_sortgroupclause_tle(sgc,
1002 : query->targetList);
1003 :
1004 120 : opid = distinct_col_search(tle->resno, colnos, opids);
1005 120 : if (!OidIsValid(opid) ||
1006 48 : !equality_ops_are_compatible(opid, sgc->eqop))
1007 : break; /* exit early if no match */
1008 : }
1009 102 : if (l == NULL) /* had matches for all? */
1010 30 : return true;
1011 : }
1012 :
1013 : /*
1014 : * Otherwise, a set-returning function in the query's targetlist can
1015 : * result in returning duplicate rows, despite any grouping that might
1016 : * occur before tlist evaluation. (If all tlist SRFs are within GROUP BY
1017 : * columns, it would be safe because they'd be expanded before grouping.
1018 : * But it doesn't currently seem worth the effort to check for that.)
1019 : */
1020 1730 : if (query->hasTargetSRFs)
1021 0 : return false;
1022 :
1023 : /*
1024 : * Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all
1025 : * the grouped columns appear in colnos and operator semantics match.
1026 : */
1027 1730 : if (query->groupClause && !query->groupingSets)
1028 : {
1029 210 : foreach(l, query->groupClause)
1030 : {
1031 152 : SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
1032 152 : TargetEntry *tle = get_sortgroupclause_tle(sgc,
1033 : query->targetList);
1034 :
1035 152 : opid = distinct_col_search(tle->resno, colnos, opids);
1036 152 : if (!OidIsValid(opid) ||
1037 100 : !equality_ops_are_compatible(opid, sgc->eqop))
1038 : break; /* exit early if no match */
1039 : }
1040 110 : if (l == NULL) /* had matches for all? */
1041 58 : return true;
1042 : }
1043 1620 : else if (query->groupingSets)
1044 : {
1045 : /*
1046 : * If we have grouping sets with expressions, we probably don't have
1047 : * uniqueness and analysis would be hard. Punt.
1048 : */
1049 0 : if (query->groupClause)
1050 0 : return false;
1051 :
1052 : /*
1053 : * If we have no groupClause (therefore no grouping expressions), we
1054 : * might have one or many empty grouping sets. If there's just one,
1055 : * then we're returning only one row and are certainly unique. But
1056 : * otherwise, we know we're certainly not unique.
1057 : */
1058 0 : if (list_length(query->groupingSets) == 1 &&
1059 0 : ((GroupingSet *) linitial(query->groupingSets))->kind == GROUPING_SET_EMPTY)
1060 0 : return true;
1061 : else
1062 0 : return false;
1063 : }
1064 : else
1065 : {
1066 : /*
1067 : * If we have no GROUP BY, but do have aggregates or HAVING, then the
1068 : * result is at most one row so it's surely unique, for any operators.
1069 : */
1070 1620 : if (query->hasAggs || query->havingQual)
1071 100 : return true;
1072 : }
1073 :
1074 : /*
1075 : * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
1076 : * except with ALL.
1077 : */
1078 1572 : if (query->setOperations)
1079 : {
1080 1448 : SetOperationStmt *topop = castNode(SetOperationStmt, query->setOperations);
1081 :
1082 : Assert(topop->op != SETOP_NONE);
1083 :
1084 1448 : if (!topop->all)
1085 : {
1086 : ListCell *lg;
1087 :
1088 : /* We're good if all the nonjunk output columns are in colnos */
1089 72 : lg = list_head(topop->groupClauses);
1090 90 : foreach(l, query->targetList)
1091 : {
1092 78 : TargetEntry *tle = (TargetEntry *) lfirst(l);
1093 : SortGroupClause *sgc;
1094 :
1095 78 : if (tle->resjunk)
1096 0 : continue; /* ignore resjunk columns */
1097 :
1098 : /* non-resjunk columns should have grouping clauses */
1099 : Assert(lg != NULL);
1100 78 : sgc = (SortGroupClause *) lfirst(lg);
1101 78 : lg = lnext(topop->groupClauses, lg);
1102 :
1103 78 : opid = distinct_col_search(tle->resno, colnos, opids);
1104 78 : if (!OidIsValid(opid) ||
1105 18 : !equality_ops_are_compatible(opid, sgc->eqop))
1106 : break; /* exit early if no match */
1107 : }
1108 72 : if (l == NULL) /* had matches for all? */
1109 12 : return true;
1110 : }
1111 : }
1112 :
1113 : /*
1114 : * XXX Are there any other cases in which we can easily see the result
1115 : * must be distinct?
1116 : *
1117 : * If you do add more smarts to this function, be sure to update
1118 : * query_supports_distinctness() to match.
1119 : */
1120 :
1121 1560 : return false;
1122 : }
1123 :
1124 : /*
1125 : * distinct_col_search - subroutine for query_is_distinct_for
1126 : *
1127 : * If colno is in colnos, return the corresponding element of opids,
1128 : * else return InvalidOid. (Ordinarily colnos would not contain duplicates,
1129 : * but if it does, we arbitrarily select the first match.)
1130 : */
1131 : static Oid
1132 350 : distinct_col_search(int colno, List *colnos, List *opids)
1133 : {
1134 : ListCell *lc1,
1135 : *lc2;
1136 :
1137 562 : forboth(lc1, colnos, lc2, opids)
1138 : {
1139 378 : if (colno == lfirst_int(lc1))
1140 166 : return lfirst_oid(lc2);
1141 : }
1142 184 : return InvalidOid;
1143 : }
1144 :
1145 :
1146 : /*
1147 : * innerrel_is_unique
1148 : * Check if the innerrel provably contains at most one tuple matching any
1149 : * tuple from the outerrel, based on join clauses in the 'restrictlist'.
1150 : *
1151 : * We need an actual RelOptInfo for the innerrel, but it's sufficient to
1152 : * identify the outerrel by its Relids. This asymmetry supports use of this
1153 : * function before joinrels have been built. (The caller is expected to
1154 : * also supply the joinrelids, just to save recalculating that.)
1155 : *
1156 : * The proof must be made based only on clauses that will be "joinquals"
1157 : * rather than "otherquals" at execution. For an inner join there's no
1158 : * difference; but if the join is outer, we must ignore pushed-down quals,
1159 : * as those will become "otherquals". Note that this means the answer might
1160 : * vary depending on whether IS_OUTER_JOIN(jointype); since we cache the
1161 : * answer without regard to that, callers must take care not to call this
1162 : * with jointypes that would be classified differently by IS_OUTER_JOIN().
1163 : *
1164 : * The actual proof is undertaken by is_innerrel_unique_for(); this function
1165 : * is a frontend that is mainly concerned with caching the answers.
1166 : * In particular, the force_cache argument allows overriding the internal
1167 : * heuristic about whether to cache negative answers; it should be "true"
1168 : * if making an inquiry that is not part of the normal bottom-up join search
1169 : * sequence.
1170 : */
1171 : bool
1172 556390 : innerrel_is_unique(PlannerInfo *root,
1173 : Relids joinrelids,
1174 : Relids outerrelids,
1175 : RelOptInfo *innerrel,
1176 : JoinType jointype,
1177 : List *restrictlist,
1178 : bool force_cache)
1179 : {
1180 : MemoryContext old_context;
1181 : ListCell *lc;
1182 :
1183 : /* Certainly can't prove uniqueness when there are no joinclauses */
1184 556390 : if (restrictlist == NIL)
1185 99886 : return false;
1186 :
1187 : /*
1188 : * Make a quick check to eliminate cases in which we will surely be unable
1189 : * to prove uniqueness of the innerrel.
1190 : */
1191 456504 : if (!rel_supports_distinctness(root, innerrel))
1192 225458 : return false;
1193 :
1194 : /*
1195 : * Query the cache to see if we've managed to prove that innerrel is
1196 : * unique for any subset of this outerrel. We don't need an exact match,
1197 : * as extra outerrels can't make the innerrel any less unique (or more
1198 : * formally, the restrictlist for a join to a superset outerrel must be a
1199 : * superset of the conditions we successfully used before).
1200 : */
1201 254702 : foreach(lc, innerrel->unique_for_rels)
1202 : {
1203 94238 : Relids unique_for_rels = (Relids) lfirst(lc);
1204 :
1205 94238 : if (bms_is_subset(unique_for_rels, outerrelids))
1206 70582 : return true; /* Success! */
1207 : }
1208 :
1209 : /*
1210 : * Conversely, we may have already determined that this outerrel, or some
1211 : * superset thereof, cannot prove this innerrel to be unique.
1212 : */
1213 160464 : foreach(lc, innerrel->non_unique_for_rels)
1214 : {
1215 0 : Relids unique_for_rels = (Relids) lfirst(lc);
1216 :
1217 0 : if (bms_is_subset(outerrelids, unique_for_rels))
1218 0 : return false;
1219 : }
1220 :
1221 : /* No cached information, so try to make the proof. */
1222 160464 : if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel,
1223 : jointype, restrictlist))
1224 : {
1225 : /*
1226 : * Cache the positive result for future probes, being sure to keep it
1227 : * in the planner_cxt even if we are working in GEQO.
1228 : *
1229 : * Note: one might consider trying to isolate the minimal subset of
1230 : * the outerrels that proved the innerrel unique. But it's not worth
1231 : * the trouble, because the planner builds up joinrels incrementally
1232 : * and so we'll see the minimally sufficient outerrels before any
1233 : * supersets of them anyway.
1234 : */
1235 92974 : old_context = MemoryContextSwitchTo(root->planner_cxt);
1236 92974 : innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
1237 92974 : bms_copy(outerrelids));
1238 92974 : MemoryContextSwitchTo(old_context);
1239 :
1240 92974 : return true; /* Success! */
1241 : }
1242 : else
1243 : {
1244 : /*
1245 : * None of the join conditions for outerrel proved innerrel unique, so
1246 : * we can safely reject this outerrel or any subset of it in future
1247 : * checks.
1248 : *
1249 : * However, in normal planning mode, caching this knowledge is totally
1250 : * pointless; it won't be queried again, because we build up joinrels
1251 : * from smaller to larger. It is useful in GEQO mode, where the
1252 : * knowledge can be carried across successive planning attempts; and
1253 : * it's likely to be useful when using join-search plugins, too. Hence
1254 : * cache when join_search_private is non-NULL. (Yeah, that's a hack,
1255 : * but it seems reasonable.)
1256 : *
1257 : * Also, allow callers to override that heuristic and force caching;
1258 : * that's useful for reduce_unique_semijoins, which calls here before
1259 : * the normal join search starts.
1260 : */
1261 67490 : if (force_cache || root->join_search_private)
1262 : {
1263 758 : old_context = MemoryContextSwitchTo(root->planner_cxt);
1264 758 : innerrel->non_unique_for_rels =
1265 758 : lappend(innerrel->non_unique_for_rels,
1266 758 : bms_copy(outerrelids));
1267 758 : MemoryContextSwitchTo(old_context);
1268 : }
1269 :
1270 67490 : return false;
1271 : }
1272 : }
1273 :
1274 : /*
1275 : * is_innerrel_unique_for
1276 : * Check if the innerrel provably contains at most one tuple matching any
1277 : * tuple from the outerrel, based on join clauses in the 'restrictlist'.
1278 : */
1279 : static bool
1280 160464 : is_innerrel_unique_for(PlannerInfo *root,
1281 : Relids joinrelids,
1282 : Relids outerrelids,
1283 : RelOptInfo *innerrel,
1284 : JoinType jointype,
1285 : List *restrictlist)
1286 : {
1287 160464 : List *clause_list = NIL;
1288 : ListCell *lc;
1289 :
1290 : /*
1291 : * Search for mergejoinable clauses that constrain the inner rel against
1292 : * the outer rel. If an operator is mergejoinable then it behaves like
1293 : * equality for some btree opclass, so it's what we want. The
1294 : * mergejoinability test also eliminates clauses containing volatile
1295 : * functions, which we couldn't depend on.
1296 : */
1297 350348 : foreach(lc, restrictlist)
1298 : {
1299 189884 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
1300 :
1301 : /*
1302 : * As noted above, if it's a pushed-down clause and we're at an outer
1303 : * join, we can't use it.
1304 : */
1305 189884 : if (IS_OUTER_JOIN(jointype) &&
1306 86586 : RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
1307 3406 : continue;
1308 :
1309 : /* Ignore if it's not a mergejoinable clause */
1310 186478 : if (!restrictinfo->can_join ||
1311 172794 : restrictinfo->mergeopfamilies == NIL)
1312 14644 : continue; /* not mergejoinable */
1313 :
1314 : /*
1315 : * Check if clause has the form "outer op inner" or "inner op outer",
1316 : * and if so mark which side is inner.
1317 : */
1318 171834 : if (!clause_sides_match_join(restrictinfo, outerrelids,
1319 : innerrel->relids))
1320 14 : continue; /* no good for these input relations */
1321 :
1322 : /* OK, add to list */
1323 171820 : clause_list = lappend(clause_list, restrictinfo);
1324 : }
1325 :
1326 : /* Let rel_is_distinct_for() do the hard work */
1327 160464 : return rel_is_distinct_for(root, innerrel, clause_list);
1328 : }
|