Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * analyzejoins.c
4 : * Routines for simplifying joins after initial query analysis
5 : *
6 : * While we do a great deal of join simplification in prep/prepjointree.c,
7 : * certain optimizations cannot be performed at that stage for lack of
8 : * detailed information about the query. The routines here are invoked
9 : * after initsplan.c has done its work, and can do additional join removal
10 : * and simplification steps based on the information extracted. The penalty
11 : * is that we have to work harder to clean up after ourselves when we modify
12 : * the query, since the derived data structures have to be updated too.
13 : *
14 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
15 : * Portions Copyright (c) 1994, Regents of the University of California
16 : *
17 : *
18 : * IDENTIFICATION
19 : * src/backend/optimizer/plan/analyzejoins.c
20 : *
21 : *-------------------------------------------------------------------------
22 : */
23 : #include "postgres.h"
24 :
25 : #include "nodes/nodeFuncs.h"
26 : #include "optimizer/joininfo.h"
27 : #include "optimizer/optimizer.h"
28 : #include "optimizer/pathnode.h"
29 : #include "optimizer/paths.h"
30 : #include "optimizer/planmain.h"
31 : #include "optimizer/restrictinfo.h"
32 : #include "utils/lsyscache.h"
33 :
34 : /* local functions */
35 : static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
36 : static void remove_rel_from_query(PlannerInfo *root, int relid,
37 : SpecialJoinInfo *sjinfo);
38 : static void remove_rel_from_restrictinfo(RestrictInfo *rinfo,
39 : int relid, int ojrelid);
40 : static void remove_rel_from_eclass(EquivalenceClass *ec,
41 : int relid, int ojrelid);
42 : static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
43 : static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
44 : static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
45 : List *clause_list);
46 : static Oid distinct_col_search(int colno, List *colnos, List *opids);
47 : static bool is_innerrel_unique_for(PlannerInfo *root,
48 : Relids joinrelids,
49 : Relids outerrelids,
50 : RelOptInfo *innerrel,
51 : JoinType jointype,
52 : List *restrictlist);
53 :
54 :
55 : /*
56 : * remove_useless_joins
57 : * Check for relations that don't actually need to be joined at all,
58 : * and remove them from the query.
59 : *
60 : * We are passed the current joinlist and return the updated list. Other
61 : * data structures that have to be updated are accessible via "root".
62 : */
63 : List *
64 291938 : remove_useless_joins(PlannerInfo *root, List *joinlist)
65 : {
66 : ListCell *lc;
67 :
68 : /*
69 : * We are only interested in relations that are left-joined to, so we can
70 : * scan the join_info_list to find them easily.
71 : */
72 291938 : restart:
73 330346 : foreach(lc, root->join_info_list)
74 : {
75 50302 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
76 : int innerrelid;
77 : int nremoved;
78 :
79 : /* Skip if not removable */
80 50302 : if (!join_is_removable(root, sjinfo))
81 38408 : continue;
82 :
83 : /*
84 : * Currently, join_is_removable can only succeed when the sjinfo's
85 : * righthand is a single baserel. Remove that rel from the query and
86 : * joinlist.
87 : */
88 11894 : innerrelid = bms_singleton_member(sjinfo->min_righthand);
89 :
90 11894 : remove_rel_from_query(root, innerrelid, sjinfo);
91 :
92 : /* We verify that exactly one reference gets removed from joinlist */
93 11894 : nremoved = 0;
94 11894 : joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved);
95 11894 : if (nremoved != 1)
96 0 : elog(ERROR, "failed to find relation %d in joinlist", innerrelid);
97 :
98 : /*
99 : * We can delete this SpecialJoinInfo from the list too, since it's no
100 : * longer of interest. (Since we'll restart the foreach loop
101 : * immediately, we don't bother with foreach_delete_current.)
102 : */
103 11894 : root->join_info_list = list_delete_cell(root->join_info_list, lc);
104 :
105 : /*
106 : * Restart the scan. This is necessary to ensure we find all
107 : * removable joins independently of ordering of the join_info_list
108 : * (note that removal of attr_needed bits may make a join appear
109 : * removable that did not before).
110 : */
111 11894 : goto restart;
112 : }
113 :
114 280044 : return joinlist;
115 : }
116 :
117 : /*
118 : * clause_sides_match_join
119 : * Determine whether a join clause is of the right form to use in this join.
120 : *
121 : * We already know that the clause is a binary opclause referencing only the
122 : * rels in the current join. The point here is to check whether it has the
123 : * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
124 : * rather than mixing outer and inner vars on either side. If it matches,
125 : * we set the transient flag outer_is_left to identify which side is which.
126 : */
127 : static inline bool
128 182288 : clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids,
129 : Relids innerrelids)
130 : {
131 278058 : if (bms_is_subset(rinfo->left_relids, outerrelids) &&
132 95770 : bms_is_subset(rinfo->right_relids, innerrelids))
133 : {
134 : /* lefthand side is outer */
135 95764 : rinfo->outer_is_left = true;
136 95764 : return true;
137 : }
138 173028 : else if (bms_is_subset(rinfo->left_relids, innerrelids) &&
139 86504 : bms_is_subset(rinfo->right_relids, outerrelids))
140 : {
141 : /* righthand side is outer */
142 86504 : rinfo->outer_is_left = false;
143 86504 : return true;
144 : }
145 20 : return false; /* no good for these input relations */
146 : }
147 :
148 : /*
149 : * join_is_removable
150 : * Check whether we need not perform this special join at all, because
151 : * it will just duplicate its left input.
152 : *
153 : * This is true for a left join for which the join condition cannot match
154 : * more than one inner-side row. (There are other possibly interesting
155 : * cases, but we don't have the infrastructure to prove them.) We also
156 : * have to check that the inner side doesn't generate any variables needed
157 : * above the join.
158 : */
159 : static bool
160 50302 : join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
161 : {
162 : int innerrelid;
163 : RelOptInfo *innerrel;
164 : Relids inputrelids;
165 : Relids joinrelids;
166 50302 : List *clause_list = NIL;
167 : ListCell *l;
168 : int attroff;
169 :
170 : /*
171 : * Must be a left join to a single baserel, else we aren't going to be
172 : * able to do anything with it.
173 : */
174 50302 : if (sjinfo->jointype != JOIN_LEFT)
175 6338 : return false;
176 :
177 43964 : if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
178 1124 : return false;
179 :
180 : /*
181 : * Never try to eliminate a left join to the query result rel. Although
182 : * the case is syntactically impossible in standard SQL, MERGE will build
183 : * a join tree that looks exactly like that.
184 : */
185 42840 : if (innerrelid == root->parse->resultRelation)
186 694 : return false;
187 :
188 42146 : innerrel = find_base_rel(root, innerrelid);
189 :
190 : /*
191 : * Before we go to the effort of checking whether any innerrel variables
192 : * are needed above the join, make a quick check to eliminate cases in
193 : * which we will surely be unable to prove uniqueness of the innerrel.
194 : */
195 42146 : if (!rel_supports_distinctness(root, innerrel))
196 2724 : return false;
197 :
198 : /* Compute the relid set for the join we are considering */
199 39422 : inputrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
200 : Assert(sjinfo->ojrelid != 0);
201 39422 : joinrelids = bms_copy(inputrelids);
202 39422 : joinrelids = bms_add_member(joinrelids, sjinfo->ojrelid);
203 :
204 : /*
205 : * We can't remove the join if any inner-rel attributes are used above the
206 : * join. Here, "above" the join includes pushed-down conditions, so we
207 : * should reject if attr_needed includes the OJ's own relid; therefore,
208 : * compare to inputrelids not joinrelids.
209 : *
210 : * As a micro-optimization, it seems better to start with max_attr and
211 : * count down rather than starting with min_attr and counting up, on the
212 : * theory that the system attributes are somewhat less likely to be wanted
213 : * and should be tested last.
214 : */
215 509386 : for (attroff = innerrel->max_attr - innerrel->min_attr;
216 : attroff >= 0;
217 469964 : attroff--)
218 : {
219 497366 : if (!bms_is_subset(innerrel->attr_needed[attroff], inputrelids))
220 27402 : return false;
221 : }
222 :
223 : /*
224 : * Similarly check that the inner rel isn't needed by any PlaceHolderVars
225 : * that will be used above the join. The PHV case is a little bit more
226 : * complicated, because PHVs may have been assigned a ph_eval_at location
227 : * that includes the innerrel, yet their contained expression might not
228 : * actually reference the innerrel (it could be just a constant, for
229 : * instance). If such a PHV is due to be evaluated above the join then it
230 : * needn't prevent join removal.
231 : */
232 12170 : foreach(l, root->placeholder_list)
233 : {
234 180 : PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
235 :
236 180 : if (bms_overlap(phinfo->ph_lateral, innerrel->relids))
237 30 : return false; /* it references innerrel laterally */
238 180 : if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids))
239 54 : continue; /* it definitely doesn't reference innerrel */
240 126 : if (bms_is_subset(phinfo->ph_needed, inputrelids))
241 6 : continue; /* PHV is not used above the join */
242 120 : if (!bms_is_member(sjinfo->ojrelid, phinfo->ph_eval_at))
243 30 : return false; /* it has to be evaluated below the join */
244 :
245 : /*
246 : * We need to be sure there will still be a place to evaluate the PHV
247 : * if we remove the join, ie that ph_eval_at wouldn't become empty.
248 : */
249 90 : if (!bms_overlap(sjinfo->min_lefthand, phinfo->ph_eval_at))
250 0 : return false; /* there isn't any other place to eval PHV */
251 : /* Check contained expression last, since this is a bit expensive */
252 90 : if (bms_overlap(pull_varnos(root, (Node *) phinfo->ph_var->phexpr),
253 90 : innerrel->relids))
254 0 : return false; /* contained expression references innerrel */
255 : }
256 :
257 : /*
258 : * Search for mergejoinable clauses that constrain the inner rel against
259 : * either the outer rel or a pseudoconstant. If an operator is
260 : * mergejoinable then it behaves like equality for some btree opclass, so
261 : * it's what we want. The mergejoinability test also eliminates clauses
262 : * containing volatile functions, which we couldn't depend on.
263 : */
264 24294 : foreach(l, innerrel->joininfo)
265 : {
266 12304 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
267 :
268 : /*
269 : * If the current join commutes with some other outer join(s) via
270 : * outer join identity 3, there will be multiple clones of its join
271 : * clauses in the joininfo list. We want to consider only the
272 : * has_clone form of such clauses. Processing more than one form
273 : * would be wasteful, and also some of the others would confuse the
274 : * RINFO_IS_PUSHED_DOWN test below.
275 : */
276 12304 : if (restrictinfo->is_clone)
277 122 : continue; /* ignore it */
278 :
279 : /*
280 : * If it's not a join clause for this outer join, we can't use it.
281 : * Note that if the clause is pushed-down, then it is logically from
282 : * above the outer join, even if it references no other rels (it might
283 : * be from WHERE, for example).
284 : */
285 12182 : if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
286 102 : continue; /* ignore; not useful here */
287 :
288 : /* Ignore if it's not a mergejoinable clause */
289 12080 : if (!restrictinfo->can_join ||
290 12026 : restrictinfo->mergeopfamilies == NIL)
291 54 : continue; /* not mergejoinable */
292 :
293 : /*
294 : * Check if the clause has the form "outer op inner" or "inner op
295 : * outer", and if so mark which side is inner.
296 : */
297 12026 : if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
298 : innerrel->relids))
299 6 : continue; /* no good for these input relations */
300 :
301 : /* OK, add to list */
302 12020 : clause_list = lappend(clause_list, restrictinfo);
303 : }
304 :
305 : /*
306 : * Now that we have the relevant equality join clauses, try to prove the
307 : * innerrel distinct.
308 : */
309 11990 : if (rel_is_distinct_for(root, innerrel, clause_list))
310 11894 : return true;
311 :
312 : /*
313 : * Some day it would be nice to check for other methods of establishing
314 : * distinctness.
315 : */
316 96 : return false;
317 : }
318 :
319 :
320 : /*
321 : * Remove the target relid and references to the target join from the
322 : * planner's data structures, having determined that there is no need
323 : * to include them in the query.
324 : *
325 : * We are not terribly thorough here. We only bother to update parts of
326 : * the planner's data structures that will actually be consulted later.
327 : */
328 : static void
329 11894 : remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo)
330 : {
331 11894 : RelOptInfo *rel = find_base_rel(root, relid);
332 11894 : int ojrelid = sjinfo->ojrelid;
333 : Relids joinrelids;
334 : Relids join_plus_commute;
335 : List *joininfos;
336 : Index rti;
337 : ListCell *l;
338 :
339 : /* Compute the relid set for the join we are considering */
340 11894 : joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
341 : Assert(ojrelid != 0);
342 11894 : joinrelids = bms_add_member(joinrelids, ojrelid);
343 :
344 : /*
345 : * Remove references to the rel from other baserels' attr_needed arrays.
346 : */
347 77090 : for (rti = 1; rti < root->simple_rel_array_size; rti++)
348 : {
349 65196 : RelOptInfo *otherrel = root->simple_rel_array[rti];
350 : int attroff;
351 :
352 : /* there may be empty slots corresponding to non-baserel RTEs */
353 65196 : if (otherrel == NULL)
354 31926 : continue;
355 :
356 : Assert(otherrel->relid == rti); /* sanity check on array */
357 :
358 : /* no point in processing target rel itself */
359 33270 : if (otherrel == rel)
360 11894 : continue;
361 :
362 502308 : for (attroff = otherrel->max_attr - otherrel->min_attr;
363 : attroff >= 0;
364 480932 : attroff--)
365 : {
366 961864 : otherrel->attr_needed[attroff] =
367 480932 : bms_del_member(otherrel->attr_needed[attroff], relid);
368 480932 : otherrel->attr_needed[attroff] =
369 480932 : bms_del_member(otherrel->attr_needed[attroff], ojrelid);
370 : }
371 : }
372 :
373 : /*
374 : * Update all_baserels and related relid sets.
375 : */
376 11894 : root->all_baserels = bms_del_member(root->all_baserels, relid);
377 11894 : root->outer_join_rels = bms_del_member(root->outer_join_rels, ojrelid);
378 11894 : root->all_query_rels = bms_del_member(root->all_query_rels, relid);
379 11894 : root->all_query_rels = bms_del_member(root->all_query_rels, ojrelid);
380 :
381 : /*
382 : * Likewise remove references from SpecialJoinInfo data structures.
383 : *
384 : * This is relevant in case the outer join we're deleting is nested inside
385 : * other outer joins: the upper joins' relid sets have to be adjusted. The
386 : * RHS of the target outer join will be made empty here, but that's OK
387 : * since caller will delete that SpecialJoinInfo entirely.
388 : */
389 27190 : foreach(l, root->join_info_list)
390 : {
391 15296 : SpecialJoinInfo *sjinf = (SpecialJoinInfo *) lfirst(l);
392 :
393 15296 : sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand, relid);
394 15296 : sjinf->min_righthand = bms_del_member(sjinf->min_righthand, relid);
395 15296 : sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand, relid);
396 15296 : sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand, relid);
397 15296 : sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand, ojrelid);
398 15296 : sjinf->min_righthand = bms_del_member(sjinf->min_righthand, ojrelid);
399 15296 : sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand, ojrelid);
400 15296 : sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand, ojrelid);
401 : /* relid cannot appear in these fields, but ojrelid can: */
402 15296 : sjinf->commute_above_l = bms_del_member(sjinf->commute_above_l, ojrelid);
403 15296 : sjinf->commute_above_r = bms_del_member(sjinf->commute_above_r, ojrelid);
404 15296 : sjinf->commute_below_l = bms_del_member(sjinf->commute_below_l, ojrelid);
405 15296 : sjinf->commute_below_r = bms_del_member(sjinf->commute_below_r, ojrelid);
406 : }
407 :
408 : /*
409 : * Likewise remove references from PlaceHolderVar data structures,
410 : * removing any no-longer-needed placeholders entirely.
411 : *
412 : * Removal is a bit trickier than it might seem: we can remove PHVs that
413 : * are used at the target rel and/or in the join qual, but not those that
414 : * are used at join partner rels or above the join. It's not that easy to
415 : * distinguish PHVs used at partner rels from those used in the join qual,
416 : * since they will both have ph_needed sets that are subsets of
417 : * joinrelids. However, a PHV used at a partner rel could not have the
418 : * target rel in ph_eval_at, so we check that while deciding whether to
419 : * remove or just update the PHV. There is no corresponding test in
420 : * join_is_removable because it doesn't need to distinguish those cases.
421 : */
422 12026 : foreach(l, root->placeholder_list)
423 : {
424 132 : PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
425 :
426 : Assert(!bms_is_member(relid, phinfo->ph_lateral));
427 162 : if (bms_is_subset(phinfo->ph_needed, joinrelids) &&
428 30 : bms_is_member(relid, phinfo->ph_eval_at) &&
429 12 : !bms_is_member(ojrelid, phinfo->ph_eval_at))
430 : {
431 6 : root->placeholder_list = foreach_delete_current(root->placeholder_list,
432 : l);
433 6 : root->placeholder_array[phinfo->phid] = NULL;
434 : }
435 : else
436 : {
437 126 : PlaceHolderVar *phv = phinfo->ph_var;
438 :
439 126 : phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, relid);
440 126 : phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, ojrelid);
441 : Assert(!bms_is_empty(phinfo->ph_eval_at)); /* checked previously */
442 126 : phinfo->ph_needed = bms_del_member(phinfo->ph_needed, relid);
443 126 : phinfo->ph_needed = bms_del_member(phinfo->ph_needed, ojrelid);
444 : /* ph_needed might or might not become empty */
445 126 : phv->phrels = bms_del_member(phv->phrels, relid);
446 126 : phv->phrels = bms_del_member(phv->phrels, ojrelid);
447 : Assert(!bms_is_empty(phv->phrels));
448 : Assert(phv->phnullingrels == NULL); /* no need to adjust */
449 : }
450 : }
451 :
452 : /*
453 : * Remove any joinquals referencing the rel from the joininfo lists.
454 : *
455 : * In some cases, a joinqual has to be put back after deleting its
456 : * reference to the target rel. This can occur for pseudoconstant and
457 : * outerjoin-delayed quals, which can get marked as requiring the rel in
458 : * order to force them to be evaluated at or above the join. We can't
459 : * just discard them, though. Only quals that logically belonged to the
460 : * outer join being discarded should be removed from the query.
461 : *
462 : * We might encounter a qual that is a clone of a deletable qual with some
463 : * outer-join relids added (see deconstruct_distribute_oj_quals). To
464 : * ensure we get rid of such clones as well, add the relids of all OJs
465 : * commutable with this one to the set we test against for
466 : * pushed-down-ness.
467 : */
468 11894 : join_plus_commute = bms_union(joinrelids,
469 11894 : sjinfo->commute_above_r);
470 11894 : join_plus_commute = bms_add_members(join_plus_commute,
471 11894 : sjinfo->commute_below_l);
472 :
473 : /*
474 : * We must make a copy of the rel's old joininfo list before starting the
475 : * loop, because otherwise remove_join_clause_from_rels would destroy the
476 : * list while we're scanning it.
477 : */
478 11894 : joininfos = list_copy(rel->joininfo);
479 24120 : foreach(l, joininfos)
480 : {
481 12226 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
482 :
483 12226 : remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
484 :
485 12226 : if (RINFO_IS_PUSHED_DOWN(rinfo, join_plus_commute))
486 : {
487 : /*
488 : * There might be references to relid or ojrelid in the
489 : * RestrictInfo's relid sets, as a consequence of PHVs having had
490 : * ph_eval_at sets that include those. We already checked above
491 : * that any such PHV is safe (and updated its ph_eval_at), so we
492 : * can just drop those references.
493 : */
494 102 : remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
495 :
496 : /*
497 : * Cross-check that the clause itself does not reference the
498 : * target rel or join.
499 : */
500 : #ifdef USE_ASSERT_CHECKING
501 : {
502 : Relids clause_varnos = pull_varnos(root,
503 : (Node *) rinfo->clause);
504 :
505 : Assert(!bms_is_member(relid, clause_varnos));
506 : Assert(!bms_is_member(ojrelid, clause_varnos));
507 : }
508 : #endif
509 : /* Now throw it back into the joininfo lists */
510 102 : distribute_restrictinfo_to_rels(root, rinfo);
511 : }
512 : }
513 :
514 : /*
515 : * Likewise remove references from EquivalenceClasses.
516 : */
517 61272 : foreach(l, root->eq_classes)
518 : {
519 49378 : EquivalenceClass *ec = (EquivalenceClass *) lfirst(l);
520 :
521 82920 : if (bms_is_member(relid, ec->ec_relids) ||
522 33542 : bms_is_member(ojrelid, ec->ec_relids))
523 15836 : remove_rel_from_eclass(ec, relid, ojrelid);
524 : }
525 :
526 : /*
527 : * There may be references to the rel in root->fkey_list, but if so,
528 : * match_foreign_keys_to_quals() will get rid of them.
529 : */
530 :
531 : /*
532 : * Finally, remove the rel from the baserel array to prevent it from being
533 : * referenced again. (We can't do this earlier because
534 : * remove_join_clause_from_rels will touch it.)
535 : */
536 11894 : root->simple_rel_array[relid] = NULL;
537 :
538 : /* And nuke the RelOptInfo, just in case there's another access path */
539 11894 : pfree(rel);
540 11894 : }
541 :
542 : /*
543 : * Remove any references to relid or ojrelid from the RestrictInfo.
544 : *
545 : * We only bother to clean out bits in clause_relids and required_relids,
546 : * not nullingrel bits in contained Vars and PHVs. (This might have to be
547 : * improved sometime.) However, if the RestrictInfo contains an OR clause
548 : * we have to also clean up the sub-clauses.
549 : */
550 : static void
551 3954 : remove_rel_from_restrictinfo(RestrictInfo *rinfo, int relid, int ojrelid)
552 : {
553 : /*
554 : * The clause_relids probably aren't shared with anything else, but let's
555 : * copy them just to be sure.
556 : */
557 3954 : rinfo->clause_relids = bms_copy(rinfo->clause_relids);
558 3954 : rinfo->clause_relids = bms_del_member(rinfo->clause_relids, relid);
559 3954 : rinfo->clause_relids = bms_del_member(rinfo->clause_relids, ojrelid);
560 : /* Likewise for required_relids */
561 3954 : rinfo->required_relids = bms_copy(rinfo->required_relids);
562 3954 : rinfo->required_relids = bms_del_member(rinfo->required_relids, relid);
563 3954 : rinfo->required_relids = bms_del_member(rinfo->required_relids, ojrelid);
564 :
565 : /* If it's an OR, recurse to clean up sub-clauses */
566 3954 : if (restriction_is_or_clause(rinfo))
567 : {
568 : ListCell *lc;
569 :
570 : Assert(is_orclause(rinfo->orclause));
571 18 : foreach(lc, ((BoolExpr *) rinfo->orclause)->args)
572 : {
573 12 : Node *orarg = (Node *) lfirst(lc);
574 :
575 : /* OR arguments should be ANDs or sub-RestrictInfos */
576 12 : if (is_andclause(orarg))
577 : {
578 0 : List *andargs = ((BoolExpr *) orarg)->args;
579 : ListCell *lc2;
580 :
581 0 : foreach(lc2, andargs)
582 : {
583 0 : RestrictInfo *rinfo2 = lfirst_node(RestrictInfo, lc2);
584 :
585 0 : remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
586 : }
587 : }
588 : else
589 : {
590 12 : RestrictInfo *rinfo2 = castNode(RestrictInfo, orarg);
591 :
592 12 : remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
593 : }
594 : }
595 : }
596 3954 : }
597 :
598 : /*
599 : * Remove any references to relid or ojrelid from the EquivalenceClass.
600 : *
601 : * Like remove_rel_from_restrictinfo, we don't worry about cleaning out
602 : * any nullingrel bits in contained Vars and PHVs. (This might have to be
603 : * improved sometime.) We do need to fix the EC and EM relid sets to ensure
604 : * that implied join equalities will be generated at the appropriate join
605 : * level(s).
606 : */
607 : static void
608 15836 : remove_rel_from_eclass(EquivalenceClass *ec, int relid, int ojrelid)
609 : {
610 : ListCell *lc;
611 :
612 : /* Fix up the EC's overall relids */
613 15836 : ec->ec_relids = bms_del_member(ec->ec_relids, relid);
614 15836 : ec->ec_relids = bms_del_member(ec->ec_relids, ojrelid);
615 :
616 : /*
617 : * Fix up the member expressions. Any non-const member that ends with
618 : * empty em_relids must be a Var or PHV of the removed relation. We don't
619 : * need it anymore, so we can drop it.
620 : */
621 35512 : foreach(lc, ec->ec_members)
622 : {
623 19676 : EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
624 :
625 23516 : if (bms_is_member(relid, cur_em->em_relids) ||
626 3840 : bms_is_member(ojrelid, cur_em->em_relids))
627 : {
628 : Assert(!cur_em->em_is_const);
629 15836 : cur_em->em_relids = bms_del_member(cur_em->em_relids, relid);
630 15836 : cur_em->em_relids = bms_del_member(cur_em->em_relids, ojrelid);
631 15836 : if (bms_is_empty(cur_em->em_relids))
632 15824 : ec->ec_members = foreach_delete_current(ec->ec_members, lc);
633 : }
634 : }
635 :
636 : /* Fix up the source clauses, in case we can re-use them later */
637 19676 : foreach(lc, ec->ec_sources)
638 : {
639 3840 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
640 :
641 3840 : remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
642 : }
643 :
644 : /*
645 : * Rather than expend code on fixing up any already-derived clauses, just
646 : * drop them. (At this point, any such clauses would be base restriction
647 : * clauses, which we'd not need anymore anyway.)
648 : */
649 15836 : ec->ec_derives = NIL;
650 15836 : }
651 :
652 : /*
653 : * Remove any occurrences of the target relid from a joinlist structure.
654 : *
655 : * It's easiest to build a whole new list structure, so we handle it that
656 : * way. Efficiency is not a big deal here.
657 : *
658 : * *nremoved is incremented by the number of occurrences removed (there
659 : * should be exactly one, but the caller checks that).
660 : */
661 : static List *
662 12098 : remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
663 : {
664 12098 : List *result = NIL;
665 : ListCell *jl;
666 :
667 45572 : foreach(jl, joinlist)
668 : {
669 33474 : Node *jlnode = (Node *) lfirst(jl);
670 :
671 33474 : if (IsA(jlnode, RangeTblRef))
672 : {
673 33270 : int varno = ((RangeTblRef *) jlnode)->rtindex;
674 :
675 33270 : if (varno == relid)
676 11894 : (*nremoved)++;
677 : else
678 21376 : result = lappend(result, jlnode);
679 : }
680 204 : else if (IsA(jlnode, List))
681 : {
682 : /* Recurse to handle subproblem */
683 : List *sublist;
684 :
685 204 : sublist = remove_rel_from_joinlist((List *) jlnode,
686 : relid, nremoved);
687 : /* Avoid including empty sub-lists in the result */
688 204 : if (sublist)
689 204 : result = lappend(result, sublist);
690 : }
691 : else
692 : {
693 0 : elog(ERROR, "unrecognized joinlist node type: %d",
694 : (int) nodeTag(jlnode));
695 : }
696 : }
697 :
698 12098 : return result;
699 : }
700 :
701 :
702 : /*
703 : * reduce_unique_semijoins
704 : * Check for semijoins that can be simplified to plain inner joins
705 : * because the inner relation is provably unique for the join clauses.
706 : *
707 : * Ideally this would happen during reduce_outer_joins, but we don't have
708 : * enough information at that point.
709 : *
710 : * To perform the strength reduction when applicable, we need only delete
711 : * the semijoin's SpecialJoinInfo from root->join_info_list. (We don't
712 : * bother fixing the join type attributed to it in the query jointree,
713 : * since that won't be consulted again.)
714 : */
715 : void
716 280044 : reduce_unique_semijoins(PlannerInfo *root)
717 : {
718 : ListCell *lc;
719 :
720 : /*
721 : * Scan the join_info_list to find semijoins.
722 : */
723 318240 : foreach(lc, root->join_info_list)
724 : {
725 38196 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
726 : int innerrelid;
727 : RelOptInfo *innerrel;
728 : Relids joinrelids;
729 : List *restrictlist;
730 :
731 : /*
732 : * Must be a semijoin to a single baserel, else we aren't going to be
733 : * able to do anything with it.
734 : */
735 38196 : if (sjinfo->jointype != JOIN_SEMI)
736 37972 : continue;
737 :
738 1916 : if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
739 146 : continue;
740 :
741 1770 : innerrel = find_base_rel(root, innerrelid);
742 :
743 : /*
744 : * Before we trouble to run generate_join_implied_equalities, make a
745 : * quick check to eliminate cases in which we will surely be unable to
746 : * prove uniqueness of the innerrel.
747 : */
748 1770 : if (!rel_supports_distinctness(root, innerrel))
749 778 : continue;
750 :
751 : /* Compute the relid set for the join we are considering */
752 992 : joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
753 : Assert(sjinfo->ojrelid == 0); /* SEMI joins don't have RT indexes */
754 :
755 : /*
756 : * Since we're only considering a single-rel RHS, any join clauses it
757 : * has must be clauses linking it to the semijoin's min_lefthand. We
758 : * can also consider EC-derived join clauses.
759 : */
760 : restrictlist =
761 992 : list_concat(generate_join_implied_equalities(root,
762 : joinrelids,
763 : sjinfo->min_lefthand,
764 : innerrel,
765 : NULL),
766 992 : innerrel->joininfo);
767 :
768 : /* Test whether the innerrel is unique for those clauses. */
769 992 : if (!innerrel_is_unique(root,
770 : joinrelids, sjinfo->min_lefthand, innerrel,
771 : JOIN_SEMI, restrictlist, true))
772 768 : continue;
773 :
774 : /* OK, remove the SpecialJoinInfo from the list. */
775 224 : root->join_info_list = foreach_delete_current(root->join_info_list, lc);
776 : }
777 280044 : }
778 :
779 :
780 : /*
781 : * rel_supports_distinctness
782 : * Could the relation possibly be proven distinct on some set of columns?
783 : *
784 : * This is effectively a pre-checking function for rel_is_distinct_for().
785 : * It must return true if rel_is_distinct_for() could possibly return true
786 : * with this rel, but it should not expend a lot of cycles. The idea is
787 : * that callers can avoid doing possibly-expensive processing to compute
788 : * rel_is_distinct_for()'s argument lists if the call could not possibly
789 : * succeed.
790 : */
791 : static bool
792 503486 : rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
793 : {
794 : /* We only know about baserels ... */
795 503486 : if (rel->reloptkind != RELOPT_BASEREL)
796 163138 : return false;
797 340348 : if (rel->rtekind == RTE_RELATION)
798 : {
799 : /*
800 : * For a plain relation, we only know how to prove uniqueness by
801 : * reference to unique indexes. Make sure there's at least one
802 : * suitable unique index. It must be immediately enforced, and not a
803 : * partial index. (Keep these conditions in sync with
804 : * relation_has_unique_index_for!)
805 : */
806 : ListCell *lc;
807 :
808 442382 : foreach(lc, rel->indexlist)
809 : {
810 402744 : IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc);
811 :
812 402744 : if (ind->unique && ind->immediate && ind->indpred == NIL)
813 271436 : return true;
814 : }
815 : }
816 29274 : else if (rel->rtekind == RTE_SUBQUERY)
817 : {
818 4034 : Query *subquery = root->simple_rte_array[rel->relid]->subquery;
819 :
820 : /* Check if the subquery has any qualities that support distinctness */
821 4034 : if (query_supports_distinctness(subquery))
822 2646 : return true;
823 : }
824 : /* We have no proof rules for any other rtekinds. */
825 66266 : return false;
826 : }
827 :
828 : /*
829 : * rel_is_distinct_for
830 : * Does the relation return only distinct rows according to clause_list?
831 : *
832 : * clause_list is a list of join restriction clauses involving this rel and
833 : * some other one. Return true if no two rows emitted by this rel could
834 : * possibly join to the same row of the other rel.
835 : *
836 : * The caller must have already determined that each condition is a
837 : * mergejoinable equality with an expression in this relation on one side, and
838 : * an expression not involving this relation on the other. The transient
839 : * outer_is_left flag is used to identify which side references this relation:
840 : * left side if outer_is_left is false, right side if it is true.
841 : *
842 : * Note that the passed-in clause_list may be destructively modified! This
843 : * is OK for current uses, because the clause_list is built by the caller for
844 : * the sole purpose of passing to this function.
845 : */
846 : static bool
847 171096 : rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
848 : {
849 : /*
850 : * We could skip a couple of tests here if we assume all callers checked
851 : * rel_supports_distinctness first, but it doesn't seem worth taking any
852 : * risk for.
853 : */
854 171096 : if (rel->reloptkind != RELOPT_BASEREL)
855 0 : return false;
856 171096 : if (rel->rtekind == RTE_RELATION)
857 : {
858 : /*
859 : * Examine the indexes to see if we have a matching unique index.
860 : * relation_has_unique_index_for automatically adds any usable
861 : * restriction clauses for the rel, so we needn't do that here.
862 : */
863 169370 : if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL))
864 104710 : return true;
865 : }
866 1726 : else if (rel->rtekind == RTE_SUBQUERY)
867 : {
868 1726 : Index relid = rel->relid;
869 1726 : Query *subquery = root->simple_rte_array[relid]->subquery;
870 1726 : List *colnos = NIL;
871 1726 : List *opids = NIL;
872 : ListCell *l;
873 :
874 : /*
875 : * Build the argument lists for query_is_distinct_for: a list of
876 : * output column numbers that the query needs to be distinct over, and
877 : * a list of equality operators that the output columns need to be
878 : * distinct according to.
879 : *
880 : * (XXX we are not considering restriction clauses attached to the
881 : * subquery; is that worth doing?)
882 : */
883 3416 : foreach(l, clause_list)
884 : {
885 1690 : RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
886 : Oid op;
887 : Var *var;
888 :
889 : /*
890 : * Get the equality operator we need uniqueness according to.
891 : * (This might be a cross-type operator and thus not exactly the
892 : * same operator the subquery would consider; that's all right
893 : * since query_is_distinct_for can resolve such cases.) The
894 : * caller's mergejoinability test should have selected only
895 : * OpExprs.
896 : */
897 1690 : op = castNode(OpExpr, rinfo->clause)->opno;
898 :
899 : /* caller identified the inner side for us */
900 1690 : if (rinfo->outer_is_left)
901 1376 : var = (Var *) get_rightop(rinfo->clause);
902 : else
903 314 : var = (Var *) get_leftop(rinfo->clause);
904 :
905 : /*
906 : * We may ignore any RelabelType node above the operand. (There
907 : * won't be more than one, since eval_const_expressions() has been
908 : * applied already.)
909 : */
910 1690 : if (var && IsA(var, RelabelType))
911 612 : var = (Var *) ((RelabelType *) var)->arg;
912 :
913 : /*
914 : * If inner side isn't a Var referencing a subquery output column,
915 : * this clause doesn't help us.
916 : */
917 1690 : if (!var || !IsA(var, Var) ||
918 1678 : var->varno != relid || var->varlevelsup != 0)
919 12 : continue;
920 :
921 1678 : colnos = lappend_int(colnos, var->varattno);
922 1678 : opids = lappend_oid(opids, op);
923 : }
924 :
925 1726 : if (query_is_distinct_for(subquery, colnos, opids))
926 200 : return true;
927 : }
928 66186 : return false;
929 : }
930 :
931 :
932 : /*
933 : * query_supports_distinctness - could the query possibly be proven distinct
934 : * on some set of output columns?
935 : *
936 : * This is effectively a pre-checking function for query_is_distinct_for().
937 : * It must return true if query_is_distinct_for() could possibly return true
938 : * with this query, but it should not expend a lot of cycles. The idea is
939 : * that callers can avoid doing possibly-expensive processing to compute
940 : * query_is_distinct_for()'s argument lists if the call could not possibly
941 : * succeed.
942 : */
943 : bool
944 4692 : query_supports_distinctness(Query *query)
945 : {
946 : /* SRFs break distinctness except with DISTINCT, see below */
947 4692 : if (query->hasTargetSRFs && query->distinctClause == NIL)
948 908 : return false;
949 :
950 : /* check for features we can prove distinctness with */
951 3784 : if (query->distinctClause != NIL ||
952 3640 : query->groupClause != NIL ||
953 3434 : query->groupingSets != NIL ||
954 3434 : query->hasAggs ||
955 3162 : query->havingQual ||
956 3162 : query->setOperations)
957 3268 : return true;
958 :
959 516 : return false;
960 : }
961 :
962 : /*
963 : * query_is_distinct_for - does query never return duplicates of the
964 : * specified columns?
965 : *
966 : * query is a not-yet-planned subquery (in current usage, it's always from
967 : * a subquery RTE, which the planner avoids scribbling on).
968 : *
969 : * colnos is an integer list of output column numbers (resno's). We are
970 : * interested in whether rows consisting of just these columns are certain
971 : * to be distinct. "Distinctness" is defined according to whether the
972 : * corresponding upper-level equality operators listed in opids would think
973 : * the values are distinct. (Note: the opids entries could be cross-type
974 : * operators, and thus not exactly the equality operators that the subquery
975 : * would use itself. We use equality_ops_are_compatible() to check
976 : * compatibility. That looks at btree or hash opfamily membership, and so
977 : * should give trustworthy answers for all operators that we might need
978 : * to deal with here.)
979 : */
980 : bool
981 1848 : query_is_distinct_for(Query *query, List *colnos, List *opids)
982 : {
983 : ListCell *l;
984 : Oid opid;
985 :
986 : Assert(list_length(colnos) == list_length(opids));
987 :
988 : /*
989 : * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
990 : * columns in the DISTINCT clause appear in colnos and operator semantics
991 : * match. This is true even if there are SRFs in the DISTINCT columns or
992 : * elsewhere in the tlist.
993 : */
994 1848 : if (query->distinctClause)
995 : {
996 150 : foreach(l, query->distinctClause)
997 : {
998 120 : SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
999 120 : TargetEntry *tle = get_sortgroupclause_tle(sgc,
1000 : query->targetList);
1001 :
1002 120 : opid = distinct_col_search(tle->resno, colnos, opids);
1003 120 : if (!OidIsValid(opid) ||
1004 48 : !equality_ops_are_compatible(opid, sgc->eqop))
1005 : break; /* exit early if no match */
1006 : }
1007 102 : if (l == NULL) /* had matches for all? */
1008 30 : return true;
1009 : }
1010 :
1011 : /*
1012 : * Otherwise, a set-returning function in the query's targetlist can
1013 : * result in returning duplicate rows, despite any grouping that might
1014 : * occur before tlist evaluation. (If all tlist SRFs are within GROUP BY
1015 : * columns, it would be safe because they'd be expanded before grouping.
1016 : * But it doesn't currently seem worth the effort to check for that.)
1017 : */
1018 1818 : if (query->hasTargetSRFs)
1019 0 : return false;
1020 :
1021 : /*
1022 : * Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all
1023 : * the grouped columns appear in colnos and operator semantics match.
1024 : */
1025 1818 : if (query->groupClause && !query->groupingSets)
1026 : {
1027 258 : foreach(l, query->groupClause)
1028 : {
1029 200 : SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
1030 200 : TargetEntry *tle = get_sortgroupclause_tle(sgc,
1031 : query->targetList);
1032 :
1033 200 : opid = distinct_col_search(tle->resno, colnos, opids);
1034 200 : if (!OidIsValid(opid) ||
1035 124 : !equality_ops_are_compatible(opid, sgc->eqop))
1036 : break; /* exit early if no match */
1037 : }
1038 134 : if (l == NULL) /* had matches for all? */
1039 58 : return true;
1040 : }
1041 1684 : else if (query->groupingSets)
1042 : {
1043 : /*
1044 : * If we have grouping sets with expressions, we probably don't have
1045 : * uniqueness and analysis would be hard. Punt.
1046 : */
1047 0 : if (query->groupClause)
1048 0 : return false;
1049 :
1050 : /*
1051 : * If we have no groupClause (therefore no grouping expressions), we
1052 : * might have one or many empty grouping sets. If there's just one,
1053 : * then we're returning only one row and are certainly unique. But
1054 : * otherwise, we know we're certainly not unique.
1055 : */
1056 0 : if (list_length(query->groupingSets) == 1 &&
1057 0 : ((GroupingSet *) linitial(query->groupingSets))->kind == GROUPING_SET_EMPTY)
1058 0 : return true;
1059 : else
1060 0 : return false;
1061 : }
1062 : else
1063 : {
1064 : /*
1065 : * If we have no GROUP BY, but do have aggregates or HAVING, then the
1066 : * result is at most one row so it's surely unique, for any operators.
1067 : */
1068 1684 : if (query->hasAggs || query->havingQual)
1069 100 : return true;
1070 : }
1071 :
1072 : /*
1073 : * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
1074 : * except with ALL.
1075 : */
1076 1660 : if (query->setOperations)
1077 : {
1078 1512 : SetOperationStmt *topop = castNode(SetOperationStmt, query->setOperations);
1079 :
1080 : Assert(topop->op != SETOP_NONE);
1081 :
1082 1512 : if (!topop->all)
1083 : {
1084 : ListCell *lg;
1085 :
1086 : /* We're good if all the nonjunk output columns are in colnos */
1087 72 : lg = list_head(topop->groupClauses);
1088 90 : foreach(l, query->targetList)
1089 : {
1090 78 : TargetEntry *tle = (TargetEntry *) lfirst(l);
1091 : SortGroupClause *sgc;
1092 :
1093 78 : if (tle->resjunk)
1094 0 : continue; /* ignore resjunk columns */
1095 :
1096 : /* non-resjunk columns should have grouping clauses */
1097 : Assert(lg != NULL);
1098 78 : sgc = (SortGroupClause *) lfirst(lg);
1099 78 : lg = lnext(topop->groupClauses, lg);
1100 :
1101 78 : opid = distinct_col_search(tle->resno, colnos, opids);
1102 78 : if (!OidIsValid(opid) ||
1103 18 : !equality_ops_are_compatible(opid, sgc->eqop))
1104 : break; /* exit early if no match */
1105 : }
1106 72 : if (l == NULL) /* had matches for all? */
1107 12 : return true;
1108 : }
1109 : }
1110 :
1111 : /*
1112 : * XXX Are there any other cases in which we can easily see the result
1113 : * must be distinct?
1114 : *
1115 : * If you do add more smarts to this function, be sure to update
1116 : * query_supports_distinctness() to match.
1117 : */
1118 :
1119 1648 : return false;
1120 : }
1121 :
1122 : /*
1123 : * distinct_col_search - subroutine for query_is_distinct_for
1124 : *
1125 : * If colno is in colnos, return the corresponding element of opids,
1126 : * else return InvalidOid. (Ordinarily colnos would not contain duplicates,
1127 : * but if it does, we arbitrarily select the first match.)
1128 : */
1129 : static Oid
1130 398 : distinct_col_search(int colno, List *colnos, List *opids)
1131 : {
1132 : ListCell *lc1,
1133 : *lc2;
1134 :
1135 634 : forboth(lc1, colnos, lc2, opids)
1136 : {
1137 426 : if (colno == lfirst_int(lc1))
1138 190 : return lfirst_oid(lc2);
1139 : }
1140 208 : return InvalidOid;
1141 : }
1142 :
1143 :
1144 : /*
1145 : * innerrel_is_unique
1146 : * Check if the innerrel provably contains at most one tuple matching any
1147 : * tuple from the outerrel, based on join clauses in the 'restrictlist'.
1148 : *
1149 : * We need an actual RelOptInfo for the innerrel, but it's sufficient to
1150 : * identify the outerrel by its Relids. This asymmetry supports use of this
1151 : * function before joinrels have been built. (The caller is expected to
1152 : * also supply the joinrelids, just to save recalculating that.)
1153 : *
1154 : * The proof must be made based only on clauses that will be "joinquals"
1155 : * rather than "otherquals" at execution. For an inner join there's no
1156 : * difference; but if the join is outer, we must ignore pushed-down quals,
1157 : * as those will become "otherquals". Note that this means the answer might
1158 : * vary depending on whether IS_OUTER_JOIN(jointype); since we cache the
1159 : * answer without regard to that, callers must take care not to call this
1160 : * with jointypes that would be classified differently by IS_OUTER_JOIN().
1161 : *
1162 : * The actual proof is undertaken by is_innerrel_unique_for(); this function
1163 : * is a frontend that is mainly concerned with caching the answers.
1164 : * In particular, the force_cache argument allows overriding the internal
1165 : * heuristic about whether to cache negative answers; it should be "true"
1166 : * if making an inquiry that is not part of the normal bottom-up join search
1167 : * sequence.
1168 : */
1169 : bool
1170 556510 : innerrel_is_unique(PlannerInfo *root,
1171 : Relids joinrelids,
1172 : Relids outerrelids,
1173 : RelOptInfo *innerrel,
1174 : JoinType jointype,
1175 : List *restrictlist,
1176 : bool force_cache)
1177 : {
1178 : MemoryContext old_context;
1179 : ListCell *lc;
1180 :
1181 : /* Certainly can't prove uniqueness when there are no joinclauses */
1182 556510 : if (restrictlist == NIL)
1183 96940 : return false;
1184 :
1185 : /*
1186 : * Make a quick check to eliminate cases in which we will surely be unable
1187 : * to prove uniqueness of the innerrel.
1188 : */
1189 459570 : if (!rel_supports_distinctness(root, innerrel))
1190 225902 : return false;
1191 :
1192 : /*
1193 : * Query the cache to see if we've managed to prove that innerrel is
1194 : * unique for any subset of this outerrel. We don't need an exact match,
1195 : * as extra outerrels can't make the innerrel any less unique (or more
1196 : * formally, the restrictlist for a join to a superset outerrel must be a
1197 : * superset of the conditions we successfully used before).
1198 : */
1199 256012 : foreach(lc, innerrel->unique_for_rels)
1200 : {
1201 96906 : Relids unique_for_rels = (Relids) lfirst(lc);
1202 :
1203 96906 : if (bms_is_subset(unique_for_rels, outerrelids))
1204 74562 : return true; /* Success! */
1205 : }
1206 :
1207 : /*
1208 : * Conversely, we may have already determined that this outerrel, or some
1209 : * superset thereof, cannot prove this innerrel to be unique.
1210 : */
1211 159106 : foreach(lc, innerrel->non_unique_for_rels)
1212 : {
1213 0 : Relids unique_for_rels = (Relids) lfirst(lc);
1214 :
1215 0 : if (bms_is_subset(outerrelids, unique_for_rels))
1216 0 : return false;
1217 : }
1218 :
1219 : /* No cached information, so try to make the proof. */
1220 159106 : if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel,
1221 : jointype, restrictlist))
1222 : {
1223 : /*
1224 : * Cache the positive result for future probes, being sure to keep it
1225 : * in the planner_cxt even if we are working in GEQO.
1226 : *
1227 : * Note: one might consider trying to isolate the minimal subset of
1228 : * the outerrels that proved the innerrel unique. But it's not worth
1229 : * the trouble, because the planner builds up joinrels incrementally
1230 : * and so we'll see the minimally sufficient outerrels before any
1231 : * supersets of them anyway.
1232 : */
1233 93016 : old_context = MemoryContextSwitchTo(root->planner_cxt);
1234 93016 : innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
1235 93016 : bms_copy(outerrelids));
1236 93016 : MemoryContextSwitchTo(old_context);
1237 :
1238 93016 : return true; /* Success! */
1239 : }
1240 : else
1241 : {
1242 : /*
1243 : * None of the join conditions for outerrel proved innerrel unique, so
1244 : * we can safely reject this outerrel or any subset of it in future
1245 : * checks.
1246 : *
1247 : * However, in normal planning mode, caching this knowledge is totally
1248 : * pointless; it won't be queried again, because we build up joinrels
1249 : * from smaller to larger. It is useful in GEQO mode, where the
1250 : * knowledge can be carried across successive planning attempts; and
1251 : * it's likely to be useful when using join-search plugins, too. Hence
1252 : * cache when join_search_private is non-NULL. (Yeah, that's a hack,
1253 : * but it seems reasonable.)
1254 : *
1255 : * Also, allow callers to override that heuristic and force caching;
1256 : * that's useful for reduce_unique_semijoins, which calls here before
1257 : * the normal join search starts.
1258 : */
1259 66090 : if (force_cache || root->join_search_private)
1260 : {
1261 768 : old_context = MemoryContextSwitchTo(root->planner_cxt);
1262 768 : innerrel->non_unique_for_rels =
1263 768 : lappend(innerrel->non_unique_for_rels,
1264 768 : bms_copy(outerrelids));
1265 768 : MemoryContextSwitchTo(old_context);
1266 : }
1267 :
1268 66090 : return false;
1269 : }
1270 : }
1271 :
1272 : /*
1273 : * is_innerrel_unique_for
1274 : * Check if the innerrel provably contains at most one tuple matching any
1275 : * tuple from the outerrel, based on join clauses in the 'restrictlist'.
1276 : */
1277 : static bool
1278 159106 : is_innerrel_unique_for(PlannerInfo *root,
1279 : Relids joinrelids,
1280 : Relids outerrelids,
1281 : RelOptInfo *innerrel,
1282 : JoinType jointype,
1283 : List *restrictlist)
1284 : {
1285 159106 : List *clause_list = NIL;
1286 : ListCell *lc;
1287 :
1288 : /*
1289 : * Search for mergejoinable clauses that constrain the inner rel against
1290 : * the outer rel. If an operator is mergejoinable then it behaves like
1291 : * equality for some btree opclass, so it's what we want. The
1292 : * mergejoinability test also eliminates clauses containing volatile
1293 : * functions, which we couldn't depend on.
1294 : */
1295 347840 : foreach(lc, restrictlist)
1296 : {
1297 188734 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
1298 :
1299 : /*
1300 : * As noted above, if it's a pushed-down clause and we're at an outer
1301 : * join, we can't use it.
1302 : */
1303 188734 : if (IS_OUTER_JOIN(jointype) &&
1304 94582 : RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
1305 3370 : continue;
1306 :
1307 : /* Ignore if it's not a mergejoinable clause */
1308 185364 : if (!restrictinfo->can_join ||
1309 172434 : restrictinfo->mergeopfamilies == NIL)
1310 15102 : continue; /* not mergejoinable */
1311 :
1312 : /*
1313 : * Check if clause has the form "outer op inner" or "inner op outer",
1314 : * and if so mark which side is inner.
1315 : */
1316 170262 : if (!clause_sides_match_join(restrictinfo, outerrelids,
1317 : innerrel->relids))
1318 14 : continue; /* no good for these input relations */
1319 :
1320 : /* OK, add to list */
1321 170248 : clause_list = lappend(clause_list, restrictinfo);
1322 : }
1323 :
1324 : /* Let rel_is_distinct_for() do the hard work */
1325 159106 : return rel_is_distinct_for(root, innerrel, clause_list);
1326 : }
|