Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * analyzejoins.c
4 : * Routines for simplifying joins after initial query analysis
5 : *
6 : * While we do a great deal of join simplification in prep/prepjointree.c,
7 : * certain optimizations cannot be performed at that stage for lack of
8 : * detailed information about the query. The routines here are invoked
9 : * after initsplan.c has done its work, and can do additional join removal
10 : * and simplification steps based on the information extracted. The penalty
11 : * is that we have to work harder to clean up after ourselves when we modify
12 : * the query, since the derived data structures have to be updated too.
13 : *
14 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
15 : * Portions Copyright (c) 1994, Regents of the University of California
16 : *
17 : *
18 : * IDENTIFICATION
19 : * src/backend/optimizer/plan/analyzejoins.c
20 : *
21 : *-------------------------------------------------------------------------
22 : */
23 : #include "postgres.h"
24 :
25 : #include "catalog/pg_class.h"
26 : #include "nodes/nodeFuncs.h"
27 : #include "optimizer/joininfo.h"
28 : #include "optimizer/optimizer.h"
29 : #include "optimizer/pathnode.h"
30 : #include "optimizer/paths.h"
31 : #include "optimizer/placeholder.h"
32 : #include "optimizer/planmain.h"
33 : #include "optimizer/restrictinfo.h"
34 : #include "rewrite/rewriteManip.h"
35 : #include "utils/lsyscache.h"
36 :
37 : /*
38 : * Utility structure. A sorting procedure is needed to simplify the search
39 : * of SJE-candidate baserels referencing the same database relation. Having
40 : * collected all baserels from the query jointree, the planner sorts them
41 : * according to the reloid value, groups them with the next pass and attempts
42 : * to remove self-joins.
43 : *
44 : * Preliminary sorting prevents quadratic behavior that can be harmful in the
45 : * case of numerous joins.
46 : */
47 : typedef struct
48 : {
49 : int relid;
50 : Oid reloid;
51 : } SelfJoinCandidate;
52 :
53 : bool enable_self_join_elimination;
54 :
55 : /* local functions */
56 : static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
57 : static void remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
58 : SpecialJoinInfo *sjinfo);
59 : static void remove_rel_from_restrictinfo(RestrictInfo *rinfo,
60 : int relid, int ojrelid);
61 : static void remove_rel_from_eclass(EquivalenceClass *ec,
62 : SpecialJoinInfo *sjinfo,
63 : int relid, int subst);
64 : static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
65 : static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
66 : static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
67 : List *clause_list, List **extra_clauses);
68 : static Oid distinct_col_search(int colno, List *colnos, List *opids);
69 : static bool is_innerrel_unique_for(PlannerInfo *root,
70 : Relids joinrelids,
71 : Relids outerrelids,
72 : RelOptInfo *innerrel,
73 : JoinType jointype,
74 : List *restrictlist,
75 : List **extra_clauses);
76 : static int self_join_candidates_cmp(const void *a, const void *b);
77 : static bool replace_relid_callback(Node *node,
78 : ChangeVarNodes_context *context);
79 :
80 :
81 : /*
82 : * remove_useless_joins
83 : * Check for relations that don't actually need to be joined at all,
84 : * and remove them from the query.
85 : *
86 : * We are passed the current joinlist and return the updated list. Other
87 : * data structures that have to be updated are accessible via "root".
88 : */
89 : List *
90 342304 : remove_useless_joins(PlannerInfo *root, List *joinlist)
91 : {
92 : ListCell *lc;
93 :
94 : /*
95 : * We are only interested in relations that are left-joined to, so we can
96 : * scan the join_info_list to find them easily.
97 : */
98 342304 : restart:
99 386324 : foreach(lc, root->join_info_list)
100 : {
101 54704 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
102 : int innerrelid;
103 : int nremoved;
104 :
105 : /* Skip if not removable */
106 54704 : if (!join_is_removable(root, sjinfo))
107 44020 : continue;
108 :
109 : /*
110 : * Currently, join_is_removable can only succeed when the sjinfo's
111 : * righthand is a single baserel. Remove that rel from the query and
112 : * joinlist.
113 : */
114 10684 : innerrelid = bms_singleton_member(sjinfo->min_righthand);
115 :
116 10684 : remove_leftjoinrel_from_query(root, innerrelid, sjinfo);
117 :
118 : /* We verify that exactly one reference gets removed from joinlist */
119 10684 : nremoved = 0;
120 10684 : joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved);
121 10684 : if (nremoved != 1)
122 0 : elog(ERROR, "failed to find relation %d in joinlist", innerrelid);
123 :
124 : /*
125 : * We can delete this SpecialJoinInfo from the list too, since it's no
126 : * longer of interest. (Since we'll restart the foreach loop
127 : * immediately, we don't bother with foreach_delete_current.)
128 : */
129 10684 : root->join_info_list = list_delete_cell(root->join_info_list, lc);
130 :
131 : /*
132 : * Restart the scan. This is necessary to ensure we find all
133 : * removable joins independently of ordering of the join_info_list
134 : * (note that removal of attr_needed bits may make a join appear
135 : * removable that did not before).
136 : */
137 10684 : goto restart;
138 : }
139 :
140 331620 : return joinlist;
141 : }
142 :
143 : /*
144 : * join_is_removable
145 : * Check whether we need not perform this special join at all, because
146 : * it will just duplicate its left input.
147 : *
148 : * This is true for a left join for which the join condition cannot match
149 : * more than one inner-side row. (There are other possibly interesting
150 : * cases, but we don't have the infrastructure to prove them.) We also
151 : * have to check that the inner side doesn't generate any variables needed
152 : * above the join.
153 : */
154 : static bool
155 54704 : join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
156 : {
157 : int innerrelid;
158 : RelOptInfo *innerrel;
159 : Relids inputrelids;
160 : Relids joinrelids;
161 54704 : List *clause_list = NIL;
162 : ListCell *l;
163 : int attroff;
164 :
165 : /*
166 : * Must be a left join to a single baserel, else we aren't going to be
167 : * able to do anything with it.
168 : */
169 54704 : if (sjinfo->jointype != JOIN_LEFT)
170 9940 : return false;
171 :
172 44764 : if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
173 1298 : return false;
174 :
175 : /*
176 : * Never try to eliminate a left join to the query result rel. Although
177 : * the case is syntactically impossible in standard SQL, MERGE will build
178 : * a join tree that looks exactly like that.
179 : */
180 43466 : if (innerrelid == root->parse->resultRelation)
181 758 : return false;
182 :
183 42708 : innerrel = find_base_rel(root, innerrelid);
184 :
185 : /*
186 : * Before we go to the effort of checking whether any innerrel variables
187 : * are needed above the join, make a quick check to eliminate cases in
188 : * which we will surely be unable to prove uniqueness of the innerrel.
189 : */
190 42708 : if (!rel_supports_distinctness(root, innerrel))
191 3116 : return false;
192 :
193 : /* Compute the relid set for the join we are considering */
194 39592 : inputrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
195 : Assert(sjinfo->ojrelid != 0);
196 39592 : joinrelids = bms_copy(inputrelids);
197 39592 : joinrelids = bms_add_member(joinrelids, sjinfo->ojrelid);
198 :
199 : /*
200 : * We can't remove the join if any inner-rel attributes are used above the
201 : * join. Here, "above" the join includes pushed-down conditions, so we
202 : * should reject if attr_needed includes the OJ's own relid; therefore,
203 : * compare to inputrelids not joinrelids.
204 : *
205 : * As a micro-optimization, it seems better to start with max_attr and
206 : * count down rather than starting with min_attr and counting up, on the
207 : * theory that the system attributes are somewhat less likely to be wanted
208 : * and should be tested last.
209 : */
210 351474 : for (attroff = innerrel->max_attr - innerrel->min_attr;
211 : attroff >= 0;
212 311882 : attroff--)
213 : {
214 340628 : if (!bms_is_subset(innerrel->attr_needed[attroff], inputrelids))
215 28746 : return false;
216 : }
217 :
218 : /*
219 : * Similarly check that the inner rel isn't needed by any PlaceHolderVars
220 : * that will be used above the join. The PHV case is a little bit more
221 : * complicated, because PHVs may have been assigned a ph_eval_at location
222 : * that includes the innerrel, yet their contained expression might not
223 : * actually reference the innerrel (it could be just a constant, for
224 : * instance). If such a PHV is due to be evaluated above the join then it
225 : * needn't prevent join removal.
226 : */
227 11038 : foreach(l, root->placeholder_list)
228 : {
229 228 : PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
230 :
231 228 : if (bms_overlap(phinfo->ph_lateral, innerrel->relids))
232 36 : return false; /* it references innerrel laterally */
233 228 : if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids))
234 54 : continue; /* it definitely doesn't reference innerrel */
235 174 : if (bms_is_subset(phinfo->ph_needed, inputrelids))
236 6 : continue; /* PHV is not used above the join */
237 168 : if (!bms_is_member(sjinfo->ojrelid, phinfo->ph_eval_at))
238 30 : return false; /* it has to be evaluated below the join */
239 :
240 : /*
241 : * We need to be sure there will still be a place to evaluate the PHV
242 : * if we remove the join, ie that ph_eval_at wouldn't become empty.
243 : */
244 138 : if (!bms_overlap(sjinfo->min_lefthand, phinfo->ph_eval_at))
245 6 : return false; /* there isn't any other place to eval PHV */
246 : /* Check contained expression last, since this is a bit expensive */
247 132 : if (bms_overlap(pull_varnos(root, (Node *) phinfo->ph_var->phexpr),
248 132 : innerrel->relids))
249 0 : return false; /* contained expression references innerrel */
250 : }
251 :
252 : /*
253 : * Search for mergejoinable clauses that constrain the inner rel against
254 : * either the outer rel or a pseudoconstant. If an operator is
255 : * mergejoinable then it behaves like equality for some btree opclass, so
256 : * it's what we want. The mergejoinability test also eliminates clauses
257 : * containing volatile functions, which we couldn't depend on.
258 : */
259 21930 : foreach(l, innerrel->joininfo)
260 : {
261 11120 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
262 :
263 : /*
264 : * If the current join commutes with some other outer join(s) via
265 : * outer join identity 3, there will be multiple clones of its join
266 : * clauses in the joininfo list. We want to consider only the
267 : * has_clone form of such clauses. Processing more than one form
268 : * would be wasteful, and also some of the others would confuse the
269 : * RINFO_IS_PUSHED_DOWN test below.
270 : */
271 11120 : if (restrictinfo->is_clone)
272 100 : continue; /* ignore it */
273 :
274 : /*
275 : * If it's not a join clause for this outer join, we can't use it.
276 : * Note that if the clause is pushed-down, then it is logically from
277 : * above the outer join, even if it references no other rels (it might
278 : * be from WHERE, for example).
279 : */
280 11020 : if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
281 120 : continue; /* ignore; not useful here */
282 :
283 : /* Ignore if it's not a mergejoinable clause */
284 10900 : if (!restrictinfo->can_join ||
285 10826 : restrictinfo->mergeopfamilies == NIL)
286 74 : continue; /* not mergejoinable */
287 :
288 : /*
289 : * Check if the clause has the form "outer op inner" or "inner op
290 : * outer", and if so mark which side is inner.
291 : */
292 10826 : if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
293 : innerrel->relids))
294 6 : continue; /* no good for these input relations */
295 :
296 : /* OK, add to list */
297 10820 : clause_list = lappend(clause_list, restrictinfo);
298 : }
299 :
300 : /*
301 : * Now that we have the relevant equality join clauses, try to prove the
302 : * innerrel distinct.
303 : */
304 10810 : if (rel_is_distinct_for(root, innerrel, clause_list, NULL))
305 10684 : return true;
306 :
307 : /*
308 : * Some day it would be nice to check for other methods of establishing
309 : * distinctness.
310 : */
311 126 : return false;
312 : }
313 :
314 :
315 : /*
316 : * Remove the target rel->relid and references to the target join from the
317 : * planner's data structures, having determined that there is no need
318 : * to include them in the query. Optionally replace them with subst if subst
319 : * is non-negative.
320 : *
321 : * This function updates only parts needed for both left-join removal and
322 : * self-join removal.
323 : */
324 : static void
325 11284 : remove_rel_from_query(PlannerInfo *root, RelOptInfo *rel,
326 : int subst, SpecialJoinInfo *sjinfo,
327 : Relids joinrelids)
328 : {
329 11284 : int relid = rel->relid;
330 : Index rti;
331 : ListCell *l;
332 :
333 : /*
334 : * Update all_baserels and related relid sets.
335 : */
336 11284 : root->all_baserels = adjust_relid_set(root->all_baserels, relid, subst);
337 11284 : root->all_query_rels = adjust_relid_set(root->all_query_rels, relid, subst);
338 :
339 11284 : if (sjinfo != NULL)
340 : {
341 21368 : root->outer_join_rels = bms_del_member(root->outer_join_rels,
342 10684 : sjinfo->ojrelid);
343 10684 : root->all_query_rels = bms_del_member(root->all_query_rels,
344 10684 : sjinfo->ojrelid);
345 : }
346 :
347 : /*
348 : * Likewise remove references from SpecialJoinInfo data structures.
349 : *
350 : * This is relevant in case the outer join we're deleting is nested inside
351 : * other outer joins: the upper joins' relid sets have to be adjusted. The
352 : * RHS of the target outer join will be made empty here, but that's OK
353 : * since caller will delete that SpecialJoinInfo entirely.
354 : */
355 25786 : foreach(l, root->join_info_list)
356 : {
357 14502 : SpecialJoinInfo *sjinf = (SpecialJoinInfo *) lfirst(l);
358 :
359 : /*
360 : * initsplan.c is fairly cavalier about allowing SpecialJoinInfos'
361 : * lefthand/righthand relid sets to be shared with other data
362 : * structures. Ensure that we don't modify the original relid sets.
363 : * (The commute_xxx sets are always per-SpecialJoinInfo though.)
364 : */
365 14502 : sjinf->min_lefthand = bms_copy(sjinf->min_lefthand);
366 14502 : sjinf->min_righthand = bms_copy(sjinf->min_righthand);
367 14502 : sjinf->syn_lefthand = bms_copy(sjinf->syn_lefthand);
368 14502 : sjinf->syn_righthand = bms_copy(sjinf->syn_righthand);
369 : /* Now remove relid from the sets: */
370 14502 : sjinf->min_lefthand = adjust_relid_set(sjinf->min_lefthand, relid, subst);
371 14502 : sjinf->min_righthand = adjust_relid_set(sjinf->min_righthand, relid, subst);
372 14502 : sjinf->syn_lefthand = adjust_relid_set(sjinf->syn_lefthand, relid, subst);
373 14502 : sjinf->syn_righthand = adjust_relid_set(sjinf->syn_righthand, relid, subst);
374 :
375 14502 : if (sjinfo != NULL)
376 : {
377 : Assert(subst <= 0);
378 :
379 : /* Remove sjinfo->ojrelid bits from the sets: */
380 28800 : sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand,
381 14400 : sjinfo->ojrelid);
382 28800 : sjinf->min_righthand = bms_del_member(sjinf->min_righthand,
383 14400 : sjinfo->ojrelid);
384 28800 : sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand,
385 14400 : sjinfo->ojrelid);
386 28800 : sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand,
387 14400 : sjinfo->ojrelid);
388 : /* relid cannot appear in these fields, but ojrelid can: */
389 28800 : sjinf->commute_above_l = bms_del_member(sjinf->commute_above_l,
390 14400 : sjinfo->ojrelid);
391 28800 : sjinf->commute_above_r = bms_del_member(sjinf->commute_above_r,
392 14400 : sjinfo->ojrelid);
393 28800 : sjinf->commute_below_l = bms_del_member(sjinf->commute_below_l,
394 14400 : sjinfo->ojrelid);
395 14400 : sjinf->commute_below_r = bms_del_member(sjinf->commute_below_r,
396 14400 : sjinfo->ojrelid);
397 : }
398 : else
399 : {
400 : Assert(subst > 0);
401 :
402 102 : ChangeVarNodesExtended((Node *) sjinf->semi_rhs_exprs, relid, subst,
403 : 0, replace_relid_callback);
404 : }
405 : }
406 :
407 : /*
408 : * Likewise remove references from PlaceHolderVar data structures,
409 : * removing any no-longer-needed placeholders entirely. We remove PHV
410 : * only for left-join removal. With self-join elimination, PHVs already
411 : * get moved to the remaining relation, where they might still be needed.
412 : * It might also happen that we skip the removal of some PHVs that could
413 : * be removed. However, the overhead of extra PHVs is small compared to
414 : * the complexity of analysis needed to remove them.
415 : *
416 : * Removal is a bit trickier than it might seem: we can remove PHVs that
417 : * are used at the target rel and/or in the join qual, but not those that
418 : * are used at join partner rels or above the join. It's not that easy to
419 : * distinguish PHVs used at partner rels from those used in the join qual,
420 : * since they will both have ph_needed sets that are subsets of
421 : * joinrelids. However, a PHV used at a partner rel could not have the
422 : * target rel in ph_eval_at, so we check that while deciding whether to
423 : * remove or just update the PHV. There is no corresponding test in
424 : * join_is_removable because it doesn't need to distinguish those cases.
425 : */
426 11500 : foreach(l, root->placeholder_list)
427 : {
428 216 : PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
429 :
430 : Assert(sjinfo == NULL || !bms_is_member(relid, phinfo->ph_lateral));
431 390 : if (sjinfo != NULL &&
432 204 : bms_is_subset(phinfo->ph_needed, joinrelids) &&
433 30 : bms_is_member(relid, phinfo->ph_eval_at) &&
434 12 : !bms_is_member(sjinfo->ojrelid, phinfo->ph_eval_at))
435 : {
436 : /*
437 : * This code shouldn't be executed if one relation is substituted
438 : * with another: in this case, the placeholder may be employed in
439 : * a filter inside the scan node the SJE removes.
440 : */
441 6 : root->placeholder_list = foreach_delete_current(root->placeholder_list,
442 : l);
443 6 : root->placeholder_array[phinfo->phid] = NULL;
444 : }
445 : else
446 : {
447 210 : PlaceHolderVar *phv = phinfo->ph_var;
448 :
449 210 : phinfo->ph_eval_at = adjust_relid_set(phinfo->ph_eval_at, relid, subst);
450 210 : if (sjinfo != NULL)
451 168 : phinfo->ph_eval_at = adjust_relid_set(phinfo->ph_eval_at,
452 168 : sjinfo->ojrelid, subst);
453 : Assert(!bms_is_empty(phinfo->ph_eval_at)); /* checked previously */
454 : /* Reduce ph_needed to contain only "relation 0"; see below */
455 210 : if (bms_is_member(0, phinfo->ph_needed))
456 102 : phinfo->ph_needed = bms_make_singleton(0);
457 : else
458 108 : phinfo->ph_needed = NULL;
459 :
460 210 : phinfo->ph_lateral = adjust_relid_set(phinfo->ph_lateral, relid, subst);
461 :
462 : /*
463 : * ph_lateral might contain rels mentioned in ph_eval_at after the
464 : * replacement, remove them.
465 : */
466 210 : phinfo->ph_lateral = bms_difference(phinfo->ph_lateral, phinfo->ph_eval_at);
467 : /* ph_lateral might or might not be empty */
468 :
469 210 : phv->phrels = adjust_relid_set(phv->phrels, relid, subst);
470 210 : if (sjinfo != NULL)
471 168 : phv->phrels = adjust_relid_set(phv->phrels,
472 168 : sjinfo->ojrelid, subst);
473 : Assert(!bms_is_empty(phv->phrels));
474 :
475 210 : ChangeVarNodesExtended((Node *) phv->phexpr, relid, subst, 0,
476 : replace_relid_callback);
477 :
478 : Assert(phv->phnullingrels == NULL); /* no need to adjust */
479 : }
480 : }
481 :
482 : /*
483 : * Likewise remove references from EquivalenceClasses.
484 : */
485 59610 : foreach(l, root->eq_classes)
486 : {
487 48326 : EquivalenceClass *ec = (EquivalenceClass *) lfirst(l);
488 :
489 48326 : if (bms_is_member(relid, ec->ec_relids) ||
490 31822 : (sjinfo == NULL || bms_is_member(sjinfo->ojrelid, ec->ec_relids)))
491 16504 : remove_rel_from_eclass(ec, sjinfo, relid, subst);
492 : }
493 :
494 : /*
495 : * Finally, we must recompute per-Var attr_needed and per-PlaceHolderVar
496 : * ph_needed relid sets. These have to be known accurately, else we may
497 : * fail to remove other now-removable outer joins. And our removal of the
498 : * join clause(s) for this outer join may mean that Vars that were
499 : * formerly needed no longer are. So we have to do this honestly by
500 : * repeating the construction of those relid sets. We can cheat to one
501 : * small extent: we can avoid re-examining the targetlist and HAVING qual
502 : * by preserving "relation 0" bits from the existing relid sets. This is
503 : * safe because we'd never remove such references.
504 : *
505 : * So, start by removing all other bits from attr_needed sets and
506 : * lateral_vars lists. (We already did this above for ph_needed.)
507 : */
508 70108 : for (rti = 1; rti < root->simple_rel_array_size; rti++)
509 : {
510 58824 : RelOptInfo *otherrel = root->simple_rel_array[rti];
511 : int attroff;
512 :
513 : /* there may be empty slots corresponding to non-baserel RTEs */
514 58824 : if (otherrel == NULL)
515 28644 : continue;
516 :
517 : Assert(otherrel->relid == rti); /* sanity check on array */
518 :
519 669070 : for (attroff = otherrel->max_attr - otherrel->min_attr;
520 : attroff >= 0;
521 638890 : attroff--)
522 : {
523 638890 : if (bms_is_member(0, otherrel->attr_needed[attroff]))
524 46280 : otherrel->attr_needed[attroff] = bms_make_singleton(0);
525 : else
526 592610 : otherrel->attr_needed[attroff] = NULL;
527 : }
528 :
529 30180 : if (subst > 0)
530 1496 : ChangeVarNodesExtended((Node *) otherrel->lateral_vars, relid,
531 : subst, 0, replace_relid_callback);
532 : }
533 11284 : }
534 :
535 : /*
536 : * Remove the target relid and references to the target join from the
537 : * planner's data structures, having determined that there is no need
538 : * to include them in the query.
539 : *
540 : * We are not terribly thorough here. We only bother to update parts of
541 : * the planner's data structures that will actually be consulted later.
542 : */
543 : static void
544 10684 : remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
545 : SpecialJoinInfo *sjinfo)
546 : {
547 10684 : RelOptInfo *rel = find_base_rel(root, relid);
548 10684 : int ojrelid = sjinfo->ojrelid;
549 : Relids joinrelids;
550 : Relids join_plus_commute;
551 : List *joininfos;
552 : ListCell *l;
553 :
554 : /* Compute the relid set for the join we are considering */
555 10684 : joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
556 : Assert(ojrelid != 0);
557 10684 : joinrelids = bms_add_member(joinrelids, ojrelid);
558 :
559 10684 : remove_rel_from_query(root, rel, -1, sjinfo, joinrelids);
560 :
561 : /*
562 : * Remove any joinquals referencing the rel from the joininfo lists.
563 : *
564 : * In some cases, a joinqual has to be put back after deleting its
565 : * reference to the target rel. This can occur for pseudoconstant and
566 : * outerjoin-delayed quals, which can get marked as requiring the rel in
567 : * order to force them to be evaluated at or above the join. We can't
568 : * just discard them, though. Only quals that logically belonged to the
569 : * outer join being discarded should be removed from the query.
570 : *
571 : * We might encounter a qual that is a clone of a deletable qual with some
572 : * outer-join relids added (see deconstruct_distribute_oj_quals). To
573 : * ensure we get rid of such clones as well, add the relids of all OJs
574 : * commutable with this one to the set we test against for
575 : * pushed-down-ness.
576 : */
577 10684 : join_plus_commute = bms_union(joinrelids,
578 10684 : sjinfo->commute_above_r);
579 10684 : join_plus_commute = bms_add_members(join_plus_commute,
580 10684 : sjinfo->commute_below_l);
581 :
582 : /*
583 : * We must make a copy of the rel's old joininfo list before starting the
584 : * loop, because otherwise remove_join_clause_from_rels would destroy the
585 : * list while we're scanning it.
586 : */
587 10684 : joininfos = list_copy(rel->joininfo);
588 21702 : foreach(l, joininfos)
589 : {
590 11018 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
591 :
592 11018 : remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
593 :
594 11018 : if (RINFO_IS_PUSHED_DOWN(rinfo, join_plus_commute))
595 : {
596 : /*
597 : * There might be references to relid or ojrelid in the
598 : * RestrictInfo's relid sets, as a consequence of PHVs having had
599 : * ph_eval_at sets that include those. We already checked above
600 : * that any such PHV is safe (and updated its ph_eval_at), so we
601 : * can just drop those references.
602 : */
603 120 : remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
604 :
605 : /*
606 : * Cross-check that the clause itself does not reference the
607 : * target rel or join.
608 : */
609 : #ifdef USE_ASSERT_CHECKING
610 : {
611 : Relids clause_varnos = pull_varnos(root,
612 : (Node *) rinfo->clause);
613 :
614 : Assert(!bms_is_member(relid, clause_varnos));
615 : Assert(!bms_is_member(ojrelid, clause_varnos));
616 : }
617 : #endif
618 : /* Now throw it back into the joininfo lists */
619 120 : distribute_restrictinfo_to_rels(root, rinfo);
620 : }
621 : }
622 :
623 : /*
624 : * There may be references to the rel in root->fkey_list, but if so,
625 : * match_foreign_keys_to_quals() will get rid of them.
626 : */
627 :
628 : /*
629 : * Now remove the rel from the baserel array to prevent it from being
630 : * referenced again. (We can't do this earlier because
631 : * remove_join_clause_from_rels will touch it.)
632 : */
633 10684 : root->simple_rel_array[relid] = NULL;
634 :
635 : /* And nuke the RelOptInfo, just in case there's another access path */
636 10684 : pfree(rel);
637 :
638 : /*
639 : * Now repeat construction of attr_needed bits coming from all other
640 : * sources.
641 : */
642 10684 : rebuild_placeholder_attr_needed(root);
643 10684 : rebuild_joinclause_attr_needed(root);
644 10684 : rebuild_eclass_attr_needed(root);
645 10684 : rebuild_lateral_attr_needed(root);
646 10684 : }
647 :
648 : /*
649 : * Remove any references to relid or ojrelid from the RestrictInfo.
650 : *
651 : * We only bother to clean out bits in clause_relids and required_relids,
652 : * not nullingrel bits in contained Vars and PHVs. (This might have to be
653 : * improved sometime.) However, if the RestrictInfo contains an OR clause
654 : * we have to also clean up the sub-clauses.
655 : */
656 : static void
657 4790 : remove_rel_from_restrictinfo(RestrictInfo *rinfo, int relid, int ojrelid)
658 : {
659 : /*
660 : * initsplan.c is fairly cavalier about allowing RestrictInfos to share
661 : * relid sets with other RestrictInfos, and SpecialJoinInfos too. Make
662 : * sure this RestrictInfo has its own relid sets before we modify them.
663 : * (In present usage, clause_relids is probably not shared, but
664 : * required_relids could be; let's not assume anything.)
665 : */
666 4790 : rinfo->clause_relids = bms_copy(rinfo->clause_relids);
667 4790 : rinfo->clause_relids = bms_del_member(rinfo->clause_relids, relid);
668 4790 : rinfo->clause_relids = bms_del_member(rinfo->clause_relids, ojrelid);
669 : /* Likewise for required_relids */
670 4790 : rinfo->required_relids = bms_copy(rinfo->required_relids);
671 4790 : rinfo->required_relids = bms_del_member(rinfo->required_relids, relid);
672 4790 : rinfo->required_relids = bms_del_member(rinfo->required_relids, ojrelid);
673 :
674 : /* If it's an OR, recurse to clean up sub-clauses */
675 4790 : if (restriction_is_or_clause(rinfo))
676 : {
677 : ListCell *lc;
678 :
679 : Assert(is_orclause(rinfo->orclause));
680 18 : foreach(lc, ((BoolExpr *) rinfo->orclause)->args)
681 : {
682 12 : Node *orarg = (Node *) lfirst(lc);
683 :
684 : /* OR arguments should be ANDs or sub-RestrictInfos */
685 12 : if (is_andclause(orarg))
686 : {
687 0 : List *andargs = ((BoolExpr *) orarg)->args;
688 : ListCell *lc2;
689 :
690 0 : foreach(lc2, andargs)
691 : {
692 0 : RestrictInfo *rinfo2 = lfirst_node(RestrictInfo, lc2);
693 :
694 0 : remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
695 : }
696 : }
697 : else
698 : {
699 12 : RestrictInfo *rinfo2 = castNode(RestrictInfo, orarg);
700 :
701 12 : remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
702 : }
703 : }
704 : }
705 4790 : }
706 :
707 : /*
708 : * Remove any references to relid or sjinfo->ojrelid (if sjinfo != NULL)
709 : * from the EquivalenceClass.
710 : *
711 : * Like remove_rel_from_restrictinfo, we don't worry about cleaning out
712 : * any nullingrel bits in contained Vars and PHVs. (This might have to be
713 : * improved sometime.) We do need to fix the EC and EM relid sets to ensure
714 : * that implied join equalities will be generated at the appropriate join
715 : * level(s).
716 : */
717 : static void
718 16504 : remove_rel_from_eclass(EquivalenceClass *ec, SpecialJoinInfo *sjinfo,
719 : int relid, int subst)
720 : {
721 : ListCell *lc;
722 :
723 : /* Fix up the EC's overall relids */
724 16504 : ec->ec_relids = adjust_relid_set(ec->ec_relids, relid, subst);
725 16504 : if (sjinfo != NULL)
726 15436 : ec->ec_relids = adjust_relid_set(ec->ec_relids,
727 15436 : sjinfo->ojrelid, subst);
728 :
729 : /*
730 : * We don't expect any EC child members to exist at this point. Ensure
731 : * that's the case, otherwise, we might be getting asked to do something
732 : * this function hasn't been coded for.
733 : */
734 : Assert(ec->ec_childmembers == NULL);
735 :
736 : /*
737 : * Fix up the member expressions. Any non-const member that ends with
738 : * empty em_relids must be a Var or PHV of the removed relation. We don't
739 : * need it anymore, so we can drop it.
740 : */
741 38448 : foreach(lc, ec->ec_members)
742 : {
743 21944 : EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
744 :
745 21944 : if (bms_is_member(relid, cur_em->em_relids) ||
746 4658 : (sjinfo != NULL && bms_is_member(sjinfo->ojrelid,
747 4658 : cur_em->em_relids)))
748 : {
749 : Assert(!cur_em->em_is_const);
750 15436 : cur_em->em_relids = adjust_relid_set(cur_em->em_relids, relid, subst);
751 15436 : if (sjinfo != NULL)
752 15436 : cur_em->em_relids = adjust_relid_set(cur_em->em_relids,
753 15436 : sjinfo->ojrelid, subst);
754 15436 : if (bms_is_empty(cur_em->em_relids))
755 15424 : ec->ec_members = foreach_delete_current(ec->ec_members, lc);
756 : }
757 : }
758 :
759 : /* Fix up the source clauses, in case we can re-use them later */
760 22218 : foreach(lc, ec->ec_sources)
761 : {
762 5714 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
763 :
764 5714 : if (sjinfo == NULL)
765 1056 : ChangeVarNodesExtended((Node *) rinfo, relid, subst, 0,
766 : replace_relid_callback);
767 : else
768 4658 : remove_rel_from_restrictinfo(rinfo, relid, sjinfo->ojrelid);
769 : }
770 :
771 : /*
772 : * Rather than expend code on fixing up any already-derived clauses, just
773 : * drop them. (At this point, any such clauses would be base restriction
774 : * clauses, which we'd not need anymore anyway.)
775 : */
776 16504 : ec_clear_derived_clauses(ec);
777 16504 : }
778 :
779 : /*
780 : * Remove any occurrences of the target relid from a joinlist structure.
781 : *
782 : * It's easiest to build a whole new list structure, so we handle it that
783 : * way. Efficiency is not a big deal here.
784 : *
785 : * *nremoved is incremented by the number of occurrences removed (there
786 : * should be exactly one, but the caller checks that).
787 : */
788 : static List *
789 11548 : remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
790 : {
791 11548 : List *result = NIL;
792 : ListCell *jl;
793 :
794 41992 : foreach(jl, joinlist)
795 : {
796 30444 : Node *jlnode = (Node *) lfirst(jl);
797 :
798 30444 : if (IsA(jlnode, RangeTblRef))
799 : {
800 30180 : int varno = ((RangeTblRef *) jlnode)->rtindex;
801 :
802 30180 : if (varno == relid)
803 11284 : (*nremoved)++;
804 : else
805 18896 : result = lappend(result, jlnode);
806 : }
807 264 : else if (IsA(jlnode, List))
808 : {
809 : /* Recurse to handle subproblem */
810 : List *sublist;
811 :
812 264 : sublist = remove_rel_from_joinlist((List *) jlnode,
813 : relid, nremoved);
814 : /* Avoid including empty sub-lists in the result */
815 264 : if (sublist)
816 264 : result = lappend(result, sublist);
817 : }
818 : else
819 : {
820 0 : elog(ERROR, "unrecognized joinlist node type: %d",
821 : (int) nodeTag(jlnode));
822 : }
823 : }
824 :
825 11548 : return result;
826 : }
827 :
828 :
829 : /*
830 : * reduce_unique_semijoins
831 : * Check for semijoins that can be simplified to plain inner joins
832 : * because the inner relation is provably unique for the join clauses.
833 : *
834 : * Ideally this would happen during reduce_outer_joins, but we don't have
835 : * enough information at that point.
836 : *
837 : * To perform the strength reduction when applicable, we need only delete
838 : * the semijoin's SpecialJoinInfo from root->join_info_list. (We don't
839 : * bother fixing the join type attributed to it in the query jointree,
840 : * since that won't be consulted again.)
841 : */
842 : void
843 331620 : reduce_unique_semijoins(PlannerInfo *root)
844 : {
845 : ListCell *lc;
846 :
847 : /*
848 : * Scan the join_info_list to find semijoins.
849 : */
850 375414 : foreach(lc, root->join_info_list)
851 : {
852 43794 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
853 : int innerrelid;
854 : RelOptInfo *innerrel;
855 : Relids joinrelids;
856 : List *restrictlist;
857 :
858 : /*
859 : * Must be a semijoin to a single baserel, else we aren't going to be
860 : * able to do anything with it.
861 : */
862 43794 : if (sjinfo->jointype != JOIN_SEMI)
863 43484 : continue;
864 :
865 4812 : if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
866 164 : continue;
867 :
868 4648 : innerrel = find_base_rel(root, innerrelid);
869 :
870 : /*
871 : * Before we trouble to run generate_join_implied_equalities, make a
872 : * quick check to eliminate cases in which we will surely be unable to
873 : * prove uniqueness of the innerrel.
874 : */
875 4648 : if (!rel_supports_distinctness(root, innerrel))
876 944 : continue;
877 :
878 : /* Compute the relid set for the join we are considering */
879 3704 : joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
880 : Assert(sjinfo->ojrelid == 0); /* SEMI joins don't have RT indexes */
881 :
882 : /*
883 : * Since we're only considering a single-rel RHS, any join clauses it
884 : * has must be clauses linking it to the semijoin's min_lefthand. We
885 : * can also consider EC-derived join clauses.
886 : */
887 : restrictlist =
888 3704 : list_concat(generate_join_implied_equalities(root,
889 : joinrelids,
890 : sjinfo->min_lefthand,
891 : innerrel,
892 : NULL),
893 3704 : innerrel->joininfo);
894 :
895 : /* Test whether the innerrel is unique for those clauses. */
896 3704 : if (!innerrel_is_unique(root,
897 : joinrelids, sjinfo->min_lefthand, innerrel,
898 : JOIN_SEMI, restrictlist, true))
899 3394 : continue;
900 :
901 : /* OK, remove the SpecialJoinInfo from the list. */
902 310 : root->join_info_list = foreach_delete_current(root->join_info_list, lc);
903 : }
904 331620 : }
905 :
906 :
907 : /*
908 : * rel_supports_distinctness
909 : * Could the relation possibly be proven distinct on some set of columns?
910 : *
911 : * This is effectively a pre-checking function for rel_is_distinct_for().
912 : * It must return true if rel_is_distinct_for() could possibly return true
913 : * with this rel, but it should not expend a lot of cycles. The idea is
914 : * that callers can avoid doing possibly-expensive processing to compute
915 : * rel_is_distinct_for()'s argument lists if the call could not possibly
916 : * succeed.
917 : */
918 : static bool
919 616298 : rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
920 : {
921 : /* We only know about baserels ... */
922 616298 : if (rel->reloptkind != RELOPT_BASEREL)
923 202120 : return false;
924 414178 : if (rel->rtekind == RTE_RELATION)
925 : {
926 : /*
927 : * For a plain relation, we only know how to prove uniqueness by
928 : * reference to unique indexes. Make sure there's at least one
929 : * suitable unique index. It must be immediately enforced, and not a
930 : * partial index. (Keep these conditions in sync with
931 : * relation_has_unique_index_for!)
932 : */
933 : ListCell *lc;
934 :
935 525112 : foreach(lc, rel->indexlist)
936 : {
937 471790 : IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc);
938 :
939 471790 : if (ind->unique && ind->immediate && ind->indpred == NIL)
940 323562 : return true;
941 : }
942 : }
943 37294 : else if (rel->rtekind == RTE_SUBQUERY)
944 : {
945 10194 : Query *subquery = root->simple_rte_array[rel->relid]->subquery;
946 :
947 : /* Check if the subquery has any qualities that support distinctness */
948 10194 : if (query_supports_distinctness(subquery))
949 8212 : return true;
950 : }
951 : /* We have no proof rules for any other rtekinds. */
952 82404 : return false;
953 : }
954 :
955 : /*
956 : * rel_is_distinct_for
957 : * Does the relation return only distinct rows according to clause_list?
958 : *
959 : * clause_list is a list of join restriction clauses involving this rel and
960 : * some other one. Return true if no two rows emitted by this rel could
961 : * possibly join to the same row of the other rel.
962 : *
963 : * The caller must have already determined that each condition is a
964 : * mergejoinable equality with an expression in this relation on one side, and
965 : * an expression not involving this relation on the other. The transient
966 : * outer_is_left flag is used to identify which side references this relation:
967 : * left side if outer_is_left is false, right side if it is true.
968 : *
969 : * Note that the passed-in clause_list may be destructively modified! This
970 : * is OK for current uses, because the clause_list is built by the caller for
971 : * the sole purpose of passing to this function.
972 : *
973 : * (*extra_clauses) to be set to the right sides of baserestrictinfo clauses,
974 : * looking like "x = const" if distinctness is derived from such clauses, not
975 : * joininfo clauses. Pass NULL to the extra_clauses if this value is not
976 : * needed.
977 : */
978 : static bool
979 209824 : rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list,
980 : List **extra_clauses)
981 : {
982 : /*
983 : * We could skip a couple of tests here if we assume all callers checked
984 : * rel_supports_distinctness first, but it doesn't seem worth taking any
985 : * risk for.
986 : */
987 209824 : if (rel->reloptkind != RELOPT_BASEREL)
988 0 : return false;
989 209824 : if (rel->rtekind == RTE_RELATION)
990 : {
991 : /*
992 : * Examine the indexes to see if we have a matching unique index.
993 : * relation_has_unique_index_ext automatically adds any usable
994 : * restriction clauses for the rel, so we needn't do that here.
995 : */
996 205152 : if (relation_has_unique_index_ext(root, rel, clause_list, NIL, NIL,
997 : extra_clauses))
998 117206 : return true;
999 : }
1000 4672 : else if (rel->rtekind == RTE_SUBQUERY)
1001 : {
1002 4672 : Index relid = rel->relid;
1003 4672 : Query *subquery = root->simple_rte_array[relid]->subquery;
1004 4672 : List *colnos = NIL;
1005 4672 : List *opids = NIL;
1006 : ListCell *l;
1007 :
1008 : /*
1009 : * Build the argument lists for query_is_distinct_for: a list of
1010 : * output column numbers that the query needs to be distinct over, and
1011 : * a list of equality operators that the output columns need to be
1012 : * distinct according to.
1013 : *
1014 : * (XXX we are not considering restriction clauses attached to the
1015 : * subquery; is that worth doing?)
1016 : */
1017 9308 : foreach(l, clause_list)
1018 : {
1019 4636 : RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
1020 : Oid op;
1021 : Var *var;
1022 :
1023 : /*
1024 : * Get the equality operator we need uniqueness according to.
1025 : * (This might be a cross-type operator and thus not exactly the
1026 : * same operator the subquery would consider; that's all right
1027 : * since query_is_distinct_for can resolve such cases.) The
1028 : * caller's mergejoinability test should have selected only
1029 : * OpExprs.
1030 : */
1031 4636 : op = castNode(OpExpr, rinfo->clause)->opno;
1032 :
1033 : /* caller identified the inner side for us */
1034 4636 : if (rinfo->outer_is_left)
1035 4298 : var = (Var *) get_rightop(rinfo->clause);
1036 : else
1037 338 : var = (Var *) get_leftop(rinfo->clause);
1038 :
1039 : /*
1040 : * We may ignore any RelabelType node above the operand. (There
1041 : * won't be more than one, since eval_const_expressions() has been
1042 : * applied already.)
1043 : */
1044 4636 : if (var && IsA(var, RelabelType))
1045 3176 : var = (Var *) ((RelabelType *) var)->arg;
1046 :
1047 : /*
1048 : * If inner side isn't a Var referencing a subquery output column,
1049 : * this clause doesn't help us.
1050 : */
1051 4636 : if (!var || !IsA(var, Var) ||
1052 4624 : var->varno != relid || var->varlevelsup != 0)
1053 12 : continue;
1054 :
1055 4624 : colnos = lappend_int(colnos, var->varattno);
1056 4624 : opids = lappend_oid(opids, op);
1057 : }
1058 :
1059 4672 : if (query_is_distinct_for(subquery, colnos, opids))
1060 212 : return true;
1061 : }
1062 92406 : return false;
1063 : }
1064 :
1065 :
1066 : /*
1067 : * query_supports_distinctness - could the query possibly be proven distinct
1068 : * on some set of output columns?
1069 : *
1070 : * This is effectively a pre-checking function for query_is_distinct_for().
1071 : * It must return true if query_is_distinct_for() could possibly return true
1072 : * with this query, but it should not expend a lot of cycles. The idea is
1073 : * that callers can avoid doing possibly-expensive processing to compute
1074 : * query_is_distinct_for()'s argument lists if the call could not possibly
1075 : * succeed.
1076 : */
1077 : bool
1078 13496 : query_supports_distinctness(Query *query)
1079 : {
1080 : /* SRFs break distinctness except with DISTINCT, see below */
1081 13496 : if (query->hasTargetSRFs && query->distinctClause == NIL)
1082 1008 : return false;
1083 :
1084 : /* check for features we can prove distinctness with */
1085 12488 : if (query->distinctClause != NIL ||
1086 12344 : query->groupClause != NIL ||
1087 12156 : query->groupingSets != NIL ||
1088 12156 : query->hasAggs ||
1089 11884 : query->havingQual ||
1090 11884 : query->setOperations)
1091 11460 : return true;
1092 :
1093 1028 : return false;
1094 : }
1095 :
1096 : /*
1097 : * query_is_distinct_for - does query never return duplicates of the
1098 : * specified columns?
1099 : *
1100 : * query is a not-yet-planned subquery (in current usage, it's always from
1101 : * a subquery RTE, which the planner avoids scribbling on).
1102 : *
1103 : * colnos is an integer list of output column numbers (resno's). We are
1104 : * interested in whether rows consisting of just these columns are certain
1105 : * to be distinct. "Distinctness" is defined according to whether the
1106 : * corresponding upper-level equality operators listed in opids would think
1107 : * the values are distinct. (Note: the opids entries could be cross-type
1108 : * operators, and thus not exactly the equality operators that the subquery
1109 : * would use itself. We use equality_ops_are_compatible() to check
1110 : * compatibility. That looks at opfamily membership for index AMs that have
1111 : * declared that they support consistent equality semantics within an
1112 : * opfamily, and so should give trustworthy answers for all operators that we
1113 : * might need to deal with here.)
1114 : */
1115 : bool
1116 4868 : query_is_distinct_for(Query *query, List *colnos, List *opids)
1117 : {
1118 : ListCell *l;
1119 : Oid opid;
1120 :
1121 : Assert(list_length(colnos) == list_length(opids));
1122 :
1123 : /*
1124 : * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
1125 : * columns in the DISTINCT clause appear in colnos and operator semantics
1126 : * match. This is true even if there are SRFs in the DISTINCT columns or
1127 : * elsewhere in the tlist.
1128 : */
1129 4868 : if (query->distinctClause)
1130 : {
1131 150 : foreach(l, query->distinctClause)
1132 : {
1133 120 : SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
1134 120 : TargetEntry *tle = get_sortgroupclause_tle(sgc,
1135 : query->targetList);
1136 :
1137 120 : opid = distinct_col_search(tle->resno, colnos, opids);
1138 120 : if (!OidIsValid(opid) ||
1139 48 : !equality_ops_are_compatible(opid, sgc->eqop))
1140 : break; /* exit early if no match */
1141 : }
1142 102 : if (l == NULL) /* had matches for all? */
1143 30 : return true;
1144 : }
1145 :
1146 : /*
1147 : * Otherwise, a set-returning function in the query's targetlist can
1148 : * result in returning duplicate rows, despite any grouping that might
1149 : * occur before tlist evaluation. (If all tlist SRFs are within GROUP BY
1150 : * columns, it would be safe because they'd be expanded before grouping.
1151 : * But it doesn't currently seem worth the effort to check for that.)
1152 : */
1153 4838 : if (query->hasTargetSRFs)
1154 0 : return false;
1155 :
1156 : /*
1157 : * Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all
1158 : * the grouped columns appear in colnos and operator semantics match.
1159 : */
1160 4838 : if (query->groupClause && !query->groupingSets)
1161 : {
1162 234 : foreach(l, query->groupClause)
1163 : {
1164 164 : SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
1165 164 : TargetEntry *tle = get_sortgroupclause_tle(sgc,
1166 : query->targetList);
1167 :
1168 164 : opid = distinct_col_search(tle->resno, colnos, opids);
1169 164 : if (!OidIsValid(opid) ||
1170 112 : !equality_ops_are_compatible(opid, sgc->eqop))
1171 : break; /* exit early if no match */
1172 : }
1173 122 : if (l == NULL) /* had matches for all? */
1174 70 : return true;
1175 : }
1176 4716 : else if (query->groupingSets)
1177 : {
1178 : /*
1179 : * If we have grouping sets with expressions, we probably don't have
1180 : * uniqueness and analysis would be hard. Punt.
1181 : */
1182 0 : if (query->groupClause)
1183 0 : return false;
1184 :
1185 : /*
1186 : * If we have no groupClause (therefore no grouping expressions), we
1187 : * might have one or many empty grouping sets. If there's just one,
1188 : * then we're returning only one row and are certainly unique. But
1189 : * otherwise, we know we're certainly not unique.
1190 : */
1191 0 : if (list_length(query->groupingSets) == 1 &&
1192 0 : ((GroupingSet *) linitial(query->groupingSets))->kind == GROUPING_SET_EMPTY)
1193 0 : return true;
1194 : else
1195 0 : return false;
1196 : }
1197 : else
1198 : {
1199 : /*
1200 : * If we have no GROUP BY, but do have aggregates or HAVING, then the
1201 : * result is at most one row so it's surely unique, for any operators.
1202 : */
1203 4716 : if (query->hasAggs || query->havingQual)
1204 100 : return true;
1205 : }
1206 :
1207 : /*
1208 : * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
1209 : * except with ALL.
1210 : */
1211 4668 : if (query->setOperations)
1212 : {
1213 4544 : SetOperationStmt *topop = castNode(SetOperationStmt, query->setOperations);
1214 :
1215 : Assert(topop->op != SETOP_NONE);
1216 :
1217 4544 : if (!topop->all)
1218 : {
1219 : ListCell *lg;
1220 :
1221 : /* We're good if all the nonjunk output columns are in colnos */
1222 72 : lg = list_head(topop->groupClauses);
1223 90 : foreach(l, query->targetList)
1224 : {
1225 78 : TargetEntry *tle = (TargetEntry *) lfirst(l);
1226 : SortGroupClause *sgc;
1227 :
1228 78 : if (tle->resjunk)
1229 0 : continue; /* ignore resjunk columns */
1230 :
1231 : /* non-resjunk columns should have grouping clauses */
1232 : Assert(lg != NULL);
1233 78 : sgc = (SortGroupClause *) lfirst(lg);
1234 78 : lg = lnext(topop->groupClauses, lg);
1235 :
1236 78 : opid = distinct_col_search(tle->resno, colnos, opids);
1237 78 : if (!OidIsValid(opid) ||
1238 18 : !equality_ops_are_compatible(opid, sgc->eqop))
1239 : break; /* exit early if no match */
1240 : }
1241 72 : if (l == NULL) /* had matches for all? */
1242 12 : return true;
1243 : }
1244 : }
1245 :
1246 : /*
1247 : * XXX Are there any other cases in which we can easily see the result
1248 : * must be distinct?
1249 : *
1250 : * If you do add more smarts to this function, be sure to update
1251 : * query_supports_distinctness() to match.
1252 : */
1253 :
1254 4656 : return false;
1255 : }
1256 :
1257 : /*
1258 : * distinct_col_search - subroutine for query_is_distinct_for
1259 : *
1260 : * If colno is in colnos, return the corresponding element of opids,
1261 : * else return InvalidOid. (Ordinarily colnos would not contain duplicates,
1262 : * but if it does, we arbitrarily select the first match.)
1263 : */
1264 : static Oid
1265 362 : distinct_col_search(int colno, List *colnos, List *opids)
1266 : {
1267 : ListCell *lc1,
1268 : *lc2;
1269 :
1270 574 : forboth(lc1, colnos, lc2, opids)
1271 : {
1272 390 : if (colno == lfirst_int(lc1))
1273 178 : return lfirst_oid(lc2);
1274 : }
1275 184 : return InvalidOid;
1276 : }
1277 :
1278 :
1279 : /*
1280 : * innerrel_is_unique
1281 : * Check if the innerrel provably contains at most one tuple matching any
1282 : * tuple from the outerrel, based on join clauses in the 'restrictlist'.
1283 : *
1284 : * We need an actual RelOptInfo for the innerrel, but it's sufficient to
1285 : * identify the outerrel by its Relids. This asymmetry supports use of this
1286 : * function before joinrels have been built. (The caller is expected to
1287 : * also supply the joinrelids, just to save recalculating that.)
1288 : *
1289 : * The proof must be made based only on clauses that will be "joinquals"
1290 : * rather than "otherquals" at execution. For an inner join there's no
1291 : * difference; but if the join is outer, we must ignore pushed-down quals,
1292 : * as those will become "otherquals". Note that this means the answer might
1293 : * vary depending on whether IS_OUTER_JOIN(jointype); since we cache the
1294 : * answer without regard to that, callers must take care not to call this
1295 : * with jointypes that would be classified differently by IS_OUTER_JOIN().
1296 : *
1297 : * The actual proof is undertaken by is_innerrel_unique_for(); this function
1298 : * is a frontend that is mainly concerned with caching the answers.
1299 : * In particular, the force_cache argument allows overriding the internal
1300 : * heuristic about whether to cache negative answers; it should be "true"
1301 : * if making an inquiry that is not part of the normal bottom-up join search
1302 : * sequence.
1303 : */
1304 : bool
1305 669392 : innerrel_is_unique(PlannerInfo *root,
1306 : Relids joinrelids,
1307 : Relids outerrelids,
1308 : RelOptInfo *innerrel,
1309 : JoinType jointype,
1310 : List *restrictlist,
1311 : bool force_cache)
1312 : {
1313 669392 : return innerrel_is_unique_ext(root, joinrelids, outerrelids, innerrel,
1314 : jointype, restrictlist, force_cache, NULL);
1315 : }
1316 :
1317 : /*
1318 : * innerrel_is_unique_ext
1319 : * Do the same as innerrel_is_unique(), but also set to (*extra_clauses)
1320 : * additional clauses from a baserestrictinfo list used to prove the
1321 : * uniqueness.
1322 : *
1323 : * A non-NULL extra_clauses indicates that we're checking for self-join and
1324 : * correspondingly dealing with filtered clauses.
1325 : */
1326 : bool
1327 671392 : innerrel_is_unique_ext(PlannerInfo *root,
1328 : Relids joinrelids,
1329 : Relids outerrelids,
1330 : RelOptInfo *innerrel,
1331 : JoinType jointype,
1332 : List *restrictlist,
1333 : bool force_cache,
1334 : List **extra_clauses)
1335 : {
1336 : MemoryContext old_context;
1337 : ListCell *lc;
1338 : UniqueRelInfo *uniqueRelInfo;
1339 671392 : List *outer_exprs = NIL;
1340 671392 : bool self_join = (extra_clauses != NULL);
1341 :
1342 : /* Certainly can't prove uniqueness when there are no joinclauses */
1343 671392 : if (restrictlist == NIL)
1344 102450 : return false;
1345 :
1346 : /*
1347 : * Make a quick check to eliminate cases in which we will surely be unable
1348 : * to prove uniqueness of the innerrel.
1349 : */
1350 568942 : if (!rel_supports_distinctness(root, innerrel))
1351 280464 : return false;
1352 :
1353 : /*
1354 : * Query the cache to see if we've managed to prove that innerrel is
1355 : * unique for any subset of this outerrel. For non-self-join search, we
1356 : * don't need an exact match, as extra outerrels can't make the innerrel
1357 : * any less unique (or more formally, the restrictlist for a join to a
1358 : * superset outerrel must be a superset of the conditions we successfully
1359 : * used before). For self-join search, we require an exact match of
1360 : * outerrels because we need extra clauses to be valid for our case. Also,
1361 : * for self-join checking we've filtered the clauses list. Thus, we can
1362 : * match only the result cached for a self-join search for another
1363 : * self-join check.
1364 : */
1365 319236 : foreach(lc, innerrel->unique_for_rels)
1366 : {
1367 119898 : uniqueRelInfo = (UniqueRelInfo *) lfirst(lc);
1368 :
1369 119898 : if ((!self_join && bms_is_subset(uniqueRelInfo->outerrelids, outerrelids)) ||
1370 74 : (self_join && bms_equal(uniqueRelInfo->outerrelids, outerrelids) &&
1371 56 : uniqueRelInfo->self_join))
1372 : {
1373 89140 : if (extra_clauses)
1374 12 : *extra_clauses = uniqueRelInfo->extra_clauses;
1375 89140 : return true; /* Success! */
1376 : }
1377 : }
1378 :
1379 : /*
1380 : * Conversely, we may have already determined that this outerrel, or some
1381 : * superset thereof, cannot prove this innerrel to be unique.
1382 : */
1383 199822 : foreach(lc, innerrel->non_unique_for_rels)
1384 : {
1385 808 : Relids unique_for_rels = (Relids) lfirst(lc);
1386 :
1387 808 : if (bms_is_subset(outerrelids, unique_for_rels))
1388 324 : return false;
1389 : }
1390 :
1391 : /* No cached information, so try to make the proof. */
1392 199014 : if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel,
1393 : jointype, restrictlist,
1394 : self_join ? &outer_exprs : NULL))
1395 : {
1396 : /*
1397 : * Cache the positive result for future probes, being sure to keep it
1398 : * in the planner_cxt even if we are working in GEQO.
1399 : *
1400 : * Note: one might consider trying to isolate the minimal subset of
1401 : * the outerrels that proved the innerrel unique. But it's not worth
1402 : * the trouble, because the planner builds up joinrels incrementally
1403 : * and so we'll see the minimally sufficient outerrels before any
1404 : * supersets of them anyway.
1405 : */
1406 106734 : old_context = MemoryContextSwitchTo(root->planner_cxt);
1407 106734 : uniqueRelInfo = makeNode(UniqueRelInfo);
1408 106734 : uniqueRelInfo->outerrelids = bms_copy(outerrelids);
1409 106734 : uniqueRelInfo->self_join = self_join;
1410 106734 : uniqueRelInfo->extra_clauses = outer_exprs;
1411 106734 : innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
1412 : uniqueRelInfo);
1413 106734 : MemoryContextSwitchTo(old_context);
1414 :
1415 106734 : if (extra_clauses)
1416 654 : *extra_clauses = outer_exprs;
1417 106734 : return true; /* Success! */
1418 : }
1419 : else
1420 : {
1421 : /*
1422 : * None of the join conditions for outerrel proved innerrel unique, so
1423 : * we can safely reject this outerrel or any subset of it in future
1424 : * checks.
1425 : *
1426 : * However, in normal planning mode, caching this knowledge is totally
1427 : * pointless; it won't be queried again, because we build up joinrels
1428 : * from smaller to larger. It is useful in GEQO mode, where the
1429 : * knowledge can be carried across successive planning attempts; and
1430 : * it's likely to be useful when using join-search plugins, too. Hence
1431 : * cache when join_search_private is non-NULL. (Yeah, that's a hack,
1432 : * but it seems reasonable.)
1433 : *
1434 : * Also, allow callers to override that heuristic and force caching;
1435 : * that's useful for reduce_unique_semijoins, which calls here before
1436 : * the normal join search starts.
1437 : */
1438 92280 : if (force_cache || root->join_search_private)
1439 : {
1440 3718 : old_context = MemoryContextSwitchTo(root->planner_cxt);
1441 3718 : innerrel->non_unique_for_rels =
1442 3718 : lappend(innerrel->non_unique_for_rels,
1443 3718 : bms_copy(outerrelids));
1444 3718 : MemoryContextSwitchTo(old_context);
1445 : }
1446 :
1447 92280 : return false;
1448 : }
1449 : }
1450 :
1451 : /*
1452 : * is_innerrel_unique_for
1453 : * Check if the innerrel provably contains at most one tuple matching any
1454 : * tuple from the outerrel, based on join clauses in the 'restrictlist'.
1455 : */
1456 : static bool
1457 199014 : is_innerrel_unique_for(PlannerInfo *root,
1458 : Relids joinrelids,
1459 : Relids outerrelids,
1460 : RelOptInfo *innerrel,
1461 : JoinType jointype,
1462 : List *restrictlist,
1463 : List **extra_clauses)
1464 : {
1465 199014 : List *clause_list = NIL;
1466 : ListCell *lc;
1467 :
1468 : /*
1469 : * Search for mergejoinable clauses that constrain the inner rel against
1470 : * the outer rel. If an operator is mergejoinable then it behaves like
1471 : * equality for some btree opclass, so it's what we want. The
1472 : * mergejoinability test also eliminates clauses containing volatile
1473 : * functions, which we couldn't depend on.
1474 : */
1475 438866 : foreach(lc, restrictlist)
1476 : {
1477 239852 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
1478 :
1479 : /*
1480 : * As noted above, if it's a pushed-down clause and we're at an outer
1481 : * join, we can't use it.
1482 : */
1483 239852 : if (IS_OUTER_JOIN(jointype) &&
1484 106558 : RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
1485 4958 : continue;
1486 :
1487 : /* Ignore if it's not a mergejoinable clause */
1488 234894 : if (!restrictinfo->can_join ||
1489 216868 : restrictinfo->mergeopfamilies == NIL)
1490 18950 : continue; /* not mergejoinable */
1491 :
1492 : /*
1493 : * Check if the clause has the form "outer op inner" or "inner op
1494 : * outer", and if so mark which side is inner.
1495 : */
1496 215944 : if (!clause_sides_match_join(restrictinfo, outerrelids,
1497 : innerrel->relids))
1498 40 : continue; /* no good for these input relations */
1499 :
1500 : /* OK, add to the list */
1501 215904 : clause_list = lappend(clause_list, restrictinfo);
1502 : }
1503 :
1504 : /* Let rel_is_distinct_for() do the hard work */
1505 199014 : return rel_is_distinct_for(root, innerrel, clause_list, extra_clauses);
1506 : }
1507 :
1508 : /*
1509 : * Update EC members to point to the remaining relation instead of the removed
1510 : * one, removing duplicates.
1511 : *
1512 : * Restriction clauses for base relations are already distributed to
1513 : * the respective baserestrictinfo lists (see
1514 : * generate_implied_equalities_for_column). The above code has already processed
1515 : * this list and updated these clauses to reference the remaining
1516 : * relation, so that we can skip them here based on their relids.
1517 : *
1518 : * Likewise, we have already processed the join clauses that join the
1519 : * removed relation to the remaining one.
1520 : *
1521 : * Finally, there might be join clauses tying the removed relation to
1522 : * some third relation. We can't just delete the source clauses and
1523 : * regenerate them from the EC because the corresponding equality
1524 : * operators might be missing (see the handling of ec_broken).
1525 : * Therefore, we will update the references in the source clauses.
1526 : *
1527 : * Derived clauses can be generated again, so it is simpler just to
1528 : * delete them.
1529 : */
1530 : static void
1531 924 : update_eclasses(EquivalenceClass *ec, int from, int to)
1532 : {
1533 924 : List *new_members = NIL;
1534 924 : List *new_sources = NIL;
1535 :
1536 : /*
1537 : * We don't expect any EC child members to exist at this point. Ensure
1538 : * that's the case, otherwise, we might be getting asked to do something
1539 : * this function hasn't been coded for.
1540 : */
1541 : Assert(ec->ec_childmembers == NULL);
1542 :
1543 3726 : foreach_node(EquivalenceMember, em, ec->ec_members)
1544 : {
1545 1878 : bool is_redundant = false;
1546 :
1547 1878 : if (!bms_is_member(from, em->em_relids))
1548 : {
1549 936 : new_members = lappend(new_members, em);
1550 936 : continue;
1551 : }
1552 :
1553 942 : em->em_relids = adjust_relid_set(em->em_relids, from, to);
1554 942 : em->em_jdomain->jd_relids = adjust_relid_set(em->em_jdomain->jd_relids, from, to);
1555 :
1556 : /* We only process inner joins */
1557 942 : ChangeVarNodesExtended((Node *) em->em_expr, from, to, 0,
1558 : replace_relid_callback);
1559 :
1560 1908 : foreach_node(EquivalenceMember, other, new_members)
1561 : {
1562 304 : if (!equal(em->em_relids, other->em_relids))
1563 24 : continue;
1564 :
1565 280 : if (equal(em->em_expr, other->em_expr))
1566 : {
1567 280 : is_redundant = true;
1568 280 : break;
1569 : }
1570 : }
1571 :
1572 942 : if (!is_redundant)
1573 662 : new_members = lappend(new_members, em);
1574 : }
1575 :
1576 924 : list_free(ec->ec_members);
1577 924 : ec->ec_members = new_members;
1578 :
1579 924 : ec_clear_derived_clauses(ec);
1580 :
1581 : /* Update EC source expressions */
1582 2802 : foreach_node(RestrictInfo, rinfo, ec->ec_sources)
1583 : {
1584 954 : bool is_redundant = false;
1585 :
1586 954 : if (!bms_is_member(from, rinfo->required_relids))
1587 : {
1588 118 : new_sources = lappend(new_sources, rinfo);
1589 118 : continue;
1590 : }
1591 :
1592 836 : ChangeVarNodesExtended((Node *) rinfo, from, to, 0,
1593 : replace_relid_callback);
1594 :
1595 : /*
1596 : * After switching the clause to the remaining relation, check it for
1597 : * redundancy with existing ones. We don't have to check for
1598 : * redundancy with derived clauses, because we've just deleted them.
1599 : */
1600 1696 : foreach_node(RestrictInfo, other, new_sources)
1601 : {
1602 36 : if (!equal(rinfo->clause_relids, other->clause_relids))
1603 24 : continue;
1604 :
1605 12 : if (equal(rinfo->clause, other->clause))
1606 : {
1607 12 : is_redundant = true;
1608 12 : break;
1609 : }
1610 : }
1611 :
1612 836 : if (!is_redundant)
1613 824 : new_sources = lappend(new_sources, rinfo);
1614 : }
1615 :
1616 924 : list_free(ec->ec_sources);
1617 924 : ec->ec_sources = new_sources;
1618 924 : ec->ec_relids = adjust_relid_set(ec->ec_relids, from, to);
1619 924 : }
1620 :
1621 : /*
1622 : * "Logically" compares two RestrictInfo's ignoring the 'rinfo_serial' field,
1623 : * which makes almost every RestrictInfo unique. This type of comparison is
1624 : * useful when removing duplicates while moving RestrictInfo's from removed
1625 : * relation to remaining relation during self-join elimination.
1626 : *
1627 : * XXX: In the future, we might remove the 'rinfo_serial' field completely and
1628 : * get rid of this function.
1629 : */
1630 : static bool
1631 512 : restrict_infos_logically_equal(RestrictInfo *a, RestrictInfo *b)
1632 : {
1633 512 : int saved_rinfo_serial = a->rinfo_serial;
1634 : bool result;
1635 :
1636 512 : a->rinfo_serial = b->rinfo_serial;
1637 512 : result = equal(a, b);
1638 512 : a->rinfo_serial = saved_rinfo_serial;
1639 :
1640 512 : return result;
1641 : }
1642 :
1643 : /*
1644 : * This function adds all non-redundant clauses to the keeping relation
1645 : * during self-join elimination. That is a contradictory operation. On the
1646 : * one hand, we reduce the length of the `restrict` lists, which can
1647 : * impact planning or executing time. Additionally, we improve the
1648 : * accuracy of cardinality estimation. On the other hand, it is one more
1649 : * place that can make planning time much longer in specific cases. It
1650 : * would have been better to avoid calling the equal() function here, but
1651 : * it's the only way to detect duplicated inequality expressions.
1652 : *
1653 : * (*keep_rinfo_list) is given by pointer because it might be altered by
1654 : * distribute_restrictinfo_to_rels().
1655 : */
1656 : static void
1657 1200 : add_non_redundant_clauses(PlannerInfo *root,
1658 : List *rinfo_candidates,
1659 : List **keep_rinfo_list,
1660 : Index removed_relid)
1661 : {
1662 3328 : foreach_node(RestrictInfo, rinfo, rinfo_candidates)
1663 : {
1664 928 : bool is_redundant = false;
1665 :
1666 : Assert(!bms_is_member(removed_relid, rinfo->required_relids));
1667 :
1668 2232 : foreach_node(RestrictInfo, src, (*keep_rinfo_list))
1669 : {
1670 530 : if (!bms_equal(src->clause_relids, rinfo->clause_relids))
1671 : /* Can't compare trivially different clauses */
1672 12 : continue;
1673 :
1674 518 : if (src == rinfo ||
1675 518 : (rinfo->parent_ec != NULL &&
1676 816 : src->parent_ec == rinfo->parent_ec) ||
1677 512 : restrict_infos_logically_equal(rinfo, src))
1678 : {
1679 154 : is_redundant = true;
1680 154 : break;
1681 : }
1682 : }
1683 928 : if (!is_redundant)
1684 774 : distribute_restrictinfo_to_rels(root, rinfo);
1685 : }
1686 1200 : }
1687 :
1688 : /*
1689 : * A custom callback for ChangeVarNodesExtended() providing
1690 : * Self-join elimination (SJE) related functionality
1691 : *
1692 : * SJE needs to skip the RangeTblRef node
1693 : * type. During SJE's last step, remove_rel_from_joinlist() removes
1694 : * remaining RangeTblRefs with target relid. If ChangeVarNodes() replaces
1695 : * the target relid before, remove_rel_from_joinlist() fails to identify
1696 : * the nodes to delete.
1697 : *
1698 : * SJE also needs to change the relids within RestrictInfo's.
1699 : */
1700 : static bool
1701 41006 : replace_relid_callback(Node *node, ChangeVarNodes_context *context)
1702 : {
1703 41006 : if (IsA(node, RangeTblRef))
1704 : {
1705 1544 : return true;
1706 : }
1707 39462 : else if (IsA(node, RestrictInfo))
1708 : {
1709 2844 : RestrictInfo *rinfo = (RestrictInfo *) node;
1710 2844 : int relid = -1;
1711 2844 : bool is_req_equal =
1712 2844 : (rinfo->required_relids == rinfo->clause_relids);
1713 2844 : bool clause_relids_is_multiple =
1714 2844 : (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
1715 :
1716 : /*
1717 : * Recurse down into clauses if the target relation is present in
1718 : * clause_relids or required_relids. We must check required_relids
1719 : * because the relation not present in clause_relids might still be
1720 : * present somewhere in orclause.
1721 : */
1722 3960 : if (bms_is_member(context->rt_index, rinfo->clause_relids) ||
1723 1116 : bms_is_member(context->rt_index, rinfo->required_relids))
1724 : {
1725 : Relids new_clause_relids;
1726 :
1727 1776 : ChangeVarNodesWalkExpression((Node *) rinfo->clause, context);
1728 1776 : ChangeVarNodesWalkExpression((Node *) rinfo->orclause, context);
1729 :
1730 1776 : new_clause_relids = adjust_relid_set(rinfo->clause_relids,
1731 : context->rt_index,
1732 : context->new_index);
1733 :
1734 : /*
1735 : * Incrementally adjust num_base_rels based on the change of
1736 : * clause_relids, which could contain both base relids and
1737 : * outer-join relids. This operation is legal until we remove
1738 : * only baserels.
1739 : */
1740 1776 : rinfo->num_base_rels -= bms_num_members(rinfo->clause_relids) -
1741 1776 : bms_num_members(new_clause_relids);
1742 :
1743 1776 : rinfo->clause_relids = new_clause_relids;
1744 1776 : rinfo->left_relids =
1745 1776 : adjust_relid_set(rinfo->left_relids, context->rt_index, context->new_index);
1746 1776 : rinfo->right_relids =
1747 1776 : adjust_relid_set(rinfo->right_relids, context->rt_index, context->new_index);
1748 : }
1749 :
1750 2844 : if (is_req_equal)
1751 24 : rinfo->required_relids = rinfo->clause_relids;
1752 : else
1753 2820 : rinfo->required_relids =
1754 2820 : adjust_relid_set(rinfo->required_relids, context->rt_index, context->new_index);
1755 :
1756 2844 : rinfo->outer_relids =
1757 2844 : adjust_relid_set(rinfo->outer_relids, context->rt_index, context->new_index);
1758 2844 : rinfo->incompatible_relids =
1759 2844 : adjust_relid_set(rinfo->incompatible_relids, context->rt_index, context->new_index);
1760 :
1761 4836 : if (rinfo->mergeopfamilies &&
1762 3704 : bms_get_singleton_member(rinfo->clause_relids, &relid) &&
1763 1272 : clause_relids_is_multiple &&
1764 1272 : relid == context->new_index && IsA(rinfo->clause, OpExpr))
1765 : {
1766 : Expr *leftOp;
1767 : Expr *rightOp;
1768 :
1769 1272 : leftOp = (Expr *) get_leftop(rinfo->clause);
1770 1272 : rightOp = (Expr *) get_rightop(rinfo->clause);
1771 :
1772 : /*
1773 : * For self-join elimination, changing varnos could transform
1774 : * "t1.a = t2.a" into "t1.a = t1.a". That is always true as long
1775 : * as "t1.a" is not null. We use qual() to check for such a case,
1776 : * and then we replace the qual for a check for not null
1777 : * (NullTest).
1778 : */
1779 1272 : if (leftOp != NULL && equal(leftOp, rightOp))
1780 : {
1781 1260 : NullTest *ntest = makeNode(NullTest);
1782 :
1783 1260 : ntest->arg = leftOp;
1784 1260 : ntest->nulltesttype = IS_NOT_NULL;
1785 1260 : ntest->argisrow = false;
1786 1260 : ntest->location = -1;
1787 1260 : rinfo->clause = (Expr *) ntest;
1788 1260 : rinfo->mergeopfamilies = NIL;
1789 1260 : rinfo->left_em = NULL;
1790 1260 : rinfo->right_em = NULL;
1791 : }
1792 : Assert(rinfo->orclause == NULL);
1793 : }
1794 2844 : return true;
1795 : }
1796 :
1797 36618 : return false;
1798 : }
1799 :
1800 : /*
1801 : * Remove a relation after we have proven that it participates only in an
1802 : * unneeded unique self-join.
1803 : *
1804 : * Replace any links in planner info structures.
1805 : *
1806 : * Transfer join and restriction clauses from the removed relation to the
1807 : * remaining one. We change the Vars of the clause to point to the
1808 : * remaining relation instead of the removed one. The clauses that require
1809 : * a subset of joinrelids become restriction clauses of the remaining
1810 : * relation, and others remain join clauses. We append them to
1811 : * baserestrictinfo and joininfo, respectively, trying not to introduce
1812 : * duplicates.
1813 : *
1814 : * We also have to process the 'joinclauses' list here, because it
1815 : * contains EC-derived join clauses which must become filter clauses. It
1816 : * is not enough to just correct the ECs because the EC-derived
1817 : * restrictions are generated before join removal (see
1818 : * generate_base_implied_equalities).
1819 : *
1820 : * NOTE: Remember to keep the code in sync with PlannerInfo to be sure all
1821 : * cached relids and relid bitmapsets can be correctly cleaned during the
1822 : * self-join elimination procedure.
1823 : */
1824 : static void
1825 600 : remove_self_join_rel(PlannerInfo *root, PlanRowMark *kmark, PlanRowMark *rmark,
1826 : RelOptInfo *toKeep, RelOptInfo *toRemove,
1827 : List *restrictlist)
1828 : {
1829 : List *joininfos;
1830 : ListCell *lc;
1831 : int i;
1832 600 : List *jinfo_candidates = NIL;
1833 600 : List *binfo_candidates = NIL;
1834 :
1835 : Assert(toKeep->relid > 0);
1836 : Assert(toRemove->relid > 0);
1837 :
1838 : /*
1839 : * Replace the index of the removing table with the keeping one. The
1840 : * technique of removing/distributing restrictinfo is used here to attach
1841 : * just appeared (for keeping relation) join clauses and avoid adding
1842 : * duplicates of those that already exist in the joininfo list.
1843 : */
1844 600 : joininfos = list_copy(toRemove->joininfo);
1845 1290 : foreach_node(RestrictInfo, rinfo, joininfos)
1846 : {
1847 90 : remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
1848 90 : ChangeVarNodesExtended((Node *) rinfo, toRemove->relid, toKeep->relid,
1849 : 0, replace_relid_callback);
1850 :
1851 90 : if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
1852 72 : jinfo_candidates = lappend(jinfo_candidates, rinfo);
1853 : else
1854 18 : binfo_candidates = lappend(binfo_candidates, rinfo);
1855 : }
1856 :
1857 : /*
1858 : * Concatenate restrictlist to the list of base restrictions of the
1859 : * removing table just to simplify the replacement procedure: all of them
1860 : * weren't connected to any keeping relations and need to be added to some
1861 : * rels.
1862 : */
1863 600 : toRemove->baserestrictinfo = list_concat(toRemove->baserestrictinfo,
1864 : restrictlist);
1865 2038 : foreach_node(RestrictInfo, rinfo, toRemove->baserestrictinfo)
1866 : {
1867 838 : ChangeVarNodesExtended((Node *) rinfo, toRemove->relid, toKeep->relid,
1868 : 0, replace_relid_callback);
1869 :
1870 838 : if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
1871 0 : jinfo_candidates = lappend(jinfo_candidates, rinfo);
1872 : else
1873 838 : binfo_candidates = lappend(binfo_candidates, rinfo);
1874 : }
1875 :
1876 : /*
1877 : * Now, add all non-redundant clauses to the keeping relation.
1878 : */
1879 600 : add_non_redundant_clauses(root, binfo_candidates,
1880 : &toKeep->baserestrictinfo, toRemove->relid);
1881 600 : add_non_redundant_clauses(root, jinfo_candidates,
1882 : &toKeep->joininfo, toRemove->relid);
1883 :
1884 600 : list_free(binfo_candidates);
1885 600 : list_free(jinfo_candidates);
1886 :
1887 : /*
1888 : * Arrange equivalence classes, mentioned removing a table, with the
1889 : * keeping one: varno of removing table should be replaced in members and
1890 : * sources lists. Also, remove duplicated elements if this replacement
1891 : * procedure created them.
1892 : */
1893 600 : i = -1;
1894 1524 : while ((i = bms_next_member(toRemove->eclass_indexes, i)) >= 0)
1895 : {
1896 924 : EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes, i);
1897 :
1898 924 : update_eclasses(ec, toRemove->relid, toKeep->relid);
1899 924 : toKeep->eclass_indexes = bms_add_member(toKeep->eclass_indexes, i);
1900 : }
1901 :
1902 : /*
1903 : * Transfer the targetlist and attr_needed flags.
1904 : */
1905 :
1906 2396 : foreach(lc, toRemove->reltarget->exprs)
1907 : {
1908 1796 : Node *node = lfirst(lc);
1909 :
1910 1796 : ChangeVarNodesExtended(node, toRemove->relid, toKeep->relid, 0,
1911 : replace_relid_callback);
1912 1796 : if (!list_member(toKeep->reltarget->exprs, node))
1913 180 : toKeep->reltarget->exprs = lappend(toKeep->reltarget->exprs, node);
1914 : }
1915 :
1916 7590 : for (i = toKeep->min_attr; i <= toKeep->max_attr; i++)
1917 : {
1918 6990 : int attno = i - toKeep->min_attr;
1919 :
1920 13980 : toRemove->attr_needed[attno] = adjust_relid_set(toRemove->attr_needed[attno],
1921 6990 : toRemove->relid, toKeep->relid);
1922 6990 : toKeep->attr_needed[attno] = bms_add_members(toKeep->attr_needed[attno],
1923 6990 : toRemove->attr_needed[attno]);
1924 : }
1925 :
1926 : /*
1927 : * If the removed relation has a row mark, transfer it to the remaining
1928 : * one.
1929 : *
1930 : * If both rels have row marks, just keep the one corresponding to the
1931 : * remaining relation because we verified earlier that they have the same
1932 : * strength.
1933 : */
1934 600 : if (rmark)
1935 : {
1936 74 : if (kmark)
1937 : {
1938 : Assert(kmark->markType == rmark->markType);
1939 :
1940 74 : root->rowMarks = list_delete_ptr(root->rowMarks, rmark);
1941 : }
1942 : else
1943 : {
1944 : /* Shouldn't have inheritance children here. */
1945 : Assert(rmark->rti == rmark->prti);
1946 :
1947 0 : rmark->rti = rmark->prti = toKeep->relid;
1948 : }
1949 : }
1950 :
1951 : /*
1952 : * Replace varno in all the query structures, except nodes RangeTblRef
1953 : * otherwise later remove_rel_from_joinlist will yield errors.
1954 : */
1955 600 : ChangeVarNodesExtended((Node *) root->parse, toRemove->relid, toKeep->relid,
1956 : 0, replace_relid_callback);
1957 :
1958 : /* Replace links in the planner info */
1959 600 : remove_rel_from_query(root, toRemove, toKeep->relid, NULL, NULL);
1960 :
1961 : /* At last, replace varno in root targetlist and HAVING clause */
1962 600 : ChangeVarNodesExtended((Node *) root->processed_tlist, toRemove->relid,
1963 600 : toKeep->relid, 0, replace_relid_callback);
1964 600 : ChangeVarNodesExtended((Node *) root->processed_groupClause,
1965 600 : toRemove->relid, toKeep->relid, 0,
1966 : replace_relid_callback);
1967 :
1968 600 : adjust_relid_set(root->all_result_relids, toRemove->relid, toKeep->relid);
1969 600 : adjust_relid_set(root->leaf_result_relids, toRemove->relid, toKeep->relid);
1970 :
1971 : /*
1972 : * There may be references to the rel in root->fkey_list, but if so,
1973 : * match_foreign_keys_to_quals() will get rid of them.
1974 : */
1975 :
1976 : /*
1977 : * Finally, remove the rel from the baserel array to prevent it from being
1978 : * referenced again. (We can't do this earlier because
1979 : * remove_join_clause_from_rels will touch it.)
1980 : */
1981 600 : root->simple_rel_array[toRemove->relid] = NULL;
1982 :
1983 : /* And nuke the RelOptInfo, just in case there's another access path. */
1984 600 : pfree(toRemove);
1985 :
1986 : /*
1987 : * Now repeat construction of attr_needed bits coming from all other
1988 : * sources.
1989 : */
1990 600 : rebuild_placeholder_attr_needed(root);
1991 600 : rebuild_joinclause_attr_needed(root);
1992 600 : rebuild_eclass_attr_needed(root);
1993 600 : rebuild_lateral_attr_needed(root);
1994 600 : }
1995 :
1996 : /*
1997 : * split_selfjoin_quals
1998 : * Processes 'joinquals' by building two lists: one containing the quals
1999 : * where the columns/exprs are on either side of the join match and
2000 : * another one containing the remaining quals.
2001 : *
2002 : * 'joinquals' must only contain quals for a RTE_RELATION being joined to
2003 : * itself.
2004 : */
2005 : static void
2006 2000 : split_selfjoin_quals(PlannerInfo *root, List *joinquals, List **selfjoinquals,
2007 : List **otherjoinquals, int from, int to)
2008 : {
2009 2000 : List *sjoinquals = NIL;
2010 2000 : List *ojoinquals = NIL;
2011 :
2012 6134 : foreach_node(RestrictInfo, rinfo, joinquals)
2013 : {
2014 : OpExpr *expr;
2015 : Node *leftexpr;
2016 : Node *rightexpr;
2017 :
2018 : /* In general, clause looks like F(arg1) = G(arg2) */
2019 4268 : if (!rinfo->mergeopfamilies ||
2020 4268 : bms_num_members(rinfo->clause_relids) != 2 ||
2021 4268 : bms_membership(rinfo->left_relids) != BMS_SINGLETON ||
2022 2134 : bms_membership(rinfo->right_relids) != BMS_SINGLETON)
2023 : {
2024 0 : ojoinquals = lappend(ojoinquals, rinfo);
2025 0 : continue;
2026 : }
2027 :
2028 2134 : expr = (OpExpr *) rinfo->clause;
2029 :
2030 2134 : if (!IsA(expr, OpExpr) || list_length(expr->args) != 2)
2031 : {
2032 0 : ojoinquals = lappend(ojoinquals, rinfo);
2033 0 : continue;
2034 : }
2035 :
2036 2134 : leftexpr = get_leftop(rinfo->clause);
2037 2134 : rightexpr = copyObject(get_rightop(rinfo->clause));
2038 :
2039 2134 : if (leftexpr && IsA(leftexpr, RelabelType))
2040 12 : leftexpr = (Node *) ((RelabelType *) leftexpr)->arg;
2041 2134 : if (rightexpr && IsA(rightexpr, RelabelType))
2042 6 : rightexpr = (Node *) ((RelabelType *) rightexpr)->arg;
2043 :
2044 : /*
2045 : * Quite an expensive operation, narrowing the use case. For example,
2046 : * when we have cast of the same var to different (but compatible)
2047 : * types.
2048 : */
2049 2134 : ChangeVarNodesExtended(rightexpr,
2050 2134 : bms_singleton_member(rinfo->right_relids),
2051 2134 : bms_singleton_member(rinfo->left_relids), 0,
2052 : replace_relid_callback);
2053 :
2054 2134 : if (equal(leftexpr, rightexpr))
2055 1636 : sjoinquals = lappend(sjoinquals, rinfo);
2056 : else
2057 498 : ojoinquals = lappend(ojoinquals, rinfo);
2058 : }
2059 :
2060 2000 : *selfjoinquals = sjoinquals;
2061 2000 : *otherjoinquals = ojoinquals;
2062 2000 : }
2063 :
2064 : /*
2065 : * Check for a case when uniqueness is at least partly derived from a
2066 : * baserestrictinfo clause. In this case, we have a chance to return only
2067 : * one row (if such clauses on both sides of SJ are equal) or nothing (if they
2068 : * are different).
2069 : */
2070 : static bool
2071 666 : match_unique_clauses(PlannerInfo *root, RelOptInfo *outer, List *uclauses,
2072 : Index relid)
2073 : {
2074 1350 : foreach_node(RestrictInfo, rinfo, uclauses)
2075 : {
2076 : Expr *clause;
2077 : Node *iclause;
2078 : Node *c1;
2079 150 : bool matched = false;
2080 :
2081 : Assert(outer->relid > 0 && relid > 0);
2082 :
2083 : /* Only filters like f(R.x1,...,R.xN) == expr we should consider. */
2084 : Assert(bms_is_empty(rinfo->left_relids) ^
2085 : bms_is_empty(rinfo->right_relids));
2086 :
2087 150 : clause = (Expr *) copyObject(rinfo->clause);
2088 150 : ChangeVarNodesExtended((Node *) clause, relid, outer->relid, 0,
2089 : replace_relid_callback);
2090 :
2091 150 : iclause = bms_is_empty(rinfo->left_relids) ? get_rightop(clause) :
2092 144 : get_leftop(clause);
2093 150 : c1 = bms_is_empty(rinfo->left_relids) ? get_leftop(clause) :
2094 144 : get_rightop(clause);
2095 :
2096 : /*
2097 : * Compare these left and right sides with the corresponding sides of
2098 : * the outer's filters. If no one is detected - return immediately.
2099 : */
2100 408 : foreach_node(RestrictInfo, orinfo, outer->baserestrictinfo)
2101 : {
2102 : Node *oclause;
2103 : Node *c2;
2104 :
2105 192 : if (orinfo->mergeopfamilies == NIL)
2106 : /* Don't consider clauses that aren't similar to 'F(X)=G(Y)' */
2107 60 : continue;
2108 :
2109 : Assert(is_opclause(orinfo->clause));
2110 :
2111 264 : oclause = bms_is_empty(orinfo->left_relids) ?
2112 132 : get_rightop(orinfo->clause) : get_leftop(orinfo->clause);
2113 264 : c2 = (bms_is_empty(orinfo->left_relids) ?
2114 132 : get_leftop(orinfo->clause) : get_rightop(orinfo->clause));
2115 :
2116 132 : if (equal(iclause, oclause) && equal(c1, c2))
2117 : {
2118 84 : matched = true;
2119 84 : break;
2120 : }
2121 : }
2122 :
2123 150 : if (!matched)
2124 66 : return false;
2125 : }
2126 :
2127 600 : return true;
2128 : }
2129 :
2130 : /*
2131 : * Find and remove unique self-joins in a group of base relations that have
2132 : * the same Oid.
2133 : *
2134 : * Returns a set of relids that were removed.
2135 : */
2136 : static Relids
2137 11454 : remove_self_joins_one_group(PlannerInfo *root, Relids relids)
2138 : {
2139 11454 : Relids result = NULL;
2140 : int k; /* Index of kept relation */
2141 11454 : int r = -1; /* Index of removed relation */
2142 :
2143 35550 : while ((r = bms_next_member(relids, r)) > 0)
2144 : {
2145 24096 : RelOptInfo *inner = root->simple_rel_array[r];
2146 :
2147 24096 : k = r;
2148 :
2149 37532 : while ((k = bms_next_member(relids, k)) > 0)
2150 : {
2151 14036 : Relids joinrelids = NULL;
2152 14036 : RelOptInfo *outer = root->simple_rel_array[k];
2153 : List *restrictlist;
2154 : List *selfjoinquals;
2155 : List *otherjoinquals;
2156 : ListCell *lc;
2157 14036 : bool jinfo_check = true;
2158 14036 : PlanRowMark *omark = NULL;
2159 14036 : PlanRowMark *imark = NULL;
2160 14036 : List *uclauses = NIL;
2161 :
2162 : /* A sanity check: the relations have the same Oid. */
2163 : Assert(root->simple_rte_array[k]->relid ==
2164 : root->simple_rte_array[r]->relid);
2165 :
2166 : /*
2167 : * It is impossible to eliminate the join of two relations if they
2168 : * belong to different rules of order. Otherwise, the planner
2169 : * can't find any variants of the correct query plan.
2170 : */
2171 17436 : foreach(lc, root->join_info_list)
2172 : {
2173 11114 : SpecialJoinInfo *info = (SpecialJoinInfo *) lfirst(lc);
2174 :
2175 22228 : if ((bms_is_member(k, info->syn_lefthand) ^
2176 15830 : bms_is_member(r, info->syn_lefthand)) ||
2177 4716 : (bms_is_member(k, info->syn_righthand) ^
2178 4716 : bms_is_member(r, info->syn_righthand)))
2179 : {
2180 7714 : jinfo_check = false;
2181 7714 : break;
2182 : }
2183 : }
2184 14036 : if (!jinfo_check)
2185 13436 : continue;
2186 :
2187 : /*
2188 : * Check Row Marks equivalence. We can't remove the join if the
2189 : * relations have row marks of different strength (e.g., one is
2190 : * locked FOR UPDATE, and another just has ROW_MARK_REFERENCE for
2191 : * EvalPlanQual rechecking).
2192 : */
2193 6530 : foreach(lc, root->rowMarks)
2194 : {
2195 380 : PlanRowMark *rowMark = (PlanRowMark *) lfirst(lc);
2196 :
2197 380 : if (rowMark->rti == k)
2198 : {
2199 : Assert(imark == NULL);
2200 172 : imark = rowMark;
2201 : }
2202 208 : else if (rowMark->rti == r)
2203 : {
2204 : Assert(omark == NULL);
2205 172 : omark = rowMark;
2206 : }
2207 :
2208 380 : if (omark && imark)
2209 172 : break;
2210 : }
2211 6322 : if (omark && imark && omark->markType != imark->markType)
2212 52 : continue;
2213 :
2214 : /*
2215 : * We only deal with base rels here, so their relids bitset
2216 : * contains only one member -- their relid.
2217 : */
2218 6270 : joinrelids = bms_add_member(joinrelids, r);
2219 6270 : joinrelids = bms_add_member(joinrelids, k);
2220 :
2221 : /*
2222 : * PHVs should not impose any constraints on removing self-joins.
2223 : */
2224 :
2225 : /*
2226 : * At this stage, joininfo lists of inner and outer can contain
2227 : * only clauses required for a superior outer join that can't
2228 : * influence this optimization. So, we can avoid to call the
2229 : * build_joinrel_restrictlist() routine.
2230 : */
2231 6270 : restrictlist = generate_join_implied_equalities(root, joinrelids,
2232 : inner->relids,
2233 : outer, NULL);
2234 6270 : if (restrictlist == NIL)
2235 4270 : continue;
2236 :
2237 : /*
2238 : * Process restrictlist to separate the self-join quals from the
2239 : * other quals. e.g., "x = x" goes to selfjoinquals and "a = b" to
2240 : * otherjoinquals.
2241 : */
2242 2000 : split_selfjoin_quals(root, restrictlist, &selfjoinquals,
2243 2000 : &otherjoinquals, inner->relid, outer->relid);
2244 :
2245 : Assert(list_length(restrictlist) ==
2246 : (list_length(selfjoinquals) + list_length(otherjoinquals)));
2247 :
2248 : /*
2249 : * To enable SJE for the only degenerate case without any self
2250 : * join clauses at all, add baserestrictinfo to this list. The
2251 : * degenerate case works only if both sides have the same clause.
2252 : * So doesn't matter which side to add.
2253 : */
2254 2000 : selfjoinquals = list_concat(selfjoinquals, outer->baserestrictinfo);
2255 :
2256 : /*
2257 : * Determine if the inner table can duplicate outer rows. We must
2258 : * bypass the unique rel cache here since we're possibly using a
2259 : * subset of join quals. We can use 'force_cache' == true when all
2260 : * join quals are self-join quals. Otherwise, we could end up
2261 : * putting false negatives in the cache.
2262 : */
2263 2000 : if (!innerrel_is_unique_ext(root, joinrelids, inner->relids,
2264 : outer, JOIN_INNER, selfjoinquals,
2265 2000 : list_length(otherjoinquals) == 0,
2266 : &uclauses))
2267 1334 : continue;
2268 :
2269 : /*
2270 : * 'uclauses' is the copy of outer->baserestrictinfo that are
2271 : * associated with an index. We proved by matching selfjoinquals
2272 : * to a unique index that the outer relation has at most one
2273 : * matching row for each inner row. Sometimes that is not enough.
2274 : * e.g. "WHERE s1.b = s2.b AND s1.a = 1 AND s2.a = 2" when the
2275 : * unique index is (a,b). Having non-empty uclauses, we must
2276 : * validate that the inner baserestrictinfo contains the same
2277 : * expressions, or we won't match the same row on each side of the
2278 : * join.
2279 : */
2280 666 : if (!match_unique_clauses(root, inner, uclauses, outer->relid))
2281 66 : continue;
2282 :
2283 : /*
2284 : * We can remove either relation, so remove the inner one in order
2285 : * to simplify this loop.
2286 : */
2287 600 : remove_self_join_rel(root, omark, imark, outer, inner, restrictlist);
2288 :
2289 600 : result = bms_add_member(result, r);
2290 :
2291 : /* We have removed the outer relation, try the next one. */
2292 600 : break;
2293 : }
2294 : }
2295 :
2296 11454 : return result;
2297 : }
2298 :
2299 : /*
2300 : * Gather indexes of base relations from the joinlist and try to eliminate self
2301 : * joins.
2302 : */
2303 : static Relids
2304 100952 : remove_self_joins_recurse(PlannerInfo *root, List *joinlist, Relids toRemove)
2305 : {
2306 : ListCell *jl;
2307 100952 : Relids relids = NULL;
2308 100952 : SelfJoinCandidate *candidates = NULL;
2309 : int i;
2310 : int j;
2311 : int numRels;
2312 :
2313 : /* Collect indexes of base relations of the join tree */
2314 336628 : foreach(jl, joinlist)
2315 : {
2316 235676 : Node *jlnode = (Node *) lfirst(jl);
2317 :
2318 235676 : if (IsA(jlnode, RangeTblRef))
2319 : {
2320 232310 : int varno = ((RangeTblRef *) jlnode)->rtindex;
2321 232310 : RangeTblEntry *rte = root->simple_rte_array[varno];
2322 :
2323 : /*
2324 : * We only consider ordinary relations as candidates to be
2325 : * removed, and these relations should not have TABLESAMPLE
2326 : * clauses specified. Removing a relation with TABLESAMPLE clause
2327 : * could potentially change the syntax of the query. Because of
2328 : * UPDATE/DELETE EPQ mechanism, currently Query->resultRelation or
2329 : * Query->mergeTargetRelation associated rel cannot be eliminated.
2330 : */
2331 232310 : if (rte->rtekind == RTE_RELATION &&
2332 201396 : rte->relkind == RELKIND_RELATION &&
2333 196172 : rte->tablesample == NULL &&
2334 196148 : varno != root->parse->resultRelation &&
2335 194364 : varno != root->parse->mergeTargetRelation)
2336 : {
2337 : Assert(!bms_is_member(varno, relids));
2338 194364 : relids = bms_add_member(relids, varno);
2339 : }
2340 : }
2341 3366 : else if (IsA(jlnode, List))
2342 : {
2343 : /* Recursively go inside the sub-joinlist */
2344 3366 : toRemove = remove_self_joins_recurse(root, (List *) jlnode,
2345 : toRemove);
2346 : }
2347 : else
2348 0 : elog(ERROR, "unrecognized joinlist node type: %d",
2349 : (int) nodeTag(jlnode));
2350 : }
2351 :
2352 100952 : numRels = bms_num_members(relids);
2353 :
2354 : /* Need at least two relations for the join */
2355 100952 : if (numRels < 2)
2356 30626 : return toRemove;
2357 :
2358 : /*
2359 : * In order to find relations with the same oid we first build an array of
2360 : * candidates and then sort it by oid.
2361 : */
2362 70326 : candidates = (SelfJoinCandidate *) palloc(sizeof(SelfJoinCandidate) *
2363 : numRels);
2364 70326 : i = -1;
2365 70326 : j = 0;
2366 242548 : while ((i = bms_next_member(relids, i)) >= 0)
2367 : {
2368 172222 : candidates[j].relid = i;
2369 172222 : candidates[j].reloid = root->simple_rte_array[i]->relid;
2370 172222 : j++;
2371 : }
2372 :
2373 70326 : qsort(candidates, numRels, sizeof(SelfJoinCandidate),
2374 : self_join_candidates_cmp);
2375 :
2376 : /*
2377 : * Iteratively form a group of relation indexes with the same oid and
2378 : * launch the routine that detects self-joins in this group and removes
2379 : * excessive range table entries.
2380 : *
2381 : * At the end of the iteration, exclude the group from the overall relids
2382 : * list. So each next iteration of the cycle will involve less and less
2383 : * value of relids.
2384 : */
2385 70326 : i = 0;
2386 242548 : for (j = 1; j < numRels + 1; j++)
2387 : {
2388 172222 : if (j == numRels || candidates[j].reloid != candidates[i].reloid)
2389 : {
2390 159664 : if (j - i >= 2)
2391 : {
2392 : /* Create a group of relation indexes with the same oid */
2393 11376 : Relids group = NULL;
2394 : Relids removed;
2395 :
2396 35310 : while (i < j)
2397 : {
2398 23934 : group = bms_add_member(group, candidates[i].relid);
2399 23934 : i++;
2400 : }
2401 11376 : relids = bms_del_members(relids, group);
2402 :
2403 : /*
2404 : * Try to remove self-joins from a group of identical entries.
2405 : * Make the next attempt iteratively - if something is deleted
2406 : * from a group, changes in clauses and equivalence classes
2407 : * can give us a chance to find more candidates.
2408 : */
2409 : do
2410 : {
2411 : Assert(!bms_overlap(group, toRemove));
2412 11454 : removed = remove_self_joins_one_group(root, group);
2413 11454 : toRemove = bms_add_members(toRemove, removed);
2414 11454 : group = bms_del_members(group, removed);
2415 576 : } while (!bms_is_empty(removed) &&
2416 11454 : bms_membership(group) == BMS_MULTIPLE);
2417 11376 : bms_free(removed);
2418 11376 : bms_free(group);
2419 : }
2420 : else
2421 : {
2422 : /* Single relation, just remove it from the set */
2423 148288 : relids = bms_del_member(relids, candidates[i].relid);
2424 148288 : i = j;
2425 : }
2426 : }
2427 : }
2428 :
2429 : Assert(bms_is_empty(relids));
2430 :
2431 70326 : return toRemove;
2432 : }
2433 :
2434 : /*
2435 : * Compare self-join candidates by their oids.
2436 : */
2437 : static int
2438 127480 : self_join_candidates_cmp(const void *a, const void *b)
2439 : {
2440 127480 : const SelfJoinCandidate *ca = (const SelfJoinCandidate *) a;
2441 127480 : const SelfJoinCandidate *cb = (const SelfJoinCandidate *) b;
2442 :
2443 127480 : if (ca->reloid != cb->reloid)
2444 114868 : return (ca->reloid < cb->reloid ? -1 : 1);
2445 : else
2446 12612 : return 0;
2447 : }
2448 :
2449 : /*
2450 : * Find and remove useless self joins.
2451 : *
2452 : * Search for joins where a relation is joined to itself. If the join clause
2453 : * for each tuple from one side of the join is proven to match the same
2454 : * physical row (or nothing) on the other side, that self-join can be
2455 : * eliminated from the query. Suitable join clauses are assumed to be in the
2456 : * form of X = X, and can be replaced with NOT NULL clauses.
2457 : *
2458 : * For the sake of simplicity, we don't apply this optimization to special
2459 : * joins. Here is a list of what we could do in some particular cases:
2460 : * 'a a1 semi join a a2': is reduced to inner by reduce_unique_semijoins,
2461 : * and then removed normally.
2462 : * 'a a1 anti join a a2': could simplify to a scan with 'outer quals AND
2463 : * (IS NULL on join columns OR NOT inner quals)'.
2464 : * 'a a1 left join a a2': could simplify to a scan like inner but without
2465 : * NOT NULL conditions on join columns.
2466 : * 'a a1 left join (a a2 join b)': can't simplify this, because join to b
2467 : * can both remove rows and introduce duplicates.
2468 : *
2469 : * To search for removable joins, we order all the relations on their Oid,
2470 : * go over each set with the same Oid, and consider each pair of relations
2471 : * in this set.
2472 : *
2473 : * To remove the join, we mark one of the participating relations as dead
2474 : * and rewrite all references to it to point to the remaining relation.
2475 : * This includes modifying RestrictInfos, EquivalenceClasses, and
2476 : * EquivalenceMembers. We also have to modify the row marks. The join clauses
2477 : * of the removed relation become either restriction or join clauses, based on
2478 : * whether they reference any relations not participating in the removed join.
2479 : *
2480 : * 'joinlist' is the top-level joinlist of the query. If it has any
2481 : * references to the removed relations, we update them to point to the
2482 : * remaining ones.
2483 : */
2484 : List *
2485 331620 : remove_useless_self_joins(PlannerInfo *root, List *joinlist)
2486 : {
2487 331620 : Relids toRemove = NULL;
2488 331620 : int relid = -1;
2489 :
2490 663240 : if (!enable_self_join_elimination || joinlist == NIL ||
2491 566522 : (list_length(joinlist) == 1 && !IsA(linitial(joinlist), List)))
2492 234034 : return joinlist;
2493 :
2494 : /*
2495 : * Merge pairs of relations participated in self-join. Remove unnecessary
2496 : * range table entries.
2497 : */
2498 97586 : toRemove = remove_self_joins_recurse(root, joinlist, toRemove);
2499 :
2500 97586 : if (unlikely(toRemove != NULL))
2501 : {
2502 : /* At the end, remove orphaned relation links */
2503 1170 : while ((relid = bms_next_member(toRemove, relid)) >= 0)
2504 : {
2505 600 : int nremoved = 0;
2506 :
2507 600 : joinlist = remove_rel_from_joinlist(joinlist, relid, &nremoved);
2508 600 : if (nremoved != 1)
2509 0 : elog(ERROR, "failed to find relation %d in joinlist", relid);
2510 : }
2511 : }
2512 :
2513 97586 : return joinlist;
2514 : }
|