Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * analyzejoins.c
4 : * Routines for simplifying joins after initial query analysis
5 : *
6 : * While we do a great deal of join simplification in prep/prepjointree.c,
7 : * certain optimizations cannot be performed at that stage for lack of
8 : * detailed information about the query. The routines here are invoked
9 : * after initsplan.c has done its work, and can do additional join removal
10 : * and simplification steps based on the information extracted. The penalty
11 : * is that we have to work harder to clean up after ourselves when we modify
12 : * the query, since the derived data structures have to be updated too.
13 : *
14 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
15 : * Portions Copyright (c) 1994, Regents of the University of California
16 : *
17 : *
18 : * IDENTIFICATION
19 : * src/backend/optimizer/plan/analyzejoins.c
20 : *
21 : *-------------------------------------------------------------------------
22 : */
23 : #include "postgres.h"
24 :
25 : #include "catalog/pg_class.h"
26 : #include "nodes/nodeFuncs.h"
27 : #include "optimizer/joininfo.h"
28 : #include "optimizer/optimizer.h"
29 : #include "optimizer/pathnode.h"
30 : #include "optimizer/paths.h"
31 : #include "optimizer/planmain.h"
32 : #include "optimizer/restrictinfo.h"
33 : #include "utils/lsyscache.h"
34 :
35 : /*
36 : * The struct containing self-join candidate. Used to find duplicate reloids.
37 : */
38 : typedef struct
39 : {
40 : int relid;
41 : Oid reloid;
42 : } SelfJoinCandidate;
43 :
44 : bool enable_self_join_removal;
45 :
46 : /* local functions */
47 : static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
48 :
49 : static void remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
50 : SpecialJoinInfo *sjinfo);
51 : static void remove_rel_from_restrictinfo(RestrictInfo *rinfo,
52 : int relid, int ojrelid);
53 : static void remove_rel_from_eclass(EquivalenceClass *ec,
54 : int relid, int ojrelid);
55 : static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
56 : static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
57 : static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
58 : List *clause_list, List **extra_clauses);
59 : static Oid distinct_col_search(int colno, List *colnos, List *opids);
60 : static bool is_innerrel_unique_for(PlannerInfo *root,
61 : Relids joinrelids,
62 : Relids outerrelids,
63 : RelOptInfo *innerrel,
64 : JoinType jointype,
65 : List *restrictlist,
66 : List **extra_clauses);
67 : static void replace_varno(Node *node, int from, int to);
68 : static Bitmapset *replace_relid(Relids relids, int oldId, int newId);
69 : static int self_join_candidates_cmp(const void *a, const void *b);
70 :
71 :
72 : /*
73 : * remove_useless_joins
74 : * Check for relations that don't actually need to be joined at all,
75 : * and remove them from the query.
76 : *
77 : * We are passed the current joinlist and return the updated list. Other
78 : * data structures that have to be updated are accessible via "root".
79 : */
80 : List *
81 285668 : remove_useless_joins(PlannerInfo *root, List *joinlist)
82 : {
83 : ListCell *lc;
84 :
85 : /*
86 : * We are only interested in relations that are left-joined to, so we can
87 : * scan the join_info_list to find them easily.
88 : */
89 285668 : restart:
90 324132 : foreach(lc, root->join_info_list)
91 : {
92 47558 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
93 : int innerrelid;
94 : int nremoved;
95 :
96 : /* Skip if not removable */
97 47558 : if (!join_is_removable(root, sjinfo))
98 38464 : continue;
99 :
100 : /*
101 : * Currently, join_is_removable can only succeed when the sjinfo's
102 : * righthand is a single baserel. Remove that rel from the query and
103 : * joinlist.
104 : */
105 9094 : innerrelid = bms_singleton_member(sjinfo->min_righthand);
106 :
107 9094 : remove_leftjoinrel_from_query(root, innerrelid, sjinfo);
108 :
109 : /* We verify that exactly one reference gets removed from joinlist */
110 9094 : nremoved = 0;
111 9094 : joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved);
112 9094 : if (nremoved != 1)
113 0 : elog(ERROR, "failed to find relation %d in joinlist", innerrelid);
114 :
115 : /*
116 : * We can delete this SpecialJoinInfo from the list too, since it's no
117 : * longer of interest. (Since we'll restart the foreach loop
118 : * immediately, we don't bother with foreach_delete_current.)
119 : */
120 9094 : root->join_info_list = list_delete_cell(root->join_info_list, lc);
121 :
122 : /*
123 : * Restart the scan. This is necessary to ensure we find all
124 : * removable joins independently of ordering of the join_info_list
125 : * (note that removal of attr_needed bits may make a join appear
126 : * removable that did not before).
127 : */
128 9094 : goto restart;
129 : }
130 :
131 276574 : return joinlist;
132 : }
133 :
134 : /*
135 : * clause_sides_match_join
136 : * Determine whether a join clause is of the right form to use in this join.
137 : *
138 : * We already know that the clause is a binary opclause referencing only the
139 : * rels in the current join. The point here is to check whether it has the
140 : * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
141 : * rather than mixing outer and inner vars on either side. If it matches,
142 : * we set the transient flag outer_is_left to identify which side is which.
143 : */
144 : static inline bool
145 179134 : clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids,
146 : Relids innerrelids)
147 : {
148 272148 : if (bms_is_subset(rinfo->left_relids, outerrelids) &&
149 93014 : bms_is_subset(rinfo->right_relids, innerrelids))
150 : {
151 : /* lefthand side is outer */
152 93008 : rinfo->outer_is_left = true;
153 93008 : return true;
154 : }
155 172232 : else if (bms_is_subset(rinfo->left_relids, innerrelids) &&
156 86106 : bms_is_subset(rinfo->right_relids, outerrelids))
157 : {
158 : /* righthand side is outer */
159 86106 : rinfo->outer_is_left = false;
160 86106 : return true;
161 : }
162 20 : return false; /* no good for these input relations */
163 : }
164 :
165 : /*
166 : * join_is_removable
167 : * Check whether we need not perform this special join at all, because
168 : * it will just duplicate its left input.
169 : *
170 : * This is true for a left join for which the join condition cannot match
171 : * more than one inner-side row. (There are other possibly interesting
172 : * cases, but we don't have the infrastructure to prove them.) We also
173 : * have to check that the inner side doesn't generate any variables needed
174 : * above the join.
175 : */
176 : static bool
177 47558 : join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
178 : {
179 : int innerrelid;
180 : RelOptInfo *innerrel;
181 : Relids inputrelids;
182 : Relids joinrelids;
183 47558 : List *clause_list = NIL;
184 : ListCell *l;
185 : int attroff;
186 :
187 : /*
188 : * Must be a left join to a single baserel, else we aren't going to be
189 : * able to do anything with it.
190 : */
191 47558 : if (sjinfo->jointype != JOIN_LEFT)
192 6382 : return false;
193 :
194 41176 : if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
195 1154 : return false;
196 :
197 : /*
198 : * Never try to eliminate a left join to the query result rel. Although
199 : * the case is syntactically impossible in standard SQL, MERGE will build
200 : * a join tree that looks exactly like that.
201 : */
202 40022 : if (innerrelid == root->parse->resultRelation)
203 694 : return false;
204 :
205 39328 : innerrel = find_base_rel(root, innerrelid);
206 :
207 : /*
208 : * Before we go to the effort of checking whether any innerrel variables
209 : * are needed above the join, make a quick check to eliminate cases in
210 : * which we will surely be unable to prove uniqueness of the innerrel.
211 : */
212 39328 : if (!rel_supports_distinctness(root, innerrel))
213 2746 : return false;
214 :
215 : /* Compute the relid set for the join we are considering */
216 36582 : inputrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
217 : Assert(sjinfo->ojrelid != 0);
218 36582 : joinrelids = bms_copy(inputrelids);
219 36582 : joinrelids = bms_add_member(joinrelids, sjinfo->ojrelid);
220 :
221 : /*
222 : * We can't remove the join if any inner-rel attributes are used above the
223 : * join. Here, "above" the join includes pushed-down conditions, so we
224 : * should reject if attr_needed includes the OJ's own relid; therefore,
225 : * compare to inputrelids not joinrelids.
226 : *
227 : * As a micro-optimization, it seems better to start with max_attr and
228 : * count down rather than starting with min_attr and counting up, on the
229 : * theory that the system attributes are somewhat less likely to be wanted
230 : * and should be tested last.
231 : */
232 455560 : for (attroff = innerrel->max_attr - innerrel->min_attr;
233 : attroff >= 0;
234 418978 : attroff--)
235 : {
236 446340 : if (!bms_is_subset(innerrel->attr_needed[attroff], inputrelids))
237 27362 : return false;
238 : }
239 :
240 : /*
241 : * Similarly check that the inner rel isn't needed by any PlaceHolderVars
242 : * that will be used above the join. The PHV case is a little bit more
243 : * complicated, because PHVs may have been assigned a ph_eval_at location
244 : * that includes the innerrel, yet their contained expression might not
245 : * actually reference the innerrel (it could be just a constant, for
246 : * instance). If such a PHV is due to be evaluated above the join then it
247 : * needn't prevent join removal.
248 : */
249 9370 : foreach(l, root->placeholder_list)
250 : {
251 180 : PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
252 :
253 180 : if (bms_overlap(phinfo->ph_lateral, innerrel->relids))
254 30 : return false; /* it references innerrel laterally */
255 180 : if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids))
256 54 : continue; /* it definitely doesn't reference innerrel */
257 126 : if (bms_is_subset(phinfo->ph_needed, inputrelids))
258 6 : continue; /* PHV is not used above the join */
259 120 : if (!bms_is_member(sjinfo->ojrelid, phinfo->ph_eval_at))
260 30 : return false; /* it has to be evaluated below the join */
261 :
262 : /*
263 : * We need to be sure there will still be a place to evaluate the PHV
264 : * if we remove the join, ie that ph_eval_at wouldn't become empty.
265 : */
266 90 : if (!bms_overlap(sjinfo->min_lefthand, phinfo->ph_eval_at))
267 0 : return false; /* there isn't any other place to eval PHV */
268 : /* Check contained expression last, since this is a bit expensive */
269 90 : if (bms_overlap(pull_varnos(root, (Node *) phinfo->ph_var->phexpr),
270 90 : innerrel->relids))
271 0 : return false; /* contained expression references innerrel */
272 : }
273 :
274 : /*
275 : * Search for mergejoinable clauses that constrain the inner rel against
276 : * either the outer rel or a pseudoconstant. If an operator is
277 : * mergejoinable then it behaves like equality for some btree opclass, so
278 : * it's what we want. The mergejoinability test also eliminates clauses
279 : * containing volatile functions, which we couldn't depend on.
280 : */
281 18694 : foreach(l, innerrel->joininfo)
282 : {
283 9504 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
284 :
285 : /*
286 : * If the current join commutes with some other outer join(s) via
287 : * outer join identity 3, there will be multiple clones of its join
288 : * clauses in the joininfo list. We want to consider only the
289 : * has_clone form of such clauses. Processing more than one form
290 : * would be wasteful, and also some of the others would confuse the
291 : * RINFO_IS_PUSHED_DOWN test below.
292 : */
293 9504 : if (restrictinfo->is_clone)
294 122 : continue; /* ignore it */
295 :
296 : /*
297 : * If it's not a join clause for this outer join, we can't use it.
298 : * Note that if the clause is pushed-down, then it is logically from
299 : * above the outer join, even if it references no other rels (it might
300 : * be from WHERE, for example).
301 : */
302 9382 : if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
303 102 : continue; /* ignore; not useful here */
304 :
305 : /* Ignore if it's not a mergejoinable clause */
306 9280 : if (!restrictinfo->can_join ||
307 9226 : restrictinfo->mergeopfamilies == NIL)
308 54 : continue; /* not mergejoinable */
309 :
310 : /*
311 : * Check if clause has the form "outer op inner" or "inner op outer",
312 : * and if so mark which side is inner.
313 : */
314 9226 : if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
315 : innerrel->relids))
316 6 : continue; /* no good for these input relations */
317 :
318 : /* OK, add to list */
319 9220 : clause_list = lappend(clause_list, restrictinfo);
320 : }
321 :
322 : /*
323 : * Now that we have the relevant equality join clauses, try to prove the
324 : * innerrel distinct.
325 : */
326 9190 : if (rel_is_distinct_for(root, innerrel, clause_list, NULL))
327 9094 : return true;
328 :
329 : /*
330 : * Some day it would be nice to check for other methods of establishing
331 : * distinctness.
332 : */
333 96 : return false;
334 : }
335 :
336 :
337 : /*
338 : * Remove the target rel->relid and references to the target join from the
339 : * planner's data structures, having determined that there is no need
340 : * to include them in the query. Optionally replace them with subst if subst
341 : * is non-negative.
342 : *
343 : * This function updates only parts needed for both left-join removal and
344 : * self-join removal.
345 : */
346 : static void
347 9720 : remove_rel_from_query(PlannerInfo *root, RelOptInfo *rel,
348 : int subst, SpecialJoinInfo *sjinfo,
349 : Relids joinrelids)
350 : {
351 9720 : int relid = rel->relid;
352 9720 : int ojrelid = (sjinfo != NULL) ? sjinfo->ojrelid : -1;
353 : Index rti;
354 : ListCell *l;
355 :
356 : /*
357 : * Remove references to the rel from other baserels' attr_needed arrays
358 : * and lateral_vars lists.
359 : */
360 60574 : for (rti = 1; rti < root->simple_rel_array_size; rti++)
361 : {
362 50854 : RelOptInfo *otherrel = root->simple_rel_array[rti];
363 : int attroff;
364 :
365 : /* there may be empty slots corresponding to non-baserel RTEs */
366 50854 : if (otherrel == NULL)
367 24416 : continue;
368 :
369 : Assert(otherrel->relid == rti); /* sanity check on array */
370 :
371 : /* no point in processing target rel itself */
372 26438 : if (otherrel == rel)
373 9720 : continue;
374 :
375 382328 : for (attroff = otherrel->max_attr - otherrel->min_attr;
376 : attroff >= 0;
377 365610 : attroff--)
378 : {
379 731220 : otherrel->attr_needed[attroff] =
380 365610 : replace_relid(otherrel->attr_needed[attroff], relid, subst);
381 365610 : otherrel->attr_needed[attroff] =
382 365610 : replace_relid(otherrel->attr_needed[attroff], ojrelid, subst);
383 : }
384 :
385 : /* Update lateral_vars list. */
386 16718 : replace_varno((Node *) otherrel->lateral_vars, relid, subst);
387 : }
388 :
389 : /*
390 : * Update all_baserels and related relid sets.
391 : */
392 9720 : root->all_baserels = replace_relid(root->all_baserels, relid, subst);
393 9720 : root->outer_join_rels = replace_relid(root->outer_join_rels, ojrelid, subst);
394 9720 : root->all_query_rels = replace_relid(root->all_query_rels, relid, subst);
395 9720 : root->all_query_rels = replace_relid(root->all_query_rels, ojrelid, subst);
396 :
397 : /*
398 : * Likewise remove references from SpecialJoinInfo data structures.
399 : *
400 : * This is relevant in case the outer join we're deleting is nested inside
401 : * other outer joins: the upper joins' relid sets have to be adjusted. The
402 : * RHS of the target outer join will be made empty here, but that's OK
403 : * since caller will delete that SpecialJoinInfo entirely.
404 : */
405 22314 : foreach(l, root->join_info_list)
406 : {
407 12594 : SpecialJoinInfo *sjinf = (SpecialJoinInfo *) lfirst(l);
408 :
409 12594 : sjinf->min_lefthand = replace_relid(sjinf->min_lefthand, relid, subst);
410 12594 : sjinf->min_righthand = replace_relid(sjinf->min_righthand, relid, subst);
411 12594 : sjinf->syn_lefthand = replace_relid(sjinf->syn_lefthand, relid, subst);
412 12594 : sjinf->syn_righthand = replace_relid(sjinf->syn_righthand, relid, subst);
413 12594 : sjinf->min_lefthand = replace_relid(sjinf->min_lefthand, ojrelid, subst);
414 12594 : sjinf->min_righthand = replace_relid(sjinf->min_righthand, ojrelid, subst);
415 12594 : sjinf->syn_lefthand = replace_relid(sjinf->syn_lefthand, ojrelid, subst);
416 12594 : sjinf->syn_righthand = replace_relid(sjinf->syn_righthand, ojrelid, subst);
417 : /* relid cannot appear in these fields, but ojrelid can: */
418 12594 : sjinf->commute_above_l = replace_relid(sjinf->commute_above_l, ojrelid, subst);
419 12594 : sjinf->commute_above_r = replace_relid(sjinf->commute_above_r, ojrelid, subst);
420 12594 : sjinf->commute_below_l = replace_relid(sjinf->commute_below_l, ojrelid, subst);
421 12594 : sjinf->commute_below_r = replace_relid(sjinf->commute_below_r, ojrelid, subst);
422 :
423 12594 : replace_varno((Node *) sjinf->semi_rhs_exprs, relid, subst);
424 : }
425 :
426 : /*
427 : * Likewise remove references from PlaceHolderVar data structures,
428 : * removing any no-longer-needed placeholders entirely.
429 : *
430 : * Removal is a bit trickier than it might seem: we can remove PHVs that
431 : * are used at the target rel and/or in the join qual, but not those that
432 : * are used at join partner rels or above the join. It's not that easy to
433 : * distinguish PHVs used at partner rels from those used in the join qual,
434 : * since they will both have ph_needed sets that are subsets of
435 : * joinrelids. However, a PHV used at a partner rel could not have the
436 : * target rel in ph_eval_at, so we check that while deciding whether to
437 : * remove or just update the PHV. There is no corresponding test in
438 : * join_is_removable because it doesn't need to distinguish those cases.
439 : */
440 9882 : foreach(l, root->placeholder_list)
441 : {
442 162 : PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
443 :
444 : Assert(sjinfo == NULL || !bms_is_member(relid, phinfo->ph_lateral));
445 198 : if (bms_is_subset(phinfo->ph_needed, joinrelids) &&
446 54 : bms_is_member(relid, phinfo->ph_eval_at) &&
447 12 : (sjinfo == NULL || !bms_is_member(ojrelid, phinfo->ph_eval_at)))
448 : {
449 12 : root->placeholder_list = foreach_delete_current(root->placeholder_list,
450 : l);
451 12 : root->placeholder_array[phinfo->phid] = NULL;
452 : }
453 : else
454 : {
455 150 : PlaceHolderVar *phv = phinfo->ph_var;
456 :
457 150 : phinfo->ph_eval_at = replace_relid(phinfo->ph_eval_at, relid, subst);
458 150 : phinfo->ph_eval_at = replace_relid(phinfo->ph_eval_at, ojrelid, subst);
459 : Assert(!bms_is_empty(phinfo->ph_eval_at)); /* checked previously */
460 150 : phinfo->ph_needed = replace_relid(phinfo->ph_needed, relid, subst);
461 150 : phinfo->ph_needed = replace_relid(phinfo->ph_needed, ojrelid, subst);
462 : /* ph_needed might or might not become empty */
463 150 : phinfo->ph_lateral = replace_relid(phinfo->ph_lateral, relid, subst);
464 : /* ph_lateral might or might not be empty */
465 150 : phv->phrels = replace_relid(phv->phrels, relid, subst);
466 150 : phv->phrels = replace_relid(phv->phrels, ojrelid, subst);
467 : Assert(!bms_is_empty(phv->phrels));
468 150 : replace_varno((Node *) phv->phexpr, relid, subst);
469 : Assert(phv->phnullingrels == NULL); /* no need to adjust */
470 : }
471 : }
472 9720 : }
473 :
474 : /*
475 : * Remove the target relid and references to the target join from the
476 : * planner's data structures, having determined that there is no need
477 : * to include them in the query.
478 : *
479 : * We are not terribly thorough here. We only bother to update parts of
480 : * the planner's data structures that will actually be consulted later.
481 : */
482 : static void
483 9094 : remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
484 : SpecialJoinInfo *sjinfo)
485 : {
486 : List *joininfos;
487 9094 : int ojrelid = sjinfo->ojrelid;
488 9094 : RelOptInfo *rel = find_base_rel(root, relid);
489 : Relids join_plus_commute;
490 : Relids joinrelids;
491 : ListCell *l;
492 :
493 : /* Compute the relid set for the join we are considering */
494 9094 : joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
495 : Assert(ojrelid != 0);
496 9094 : joinrelids = bms_add_member(joinrelids, ojrelid);
497 :
498 9094 : remove_rel_from_query(root, rel, -1, sjinfo, joinrelids);
499 :
500 : /*
501 : * Remove any joinquals referencing the rel from the joininfo lists.
502 : *
503 : * In some cases, a joinqual has to be put back after deleting its
504 : * reference to the target rel. This can occur for pseudoconstant and
505 : * outerjoin-delayed quals, which can get marked as requiring the rel in
506 : * order to force them to be evaluated at or above the join. We can't
507 : * just discard them, though. Only quals that logically belonged to the
508 : * outer join being discarded should be removed from the query.
509 : *
510 : * We might encounter a qual that is a clone of a deletable qual with some
511 : * outer-join relids added (see deconstruct_distribute_oj_quals). To
512 : * ensure we get rid of such clones as well, add the relids of all OJs
513 : * commutable with this one to the set we test against for
514 : * pushed-down-ness.
515 : */
516 9094 : join_plus_commute = bms_union(joinrelids,
517 9094 : sjinfo->commute_above_r);
518 9094 : join_plus_commute = bms_add_members(join_plus_commute,
519 9094 : sjinfo->commute_below_l);
520 :
521 : /*
522 : * We must make a copy of the rel's old joininfo list before starting the
523 : * loop, because otherwise remove_join_clause_from_rels would destroy the
524 : * list while we're scanning it.
525 : */
526 9094 : joininfos = list_copy(rel->joininfo);
527 18520 : foreach(l, joininfos)
528 : {
529 9426 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
530 :
531 9426 : remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
532 :
533 9426 : if (RINFO_IS_PUSHED_DOWN(rinfo, join_plus_commute))
534 : {
535 : /*
536 : * There might be references to relid or ojrelid in the
537 : * RestrictInfo's relid sets, as a consequence of PHVs having had
538 : * ph_eval_at sets that include those. We already checked above
539 : * that any such PHV is safe (and updated its ph_eval_at), so we
540 : * can just drop those references.
541 : */
542 102 : remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
543 :
544 : /*
545 : * Cross-check that the clause itself does not reference the
546 : * target rel or join.
547 : */
548 : #ifdef USE_ASSERT_CHECKING
549 : {
550 : Relids clause_varnos = pull_varnos(root,
551 : (Node *) rinfo->clause);
552 :
553 : Assert(!bms_is_member(relid, clause_varnos));
554 : Assert(!bms_is_member(ojrelid, clause_varnos));
555 : }
556 : #endif
557 : /* Now throw it back into the joininfo lists */
558 102 : distribute_restrictinfo_to_rels(root, rinfo);
559 : }
560 : }
561 :
562 : /*
563 : * Likewise remove references from EquivalenceClasses.
564 : */
565 49946 : foreach(l, root->eq_classes)
566 : {
567 40852 : EquivalenceClass *ec = (EquivalenceClass *) lfirst(l);
568 :
569 68690 : if (bms_is_member(relid, ec->ec_relids) ||
570 27838 : bms_is_member(ojrelid, ec->ec_relids))
571 13014 : remove_rel_from_eclass(ec, relid, ojrelid);
572 : }
573 :
574 : /*
575 : * There may be references to the rel in root->fkey_list, but if so,
576 : * match_foreign_keys_to_quals() will get rid of them.
577 : */
578 :
579 : /*
580 : * Finally, remove the rel from the baserel array to prevent it from being
581 : * referenced again. (We can't do this earlier because
582 : * remove_join_clause_from_rels will touch it.)
583 : */
584 9094 : root->simple_rel_array[relid] = NULL;
585 :
586 : /* And nuke the RelOptInfo, just in case there's another access path */
587 9094 : pfree(rel);
588 9094 : }
589 :
590 : /*
591 : * Remove any references to relid or ojrelid from the RestrictInfo.
592 : *
593 : * We only bother to clean out bits in clause_relids and required_relids,
594 : * not nullingrel bits in contained Vars and PHVs. (This might have to be
595 : * improved sometime.) However, if the RestrictInfo contains an OR clause
596 : * we have to also clean up the sub-clauses.
597 : */
598 : static void
599 3932 : remove_rel_from_restrictinfo(RestrictInfo *rinfo, int relid, int ojrelid)
600 : {
601 : /*
602 : * The clause_relids probably aren't shared with anything else, but let's
603 : * copy them just to be sure.
604 : */
605 3932 : rinfo->clause_relids = bms_copy(rinfo->clause_relids);
606 3932 : rinfo->clause_relids = bms_del_member(rinfo->clause_relids, relid);
607 3932 : rinfo->clause_relids = bms_del_member(rinfo->clause_relids, ojrelid);
608 : /* Likewise for required_relids */
609 3932 : rinfo->required_relids = bms_copy(rinfo->required_relids);
610 3932 : rinfo->required_relids = bms_del_member(rinfo->required_relids, relid);
611 3932 : rinfo->required_relids = bms_del_member(rinfo->required_relids, ojrelid);
612 :
613 : /* If it's an OR, recurse to clean up sub-clauses */
614 3932 : if (restriction_is_or_clause(rinfo))
615 : {
616 : ListCell *lc;
617 :
618 : Assert(is_orclause(rinfo->orclause));
619 18 : foreach(lc, ((BoolExpr *) rinfo->orclause)->args)
620 : {
621 12 : Node *orarg = (Node *) lfirst(lc);
622 :
623 : /* OR arguments should be ANDs or sub-RestrictInfos */
624 12 : if (is_andclause(orarg))
625 : {
626 0 : List *andargs = ((BoolExpr *) orarg)->args;
627 : ListCell *lc2;
628 :
629 0 : foreach(lc2, andargs)
630 : {
631 0 : RestrictInfo *rinfo2 = lfirst_node(RestrictInfo, lc2);
632 :
633 0 : remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
634 : }
635 : }
636 : else
637 : {
638 12 : RestrictInfo *rinfo2 = castNode(RestrictInfo, orarg);
639 :
640 12 : remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
641 : }
642 : }
643 : }
644 3932 : }
645 :
646 : /*
647 : * Remove any references to relid or ojrelid from the EquivalenceClass.
648 : *
649 : * Like remove_rel_from_restrictinfo, we don't worry about cleaning out
650 : * any nullingrel bits in contained Vars and PHVs. (This might have to be
651 : * improved sometime.) We do need to fix the EC and EM relid sets to ensure
652 : * that implied join equalities will be generated at the appropriate join
653 : * level(s).
654 : */
655 : static void
656 13014 : remove_rel_from_eclass(EquivalenceClass *ec, int relid, int ojrelid)
657 : {
658 : ListCell *lc;
659 :
660 : /* Fix up the EC's overall relids */
661 13014 : ec->ec_relids = bms_del_member(ec->ec_relids, relid);
662 13014 : ec->ec_relids = bms_del_member(ec->ec_relids, ojrelid);
663 :
664 : /*
665 : * Fix up the member expressions. Any non-const member that ends with
666 : * empty em_relids must be a Var or PHV of the removed relation. We don't
667 : * need it anymore, so we can drop it.
668 : */
669 29846 : foreach(lc, ec->ec_members)
670 : {
671 16832 : EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
672 :
673 20650 : if (bms_is_member(relid, cur_em->em_relids) ||
674 3818 : bms_is_member(ojrelid, cur_em->em_relids))
675 : {
676 : Assert(!cur_em->em_is_const);
677 13014 : cur_em->em_relids = bms_del_member(cur_em->em_relids, relid);
678 13014 : cur_em->em_relids = bms_del_member(cur_em->em_relids, ojrelid);
679 13014 : if (bms_is_empty(cur_em->em_relids))
680 13002 : ec->ec_members = foreach_delete_current(ec->ec_members, lc);
681 : }
682 : }
683 :
684 : /* Fix up the source clauses, in case we can re-use them later */
685 16832 : foreach(lc, ec->ec_sources)
686 : {
687 3818 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
688 :
689 3818 : remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
690 : }
691 :
692 : /*
693 : * Rather than expend code on fixing up any already-derived clauses, just
694 : * drop them. (At this point, any such clauses would be base restriction
695 : * clauses, which we'd not need anymore anyway.)
696 : */
697 13014 : ec->ec_derives = NIL;
698 13014 : }
699 :
700 : /*
701 : * Remove any occurrences of the target relid from a joinlist structure.
702 : *
703 : * It's easiest to build a whole new list structure, so we handle it that
704 : * way. Efficiency is not a big deal here.
705 : *
706 : * *nremoved is incremented by the number of occurrences removed (there
707 : * should be exactly one, but the caller checks that).
708 : */
709 : static List *
710 9984 : remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
711 : {
712 9984 : List *result = NIL;
713 : ListCell *jl;
714 :
715 36686 : foreach(jl, joinlist)
716 : {
717 26702 : Node *jlnode = (Node *) lfirst(jl);
718 :
719 26702 : if (IsA(jlnode, RangeTblRef))
720 : {
721 26438 : int varno = ((RangeTblRef *) jlnode)->rtindex;
722 :
723 26438 : if (varno == relid)
724 9720 : (*nremoved)++;
725 : else
726 16718 : result = lappend(result, jlnode);
727 : }
728 264 : else if (IsA(jlnode, List))
729 : {
730 : /* Recurse to handle subproblem */
731 : List *sublist;
732 :
733 264 : sublist = remove_rel_from_joinlist((List *) jlnode,
734 : relid, nremoved);
735 : /* Avoid including empty sub-lists in the result */
736 264 : if (sublist)
737 264 : result = lappend(result, sublist);
738 : }
739 : else
740 : {
741 0 : elog(ERROR, "unrecognized joinlist node type: %d",
742 : (int) nodeTag(jlnode));
743 : }
744 : }
745 :
746 9984 : return result;
747 : }
748 :
749 :
750 : /*
751 : * reduce_unique_semijoins
752 : * Check for semijoins that can be simplified to plain inner joins
753 : * because the inner relation is provably unique for the join clauses.
754 : *
755 : * Ideally this would happen during reduce_outer_joins, but we don't have
756 : * enough information at that point.
757 : *
758 : * To perform the strength reduction when applicable, we need only delete
759 : * the semijoin's SpecialJoinInfo from root->join_info_list. (We don't
760 : * bother fixing the join type attributed to it in the query jointree,
761 : * since that won't be consulted again.)
762 : */
763 : void
764 276574 : reduce_unique_semijoins(PlannerInfo *root)
765 : {
766 : ListCell *lc;
767 :
768 : /*
769 : * Scan the join_info_list to find semijoins.
770 : */
771 314826 : foreach(lc, root->join_info_list)
772 : {
773 38252 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
774 : int innerrelid;
775 : RelOptInfo *innerrel;
776 : Relids joinrelids;
777 : List *restrictlist;
778 :
779 : /*
780 : * Must be a semijoin to a single baserel, else we aren't going to be
781 : * able to do anything with it.
782 : */
783 38252 : if (sjinfo->jointype != JOIN_SEMI)
784 37998 : continue;
785 :
786 1952 : if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
787 164 : continue;
788 :
789 1788 : innerrel = find_base_rel(root, innerrelid);
790 :
791 : /*
792 : * Before we trouble to run generate_join_implied_equalities, make a
793 : * quick check to eliminate cases in which we will surely be unable to
794 : * prove uniqueness of the innerrel.
795 : */
796 1788 : if (!rel_supports_distinctness(root, innerrel))
797 778 : continue;
798 :
799 : /* Compute the relid set for the join we are considering */
800 1010 : joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
801 : Assert(sjinfo->ojrelid == 0); /* SEMI joins don't have RT indexes */
802 :
803 : /*
804 : * Since we're only considering a single-rel RHS, any join clauses it
805 : * has must be clauses linking it to the semijoin's min_lefthand. We
806 : * can also consider EC-derived join clauses.
807 : */
808 : restrictlist =
809 1010 : list_concat(generate_join_implied_equalities(root,
810 : joinrelids,
811 : sjinfo->min_lefthand,
812 : innerrel,
813 : NULL),
814 1010 : innerrel->joininfo);
815 :
816 : /* Test whether the innerrel is unique for those clauses. */
817 1010 : if (!innerrel_is_unique(root,
818 : joinrelids, sjinfo->min_lefthand, innerrel,
819 : JOIN_SEMI, restrictlist, true))
820 756 : continue;
821 :
822 : /* OK, remove the SpecialJoinInfo from the list. */
823 254 : root->join_info_list = foreach_delete_current(root->join_info_list, lc);
824 : }
825 276574 : }
826 :
827 :
828 : /*
829 : * rel_supports_distinctness
830 : * Could the relation possibly be proven distinct on some set of columns?
831 : *
832 : * This is effectively a pre-checking function for rel_is_distinct_for().
833 : * It must return true if rel_is_distinct_for() could possibly return true
834 : * with this rel, but it should not expend a lot of cycles. The idea is
835 : * that callers can avoid doing possibly-expensive processing to compute
836 : * rel_is_distinct_for()'s argument lists if the call could not possibly
837 : * succeed.
838 : */
839 : static bool
840 495198 : rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
841 : {
842 : /* We only know about baserels ... */
843 495198 : if (rel->reloptkind != RELOPT_BASEREL)
844 162222 : return false;
845 332976 : if (rel->rtekind == RTE_RELATION)
846 : {
847 : /*
848 : * For a plain relation, we only know how to prove uniqueness by
849 : * reference to unique indexes. Make sure there's at least one
850 : * suitable unique index. It must be immediately enforced, and not a
851 : * partial index. (Keep these conditions in sync with
852 : * relation_has_unique_index_for!)
853 : */
854 : ListCell *lc;
855 :
856 442198 : foreach(lc, rel->indexlist)
857 : {
858 401528 : IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc);
859 :
860 401528 : if (ind->unique && ind->immediate && ind->indpred == NIL)
861 268804 : return true;
862 : }
863 : }
864 23502 : else if (rel->rtekind == RTE_SUBQUERY)
865 : {
866 3914 : Query *subquery = root->simple_rte_array[rel->relid]->subquery;
867 :
868 : /* Check if the subquery has any qualities that support distinctness */
869 3914 : if (query_supports_distinctness(subquery))
870 2526 : return true;
871 : }
872 : /* We have no proof rules for any other rtekinds. */
873 61646 : return false;
874 : }
875 :
876 : /*
877 : * rel_is_distinct_for
878 : * Does the relation return only distinct rows according to clause_list?
879 : *
880 : * clause_list is a list of join restriction clauses involving this rel and
881 : * some other one. Return true if no two rows emitted by this rel could
882 : * possibly join to the same row of the other rel.
883 : *
884 : * The caller must have already determined that each condition is a
885 : * mergejoinable equality with an expression in this relation on one side, and
886 : * an expression not involving this relation on the other. The transient
887 : * outer_is_left flag is used to identify which side references this relation:
888 : * left side if outer_is_left is false, right side if it is true.
889 : *
890 : * Note that the passed-in clause_list may be destructively modified! This
891 : * is OK for current uses, because the clause_list is built by the caller for
892 : * the sole purpose of passing to this function.
893 : *
894 : * outer_exprs contains the right sides of baserestrictinfo clauses looking
895 : * like x = const if distinctness is derived from such clauses, not joininfo
896 : * clause. Pass NULL to the outer_exprs, if its value is not needed.
897 : */
898 : static bool
899 168648 : rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list,
900 : List **extra_clauses)
901 : {
902 : /*
903 : * We could skip a couple of tests here if we assume all callers checked
904 : * rel_supports_distinctness first, but it doesn't seem worth taking any
905 : * risk for.
906 : */
907 168648 : if (rel->reloptkind != RELOPT_BASEREL)
908 0 : return false;
909 168648 : if (rel->rtekind == RTE_RELATION)
910 : {
911 : /*
912 : * Examine the indexes to see if we have a matching unique index.
913 : * relation_has_unique_index_ext automatically adds any usable
914 : * restriction clauses for the rel, so we needn't do that here.
915 : */
916 166946 : if (relation_has_unique_index_ext(root, rel, clause_list, NIL, NIL,
917 : extra_clauses))
918 102018 : return true;
919 : }
920 1702 : else if (rel->rtekind == RTE_SUBQUERY)
921 : {
922 1702 : Index relid = rel->relid;
923 1702 : Query *subquery = root->simple_rte_array[relid]->subquery;
924 1702 : List *colnos = NIL;
925 1702 : List *opids = NIL;
926 : ListCell *l;
927 :
928 : /*
929 : * Build the argument lists for query_is_distinct_for: a list of
930 : * output column numbers that the query needs to be distinct over, and
931 : * a list of equality operators that the output columns need to be
932 : * distinct according to.
933 : *
934 : * (XXX we are not considering restriction clauses attached to the
935 : * subquery; is that worth doing?)
936 : */
937 3380 : foreach(l, clause_list)
938 : {
939 1678 : RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
940 : Oid op;
941 : Var *var;
942 :
943 : /*
944 : * Get the equality operator we need uniqueness according to.
945 : * (This might be a cross-type operator and thus not exactly the
946 : * same operator the subquery would consider; that's all right
947 : * since query_is_distinct_for can resolve such cases.) The
948 : * caller's mergejoinability test should have selected only
949 : * OpExprs.
950 : */
951 1678 : op = castNode(OpExpr, rinfo->clause)->opno;
952 :
953 : /* caller identified the inner side for us */
954 1678 : if (rinfo->outer_is_left)
955 1364 : var = (Var *) get_rightop(rinfo->clause);
956 : else
957 314 : var = (Var *) get_leftop(rinfo->clause);
958 :
959 : /*
960 : * We may ignore any RelabelType node above the operand. (There
961 : * won't be more than one, since eval_const_expressions() has been
962 : * applied already.)
963 : */
964 1678 : if (var && IsA(var, RelabelType))
965 600 : var = (Var *) ((RelabelType *) var)->arg;
966 :
967 : /*
968 : * If inner side isn't a Var referencing a subquery output column,
969 : * this clause doesn't help us.
970 : */
971 1678 : if (!var || !IsA(var, Var) ||
972 1666 : var->varno != relid || var->varlevelsup != 0)
973 12 : continue;
974 :
975 1666 : colnos = lappend_int(colnos, var->varattno);
976 1666 : opids = lappend_oid(opids, op);
977 : }
978 :
979 1702 : if (query_is_distinct_for(subquery, colnos, opids))
980 188 : return true;
981 : }
982 66442 : return false;
983 : }
984 :
985 :
986 : /*
987 : * query_supports_distinctness - could the query possibly be proven distinct
988 : * on some set of output columns?
989 : *
990 : * This is effectively a pre-checking function for query_is_distinct_for().
991 : * It must return true if query_is_distinct_for() could possibly return true
992 : * with this query, but it should not expend a lot of cycles. The idea is
993 : * that callers can avoid doing possibly-expensive processing to compute
994 : * query_is_distinct_for()'s argument lists if the call could not possibly
995 : * succeed.
996 : */
997 : bool
998 4560 : query_supports_distinctness(Query *query)
999 : {
1000 : /* SRFs break distinctness except with DISTINCT, see below */
1001 4560 : if (query->hasTargetSRFs && query->distinctClause == NIL)
1002 908 : return false;
1003 :
1004 : /* check for features we can prove distinctness with */
1005 3652 : if (query->distinctClause != NIL ||
1006 3508 : query->groupClause != NIL ||
1007 3302 : query->groupingSets != NIL ||
1008 3302 : query->hasAggs ||
1009 3126 : query->havingQual ||
1010 3126 : query->setOperations)
1011 3136 : return true;
1012 :
1013 516 : return false;
1014 : }
1015 :
1016 : /*
1017 : * query_is_distinct_for - does query never return duplicates of the
1018 : * specified columns?
1019 : *
1020 : * query is a not-yet-planned subquery (in current usage, it's always from
1021 : * a subquery RTE, which the planner avoids scribbling on).
1022 : *
1023 : * colnos is an integer list of output column numbers (resno's). We are
1024 : * interested in whether rows consisting of just these columns are certain
1025 : * to be distinct. "Distinctness" is defined according to whether the
1026 : * corresponding upper-level equality operators listed in opids would think
1027 : * the values are distinct. (Note: the opids entries could be cross-type
1028 : * operators, and thus not exactly the equality operators that the subquery
1029 : * would use itself. We use equality_ops_are_compatible() to check
1030 : * compatibility. That looks at btree or hash opfamily membership, and so
1031 : * should give trustworthy answers for all operators that we might need
1032 : * to deal with here.)
1033 : */
1034 : bool
1035 1824 : query_is_distinct_for(Query *query, List *colnos, List *opids)
1036 : {
1037 : ListCell *l;
1038 : Oid opid;
1039 :
1040 : Assert(list_length(colnos) == list_length(opids));
1041 :
1042 : /*
1043 : * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
1044 : * columns in the DISTINCT clause appear in colnos and operator semantics
1045 : * match. This is true even if there are SRFs in the DISTINCT columns or
1046 : * elsewhere in the tlist.
1047 : */
1048 1824 : if (query->distinctClause)
1049 : {
1050 150 : foreach(l, query->distinctClause)
1051 : {
1052 120 : SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
1053 120 : TargetEntry *tle = get_sortgroupclause_tle(sgc,
1054 : query->targetList);
1055 :
1056 120 : opid = distinct_col_search(tle->resno, colnos, opids);
1057 120 : if (!OidIsValid(opid) ||
1058 48 : !equality_ops_are_compatible(opid, sgc->eqop))
1059 : break; /* exit early if no match */
1060 : }
1061 102 : if (l == NULL) /* had matches for all? */
1062 30 : return true;
1063 : }
1064 :
1065 : /*
1066 : * Otherwise, a set-returning function in the query's targetlist can
1067 : * result in returning duplicate rows, despite any grouping that might
1068 : * occur before tlist evaluation. (If all tlist SRFs are within GROUP BY
1069 : * columns, it would be safe because they'd be expanded before grouping.
1070 : * But it doesn't currently seem worth the effort to check for that.)
1071 : */
1072 1794 : if (query->hasTargetSRFs)
1073 0 : return false;
1074 :
1075 : /*
1076 : * Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all
1077 : * the grouped columns appear in colnos and operator semantics match.
1078 : */
1079 1794 : if (query->groupClause && !query->groupingSets)
1080 : {
1081 258 : foreach(l, query->groupClause)
1082 : {
1083 200 : SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
1084 200 : TargetEntry *tle = get_sortgroupclause_tle(sgc,
1085 : query->targetList);
1086 :
1087 200 : opid = distinct_col_search(tle->resno, colnos, opids);
1088 200 : if (!OidIsValid(opid) ||
1089 124 : !equality_ops_are_compatible(opid, sgc->eqop))
1090 : break; /* exit early if no match */
1091 : }
1092 134 : if (l == NULL) /* had matches for all? */
1093 58 : return true;
1094 : }
1095 1660 : else if (query->groupingSets)
1096 : {
1097 : /*
1098 : * If we have grouping sets with expressions, we probably don't have
1099 : * uniqueness and analysis would be hard. Punt.
1100 : */
1101 0 : if (query->groupClause)
1102 0 : return false;
1103 :
1104 : /*
1105 : * If we have no groupClause (therefore no grouping expressions), we
1106 : * might have one or many empty grouping sets. If there's just one,
1107 : * then we're returning only one row and are certainly unique. But
1108 : * otherwise, we know we're certainly not unique.
1109 : */
1110 0 : if (list_length(query->groupingSets) == 1 &&
1111 0 : ((GroupingSet *) linitial(query->groupingSets))->kind == GROUPING_SET_EMPTY)
1112 0 : return true;
1113 : else
1114 0 : return false;
1115 : }
1116 : else
1117 : {
1118 : /*
1119 : * If we have no GROUP BY, but do have aggregates or HAVING, then the
1120 : * result is at most one row so it's surely unique, for any operators.
1121 : */
1122 1660 : if (query->hasAggs || query->havingQual)
1123 88 : return true;
1124 : }
1125 :
1126 : /*
1127 : * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
1128 : * except with ALL.
1129 : */
1130 1648 : if (query->setOperations)
1131 : {
1132 1500 : SetOperationStmt *topop = castNode(SetOperationStmt, query->setOperations);
1133 :
1134 : Assert(topop->op != SETOP_NONE);
1135 :
1136 1500 : if (!topop->all)
1137 : {
1138 : ListCell *lg;
1139 :
1140 : /* We're good if all the nonjunk output columns are in colnos */
1141 72 : lg = list_head(topop->groupClauses);
1142 90 : foreach(l, query->targetList)
1143 : {
1144 78 : TargetEntry *tle = (TargetEntry *) lfirst(l);
1145 : SortGroupClause *sgc;
1146 :
1147 78 : if (tle->resjunk)
1148 0 : continue; /* ignore resjunk columns */
1149 :
1150 : /* non-resjunk columns should have grouping clauses */
1151 : Assert(lg != NULL);
1152 78 : sgc = (SortGroupClause *) lfirst(lg);
1153 78 : lg = lnext(topop->groupClauses, lg);
1154 :
1155 78 : opid = distinct_col_search(tle->resno, colnos, opids);
1156 78 : if (!OidIsValid(opid) ||
1157 18 : !equality_ops_are_compatible(opid, sgc->eqop))
1158 : break; /* exit early if no match */
1159 : }
1160 72 : if (l == NULL) /* had matches for all? */
1161 12 : return true;
1162 : }
1163 : }
1164 :
1165 : /*
1166 : * XXX Are there any other cases in which we can easily see the result
1167 : * must be distinct?
1168 : *
1169 : * If you do add more smarts to this function, be sure to update
1170 : * query_supports_distinctness() to match.
1171 : */
1172 :
1173 1636 : return false;
1174 : }
1175 :
1176 : /*
1177 : * distinct_col_search - subroutine for query_is_distinct_for
1178 : *
1179 : * If colno is in colnos, return the corresponding element of opids,
1180 : * else return InvalidOid. (Ordinarily colnos would not contain duplicates,
1181 : * but if it does, we arbitrarily select the first match.)
1182 : */
1183 : static Oid
1184 398 : distinct_col_search(int colno, List *colnos, List *opids)
1185 : {
1186 : ListCell *lc1,
1187 : *lc2;
1188 :
1189 634 : forboth(lc1, colnos, lc2, opids)
1190 : {
1191 426 : if (colno == lfirst_int(lc1))
1192 190 : return lfirst_oid(lc2);
1193 : }
1194 208 : return InvalidOid;
1195 : }
1196 :
1197 :
1198 : /*
1199 : * innerrel_is_unique
1200 : * Check if the innerrel provably contains at most one tuple matching any
1201 : * tuple from the outerrel, based on join clauses in the 'restrictlist'.
1202 : *
1203 : * We need an actual RelOptInfo for the innerrel, but it's sufficient to
1204 : * identify the outerrel by its Relids. This asymmetry supports use of this
1205 : * function before joinrels have been built. (The caller is expected to
1206 : * also supply the joinrelids, just to save recalculating that.)
1207 : *
1208 : * The proof must be made based only on clauses that will be "joinquals"
1209 : * rather than "otherquals" at execution. For an inner join there's no
1210 : * difference; but if the join is outer, we must ignore pushed-down quals,
1211 : * as those will become "otherquals". Note that this means the answer might
1212 : * vary depending on whether IS_OUTER_JOIN(jointype); since we cache the
1213 : * answer without regard to that, callers must take care not to call this
1214 : * with jointypes that would be classified differently by IS_OUTER_JOIN().
1215 : *
1216 : * The actual proof is undertaken by is_innerrel_unique_for(); this function
1217 : * is a frontend that is mainly concerned with caching the answers.
1218 : * In particular, the force_cache argument allows overriding the internal
1219 : * heuristic about whether to cache negative answers; it should be "true"
1220 : * if making an inquiry that is not part of the normal bottom-up join search
1221 : * sequence.
1222 : */
1223 : bool
1224 547080 : innerrel_is_unique(PlannerInfo *root,
1225 : Relids joinrelids,
1226 : Relids outerrelids,
1227 : RelOptInfo *innerrel,
1228 : JoinType jointype,
1229 : List *restrictlist,
1230 : bool force_cache)
1231 : {
1232 547080 : return innerrel_is_unique_ext(root, joinrelids, outerrelids, innerrel,
1233 : jointype, restrictlist, force_cache, NULL);
1234 : }
1235 :
1236 : /*
1237 : * innerrel_is_unique_ext
1238 : * Do the same as innerrel_is_unique(), but also set to '*extra_clauses'
1239 : * additional clauses from a baserestrictinfo list that were used to prove
1240 : * uniqueness. A non NULL 'extra_clauses' indicates that we're checking
1241 : * for self-join and correspondingly dealing with filtered clauses.
1242 : */
1243 : bool
1244 552718 : innerrel_is_unique_ext(PlannerInfo *root,
1245 : Relids joinrelids,
1246 : Relids outerrelids,
1247 : RelOptInfo *innerrel,
1248 : JoinType jointype,
1249 : List *restrictlist,
1250 : bool force_cache,
1251 : List **extra_clauses)
1252 : {
1253 : MemoryContext old_context;
1254 : ListCell *lc;
1255 : UniqueRelInfo *uniqueRelInfo;
1256 552718 : List *outer_exprs = NIL;
1257 552718 : bool self_join = (extra_clauses != NULL);
1258 :
1259 : /* Certainly can't prove uniqueness when there are no joinclauses */
1260 552718 : if (restrictlist == NIL)
1261 98636 : return false;
1262 :
1263 : /*
1264 : * Make a quick check to eliminate cases in which we will surely be unable
1265 : * to prove uniqueness of the innerrel.
1266 : */
1267 454082 : if (!rel_supports_distinctness(root, innerrel))
1268 220344 : return false;
1269 :
1270 : /*
1271 : * Query the cache to see if we've managed to prove that innerrel is
1272 : * unique for any subset of this outerrel. For non self-join search, we
1273 : * don't need an exact match, as extra outerrels can't make the innerrel
1274 : * any less unique (or more formally, the restrictlist for a join to a
1275 : * superset outerrel must be a superset of the conditions we successfully
1276 : * used before). For self-join search, we require an exact match of
1277 : * outerrels, because we need extra clauses to be valid for our case.
1278 : * Also, for self-join checking we've filtered the clauses list. Thus,
1279 : * for a self-join search, we can match only the result cached for another
1280 : * self-join check.
1281 : */
1282 256240 : foreach(lc, innerrel->unique_for_rels)
1283 : {
1284 96464 : uniqueRelInfo = (UniqueRelInfo *) lfirst(lc);
1285 :
1286 96464 : if ((!self_join && bms_is_subset(uniqueRelInfo->outerrelids, outerrelids)) ||
1287 74 : (self_join && bms_equal(uniqueRelInfo->outerrelids, outerrelids) &&
1288 56 : uniqueRelInfo->self_join))
1289 : {
1290 73962 : if (extra_clauses)
1291 12 : *extra_clauses = uniqueRelInfo->extra_clauses;
1292 73962 : return true; /* Success! */
1293 : }
1294 : }
1295 :
1296 : /*
1297 : * Conversely, we may have already determined that this outerrel, or some
1298 : * superset thereof, cannot prove this innerrel to be unique.
1299 : */
1300 160736 : foreach(lc, innerrel->non_unique_for_rels)
1301 : {
1302 1278 : Relids unique_for_rels = (Relids) lfirst(lc);
1303 :
1304 1278 : if (bms_is_subset(outerrelids, unique_for_rels))
1305 318 : return false;
1306 : }
1307 :
1308 : /* No cached information, so try to make the proof. */
1309 159458 : if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel,
1310 : jointype, restrictlist,
1311 : self_join ? &outer_exprs : NULL))
1312 : {
1313 : /*
1314 : * Cache the positive result for future probes, being sure to keep it
1315 : * in the planner_cxt even if we are working in GEQO.
1316 : *
1317 : * Note: one might consider trying to isolate the minimal subset of
1318 : * the outerrels that proved the innerrel unique. But it's not worth
1319 : * the trouble, because the planner builds up joinrels incrementally
1320 : * and so we'll see the minimally sufficient outerrels before any
1321 : * supersets of them anyway.
1322 : */
1323 93112 : old_context = MemoryContextSwitchTo(root->planner_cxt);
1324 93112 : uniqueRelInfo = makeNode(UniqueRelInfo);
1325 93112 : uniqueRelInfo->outerrelids = bms_copy(outerrelids);
1326 93112 : uniqueRelInfo->self_join = self_join;
1327 93112 : uniqueRelInfo->extra_clauses = outer_exprs;
1328 93112 : innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
1329 : uniqueRelInfo);
1330 93112 : MemoryContextSwitchTo(old_context);
1331 :
1332 93112 : if (extra_clauses)
1333 694 : *extra_clauses = outer_exprs;
1334 93112 : return true; /* Success! */
1335 : }
1336 : else
1337 : {
1338 : /*
1339 : * None of the join conditions for outerrel proved innerrel unique, so
1340 : * we can safely reject this outerrel or any subset of it in future
1341 : * checks.
1342 : *
1343 : * However, in normal planning mode, caching this knowledge is totally
1344 : * pointless; it won't be queried again, because we build up joinrels
1345 : * from smaller to larger. It is useful in GEQO mode, where the
1346 : * knowledge can be carried across successive planning attempts; and
1347 : * it's likely to be useful when using join-search plugins, too. Hence
1348 : * cache when join_search_private is non-NULL. (Yeah, that's a hack,
1349 : * but it seems reasonable.)
1350 : *
1351 : * Also, allow callers to override that heuristic and force caching;
1352 : * that's useful for reduce_unique_semijoins, which calls here before
1353 : * the normal join search starts.
1354 : */
1355 66346 : if (force_cache || root->join_search_private)
1356 : {
1357 1600 : old_context = MemoryContextSwitchTo(root->planner_cxt);
1358 1600 : innerrel->non_unique_for_rels =
1359 1600 : lappend(innerrel->non_unique_for_rels,
1360 1600 : bms_copy(outerrelids));
1361 1600 : MemoryContextSwitchTo(old_context);
1362 : }
1363 :
1364 66346 : return false;
1365 : }
1366 : }
1367 :
1368 : /*
1369 : * is_innerrel_unique_for
1370 : * Check if the innerrel provably contains at most one tuple matching any
1371 : * tuple from the outerrel, based on join clauses in the 'restrictlist'.
1372 : */
1373 : static bool
1374 159458 : is_innerrel_unique_for(PlannerInfo *root,
1375 : Relids joinrelids,
1376 : Relids outerrelids,
1377 : RelOptInfo *innerrel,
1378 : JoinType jointype,
1379 : List *restrictlist,
1380 : List **extra_clauses)
1381 : {
1382 159458 : List *clause_list = NIL;
1383 : ListCell *lc;
1384 :
1385 : /*
1386 : * Search for mergejoinable clauses that constrain the inner rel against
1387 : * the outer rel. If an operator is mergejoinable then it behaves like
1388 : * equality for some btree opclass, so it's what we want. The
1389 : * mergejoinability test also eliminates clauses containing volatile
1390 : * functions, which we couldn't depend on.
1391 : */
1392 348732 : foreach(lc, restrictlist)
1393 : {
1394 189274 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
1395 :
1396 : /*
1397 : * As noted above, if it's a pushed-down clause and we're at an outer
1398 : * join, we can't use it.
1399 : */
1400 189274 : if (IS_OUTER_JOIN(jointype) &&
1401 94314 : RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
1402 3358 : continue;
1403 :
1404 : /* Ignore if it's not a mergejoinable clause */
1405 185916 : if (!restrictinfo->can_join ||
1406 171934 : restrictinfo->mergeopfamilies == NIL)
1407 16008 : continue; /* not mergejoinable */
1408 :
1409 : /*
1410 : * Check if the clause has the form "outer op inner" or "inner op
1411 : * outer", and if so mark which side is inner.
1412 : */
1413 169908 : if (!clause_sides_match_join(restrictinfo, outerrelids,
1414 : innerrel->relids))
1415 14 : continue; /* no good for these input relations */
1416 :
1417 : /* OK, add to the list */
1418 169894 : clause_list = lappend(clause_list, restrictinfo);
1419 : }
1420 :
1421 : /* Let rel_is_distinct_for() do the hard work */
1422 159458 : return rel_is_distinct_for(root, innerrel, clause_list, extra_clauses);
1423 : }
1424 :
1425 : /*
1426 : * replace_varno - find in the given tree any Vars, PlaceHolderVar, and Relids
1427 : * that reference the removing relid, and change them to the reference to
1428 : * the replacement relid.
1429 : *
1430 : * NOTE: although this has the form of a walker, we cheat and modify the
1431 : * nodes in-place.
1432 : */
1433 :
1434 : typedef struct
1435 : {
1436 : int from;
1437 : int to;
1438 : int sublevels_up;
1439 : } ReplaceVarnoContext;
1440 :
1441 : static bool
1442 61084 : replace_varno_walker(Node *node, ReplaceVarnoContext *ctx)
1443 : {
1444 61084 : if (node == NULL)
1445 18160 : return false;
1446 42924 : if (IsA(node, Var))
1447 : {
1448 19960 : Var *var = (Var *) node;
1449 :
1450 19960 : if (var->varno == ctx->from &&
1451 8284 : var->varlevelsup == ctx->sublevels_up)
1452 : {
1453 8272 : var->varno = ctx->to;
1454 8272 : var->varnosyn = ctx->to;
1455 : }
1456 19960 : return false;
1457 : }
1458 22964 : else if (IsA(node, PlaceHolderVar))
1459 : {
1460 66 : PlaceHolderVar *phv = (PlaceHolderVar *) node;
1461 :
1462 66 : if (phv->phlevelsup == ctx->sublevels_up)
1463 : {
1464 66 : phv->phrels =
1465 66 : replace_relid(phv->phrels, ctx->from, ctx->to);
1466 66 : phv->phnullingrels =
1467 66 : replace_relid(phv->phnullingrels, ctx->from, ctx->to);
1468 : }
1469 :
1470 : /* fall through to recurse into the placeholder's expression */
1471 : }
1472 22898 : else if (IsA(node, Query))
1473 : {
1474 : /* Recurse into subselects */
1475 : bool result;
1476 :
1477 30 : ctx->sublevels_up++;
1478 30 : result = query_tree_walker((Query *) node,
1479 : replace_varno_walker,
1480 : (void *) ctx,
1481 : QTW_EXAMINE_SORTGROUP);
1482 30 : ctx->sublevels_up--;
1483 30 : return result;
1484 : }
1485 22868 : else if (IsA(node, RestrictInfo))
1486 : {
1487 1852 : RestrictInfo *rinfo = (RestrictInfo *) node;
1488 1852 : int relid = -1;
1489 1852 : bool is_req_equal =
1490 1852 : (rinfo->required_relids == rinfo->clause_relids);
1491 :
1492 1852 : if (bms_is_member(ctx->from, rinfo->clause_relids))
1493 : {
1494 1798 : replace_varno((Node *) rinfo->clause, ctx->from, ctx->to);
1495 1798 : replace_varno((Node *) rinfo->orclause, ctx->from, ctx->to);
1496 1798 : rinfo->clause_relids =
1497 1798 : replace_relid(rinfo->clause_relids, ctx->from, ctx->to);
1498 1798 : rinfo->left_relids =
1499 1798 : replace_relid(rinfo->left_relids, ctx->from, ctx->to);
1500 1798 : rinfo->right_relids =
1501 1798 : replace_relid(rinfo->right_relids, ctx->from, ctx->to);
1502 : }
1503 :
1504 1852 : if (is_req_equal)
1505 12 : rinfo->required_relids = rinfo->clause_relids;
1506 : else
1507 1840 : rinfo->required_relids =
1508 1840 : replace_relid(rinfo->required_relids, ctx->from, ctx->to);
1509 :
1510 1852 : rinfo->outer_relids =
1511 1852 : replace_relid(rinfo->outer_relids, ctx->from, ctx->to);
1512 1852 : rinfo->incompatible_relids =
1513 1852 : replace_relid(rinfo->incompatible_relids, ctx->from, ctx->to);
1514 :
1515 3518 : if (rinfo->mergeopfamilies &&
1516 1666 : bms_get_singleton_member(rinfo->clause_relids, &relid) &&
1517 1550 : relid == ctx->to && IsA(rinfo->clause, OpExpr))
1518 : {
1519 : Expr *leftOp;
1520 : Expr *rightOp;
1521 :
1522 1550 : leftOp = (Expr *) get_leftop(rinfo->clause);
1523 1550 : rightOp = (Expr *) get_rightop(rinfo->clause);
1524 :
1525 1550 : if (leftOp != NULL && equal(leftOp, rightOp))
1526 : {
1527 1242 : NullTest *ntest = makeNode(NullTest);
1528 :
1529 1242 : ntest->arg = leftOp;
1530 1242 : ntest->nulltesttype = IS_NOT_NULL;
1531 1242 : ntest->argisrow = false;
1532 1242 : ntest->location = -1;
1533 1242 : rinfo->clause = (Expr *) ntest;
1534 1242 : rinfo->mergeopfamilies = NIL;
1535 : }
1536 : Assert(rinfo->orclause == NULL);
1537 : }
1538 :
1539 1852 : return false;
1540 : }
1541 :
1542 21082 : return expression_tree_walker(node, replace_varno_walker,
1543 : (void *) ctx);
1544 : }
1545 :
1546 : static void
1547 41800 : replace_varno(Node *node, int from, int to)
1548 : {
1549 : ReplaceVarnoContext ctx;
1550 :
1551 41800 : if (to <= 0)
1552 28426 : return;
1553 :
1554 13374 : ctx.from = from;
1555 13374 : ctx.to = to;
1556 13374 : ctx.sublevels_up = 0;
1557 :
1558 : /*
1559 : * Must be prepared to start with a Query or a bare expression tree.
1560 : */
1561 13374 : query_or_expression_tree_walker(node,
1562 : replace_varno_walker,
1563 : (void *) &ctx,
1564 : QTW_EXAMINE_SORTGROUP);
1565 : }
1566 :
1567 : /*
1568 : * Substitute newId by oldId in relids.
1569 : *
1570 : * We must make a copy of the original Bitmapset before making any
1571 : * modifications, because the same pointer to it might be shared among
1572 : * different places.
1573 : */
1574 : static Bitmapset *
1575 944992 : replace_relid(Relids relids, int oldId, int newId)
1576 : {
1577 944992 : if (oldId < 0)
1578 12066 : return relids;
1579 :
1580 : /* Delete relid without substitution. */
1581 932926 : if (newId < 0)
1582 898506 : return bms_del_member(bms_copy(relids), oldId);
1583 :
1584 : /* Substitute newId for oldId. */
1585 34420 : if (bms_is_member(oldId, relids))
1586 10928 : return bms_add_member(bms_del_member(bms_copy(relids), oldId), newId);
1587 :
1588 23492 : return relids;
1589 : }
1590 :
1591 : /*
1592 : * Update EC members to point to the remaining relation instead of the removed
1593 : * one, removing duplicates.
1594 : *
1595 : * Restriction clauses for base relations are already distributed to
1596 : * the respective baserestrictinfo lists (see
1597 : * generate_implied_equalities_for_column). The above code has already processed
1598 : * this list, and updated these clauses to reference the remaining
1599 : * relation, so we can skip them here based on their relids.
1600 : *
1601 : * Likewise, we have already processed the join clauses that join the
1602 : * removed relation to the remaining one.
1603 : *
1604 : * Finally, there are join clauses that join the removed relation to
1605 : * some third relation. We can't just delete the source clauses and
1606 : * regenerate them from the EC because the corresponding equality
1607 : * operators might be missing (see the handling of ec_broken).
1608 : * Therefore, we will update the references in the source clauses.
1609 : *
1610 : * Derived clauses can be generated again, so it is simpler to just
1611 : * delete them.
1612 : */
1613 : static void
1614 932 : update_eclasses(EquivalenceClass *ec, int from, int to)
1615 : {
1616 932 : List *new_members = NIL;
1617 932 : List *new_sources = NIL;
1618 : ListCell *lc;
1619 : ListCell *lc1;
1620 :
1621 2894 : foreach(lc, ec->ec_members)
1622 : {
1623 1962 : EquivalenceMember *em = lfirst_node(EquivalenceMember, lc);
1624 1962 : bool is_redundant = false;
1625 :
1626 1962 : if (!bms_is_member(from, em->em_relids))
1627 : {
1628 1012 : new_members = lappend(new_members, em);
1629 1012 : continue;
1630 : }
1631 :
1632 950 : em->em_relids = replace_relid(em->em_relids, from, to);
1633 950 : em->em_jdomain->jd_relids = replace_relid(em->em_jdomain->jd_relids, from, to);
1634 :
1635 : /* We only process inner joins */
1636 950 : replace_varno((Node *) em->em_expr, from, to);
1637 :
1638 974 : foreach(lc1, new_members)
1639 : {
1640 324 : EquivalenceMember *other = lfirst_node(EquivalenceMember, lc1);
1641 :
1642 324 : if (!equal(em->em_relids, other->em_relids))
1643 24 : continue;
1644 :
1645 300 : if (equal(em->em_expr, other->em_expr))
1646 : {
1647 300 : is_redundant = true;
1648 300 : break;
1649 : }
1650 : }
1651 :
1652 950 : if (!is_redundant)
1653 650 : new_members = lappend(new_members, em);
1654 : }
1655 :
1656 932 : list_free(ec->ec_members);
1657 932 : ec->ec_members = new_members;
1658 :
1659 932 : list_free(ec->ec_derives);
1660 932 : ec->ec_derives = NULL;
1661 :
1662 : /* Update EC source expressions */
1663 1962 : foreach(lc, ec->ec_sources)
1664 : {
1665 1030 : RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
1666 1030 : bool is_redundant = false;
1667 :
1668 1030 : if (!bms_is_member(from, rinfo->required_relids))
1669 : {
1670 144 : new_sources = lappend(new_sources, rinfo);
1671 144 : continue;
1672 : }
1673 :
1674 886 : replace_varno((Node *) rinfo, from, to);
1675 :
1676 : /*
1677 : * After switching the clause to the remaining relation, check it for
1678 : * redundancy with existing ones. We don't have to check for
1679 : * redundancy with derived clauses, because we've just deleted them.
1680 : */
1681 952 : foreach(lc1, new_sources)
1682 : {
1683 104 : RestrictInfo *other = lfirst_node(RestrictInfo, lc1);
1684 :
1685 104 : if (!equal(rinfo->clause_relids, other->clause_relids))
1686 24 : continue;
1687 :
1688 80 : if (equal(rinfo->clause, other->clause))
1689 : {
1690 38 : is_redundant = true;
1691 38 : break;
1692 : }
1693 : }
1694 :
1695 886 : if (!is_redundant)
1696 848 : new_sources = lappend(new_sources, rinfo);
1697 : }
1698 :
1699 932 : list_free(ec->ec_sources);
1700 932 : ec->ec_sources = new_sources;
1701 932 : ec->ec_relids = replace_relid(ec->ec_relids, from, to);
1702 932 : }
1703 :
1704 : /*
1705 : * "Logically" compares two RestrictInfo's ignoring the 'rinfo_serial' field,
1706 : * which makes almost every RestrictInfo unique. This type of comparison is
1707 : * useful when removing duplicates while moving RestrictInfo's from removed
1708 : * relation to remaining relation during self-join elimination.
1709 : *
1710 : * XXX: In the future, we might remove the 'rinfo_serial' field completely and
1711 : * get rid of this function.
1712 : */
1713 : static bool
1714 556 : restrict_infos_logically_equal(RestrictInfo *a, RestrictInfo *b)
1715 : {
1716 556 : int saved_rinfo_serial = a->rinfo_serial;
1717 : bool result;
1718 :
1719 556 : a->rinfo_serial = b->rinfo_serial;
1720 556 : result = equal(a, b);
1721 556 : a->rinfo_serial = saved_rinfo_serial;
1722 :
1723 556 : return result;
1724 : }
1725 :
1726 : /*
1727 : * Remove a relation after we have proven that it participates only in an
1728 : * unneeded unique self join.
1729 : *
1730 : * Replace any links in planner info structures.
1731 : *
1732 : * Transfer join and restriction clauses from the removed relation to the
1733 : * remaining one. We change the Vars of the clause to point to the
1734 : * remaining relation instead of the removed one. The clauses that require
1735 : * a subset of joinrelids become restriction clauses of the remaining
1736 : * relation, and others remain join clauses. We append them to
1737 : * baserestrictinfo and joininfo respectively, trying not to introduce
1738 : * duplicates.
1739 : *
1740 : * We also have to process the 'joinclauses' list here, because it
1741 : * contains EC-derived join clauses which must become filter clauses. It
1742 : * is not enough to just correct the ECs because the EC-derived
1743 : * restrictions are generated before join removal (see
1744 : * generate_base_implied_equalities).
1745 : */
1746 : static void
1747 626 : remove_self_join_rel(PlannerInfo *root, PlanRowMark *kmark, PlanRowMark *rmark,
1748 : RelOptInfo *toKeep, RelOptInfo *toRemove,
1749 : List *restrictlist)
1750 : {
1751 : List *joininfos;
1752 : ListCell *lc;
1753 : int i;
1754 626 : List *jinfo_candidates = NIL;
1755 626 : List *binfo_candidates = NIL;
1756 :
1757 : Assert(toKeep->relid != -1);
1758 :
1759 : /*
1760 : * Replace the index of the removing table with the keeping one. The
1761 : * technique of removing/distributing restrictinfo is used here to attach
1762 : * just appeared (for keeping relation) join clauses and avoid adding
1763 : * duplicates of those that already exist in the joininfo list.
1764 : */
1765 626 : joininfos = list_copy(toRemove->joininfo);
1766 716 : foreach(lc, joininfos)
1767 : {
1768 90 : RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
1769 :
1770 90 : remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
1771 90 : replace_varno((Node *) rinfo, toRemove->relid, toKeep->relid);
1772 :
1773 90 : if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
1774 60 : jinfo_candidates = lappend(jinfo_candidates, rinfo);
1775 : else
1776 30 : binfo_candidates = lappend(binfo_candidates, rinfo);
1777 : }
1778 :
1779 : /*
1780 : * Concatenate restrictlist to the list of base restrictions of the
1781 : * removing table just to simplify the replacement procedure: all of them
1782 : * weren't connected to any keeping relations and need to be added to some
1783 : * rels.
1784 : */
1785 626 : toRemove->baserestrictinfo = list_concat(toRemove->baserestrictinfo,
1786 : restrictlist);
1787 1490 : foreach(lc, toRemove->baserestrictinfo)
1788 : {
1789 864 : RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
1790 :
1791 864 : replace_varno((Node *) rinfo, toRemove->relid, toKeep->relid);
1792 :
1793 864 : if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
1794 0 : jinfo_candidates = lappend(jinfo_candidates, rinfo);
1795 : else
1796 864 : binfo_candidates = lappend(binfo_candidates, rinfo);
1797 : }
1798 :
1799 : /*
1800 : * Now, add all non-redundant clauses to the keeping relation.
1801 : * Contradictory operation. On the one side, we reduce the length of
1802 : * restrict lists that can impact planning or executing time.
1803 : * Additionally, we improve the accuracy of cardinality estimation. On the
1804 : * other side, it is one more place that can make planning time much
1805 : * longer in specific cases. It would have been better to avoid calling
1806 : * the equal() function here, but it's the only way to detect duplicated
1807 : * inequality expressions.
1808 : */
1809 1520 : foreach(lc, binfo_candidates)
1810 : {
1811 894 : RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
1812 894 : ListCell *olc = NULL;
1813 894 : bool is_redundant = false;
1814 :
1815 : Assert(!bms_is_member(toRemove->relid, rinfo->required_relids));
1816 :
1817 1258 : foreach(olc, toKeep->baserestrictinfo)
1818 : {
1819 562 : RestrictInfo *src = lfirst_node(RestrictInfo, olc);
1820 :
1821 562 : if (!bms_equal(src->clause_relids, rinfo->clause_relids))
1822 : /* Const and non-const expressions can't be equal */
1823 18 : continue;
1824 :
1825 544 : if (src == rinfo ||
1826 544 : (rinfo->parent_ec != NULL
1827 304 : && src->parent_ec == rinfo->parent_ec)
1828 538 : || restrict_infos_logically_equal(rinfo, src))
1829 : {
1830 198 : is_redundant = true;
1831 198 : break;
1832 : }
1833 : }
1834 894 : if (!is_redundant)
1835 696 : distribute_restrictinfo_to_rels(root, rinfo);
1836 : }
1837 686 : foreach(lc, jinfo_candidates)
1838 : {
1839 60 : RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
1840 60 : ListCell *olc = NULL;
1841 60 : bool is_redundant = false;
1842 :
1843 : Assert(!bms_is_member(toRemove->relid, rinfo->required_relids));
1844 :
1845 78 : foreach(olc, toKeep->joininfo)
1846 : {
1847 18 : RestrictInfo *src = lfirst_node(RestrictInfo, olc);
1848 :
1849 18 : if (!bms_equal(src->clause_relids, rinfo->clause_relids))
1850 : /* Can't compare trivially different clauses */
1851 0 : continue;
1852 :
1853 18 : if (src == rinfo ||
1854 18 : (rinfo->parent_ec != NULL
1855 0 : && src->parent_ec == rinfo->parent_ec)
1856 18 : || restrict_infos_logically_equal(rinfo, src))
1857 : {
1858 0 : is_redundant = true;
1859 0 : break;
1860 : }
1861 : }
1862 60 : if (!is_redundant)
1863 60 : distribute_restrictinfo_to_rels(root, rinfo);
1864 : }
1865 626 : list_free(binfo_candidates);
1866 626 : list_free(jinfo_candidates);
1867 :
1868 : /*
1869 : * Arrange equivalence classes, mentioned removing a table, with the
1870 : * keeping one: varno of removing table should be replaced in members and
1871 : * sources lists. Also, remove duplicated elements if this replacement
1872 : * procedure created them.
1873 : */
1874 626 : i = -1;
1875 1558 : while ((i = bms_next_member(toRemove->eclass_indexes, i)) >= 0)
1876 : {
1877 932 : EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes, i);
1878 :
1879 932 : update_eclasses(ec, toRemove->relid, toKeep->relid);
1880 932 : toKeep->eclass_indexes = bms_add_member(toKeep->eclass_indexes, i);
1881 : }
1882 :
1883 : /*
1884 : * Transfer the targetlist and attr_needed flags.
1885 : */
1886 :
1887 2484 : foreach(lc, toRemove->reltarget->exprs)
1888 : {
1889 1858 : Node *node = lfirst(lc);
1890 :
1891 1858 : replace_varno(node, toRemove->relid, toKeep->relid);
1892 1858 : if (!list_member(toKeep->reltarget->exprs, node))
1893 226 : toKeep->reltarget->exprs = lappend(toKeep->reltarget->exprs, node);
1894 : }
1895 :
1896 8186 : for (i = toKeep->min_attr; i <= toKeep->max_attr; i++)
1897 : {
1898 7560 : int attno = i - toKeep->min_attr;
1899 :
1900 15120 : toRemove->attr_needed[attno] = replace_relid(toRemove->attr_needed[attno],
1901 7560 : toRemove->relid, toKeep->relid);
1902 7560 : toKeep->attr_needed[attno] = bms_add_members(toKeep->attr_needed[attno],
1903 7560 : toRemove->attr_needed[attno]);
1904 : }
1905 :
1906 : /*
1907 : * If the removed relation has a row mark, transfer it to the remaining
1908 : * one.
1909 : *
1910 : * If both rels have row marks, just keep the one corresponding to the
1911 : * remaining relation, because we verified earlier that they have the same
1912 : * strength.
1913 : */
1914 626 : if (rmark)
1915 : {
1916 80 : if (kmark)
1917 : {
1918 : Assert(kmark->markType == rmark->markType);
1919 :
1920 80 : root->rowMarks = list_delete_ptr(root->rowMarks, rmark);
1921 : }
1922 : else
1923 : {
1924 : /* Shouldn't have inheritance children here. */
1925 : Assert(rmark->rti == rmark->prti);
1926 :
1927 0 : rmark->rti = rmark->prti = toKeep->relid;
1928 : }
1929 : }
1930 :
1931 : /* Replace varno in all the query structures */
1932 626 : replace_varno((Node *) root->parse, toRemove->relid, toKeep->relid);
1933 :
1934 : /* See remove_self_joins_one_group() */
1935 : Assert(root->parse->resultRelation != toRemove->relid);
1936 : Assert(root->parse->resultRelation != toKeep->relid);
1937 :
1938 : /* Replace links in the planner info */
1939 626 : remove_rel_from_query(root, toRemove, toKeep->relid, NULL, NULL);
1940 :
1941 : /* At last, replace varno in root targetlist and HAVING clause */
1942 626 : replace_varno((Node *) root->processed_tlist,
1943 626 : toRemove->relid, toKeep->relid);
1944 626 : replace_varno((Node *) root->processed_groupClause,
1945 626 : toRemove->relid, toKeep->relid);
1946 626 : replace_relid(root->all_result_relids, toRemove->relid, toKeep->relid);
1947 626 : replace_relid(root->leaf_result_relids, toRemove->relid, toKeep->relid);
1948 :
1949 :
1950 : /*
1951 : * There may be references to the rel in root->fkey_list, but if so,
1952 : * match_foreign_keys_to_quals() will get rid of them.
1953 : */
1954 :
1955 : /*
1956 : * Finally, remove the rel from the baserel array to prevent it from being
1957 : * referenced again. (We can't do this earlier because
1958 : * remove_join_clause_from_rels will touch it.)
1959 : */
1960 626 : root->simple_rel_array[toRemove->relid] = NULL;
1961 :
1962 : /* And nuke the RelOptInfo, just in case there's another access path */
1963 626 : pfree(toRemove);
1964 626 : }
1965 :
1966 : /*
1967 : * split_selfjoin_quals
1968 : * Processes 'joinquals' by building two lists: one containing the quals
1969 : * where the columns/exprs are on either side of the join match, and
1970 : * another one containing the remaining quals.
1971 : *
1972 : * 'joinquals' must only contain quals for a RTE_RELATION being joined to
1973 : * itself.
1974 : */
1975 : static void
1976 5638 : split_selfjoin_quals(PlannerInfo *root, List *joinquals, List **selfjoinquals,
1977 : List **otherjoinquals, int from, int to)
1978 : {
1979 : ListCell *lc;
1980 5638 : List *sjoinquals = NIL;
1981 5638 : List *ojoinquals = NIL;
1982 :
1983 7646 : foreach(lc, joinquals)
1984 : {
1985 2008 : RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
1986 : OpExpr *expr;
1987 : Node *leftexpr;
1988 : Node *rightexpr;
1989 :
1990 : /* In general, clause looks like F(arg1) = G(arg2) */
1991 4016 : if (!rinfo->mergeopfamilies ||
1992 4016 : bms_num_members(rinfo->clause_relids) != 2 ||
1993 4016 : bms_membership(rinfo->left_relids) != BMS_SINGLETON ||
1994 2008 : bms_membership(rinfo->right_relids) != BMS_SINGLETON)
1995 : {
1996 0 : ojoinquals = lappend(ojoinquals, rinfo);
1997 0 : continue;
1998 : }
1999 :
2000 2008 : expr = (OpExpr *) rinfo->clause;
2001 :
2002 2008 : if (!IsA(expr, OpExpr) || list_length(expr->args) != 2)
2003 : {
2004 0 : ojoinquals = lappend(ojoinquals, rinfo);
2005 0 : continue;
2006 : }
2007 :
2008 2008 : leftexpr = get_leftop(rinfo->clause);
2009 2008 : rightexpr = copyObject(get_rightop(rinfo->clause));
2010 :
2011 2008 : if (leftexpr && IsA(leftexpr, RelabelType))
2012 12 : leftexpr = (Node *) ((RelabelType *) leftexpr)->arg;
2013 2008 : if (rightexpr && IsA(rightexpr, RelabelType))
2014 6 : rightexpr = (Node *) ((RelabelType *) rightexpr)->arg;
2015 :
2016 : /*
2017 : * Quite an expensive operation, narrowing the use case. For example,
2018 : * when we have cast of the same var to different (but compatible)
2019 : * types.
2020 : */
2021 2008 : replace_varno(rightexpr, bms_singleton_member(rinfo->right_relids),
2022 2008 : bms_singleton_member(rinfo->left_relids));
2023 :
2024 2008 : if (equal(leftexpr, rightexpr))
2025 1546 : sjoinquals = lappend(sjoinquals, rinfo);
2026 : else
2027 462 : ojoinquals = lappend(ojoinquals, rinfo);
2028 : }
2029 :
2030 5638 : *selfjoinquals = sjoinquals;
2031 5638 : *otherjoinquals = ojoinquals;
2032 5638 : }
2033 :
2034 : /*
2035 : * Check for a case when uniqueness is at least partly derived from a
2036 : * baserestrictinfo clause. In this case, we have a chance to return only
2037 : * one row (if such clauses on both sides of SJ are equal) or nothing (if they
2038 : * are different).
2039 : */
2040 : static bool
2041 706 : match_unique_clauses(PlannerInfo *root, RelOptInfo *outer, List *uclauses,
2042 : Index relid)
2043 : {
2044 : ListCell *lc;
2045 :
2046 834 : foreach(lc, uclauses)
2047 : {
2048 208 : RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
2049 : Expr *clause;
2050 : Node *iclause;
2051 : Node *c1;
2052 208 : bool matched = false;
2053 : ListCell *olc;
2054 :
2055 : Assert(outer->relid > 0 && relid > 0);
2056 :
2057 : /* Only filters like f(R.x1,...,R.xN) == expr we should consider. */
2058 : Assert(bms_is_empty(rinfo->left_relids) ^
2059 : bms_is_empty(rinfo->right_relids));
2060 :
2061 208 : clause = (Expr *) copyObject(rinfo->clause);
2062 208 : replace_varno((Node *) clause, relid, outer->relid);
2063 :
2064 208 : iclause = bms_is_empty(rinfo->left_relids) ? get_rightop(clause) :
2065 202 : get_leftop(clause);
2066 208 : c1 = bms_is_empty(rinfo->left_relids) ? get_leftop(clause) :
2067 202 : get_rightop(clause);
2068 :
2069 : /*
2070 : * Compare these left and right sides with the corresponding sides of
2071 : * the outer's filters. If no one is detected - return immediately.
2072 : */
2073 318 : foreach(olc, outer->baserestrictinfo)
2074 : {
2075 238 : RestrictInfo *orinfo = lfirst_node(RestrictInfo, olc);
2076 : Node *oclause;
2077 : Node *c2;
2078 :
2079 238 : if (orinfo->mergeopfamilies == NIL)
2080 : /* Don't consider clauses which aren't similar to 'F(X)=G(Y)' */
2081 60 : continue;
2082 :
2083 : Assert(is_opclause(orinfo->clause));
2084 :
2085 356 : oclause = bms_is_empty(orinfo->left_relids) ?
2086 178 : get_rightop(orinfo->clause) : get_leftop(orinfo->clause);
2087 356 : c2 = (bms_is_empty(orinfo->left_relids) ?
2088 178 : get_leftop(orinfo->clause) : get_rightop(orinfo->clause));
2089 :
2090 178 : if (equal(iclause, oclause) && equal(c1, c2))
2091 : {
2092 128 : matched = true;
2093 128 : break;
2094 : }
2095 : }
2096 :
2097 208 : if (!matched)
2098 80 : return false;
2099 : }
2100 :
2101 626 : return true;
2102 : }
2103 :
2104 : /*
2105 : * Find and remove unique self joins in a group of base relations that have
2106 : * the same Oid.
2107 : *
2108 : * Returns a set of relids that were removed.
2109 : */
2110 : static Relids
2111 12614 : remove_self_joins_one_group(PlannerInfo *root, Relids relids)
2112 : {
2113 12614 : Relids result = NULL;
2114 : int k; /* Index of kept relation */
2115 12614 : int r = -1; /* Index of removed relation */
2116 :
2117 41062 : while ((r = bms_next_member(relids, r)) > 0)
2118 : {
2119 28448 : RelOptInfo *inner = root->simple_rel_array[r];
2120 :
2121 : /*
2122 : * We don't accept result relation as either source or target relation
2123 : * of SJE, because result relation has different behavior in
2124 : * EvalPlanQual() and RETURNING clause.
2125 : */
2126 28448 : if (root->parse->resultRelation == r)
2127 252 : continue;
2128 :
2129 28196 : k = r;
2130 :
2131 46564 : while ((k = bms_next_member(relids, k)) > 0)
2132 : {
2133 18994 : Relids joinrelids = NULL;
2134 18994 : RelOptInfo *outer = root->simple_rel_array[k];
2135 : List *restrictlist;
2136 : List *selfjoinquals;
2137 : List *otherjoinquals;
2138 : ListCell *lc;
2139 18994 : bool jinfo_check = true;
2140 18994 : PlanRowMark *omark = NULL;
2141 18994 : PlanRowMark *imark = NULL;
2142 18994 : List *uclauses = NIL;
2143 :
2144 18994 : if (root->parse->resultRelation == k)
2145 18368 : continue;
2146 :
2147 : /* A sanity check: the relations have the same Oid. */
2148 : Assert(root->simple_rte_array[k]->relid ==
2149 : root->simple_rte_array[r]->relid);
2150 :
2151 : /*
2152 : * It is impossible to eliminate join of two relations if they
2153 : * belong to different rules of order. Otherwise planner can't be
2154 : * able to find any variants of correct query plan.
2155 : */
2156 24336 : foreach(lc, root->join_info_list)
2157 : {
2158 18646 : SpecialJoinInfo *info = (SpecialJoinInfo *) lfirst(lc);
2159 :
2160 37292 : if ((bms_is_member(k, info->syn_lefthand) ^
2161 27590 : bms_is_member(r, info->syn_lefthand)) ||
2162 8944 : (bms_is_member(k, info->syn_righthand) ^
2163 8944 : bms_is_member(r, info->syn_righthand)))
2164 : {
2165 13286 : jinfo_check = false;
2166 13286 : break;
2167 : }
2168 : }
2169 18976 : if (!jinfo_check)
2170 13286 : continue;
2171 :
2172 : /*
2173 : * Check Row Marks equivalence. We can't remove the join if the
2174 : * relations have row marks of different strength (e.g. one is
2175 : * locked FOR UPDATE and another just has ROW_MARK_REFERENCE for
2176 : * EvalPlanQual rechecking).
2177 : */
2178 5874 : foreach(lc, root->rowMarks)
2179 : {
2180 344 : PlanRowMark *rowMark = (PlanRowMark *) lfirst(lc);
2181 :
2182 344 : if (rowMark->rti == k)
2183 : {
2184 : Assert(imark == NULL);
2185 160 : imark = rowMark;
2186 : }
2187 184 : else if (rowMark->rti == r)
2188 : {
2189 : Assert(omark == NULL);
2190 160 : omark = rowMark;
2191 : }
2192 :
2193 344 : if (omark && imark)
2194 160 : break;
2195 : }
2196 5690 : if (omark && imark && omark->markType != imark->markType)
2197 52 : continue;
2198 :
2199 : /*
2200 : * We only deal with base rels here, so their relids bitset
2201 : * contains only one member -- their relid.
2202 : */
2203 5638 : joinrelids = bms_add_member(joinrelids, r);
2204 5638 : joinrelids = bms_add_member(joinrelids, k);
2205 :
2206 : /*
2207 : * PHVs should not impose any constraints on removing self joins.
2208 : */
2209 :
2210 : /*
2211 : * At this stage, joininfo lists of inner and outer can contain
2212 : * only clauses, required for a superior outer join that can't
2213 : * influence this optimization. So, we can avoid to call the
2214 : * build_joinrel_restrictlist() routine.
2215 : */
2216 5638 : restrictlist = generate_join_implied_equalities(root, joinrelids,
2217 : inner->relids,
2218 : outer, NULL);
2219 :
2220 : /*
2221 : * Process restrictlist to separate the self join quals out of the
2222 : * other quals. e.g x = x goes to selfjoinquals and a = b to
2223 : * otherjoinquals.
2224 : */
2225 5638 : split_selfjoin_quals(root, restrictlist, &selfjoinquals,
2226 5638 : &otherjoinquals, inner->relid, outer->relid);
2227 :
2228 : /*
2229 : * To enable SJE for the only degenerate case without any self
2230 : * join clauses at all, add baserestrictinfo to this list. The
2231 : * degenerate case works only if both sides have the same clause.
2232 : * So doesn't matter which side to add.
2233 : */
2234 5638 : selfjoinquals = list_concat(selfjoinquals, outer->baserestrictinfo);
2235 :
2236 : /*
2237 : * Determine if the inner table can duplicate outer rows. We must
2238 : * bypass the unique rel cache here since we're possibly using a
2239 : * subset of join quals. We can use 'force_cache' == true when all
2240 : * join quals are self-join quals. Otherwise, we could end up
2241 : * putting false negatives in the cache.
2242 : */
2243 5638 : if (!innerrel_is_unique_ext(root, joinrelids, inner->relids,
2244 : outer, JOIN_INNER, selfjoinquals,
2245 5638 : list_length(otherjoinquals) == 0,
2246 : &uclauses))
2247 4932 : continue;
2248 :
2249 : /*
2250 : * We have proven that for both relations, the same unique index
2251 : * guarantees that there is at most one row where columns equal
2252 : * given values. These values must be the same for both relations,
2253 : * or else we won't match the same row on each side of the join.
2254 : */
2255 706 : if (!match_unique_clauses(root, inner, uclauses, outer->relid))
2256 80 : continue;
2257 :
2258 : /*
2259 : * We can remove either relation, so remove the inner one in order
2260 : * to simplify this loop.
2261 : */
2262 626 : remove_self_join_rel(root, omark, imark, outer, inner, restrictlist);
2263 :
2264 626 : result = bms_add_member(result, r);
2265 :
2266 : /* We have removed the outer relation, try the next one. */
2267 626 : break;
2268 : }
2269 : }
2270 :
2271 12614 : return result;
2272 : }
2273 :
2274 : /*
2275 : * Gather indexes of base relations from the joinlist and try to eliminate self
2276 : * joins.
2277 : */
2278 : static Relids
2279 85044 : remove_self_joins_recurse(PlannerInfo *root, List *joinlist, Relids toRemove)
2280 : {
2281 : ListCell *jl;
2282 85044 : Relids relids = NULL;
2283 85044 : SelfJoinCandidate *candidates = NULL;
2284 : int i;
2285 : int j;
2286 : int numRels;
2287 :
2288 : /* Collect indexes of base relations of the join tree */
2289 284068 : foreach(jl, joinlist)
2290 : {
2291 199024 : Node *jlnode = (Node *) lfirst(jl);
2292 :
2293 199024 : if (IsA(jlnode, RangeTblRef))
2294 : {
2295 195742 : RangeTblRef *ref = (RangeTblRef *) jlnode;
2296 195742 : RangeTblEntry *rte = root->simple_rte_array[ref->rtindex];
2297 :
2298 : /*
2299 : * We only care about base relations from which we select
2300 : * something.
2301 : */
2302 195742 : if (rte->rtekind == RTE_RELATION &&
2303 172232 : rte->relkind == RELKIND_RELATION &&
2304 167474 : root->simple_rel_array[ref->rtindex] != NULL)
2305 : {
2306 : Assert(!bms_is_member(ref->rtindex, relids));
2307 167474 : relids = bms_add_member(relids, ref->rtindex);
2308 : }
2309 : }
2310 3282 : else if (IsA(jlnode, List))
2311 : /* Recursively go inside the sub-joinlist */
2312 3282 : toRemove = remove_self_joins_recurse(root, (List *) jlnode,
2313 : toRemove);
2314 : else
2315 0 : elog(ERROR, "unrecognized joinlist node type: %d",
2316 : (int) nodeTag(jlnode));
2317 : }
2318 :
2319 85044 : numRels = bms_num_members(relids);
2320 :
2321 : /* Need at least two relations for the join */
2322 85044 : if (numRels < 2)
2323 23498 : return toRemove;
2324 :
2325 : /*
2326 : * In order to find relations with the same oid we first build an array of
2327 : * candidates and then sort it by oid.
2328 : */
2329 61546 : candidates = (SelfJoinCandidate *) palloc(sizeof(SelfJoinCandidate) *
2330 : numRels);
2331 61546 : i = -1;
2332 61546 : j = 0;
2333 212484 : while ((i = bms_next_member(relids, i)) >= 0)
2334 : {
2335 150938 : candidates[j].relid = i;
2336 150938 : candidates[j].reloid = root->simple_rte_array[i]->relid;
2337 150938 : j++;
2338 : }
2339 :
2340 61546 : qsort(candidates, numRels, sizeof(SelfJoinCandidate),
2341 : self_join_candidates_cmp);
2342 :
2343 : /*
2344 : * Iteratively form a group of relation indexes with the same oid and
2345 : * launch the routine that detects self-joins in this group and removes
2346 : * excessive range table entries.
2347 : *
2348 : * At the end of the iteration, exclude the group from the overall relids
2349 : * list. So each next iteration of the cycle will involve less and less
2350 : * value of relids.
2351 : */
2352 61546 : i = 0;
2353 212484 : for (j = 1; j < numRels + 1; j++)
2354 : {
2355 150938 : if (j == numRels || candidates[j].reloid != candidates[i].reloid)
2356 : {
2357 135170 : if (j - i >= 2)
2358 : {
2359 : /* Create a group of relation indexes with the same oid */
2360 12554 : Relids group = NULL;
2361 : Relids removed;
2362 :
2363 40876 : while (i < j)
2364 : {
2365 28322 : group = bms_add_member(group, candidates[i].relid);
2366 28322 : i++;
2367 : }
2368 :
2369 12554 : relids = bms_del_members(relids, group);
2370 :
2371 : /*
2372 : * Try to remove self-joins from a group of identical entries.
2373 : * Make the next attempt iteratively - if something is deleted
2374 : * from a group, changes in clauses and equivalence classes
2375 : * can give us a chance to find more candidates.
2376 : */
2377 : do
2378 : {
2379 : Assert(!bms_overlap(group, toRemove));
2380 12614 : removed = remove_self_joins_one_group(root, group);
2381 12614 : toRemove = bms_add_members(toRemove, removed);
2382 12614 : group = bms_del_members(group, removed);
2383 602 : } while (!bms_is_empty(removed) &&
2384 12614 : bms_membership(group) == BMS_MULTIPLE);
2385 12554 : bms_free(removed);
2386 12554 : bms_free(group);
2387 : }
2388 : else
2389 : {
2390 : /* Single relation, just remove it from the set */
2391 122616 : relids = bms_del_member(relids, candidates[i].relid);
2392 122616 : i = j;
2393 : }
2394 : }
2395 : }
2396 :
2397 : Assert(bms_is_empty(relids));
2398 :
2399 61546 : return toRemove;
2400 : }
2401 :
2402 : /*
2403 : * Compare self-join candidates by their oids.
2404 : */
2405 : static int
2406 112738 : self_join_candidates_cmp(const void *a, const void *b)
2407 : {
2408 112738 : const SelfJoinCandidate *ca = (const SelfJoinCandidate *) a;
2409 112738 : const SelfJoinCandidate *cb = (const SelfJoinCandidate *) b;
2410 :
2411 112738 : if (ca->reloid != cb->reloid)
2412 96910 : return (ca->reloid < cb->reloid ? -1 : 1);
2413 : else
2414 15828 : return 0;
2415 : }
2416 :
2417 : /*
2418 : * Find and remove useless self joins.
2419 : *
2420 : * Search for joins where a relation is joined to itself. If the join clause
2421 : * for each tuple from one side of the join is proven to match the same
2422 : * physical row (or nothing) on the other side, that self-join can be
2423 : * eliminated from the query. Suitable join clauses are assumed to be in the
2424 : * form of X = X, and can be replaced with NOT NULL clauses.
2425 : *
2426 : * For the sake of simplicity, we don't apply this optimization to special
2427 : * joins. Here is a list of what we could do in some particular cases:
2428 : * 'a a1 semi join a a2': is reduced to inner by reduce_unique_semijoins,
2429 : * and then removed normally.
2430 : * 'a a1 anti join a a2': could simplify to a scan with 'outer quals AND
2431 : * (IS NULL on join columns OR NOT inner quals)'.
2432 : * 'a a1 left join a a2': could simplify to a scan like inner but without
2433 : * NOT NULL conditions on join columns.
2434 : * 'a a1 left join (a a2 join b)': can't simplify this, because join to b
2435 : * can both remove rows and introduce duplicates.
2436 : *
2437 : * To search for removable joins, we order all the relations on their Oid,
2438 : * go over each set with the same Oid, and consider each pair of relations
2439 : * in this set.
2440 : *
2441 : * To remove the join, we mark one of the participating relations as dead
2442 : * and rewrite all references to it to point to the remaining relation.
2443 : * This includes modifying RestrictInfos, EquivalenceClasses, and
2444 : * EquivalenceMembers. We also have to modify the row marks. The join clauses
2445 : * of the removed relation become either restriction or join clauses, based on
2446 : * whether they reference any relations not participating in the removed join.
2447 : *
2448 : * 'targetlist' is the top-level targetlist of the query. If it has any
2449 : * references to the removed relations, we update them to point to the
2450 : * remaining ones.
2451 : */
2452 : List *
2453 276574 : remove_useless_self_joins(PlannerInfo *root, List *joinlist)
2454 : {
2455 276574 : Relids toRemove = NULL;
2456 276574 : int relid = -1;
2457 :
2458 553148 : if (!enable_self_join_removal || joinlist == NIL ||
2459 472242 : (list_length(joinlist) == 1 && !IsA(linitial(joinlist), List)))
2460 194812 : return joinlist;
2461 :
2462 : /*
2463 : * Merge pairs of relations participated in self-join. Remove unnecessary
2464 : * range table entries.
2465 : */
2466 81762 : toRemove = remove_self_joins_recurse(root, joinlist, toRemove);
2467 :
2468 81762 : if (unlikely(toRemove != NULL))
2469 : {
2470 590 : int nremoved = 0;
2471 :
2472 : /* At the end, remove orphaned relation links */
2473 1216 : while ((relid = bms_next_member(toRemove, relid)) >= 0)
2474 626 : joinlist = remove_rel_from_joinlist(joinlist, relid, &nremoved);
2475 : }
2476 :
2477 81762 : return joinlist;
2478 : }
|