Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * analyzejoins.c
4 : * Routines for simplifying joins after initial query analysis
5 : *
6 : * While we do a great deal of join simplification in prep/prepjointree.c,
7 : * certain optimizations cannot be performed at that stage for lack of
8 : * detailed information about the query. The routines here are invoked
9 : * after initsplan.c has done its work, and can do additional join removal
10 : * and simplification steps based on the information extracted. The penalty
11 : * is that we have to work harder to clean up after ourselves when we modify
12 : * the query, since the derived data structures have to be updated too.
13 : *
14 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
15 : * Portions Copyright (c) 1994, Regents of the University of California
16 : *
17 : *
18 : * IDENTIFICATION
19 : * src/backend/optimizer/plan/analyzejoins.c
20 : *
21 : *-------------------------------------------------------------------------
22 : */
23 : #include "postgres.h"
24 :
25 : #include "catalog/pg_class.h"
26 : #include "nodes/nodeFuncs.h"
27 : #include "optimizer/joininfo.h"
28 : #include "optimizer/optimizer.h"
29 : #include "optimizer/pathnode.h"
30 : #include "optimizer/paths.h"
31 : #include "optimizer/placeholder.h"
32 : #include "optimizer/planmain.h"
33 : #include "optimizer/restrictinfo.h"
34 : #include "parser/parse_agg.h"
35 : #include "rewrite/rewriteManip.h"
36 : #include "utils/lsyscache.h"
37 :
38 : /*
39 : * Utility structure. A sorting procedure is needed to simplify the search
40 : * of SJE-candidate baserels referencing the same database relation. Having
41 : * collected all baserels from the query jointree, the planner sorts them
42 : * according to the reloid value, groups them with the next pass and attempts
43 : * to remove self-joins.
44 : *
45 : * Preliminary sorting prevents quadratic behavior that can be harmful in the
46 : * case of numerous joins.
47 : */
48 : typedef struct
49 : {
50 : int relid;
51 : Oid reloid;
52 : } SelfJoinCandidate;
53 :
54 : bool enable_self_join_elimination;
55 :
56 : /* local functions */
57 : static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
58 : static void remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
59 : SpecialJoinInfo *sjinfo);
60 : static void remove_rel_from_restrictinfo(RestrictInfo *rinfo,
61 : int relid, int ojrelid);
62 : static void remove_rel_from_eclass(EquivalenceClass *ec,
63 : SpecialJoinInfo *sjinfo,
64 : int relid, int subst);
65 : static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
66 : static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
67 : static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
68 : List *clause_list, List **extra_clauses);
69 : static Oid distinct_col_search(int colno, List *colnos, List *opids);
70 : static bool is_innerrel_unique_for(PlannerInfo *root,
71 : Relids joinrelids,
72 : Relids outerrelids,
73 : RelOptInfo *innerrel,
74 : JoinType jointype,
75 : List *restrictlist,
76 : List **extra_clauses);
77 : static int self_join_candidates_cmp(const void *a, const void *b);
78 : static bool replace_relid_callback(Node *node,
79 : ChangeVarNodes_context *context);
80 :
81 :
82 : /*
83 : * remove_useless_joins
84 : * Check for relations that don't actually need to be joined at all,
85 : * and remove them from the query.
86 : *
87 : * We are passed the current joinlist and return the updated list. Other
88 : * data structures that have to be updated are accessible via "root".
89 : */
90 : List *
91 341504 : remove_useless_joins(PlannerInfo *root, List *joinlist)
92 : {
93 : ListCell *lc;
94 :
95 : /*
96 : * We are only interested in relations that are left-joined to, so we can
97 : * scan the join_info_list to find them easily.
98 : */
99 352912 : restart:
100 396760 : foreach(lc, root->join_info_list)
101 : {
102 55256 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
103 : int innerrelid;
104 : int nremoved;
105 :
106 : /* Skip if not removable */
107 55256 : if (!join_is_removable(root, sjinfo))
108 43848 : continue;
109 :
110 : /*
111 : * Currently, join_is_removable can only succeed when the sjinfo's
112 : * righthand is a single baserel. Remove that rel from the query and
113 : * joinlist.
114 : */
115 11408 : innerrelid = bms_singleton_member(sjinfo->min_righthand);
116 :
117 11408 : remove_leftjoinrel_from_query(root, innerrelid, sjinfo);
118 :
119 : /* We verify that exactly one reference gets removed from joinlist */
120 11408 : nremoved = 0;
121 11408 : joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved);
122 11408 : if (nremoved != 1)
123 0 : elog(ERROR, "failed to find relation %d in joinlist", innerrelid);
124 :
125 : /*
126 : * We can delete this SpecialJoinInfo from the list too, since it's no
127 : * longer of interest. (Since we'll restart the foreach loop
128 : * immediately, we don't bother with foreach_delete_current.)
129 : */
130 11408 : root->join_info_list = list_delete_cell(root->join_info_list, lc);
131 :
132 : /*
133 : * Restart the scan. This is necessary to ensure we find all
134 : * removable joins independently of ordering of the join_info_list
135 : * (note that removal of attr_needed bits may make a join appear
136 : * removable that did not before).
137 : */
138 11408 : goto restart;
139 : }
140 :
141 341504 : return joinlist;
142 : }
143 :
144 : /*
145 : * join_is_removable
146 : * Check whether we need not perform this special join at all, because
147 : * it will just duplicate its left input.
148 : *
149 : * This is true for a left join for which the join condition cannot match
150 : * more than one inner-side row. (There are other possibly interesting
151 : * cases, but we don't have the infrastructure to prove them.) We also
152 : * have to check that the inner side doesn't generate any variables needed
153 : * above the join.
154 : */
155 : static bool
156 55256 : join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
157 : {
158 : int innerrelid;
159 : RelOptInfo *innerrel;
160 : Relids inputrelids;
161 : Relids joinrelids;
162 55256 : List *clause_list = NIL;
163 : ListCell *l;
164 : int attroff;
165 :
166 : /*
167 : * Must be a left join to a single baserel, else we aren't going to be
168 : * able to do anything with it.
169 : */
170 55256 : if (sjinfo->jointype != JOIN_LEFT)
171 10104 : return false;
172 :
173 45152 : if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
174 1308 : return false;
175 :
176 : /*
177 : * Never try to eliminate a left join to the query result rel. Although
178 : * the case is syntactically impossible in standard SQL, MERGE will build
179 : * a join tree that looks exactly like that.
180 : */
181 43844 : if (innerrelid == root->parse->resultRelation)
182 800 : return false;
183 :
184 43044 : innerrel = find_base_rel(root, innerrelid);
185 :
186 : /*
187 : * Before we go to the effort of checking whether any innerrel variables
188 : * are needed above the join, make a quick check to eliminate cases in
189 : * which we will surely be unable to prove uniqueness of the innerrel.
190 : */
191 43044 : if (!rel_supports_distinctness(root, innerrel))
192 3078 : return false;
193 :
194 : /* Compute the relid set for the join we are considering */
195 39966 : inputrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
196 : Assert(sjinfo->ojrelid != 0);
197 39966 : joinrelids = bms_copy(inputrelids);
198 39966 : joinrelids = bms_add_member(joinrelids, sjinfo->ojrelid);
199 :
200 : /*
201 : * We can't remove the join if any inner-rel attributes are used above the
202 : * join. Here, "above" the join includes pushed-down conditions, so we
203 : * should reject if attr_needed includes the OJ's own relid; therefore,
204 : * compare to inputrelids not joinrelids.
205 : *
206 : * As a micro-optimization, it seems better to start with max_attr and
207 : * count down rather than starting with min_attr and counting up, on the
208 : * theory that the system attributes are somewhat less likely to be wanted
209 : * and should be tested last.
210 : */
211 39966 : for (attroff = innerrel->max_attr - innerrel->min_attr;
212 375334 : attroff >= 0;
213 335368 : attroff--)
214 : {
215 363636 : if (!bms_is_subset(innerrel->attr_needed[attroff], inputrelids))
216 28268 : return false;
217 : }
218 :
219 : /*
220 : * Similarly check that the inner rel isn't needed by any PlaceHolderVars
221 : * that will be used above the join. The PHV case is a little bit more
222 : * complicated, because PHVs may have been assigned a ph_eval_at location
223 : * that includes the innerrel, yet their contained expression might not
224 : * actually reference the innerrel (it could be just a constant, for
225 : * instance). If such a PHV is due to be evaluated above the join then it
226 : * needn't prevent join removal.
227 : */
228 11902 : foreach(l, root->placeholder_list)
229 : {
230 240 : PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
231 :
232 240 : if (bms_overlap(phinfo->ph_lateral, innerrel->relids))
233 36 : return false; /* it references innerrel laterally */
234 240 : if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids))
235 54 : continue; /* it definitely doesn't reference innerrel */
236 186 : if (bms_is_subset(phinfo->ph_needed, inputrelids))
237 6 : continue; /* PHV is not used above the join */
238 180 : if (!bms_is_member(sjinfo->ojrelid, phinfo->ph_eval_at))
239 30 : return false; /* it has to be evaluated below the join */
240 :
241 : /*
242 : * We need to be sure there will still be a place to evaluate the PHV
243 : * if we remove the join, ie that ph_eval_at wouldn't become empty.
244 : */
245 150 : if (!bms_overlap(sjinfo->min_lefthand, phinfo->ph_eval_at))
246 6 : return false; /* there isn't any other place to eval PHV */
247 : /* Check contained expression last, since this is a bit expensive */
248 144 : if (bms_overlap(pull_varnos(root, (Node *) phinfo->ph_var->phexpr),
249 144 : innerrel->relids))
250 0 : return false; /* contained expression references innerrel */
251 : }
252 :
253 : /*
254 : * Search for mergejoinable clauses that constrain the inner rel against
255 : * either the outer rel or a pseudoconstant. If an operator is
256 : * mergejoinable then it behaves like equality for some btree opclass, so
257 : * it's what we want. The mergejoinability test also eliminates clauses
258 : * containing volatile functions, which we couldn't depend on.
259 : */
260 23496 : foreach(l, innerrel->joininfo)
261 : {
262 11834 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
263 :
264 : /*
265 : * If the current join commutes with some other outer join(s) via
266 : * outer join identity 3, there will be multiple clones of its join
267 : * clauses in the joininfo list. We want to consider only the
268 : * has_clone form of such clauses. Processing more than one form
269 : * would be wasteful, and also some of the others would confuse the
270 : * RINFO_IS_PUSHED_DOWN test below.
271 : */
272 11834 : if (restrictinfo->is_clone)
273 100 : continue; /* ignore it */
274 :
275 : /*
276 : * If it's not a join clause for this outer join, we can't use it.
277 : * Note that if the clause is pushed-down, then it is logically from
278 : * above the outer join, even if it references no other rels (it might
279 : * be from WHERE, for example).
280 : */
281 11734 : if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
282 126 : continue; /* ignore; not useful here */
283 :
284 : /* Ignore if it's not a mergejoinable clause */
285 11608 : if (!restrictinfo->can_join ||
286 11490 : restrictinfo->mergeopfamilies == NIL)
287 118 : continue; /* not mergejoinable */
288 :
289 : /*
290 : * Check if the clause has the form "outer op inner" or "inner op
291 : * outer", and if so mark which side is inner.
292 : */
293 11490 : if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
294 : innerrel->relids))
295 6 : continue; /* no good for these input relations */
296 :
297 : /* OK, add to list */
298 11484 : clause_list = lappend(clause_list, restrictinfo);
299 : }
300 :
301 : /*
302 : * Now that we have the relevant equality join clauses, try to prove the
303 : * innerrel distinct.
304 : */
305 11662 : if (rel_is_distinct_for(root, innerrel, clause_list, NULL))
306 11408 : return true;
307 :
308 : /*
309 : * Some day it would be nice to check for other methods of establishing
310 : * distinctness.
311 : */
312 254 : return false;
313 : }
314 :
315 :
316 : /*
317 : * Remove the target rel->relid and references to the target join from the
318 : * planner's data structures, having determined that there is no need
319 : * to include them in the query. Optionally replace them with subst if subst
320 : * is non-negative.
321 : *
322 : * This function updates only parts needed for both left-join removal and
323 : * self-join removal.
324 : */
325 : static void
326 12008 : remove_rel_from_query(PlannerInfo *root, RelOptInfo *rel,
327 : int subst, SpecialJoinInfo *sjinfo,
328 : Relids joinrelids)
329 : {
330 12008 : int relid = rel->relid;
331 : Index rti;
332 : ListCell *l;
333 :
334 : /*
335 : * Update all_baserels and related relid sets.
336 : */
337 12008 : root->all_baserels = adjust_relid_set(root->all_baserels, relid, subst);
338 12008 : root->all_query_rels = adjust_relid_set(root->all_query_rels, relid, subst);
339 :
340 12008 : if (sjinfo != NULL)
341 : {
342 22816 : root->outer_join_rels = bms_del_member(root->outer_join_rels,
343 11408 : sjinfo->ojrelid);
344 11408 : root->all_query_rels = bms_del_member(root->all_query_rels,
345 11408 : sjinfo->ojrelid);
346 : }
347 :
348 : /*
349 : * Likewise remove references from SpecialJoinInfo data structures.
350 : *
351 : * This is relevant in case the outer join we're deleting is nested inside
352 : * other outer joins: the upper joins' relid sets have to be adjusted. The
353 : * RHS of the target outer join will be made empty here, but that's OK
354 : * since caller will delete that SpecialJoinInfo entirely.
355 : */
356 27736 : foreach(l, root->join_info_list)
357 : {
358 15728 : SpecialJoinInfo *sjinf = (SpecialJoinInfo *) lfirst(l);
359 :
360 : /*
361 : * initsplan.c is fairly cavalier about allowing SpecialJoinInfos'
362 : * lefthand/righthand relid sets to be shared with other data
363 : * structures. Ensure that we don't modify the original relid sets.
364 : * (The commute_xxx sets are always per-SpecialJoinInfo though.)
365 : */
366 15728 : sjinf->min_lefthand = bms_copy(sjinf->min_lefthand);
367 15728 : sjinf->min_righthand = bms_copy(sjinf->min_righthand);
368 15728 : sjinf->syn_lefthand = bms_copy(sjinf->syn_lefthand);
369 15728 : sjinf->syn_righthand = bms_copy(sjinf->syn_righthand);
370 : /* Now remove relid from the sets: */
371 15728 : sjinf->min_lefthand = adjust_relid_set(sjinf->min_lefthand, relid, subst);
372 15728 : sjinf->min_righthand = adjust_relid_set(sjinf->min_righthand, relid, subst);
373 15728 : sjinf->syn_lefthand = adjust_relid_set(sjinf->syn_lefthand, relid, subst);
374 15728 : sjinf->syn_righthand = adjust_relid_set(sjinf->syn_righthand, relid, subst);
375 :
376 15728 : if (sjinfo != NULL)
377 : {
378 : Assert(subst <= 0);
379 :
380 : /* Remove sjinfo->ojrelid bits from the sets: */
381 31252 : sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand,
382 15626 : sjinfo->ojrelid);
383 31252 : sjinf->min_righthand = bms_del_member(sjinf->min_righthand,
384 15626 : sjinfo->ojrelid);
385 31252 : sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand,
386 15626 : sjinfo->ojrelid);
387 31252 : sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand,
388 15626 : sjinfo->ojrelid);
389 : /* relid cannot appear in these fields, but ojrelid can: */
390 31252 : sjinf->commute_above_l = bms_del_member(sjinf->commute_above_l,
391 15626 : sjinfo->ojrelid);
392 31252 : sjinf->commute_above_r = bms_del_member(sjinf->commute_above_r,
393 15626 : sjinfo->ojrelid);
394 31252 : sjinf->commute_below_l = bms_del_member(sjinf->commute_below_l,
395 15626 : sjinfo->ojrelid);
396 15626 : sjinf->commute_below_r = bms_del_member(sjinf->commute_below_r,
397 15626 : sjinfo->ojrelid);
398 : }
399 : else
400 : {
401 : Assert(subst > 0);
402 :
403 102 : ChangeVarNodesExtended((Node *) sjinf->semi_rhs_exprs, relid, subst,
404 : 0, replace_relid_callback);
405 : }
406 : }
407 :
408 : /*
409 : * Likewise remove references from PlaceHolderVar data structures,
410 : * removing any no-longer-needed placeholders entirely. We remove PHV
411 : * only for left-join removal. With self-join elimination, PHVs already
412 : * get moved to the remaining relation, where they might still be needed.
413 : * It might also happen that we skip the removal of some PHVs that could
414 : * be removed. However, the overhead of extra PHVs is small compared to
415 : * the complexity of analysis needed to remove them.
416 : *
417 : * Removal is a bit trickier than it might seem: we can remove PHVs that
418 : * are used at the target rel and/or in the join qual, but not those that
419 : * are used at join partner rels or above the join. It's not that easy to
420 : * distinguish PHVs used at partner rels from those used in the join qual,
421 : * since they will both have ph_needed sets that are subsets of
422 : * joinrelids. However, a PHV used at a partner rel could not have the
423 : * target rel in ph_eval_at, so we check that while deciding whether to
424 : * remove or just update the PHV. There is no corresponding test in
425 : * join_is_removable because it doesn't need to distinguish those cases.
426 : */
427 12236 : foreach(l, root->placeholder_list)
428 : {
429 228 : PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
430 :
431 : Assert(sjinfo == NULL || !bms_is_member(relid, phinfo->ph_lateral));
432 414 : if (sjinfo != NULL &&
433 216 : bms_is_subset(phinfo->ph_needed, joinrelids) &&
434 30 : bms_is_member(relid, phinfo->ph_eval_at) &&
435 12 : !bms_is_member(sjinfo->ojrelid, phinfo->ph_eval_at))
436 : {
437 : /*
438 : * This code shouldn't be executed if one relation is substituted
439 : * with another: in this case, the placeholder may be employed in
440 : * a filter inside the scan node the SJE removes.
441 : */
442 6 : root->placeholder_list = foreach_delete_current(root->placeholder_list,
443 : l);
444 6 : root->placeholder_array[phinfo->phid] = NULL;
445 : }
446 : else
447 : {
448 222 : PlaceHolderVar *phv = phinfo->ph_var;
449 :
450 222 : phinfo->ph_eval_at = adjust_relid_set(phinfo->ph_eval_at, relid, subst);
451 222 : if (sjinfo != NULL)
452 180 : phinfo->ph_eval_at = adjust_relid_set(phinfo->ph_eval_at,
453 180 : sjinfo->ojrelid, subst);
454 : Assert(!bms_is_empty(phinfo->ph_eval_at)); /* checked previously */
455 : /* Reduce ph_needed to contain only "relation 0"; see below */
456 222 : if (bms_is_member(0, phinfo->ph_needed))
457 108 : phinfo->ph_needed = bms_make_singleton(0);
458 : else
459 114 : phinfo->ph_needed = NULL;
460 :
461 222 : phinfo->ph_lateral = adjust_relid_set(phinfo->ph_lateral, relid, subst);
462 :
463 : /*
464 : * ph_lateral might contain rels mentioned in ph_eval_at after the
465 : * replacement, remove them.
466 : */
467 222 : phinfo->ph_lateral = bms_difference(phinfo->ph_lateral, phinfo->ph_eval_at);
468 : /* ph_lateral might or might not be empty */
469 :
470 222 : phv->phrels = adjust_relid_set(phv->phrels, relid, subst);
471 222 : if (sjinfo != NULL)
472 180 : phv->phrels = adjust_relid_set(phv->phrels,
473 180 : sjinfo->ojrelid, subst);
474 : Assert(!bms_is_empty(phv->phrels));
475 :
476 222 : ChangeVarNodesExtended((Node *) phv->phexpr, relid, subst, 0,
477 : replace_relid_callback);
478 :
479 : Assert(phv->phnullingrels == NULL); /* no need to adjust */
480 : }
481 : }
482 :
483 : /*
484 : * Likewise remove references from EquivalenceClasses.
485 : */
486 63332 : foreach(l, root->eq_classes)
487 : {
488 51324 : EquivalenceClass *ec = (EquivalenceClass *) lfirst(l);
489 :
490 51324 : if (bms_is_member(relid, ec->ec_relids) ||
491 34442 : (sjinfo == NULL || bms_is_member(sjinfo->ojrelid, ec->ec_relids)))
492 16882 : remove_rel_from_eclass(ec, sjinfo, relid, subst);
493 : }
494 :
495 : /*
496 : * Finally, we must recompute per-Var attr_needed and per-PlaceHolderVar
497 : * ph_needed relid sets. These have to be known accurately, else we may
498 : * fail to remove other now-removable outer joins. And our removal of the
499 : * join clause(s) for this outer join may mean that Vars that were
500 : * formerly needed no longer are. So we have to do this honestly by
501 : * repeating the construction of those relid sets. We can cheat to one
502 : * small extent: we can avoid re-examining the targetlist and HAVING qual
503 : * by preserving "relation 0" bits from the existing relid sets. This is
504 : * safe because we'd never remove such references.
505 : *
506 : * So, start by removing all other bits from attr_needed sets and
507 : * lateral_vars lists. (We already did this above for ph_needed.)
508 : */
509 76666 : for (rti = 1; rti < root->simple_rel_array_size; rti++)
510 : {
511 64658 : RelOptInfo *otherrel = root->simple_rel_array[rti];
512 : int attroff;
513 :
514 : /* there may be empty slots corresponding to non-baserel RTEs */
515 64658 : if (otherrel == NULL)
516 32028 : continue;
517 :
518 : Assert(otherrel->relid == rti); /* sanity check on array */
519 :
520 32630 : for (attroff = otherrel->max_attr - otherrel->min_attr;
521 730224 : attroff >= 0;
522 697594 : attroff--)
523 : {
524 697594 : if (bms_is_member(0, otherrel->attr_needed[attroff]))
525 47314 : otherrel->attr_needed[attroff] = bms_make_singleton(0);
526 : else
527 650280 : otherrel->attr_needed[attroff] = NULL;
528 : }
529 :
530 32630 : if (subst > 0)
531 1496 : ChangeVarNodesExtended((Node *) otherrel->lateral_vars, relid,
532 : subst, 0, replace_relid_callback);
533 : }
534 12008 : }
535 :
536 : /*
537 : * Remove the target relid and references to the target join from the
538 : * planner's data structures, having determined that there is no need
539 : * to include them in the query.
540 : *
541 : * We are not terribly thorough here. We only bother to update parts of
542 : * the planner's data structures that will actually be consulted later.
543 : */
544 : static void
545 11408 : remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
546 : SpecialJoinInfo *sjinfo)
547 : {
548 11408 : RelOptInfo *rel = find_base_rel(root, relid);
549 11408 : int ojrelid = sjinfo->ojrelid;
550 : Relids joinrelids;
551 : Relids join_plus_commute;
552 : List *joininfos;
553 : ListCell *l;
554 :
555 : /* Compute the relid set for the join we are considering */
556 11408 : joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
557 : Assert(ojrelid != 0);
558 11408 : joinrelids = bms_add_member(joinrelids, ojrelid);
559 :
560 11408 : remove_rel_from_query(root, rel, -1, sjinfo, joinrelids);
561 :
562 : /*
563 : * Remove any joinquals referencing the rel from the joininfo lists.
564 : *
565 : * In some cases, a joinqual has to be put back after deleting its
566 : * reference to the target rel. This can occur for pseudoconstant and
567 : * outerjoin-delayed quals, which can get marked as requiring the rel in
568 : * order to force them to be evaluated at or above the join. We can't
569 : * just discard them, though. Only quals that logically belonged to the
570 : * outer join being discarded should be removed from the query.
571 : *
572 : * We might encounter a qual that is a clone of a deletable qual with some
573 : * outer-join relids added (see deconstruct_distribute_oj_quals). To
574 : * ensure we get rid of such clones as well, add the relids of all OJs
575 : * commutable with this one to the set we test against for
576 : * pushed-down-ness.
577 : */
578 11408 : join_plus_commute = bms_union(joinrelids,
579 11408 : sjinfo->commute_above_r);
580 11408 : join_plus_commute = bms_add_members(join_plus_commute,
581 11408 : sjinfo->commute_below_l);
582 :
583 : /*
584 : * We must make a copy of the rel's old joininfo list before starting the
585 : * loop, because otherwise remove_join_clause_from_rels would destroy the
586 : * list while we're scanning it.
587 : */
588 11408 : joininfos = list_copy(rel->joininfo);
589 23012 : foreach(l, joininfos)
590 : {
591 11604 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
592 :
593 11604 : remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
594 :
595 11604 : if (RINFO_IS_PUSHED_DOWN(rinfo, join_plus_commute))
596 : {
597 : /*
598 : * There might be references to relid or ojrelid in the
599 : * RestrictInfo's relid sets, as a consequence of PHVs having had
600 : * ph_eval_at sets that include those. We already checked above
601 : * that any such PHV is safe (and updated its ph_eval_at), so we
602 : * can just drop those references.
603 : */
604 126 : remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
605 :
606 : /*
607 : * Cross-check that the clause itself does not reference the
608 : * target rel or join.
609 : */
610 : #ifdef USE_ASSERT_CHECKING
611 : {
612 : Relids clause_varnos = pull_varnos(root,
613 : (Node *) rinfo->clause);
614 :
615 : Assert(!bms_is_member(relid, clause_varnos));
616 : Assert(!bms_is_member(ojrelid, clause_varnos));
617 : }
618 : #endif
619 : /* Now throw it back into the joininfo lists */
620 126 : distribute_restrictinfo_to_rels(root, rinfo);
621 : }
622 : }
623 :
624 : /*
625 : * There may be references to the rel in root->fkey_list, but if so,
626 : * match_foreign_keys_to_quals() will get rid of them.
627 : */
628 :
629 : /*
630 : * Now remove the rel from the baserel array to prevent it from being
631 : * referenced again. (We can't do this earlier because
632 : * remove_join_clause_from_rels will touch it.)
633 : */
634 11408 : root->simple_rel_array[relid] = NULL;
635 11408 : root->simple_rte_array[relid] = NULL;
636 :
637 : /* And nuke the RelOptInfo, just in case there's another access path */
638 11408 : pfree(rel);
639 :
640 : /*
641 : * Now repeat construction of attr_needed bits coming from all other
642 : * sources.
643 : */
644 11408 : rebuild_placeholder_attr_needed(root);
645 11408 : rebuild_joinclause_attr_needed(root);
646 11408 : rebuild_eclass_attr_needed(root);
647 11408 : rebuild_lateral_attr_needed(root);
648 11408 : }
649 :
650 : /*
651 : * Remove any references to relid or ojrelid from the RestrictInfo.
652 : *
653 : * We only bother to clean out bits in clause_relids and required_relids,
654 : * not nullingrel bits in contained Vars and PHVs. (This might have to be
655 : * improved sometime.) However, if the RestrictInfo contains an OR clause
656 : * we have to also clean up the sub-clauses.
657 : */
658 : static void
659 4632 : remove_rel_from_restrictinfo(RestrictInfo *rinfo, int relid, int ojrelid)
660 : {
661 : /*
662 : * initsplan.c is fairly cavalier about allowing RestrictInfos to share
663 : * relid sets with other RestrictInfos, and SpecialJoinInfos too. Make
664 : * sure this RestrictInfo has its own relid sets before we modify them.
665 : * (In present usage, clause_relids is probably not shared, but
666 : * required_relids could be; let's not assume anything.)
667 : */
668 4632 : rinfo->clause_relids = bms_copy(rinfo->clause_relids);
669 4632 : rinfo->clause_relids = bms_del_member(rinfo->clause_relids, relid);
670 4632 : rinfo->clause_relids = bms_del_member(rinfo->clause_relids, ojrelid);
671 : /* Likewise for required_relids */
672 4632 : rinfo->required_relids = bms_copy(rinfo->required_relids);
673 4632 : rinfo->required_relids = bms_del_member(rinfo->required_relids, relid);
674 4632 : rinfo->required_relids = bms_del_member(rinfo->required_relids, ojrelid);
675 :
676 : /* If it's an OR, recurse to clean up sub-clauses */
677 4632 : if (restriction_is_or_clause(rinfo))
678 : {
679 : ListCell *lc;
680 :
681 : Assert(is_orclause(rinfo->orclause));
682 18 : foreach(lc, ((BoolExpr *) rinfo->orclause)->args)
683 : {
684 12 : Node *orarg = (Node *) lfirst(lc);
685 :
686 : /* OR arguments should be ANDs or sub-RestrictInfos */
687 12 : if (is_andclause(orarg))
688 : {
689 0 : List *andargs = ((BoolExpr *) orarg)->args;
690 : ListCell *lc2;
691 :
692 0 : foreach(lc2, andargs)
693 : {
694 0 : RestrictInfo *rinfo2 = lfirst_node(RestrictInfo, lc2);
695 :
696 0 : remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
697 : }
698 : }
699 : else
700 : {
701 12 : RestrictInfo *rinfo2 = castNode(RestrictInfo, orarg);
702 :
703 12 : remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
704 : }
705 : }
706 : }
707 4632 : }
708 :
709 : /*
710 : * Remove any references to relid or sjinfo->ojrelid (if sjinfo != NULL)
711 : * from the EquivalenceClass.
712 : *
713 : * Like remove_rel_from_restrictinfo, we don't worry about cleaning out
714 : * any nullingrel bits in contained Vars and PHVs. (This might have to be
715 : * improved sometime.) We do need to fix the EC and EM relid sets to ensure
716 : * that implied join equalities will be generated at the appropriate join
717 : * level(s).
718 : */
719 : static void
720 16882 : remove_rel_from_eclass(EquivalenceClass *ec, SpecialJoinInfo *sjinfo,
721 : int relid, int subst)
722 : {
723 : ListCell *lc;
724 :
725 : /* Fix up the EC's overall relids */
726 16882 : ec->ec_relids = adjust_relid_set(ec->ec_relids, relid, subst);
727 16882 : if (sjinfo != NULL)
728 15814 : ec->ec_relids = adjust_relid_set(ec->ec_relids,
729 15814 : sjinfo->ojrelid, subst);
730 :
731 : /*
732 : * We don't expect any EC child members to exist at this point. Ensure
733 : * that's the case, otherwise, we might be getting asked to do something
734 : * this function hasn't been coded for.
735 : */
736 : Assert(ec->ec_childmembers == NULL);
737 :
738 : /*
739 : * Fix up the member expressions. Any non-const member that ends with
740 : * empty em_relids must be a Var or PHV of the removed relation. We don't
741 : * need it anymore, so we can drop it.
742 : */
743 39040 : foreach(lc, ec->ec_members)
744 : {
745 22158 : EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
746 :
747 22158 : if (bms_is_member(relid, cur_em->em_relids) ||
748 4494 : (sjinfo != NULL && bms_is_member(sjinfo->ojrelid,
749 4494 : cur_em->em_relids)))
750 : {
751 : Assert(!cur_em->em_is_const);
752 15814 : cur_em->em_relids = adjust_relid_set(cur_em->em_relids, relid, subst);
753 15814 : if (sjinfo != NULL)
754 15814 : cur_em->em_relids = adjust_relid_set(cur_em->em_relids,
755 15814 : sjinfo->ojrelid, subst);
756 15814 : if (bms_is_empty(cur_em->em_relids))
757 15802 : ec->ec_members = foreach_delete_current(ec->ec_members, lc);
758 : }
759 : }
760 :
761 : /* Fix up the source clauses, in case we can re-use them later */
762 22432 : foreach(lc, ec->ec_sources)
763 : {
764 5550 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
765 :
766 5550 : if (sjinfo == NULL)
767 1056 : ChangeVarNodesExtended((Node *) rinfo, relid, subst, 0,
768 : replace_relid_callback);
769 : else
770 4494 : remove_rel_from_restrictinfo(rinfo, relid, sjinfo->ojrelid);
771 : }
772 :
773 : /*
774 : * Rather than expend code on fixing up any already-derived clauses, just
775 : * drop them. (At this point, any such clauses would be base restriction
776 : * clauses, which we'd not need anymore anyway.)
777 : */
778 16882 : ec_clear_derived_clauses(ec);
779 16882 : }
780 :
781 : /*
782 : * Remove any occurrences of the target relid from a joinlist structure.
783 : *
784 : * It's easiest to build a whole new list structure, so we handle it that
785 : * way. Efficiency is not a big deal here.
786 : *
787 : * *nremoved is incremented by the number of occurrences removed (there
788 : * should be exactly one, but the caller checks that).
789 : */
790 : static List *
791 12272 : remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
792 : {
793 12272 : List *result = NIL;
794 : ListCell *jl;
795 :
796 45166 : foreach(jl, joinlist)
797 : {
798 32894 : Node *jlnode = (Node *) lfirst(jl);
799 :
800 32894 : if (IsA(jlnode, RangeTblRef))
801 : {
802 32630 : int varno = ((RangeTblRef *) jlnode)->rtindex;
803 :
804 32630 : if (varno == relid)
805 12008 : (*nremoved)++;
806 : else
807 20622 : result = lappend(result, jlnode);
808 : }
809 264 : else if (IsA(jlnode, List))
810 : {
811 : /* Recurse to handle subproblem */
812 : List *sublist;
813 :
814 264 : sublist = remove_rel_from_joinlist((List *) jlnode,
815 : relid, nremoved);
816 : /* Avoid including empty sub-lists in the result */
817 264 : if (sublist)
818 264 : result = lappend(result, sublist);
819 : }
820 : else
821 : {
822 0 : elog(ERROR, "unrecognized joinlist node type: %d",
823 : (int) nodeTag(jlnode));
824 : }
825 : }
826 :
827 12272 : return result;
828 : }
829 :
830 :
831 : /*
832 : * reduce_unique_semijoins
833 : * Check for semijoins that can be simplified to plain inner joins
834 : * because the inner relation is provably unique for the join clauses.
835 : *
836 : * Ideally this would happen during reduce_outer_joins, but we don't have
837 : * enough information at that point.
838 : *
839 : * To perform the strength reduction when applicable, we need only delete
840 : * the semijoin's SpecialJoinInfo from root->join_info_list. (We don't
841 : * bother fixing the join type attributed to it in the query jointree,
842 : * since that won't be consulted again.)
843 : */
844 : void
845 341504 : reduce_unique_semijoins(PlannerInfo *root)
846 : {
847 : ListCell *lc;
848 :
849 : /*
850 : * Scan the join_info_list to find semijoins.
851 : */
852 384908 : foreach(lc, root->join_info_list)
853 : {
854 43404 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
855 : int innerrelid;
856 : RelOptInfo *innerrel;
857 : Relids joinrelids;
858 : List *restrictlist;
859 :
860 : /*
861 : * Must be a semijoin to a single baserel, else we aren't going to be
862 : * able to do anything with it.
863 : */
864 43404 : if (sjinfo->jointype != JOIN_SEMI)
865 43018 : continue;
866 :
867 5142 : if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
868 182 : continue;
869 :
870 4960 : innerrel = find_base_rel(root, innerrelid);
871 :
872 : /*
873 : * Before we trouble to run generate_join_implied_equalities, make a
874 : * quick check to eliminate cases in which we will surely be unable to
875 : * prove uniqueness of the innerrel.
876 : */
877 4960 : if (!rel_supports_distinctness(root, innerrel))
878 1070 : continue;
879 :
880 : /* Compute the relid set for the join we are considering */
881 3890 : joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
882 : Assert(sjinfo->ojrelid == 0); /* SEMI joins don't have RT indexes */
883 :
884 : /*
885 : * Since we're only considering a single-rel RHS, any join clauses it
886 : * has must be clauses linking it to the semijoin's min_lefthand. We
887 : * can also consider EC-derived join clauses.
888 : */
889 : restrictlist =
890 3890 : list_concat(generate_join_implied_equalities(root,
891 : joinrelids,
892 : sjinfo->min_lefthand,
893 : innerrel,
894 : NULL),
895 3890 : innerrel->joininfo);
896 :
897 : /* Test whether the innerrel is unique for those clauses. */
898 3890 : if (!innerrel_is_unique(root,
899 : joinrelids, sjinfo->min_lefthand, innerrel,
900 : JOIN_SEMI, restrictlist, true))
901 3504 : continue;
902 :
903 : /* OK, remove the SpecialJoinInfo from the list. */
904 386 : root->join_info_list = foreach_delete_current(root->join_info_list, lc);
905 : }
906 341504 : }
907 :
908 :
909 : /*
910 : * rel_supports_distinctness
911 : * Could the relation possibly be proven distinct on some set of columns?
912 : *
913 : * This is effectively a pre-checking function for rel_is_distinct_for().
914 : * It must return true if rel_is_distinct_for() could possibly return true
915 : * with this rel, but it should not expend a lot of cycles. The idea is
916 : * that callers can avoid doing possibly-expensive processing to compute
917 : * rel_is_distinct_for()'s argument lists if the call could not possibly
918 : * succeed.
919 : */
920 : static bool
921 693684 : rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
922 : {
923 : /* We only know about baserels ... */
924 693684 : if (rel->reloptkind != RELOPT_BASEREL)
925 252120 : return false;
926 441564 : if (rel->rtekind == RTE_RELATION)
927 : {
928 : /*
929 : * For a plain relation, we only know how to prove uniqueness by
930 : * reference to unique indexes. Make sure there's at least one
931 : * suitable unique index. It must be immediately enforced, and not a
932 : * partial index. (Keep these conditions in sync with
933 : * relation_has_unique_index_for!)
934 : */
935 : ListCell *lc;
936 :
937 546440 : foreach(lc, rel->indexlist)
938 : {
939 479252 : IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc);
940 :
941 479252 : if (ind->unique && ind->immediate && ind->indpred == NIL)
942 336796 : return true;
943 : }
944 : }
945 37580 : else if (rel->rtekind == RTE_SUBQUERY)
946 : {
947 10852 : Query *subquery = root->simple_rte_array[rel->relid]->subquery;
948 :
949 : /* Check if the subquery has any qualities that support distinctness */
950 10852 : if (query_supports_distinctness(subquery))
951 9020 : return true;
952 : }
953 : /* We have no proof rules for any other rtekinds. */
954 95748 : return false;
955 : }
956 :
957 : /*
958 : * rel_is_distinct_for
959 : * Does the relation return only distinct rows according to clause_list?
960 : *
961 : * clause_list is a list of join restriction clauses involving this rel and
962 : * some other one. Return true if no two rows emitted by this rel could
963 : * possibly join to the same row of the other rel.
964 : *
965 : * The caller must have already determined that each condition is a
966 : * mergejoinable equality with an expression in this relation on one side, and
967 : * an expression not involving this relation on the other. The transient
968 : * outer_is_left flag is used to identify which side references this relation:
969 : * left side if outer_is_left is false, right side if it is true.
970 : *
971 : * Note that the passed-in clause_list may be destructively modified! This
972 : * is OK for current uses, because the clause_list is built by the caller for
973 : * the sole purpose of passing to this function.
974 : *
975 : * (*extra_clauses) to be set to the right sides of baserestrictinfo clauses,
976 : * looking like "x = const" if distinctness is derived from such clauses, not
977 : * joininfo clauses. Pass NULL to the extra_clauses if this value is not
978 : * needed.
979 : */
980 : static bool
981 226954 : rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list,
982 : List **extra_clauses)
983 : {
984 : /*
985 : * We could skip a couple of tests here if we assume all callers checked
986 : * rel_supports_distinctness first, but it doesn't seem worth taking any
987 : * risk for.
988 : */
989 226954 : if (rel->reloptkind != RELOPT_BASEREL)
990 0 : return false;
991 226954 : if (rel->rtekind == RTE_RELATION)
992 : {
993 : /*
994 : * Examine the indexes to see if we have a matching unique index.
995 : * relation_has_unique_index_for automatically adds any usable
996 : * restriction clauses for the rel, so we needn't do that here.
997 : */
998 221728 : if (relation_has_unique_index_for(root, rel, clause_list, extra_clauses))
999 125964 : return true;
1000 : }
1001 5226 : else if (rel->rtekind == RTE_SUBQUERY)
1002 : {
1003 5226 : Index relid = rel->relid;
1004 5226 : Query *subquery = root->simple_rte_array[relid]->subquery;
1005 5226 : List *colnos = NIL;
1006 5226 : List *opids = NIL;
1007 : ListCell *l;
1008 :
1009 : /*
1010 : * Build the argument lists for query_is_distinct_for: a list of
1011 : * output column numbers that the query needs to be distinct over, and
1012 : * a list of equality operators that the output columns need to be
1013 : * distinct according to.
1014 : *
1015 : * (XXX we are not considering restriction clauses attached to the
1016 : * subquery; is that worth doing?)
1017 : */
1018 10272 : foreach(l, clause_list)
1019 : {
1020 5046 : RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
1021 : Oid op;
1022 : Var *var;
1023 :
1024 : /*
1025 : * Get the equality operator we need uniqueness according to.
1026 : * (This might be a cross-type operator and thus not exactly the
1027 : * same operator the subquery would consider; that's all right
1028 : * since query_is_distinct_for can resolve such cases.) The
1029 : * caller's mergejoinability test should have selected only
1030 : * OpExprs.
1031 : */
1032 5046 : op = castNode(OpExpr, rinfo->clause)->opno;
1033 :
1034 : /* caller identified the inner side for us */
1035 5046 : if (rinfo->outer_is_left)
1036 4696 : var = (Var *) get_rightop(rinfo->clause);
1037 : else
1038 350 : var = (Var *) get_leftop(rinfo->clause);
1039 :
1040 : /*
1041 : * We may ignore any RelabelType node above the operand. (There
1042 : * won't be more than one, since eval_const_expressions() has been
1043 : * applied already.)
1044 : */
1045 5046 : if (var && IsA(var, RelabelType))
1046 3236 : var = (Var *) ((RelabelType *) var)->arg;
1047 :
1048 : /*
1049 : * If inner side isn't a Var referencing a subquery output column,
1050 : * this clause doesn't help us.
1051 : */
1052 5046 : if (!var || !IsA(var, Var) ||
1053 5034 : var->varno != relid || var->varlevelsup != 0)
1054 12 : continue;
1055 :
1056 5034 : colnos = lappend_int(colnos, var->varattno);
1057 5034 : opids = lappend_oid(opids, op);
1058 : }
1059 :
1060 5226 : if (query_is_distinct_for(subquery, colnos, opids))
1061 468 : return true;
1062 : }
1063 100522 : return false;
1064 : }
1065 :
1066 :
1067 : /*
1068 : * query_supports_distinctness - could the query possibly be proven distinct
1069 : * on some set of output columns?
1070 : *
1071 : * This is effectively a pre-checking function for query_is_distinct_for().
1072 : * It must return true if query_is_distinct_for() could possibly return true
1073 : * with this query, but it should not expend a lot of cycles. The idea is
1074 : * that callers can avoid doing possibly-expensive processing to compute
1075 : * query_is_distinct_for()'s argument lists if the call could not possibly
1076 : * succeed.
1077 : */
1078 : bool
1079 10852 : query_supports_distinctness(Query *query)
1080 : {
1081 : /* SRFs break distinctness except with DISTINCT, see below */
1082 10852 : if (query->hasTargetSRFs && query->distinctClause == NIL)
1083 1036 : return false;
1084 :
1085 : /* check for features we can prove distinctness with */
1086 9816 : if (query->distinctClause != NIL ||
1087 9666 : query->groupClause != NIL ||
1088 9468 : query->groupingSets != NIL ||
1089 9420 : query->hasAggs ||
1090 9004 : query->havingQual ||
1091 9004 : query->setOperations)
1092 9020 : return true;
1093 :
1094 796 : return false;
1095 : }
1096 :
1097 : /*
1098 : * query_is_distinct_for - does query never return duplicates of the
1099 : * specified columns?
1100 : *
1101 : * query is a not-yet-planned subquery (in current usage, it's always from
1102 : * a subquery RTE, which the planner avoids scribbling on).
1103 : *
1104 : * colnos is an integer list of output column numbers (resno's). We are
1105 : * interested in whether rows consisting of just these columns are certain
1106 : * to be distinct. "Distinctness" is defined according to whether the
1107 : * corresponding upper-level equality operators listed in opids would think
1108 : * the values are distinct. (Note: the opids entries could be cross-type
1109 : * operators, and thus not exactly the equality operators that the subquery
1110 : * would use itself. We use equality_ops_are_compatible() to check
1111 : * compatibility. That looks at opfamily membership for index AMs that have
1112 : * declared that they support consistent equality semantics within an
1113 : * opfamily, and so should give trustworthy answers for all operators that we
1114 : * might need to deal with here.)
1115 : */
1116 : bool
1117 5226 : query_is_distinct_for(Query *query, List *colnos, List *opids)
1118 : {
1119 : ListCell *l;
1120 : Oid opid;
1121 :
1122 : Assert(list_length(colnos) == list_length(opids));
1123 :
1124 : /*
1125 : * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
1126 : * columns in the DISTINCT clause appear in colnos and operator semantics
1127 : * match. This is true even if there are SRFs in the DISTINCT columns or
1128 : * elsewhere in the tlist.
1129 : */
1130 5226 : if (query->distinctClause)
1131 : {
1132 168 : foreach(l, query->distinctClause)
1133 : {
1134 126 : SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
1135 126 : TargetEntry *tle = get_sortgroupclause_tle(sgc,
1136 : query->targetList);
1137 :
1138 126 : opid = distinct_col_search(tle->resno, colnos, opids);
1139 126 : if (!OidIsValid(opid) ||
1140 60 : !equality_ops_are_compatible(opid, sgc->eqop))
1141 : break; /* exit early if no match */
1142 : }
1143 108 : if (l == NULL) /* had matches for all? */
1144 42 : return true;
1145 : }
1146 :
1147 : /*
1148 : * Otherwise, a set-returning function in the query's targetlist can
1149 : * result in returning duplicate rows, despite any grouping that might
1150 : * occur before tlist evaluation. (If all tlist SRFs are within GROUP BY
1151 : * columns, it would be safe because they'd be expanded before grouping.
1152 : * But it doesn't currently seem worth the effort to check for that.)
1153 : */
1154 5184 : if (query->hasTargetSRFs)
1155 0 : return false;
1156 :
1157 : /*
1158 : * Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all
1159 : * the grouped columns appear in colnos and operator semantics match.
1160 : */
1161 5184 : if (query->groupClause && !query->groupingSets)
1162 : {
1163 228 : foreach(l, query->groupClause)
1164 : {
1165 158 : SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
1166 158 : TargetEntry *tle = get_sortgroupclause_tle(sgc,
1167 : query->targetList);
1168 :
1169 158 : opid = distinct_col_search(tle->resno, colnos, opids);
1170 158 : if (!OidIsValid(opid) ||
1171 112 : !equality_ops_are_compatible(opid, sgc->eqop))
1172 : break; /* exit early if no match */
1173 : }
1174 116 : if (l == NULL) /* had matches for all? */
1175 70 : return true;
1176 : }
1177 5068 : else if (query->groupingSets)
1178 : {
1179 : List *gsets;
1180 :
1181 : /*
1182 : * If we have grouping sets with expressions, we probably don't have
1183 : * uniqueness and analysis would be hard. Punt.
1184 : */
1185 60 : if (query->groupClause)
1186 12 : return false;
1187 :
1188 : /*
1189 : * If we have no groupClause (therefore no grouping expressions), we
1190 : * might have one or many empty grouping sets. If there's just one,
1191 : * or if the DISTINCT clause is used on the GROUP BY, then we're
1192 : * returning only one row and are certainly unique. But otherwise, we
1193 : * know we're certainly not unique.
1194 : */
1195 48 : if (query->groupDistinct)
1196 6 : return true;
1197 :
1198 42 : gsets = expand_grouping_sets(query->groupingSets, false, -1);
1199 :
1200 42 : return (list_length(gsets) == 1);
1201 : }
1202 : else
1203 : {
1204 : /*
1205 : * If we have no GROUP BY, but do have aggregates or HAVING, then the
1206 : * result is at most one row so it's surely unique, for any operators.
1207 : */
1208 5008 : if (query->hasAggs || query->havingQual)
1209 244 : return true;
1210 : }
1211 :
1212 : /*
1213 : * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
1214 : * except with ALL.
1215 : */
1216 4810 : if (query->setOperations)
1217 : {
1218 4698 : SetOperationStmt *topop = castNode(SetOperationStmt, query->setOperations);
1219 :
1220 : Assert(topop->op != SETOP_NONE);
1221 :
1222 4698 : if (!topop->all)
1223 : {
1224 : ListCell *lg;
1225 :
1226 : /* We're good if all the nonjunk output columns are in colnos */
1227 142 : lg = list_head(topop->groupClauses);
1228 236 : foreach(l, query->targetList)
1229 : {
1230 148 : TargetEntry *tle = (TargetEntry *) lfirst(l);
1231 : SortGroupClause *sgc;
1232 :
1233 148 : if (tle->resjunk)
1234 0 : continue; /* ignore resjunk columns */
1235 :
1236 : /* non-resjunk columns should have grouping clauses */
1237 : Assert(lg != NULL);
1238 148 : sgc = (SortGroupClause *) lfirst(lg);
1239 148 : lg = lnext(topop->groupClauses, lg);
1240 :
1241 148 : opid = distinct_col_search(tle->resno, colnos, opids);
1242 148 : if (!OidIsValid(opid) ||
1243 94 : !equality_ops_are_compatible(opid, sgc->eqop))
1244 : break; /* exit early if no match */
1245 : }
1246 142 : if (l == NULL) /* had matches for all? */
1247 88 : return true;
1248 : }
1249 : }
1250 :
1251 : /*
1252 : * XXX Are there any other cases in which we can easily see the result
1253 : * must be distinct?
1254 : *
1255 : * If you do add more smarts to this function, be sure to update
1256 : * query_supports_distinctness() to match.
1257 : */
1258 :
1259 4722 : return false;
1260 : }
1261 :
1262 : /*
1263 : * distinct_col_search - subroutine for query_is_distinct_for
1264 : *
1265 : * If colno is in colnos, return the corresponding element of opids,
1266 : * else return InvalidOid. (Ordinarily colnos would not contain duplicates,
1267 : * but if it does, we arbitrarily select the first match.)
1268 : */
1269 : static Oid
1270 432 : distinct_col_search(int colno, List *colnos, List *opids)
1271 : {
1272 : ListCell *lc1,
1273 : *lc2;
1274 :
1275 626 : forboth(lc1, colnos, lc2, opids)
1276 : {
1277 460 : if (colno == lfirst_int(lc1))
1278 266 : return lfirst_oid(lc2);
1279 : }
1280 166 : return InvalidOid;
1281 : }
1282 :
1283 :
1284 : /*
1285 : * innerrel_is_unique
1286 : * Check if the innerrel provably contains at most one tuple matching any
1287 : * tuple from the outerrel, based on join clauses in the 'restrictlist'.
1288 : *
1289 : * We need an actual RelOptInfo for the innerrel, but it's sufficient to
1290 : * identify the outerrel by its Relids. This asymmetry supports use of this
1291 : * function before joinrels have been built. (The caller is expected to
1292 : * also supply the joinrelids, just to save recalculating that.)
1293 : *
1294 : * The proof must be made based only on clauses that will be "joinquals"
1295 : * rather than "otherquals" at execution. For an inner join there's no
1296 : * difference; but if the join is outer, we must ignore pushed-down quals,
1297 : * as those will become "otherquals". Note that this means the answer might
1298 : * vary depending on whether IS_OUTER_JOIN(jointype); since we cache the
1299 : * answer without regard to that, callers must take care not to call this
1300 : * with jointypes that would be classified differently by IS_OUTER_JOIN().
1301 : *
1302 : * The actual proof is undertaken by is_innerrel_unique_for(); this function
1303 : * is a frontend that is mainly concerned with caching the answers.
1304 : * In particular, the force_cache argument allows overriding the internal
1305 : * heuristic about whether to cache negative answers; it should be "true"
1306 : * if making an inquiry that is not part of the normal bottom-up join search
1307 : * sequence.
1308 : */
1309 : bool
1310 750262 : innerrel_is_unique(PlannerInfo *root,
1311 : Relids joinrelids,
1312 : Relids outerrelids,
1313 : RelOptInfo *innerrel,
1314 : JoinType jointype,
1315 : List *restrictlist,
1316 : bool force_cache)
1317 : {
1318 750262 : return innerrel_is_unique_ext(root, joinrelids, outerrelids, innerrel,
1319 : jointype, restrictlist, force_cache, NULL);
1320 : }
1321 :
1322 : /*
1323 : * innerrel_is_unique_ext
1324 : * Do the same as innerrel_is_unique(), but also set to (*extra_clauses)
1325 : * additional clauses from a baserestrictinfo list used to prove the
1326 : * uniqueness.
1327 : *
1328 : * A non-NULL extra_clauses indicates that we're checking for self-join and
1329 : * correspondingly dealing with filtered clauses.
1330 : */
1331 : bool
1332 752304 : innerrel_is_unique_ext(PlannerInfo *root,
1333 : Relids joinrelids,
1334 : Relids outerrelids,
1335 : RelOptInfo *innerrel,
1336 : JoinType jointype,
1337 : List *restrictlist,
1338 : bool force_cache,
1339 : List **extra_clauses)
1340 : {
1341 : MemoryContext old_context;
1342 : ListCell *lc;
1343 : UniqueRelInfo *uniqueRelInfo;
1344 752304 : List *outer_exprs = NIL;
1345 752304 : bool self_join = (extra_clauses != NULL);
1346 :
1347 : /* Certainly can't prove uniqueness when there are no joinclauses */
1348 752304 : if (restrictlist == NIL)
1349 106624 : return false;
1350 :
1351 : /*
1352 : * Make a quick check to eliminate cases in which we will surely be unable
1353 : * to prove uniqueness of the innerrel.
1354 : */
1355 645680 : if (!rel_supports_distinctness(root, innerrel))
1356 343720 : return false;
1357 :
1358 : /*
1359 : * Query the cache to see if we've managed to prove that innerrel is
1360 : * unique for any subset of this outerrel. For non-self-join search, we
1361 : * don't need an exact match, as extra outerrels can't make the innerrel
1362 : * any less unique (or more formally, the restrictlist for a join to a
1363 : * superset outerrel must be a superset of the conditions we successfully
1364 : * used before). For self-join search, we require an exact match of
1365 : * outerrels because we need extra clauses to be valid for our case. Also,
1366 : * for self-join checking we've filtered the clauses list. Thus, we can
1367 : * match only the result cached for a self-join search for another
1368 : * self-join check.
1369 : */
1370 333888 : foreach(lc, innerrel->unique_for_rels)
1371 : {
1372 118236 : uniqueRelInfo = (UniqueRelInfo *) lfirst(lc);
1373 :
1374 118236 : if ((!self_join && bms_is_subset(uniqueRelInfo->outerrelids, outerrelids)) ||
1375 74 : (self_join && bms_equal(uniqueRelInfo->outerrelids, outerrelids) &&
1376 56 : uniqueRelInfo->self_join))
1377 : {
1378 86308 : if (extra_clauses)
1379 12 : *extra_clauses = uniqueRelInfo->extra_clauses;
1380 86308 : return true; /* Success! */
1381 : }
1382 : }
1383 :
1384 : /*
1385 : * Conversely, we may have already determined that this outerrel, or some
1386 : * superset thereof, cannot prove this innerrel to be unique.
1387 : */
1388 216208 : foreach(lc, innerrel->non_unique_for_rels)
1389 : {
1390 916 : Relids unique_for_rels = (Relids) lfirst(lc);
1391 :
1392 916 : if (bms_is_subset(outerrelids, unique_for_rels))
1393 360 : return false;
1394 : }
1395 :
1396 : /* No cached information, so try to make the proof. */
1397 215292 : if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel,
1398 : jointype, restrictlist,
1399 : self_join ? &outer_exprs : NULL))
1400 : {
1401 : /*
1402 : * Cache the positive result for future probes, being sure to keep it
1403 : * in the planner_cxt even if we are working in GEQO.
1404 : *
1405 : * Note: one might consider trying to isolate the minimal subset of
1406 : * the outerrels that proved the innerrel unique. But it's not worth
1407 : * the trouble, because the planner builds up joinrels incrementally
1408 : * and so we'll see the minimally sufficient outerrels before any
1409 : * supersets of them anyway.
1410 : */
1411 115024 : old_context = MemoryContextSwitchTo(root->planner_cxt);
1412 115024 : uniqueRelInfo = makeNode(UniqueRelInfo);
1413 115024 : uniqueRelInfo->outerrelids = bms_copy(outerrelids);
1414 115024 : uniqueRelInfo->self_join = self_join;
1415 115024 : uniqueRelInfo->extra_clauses = outer_exprs;
1416 115024 : innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
1417 : uniqueRelInfo);
1418 115024 : MemoryContextSwitchTo(old_context);
1419 :
1420 115024 : if (extra_clauses)
1421 654 : *extra_clauses = outer_exprs;
1422 115024 : return true; /* Success! */
1423 : }
1424 : else
1425 : {
1426 : /*
1427 : * None of the join conditions for outerrel proved innerrel unique, so
1428 : * we can safely reject this outerrel or any subset of it in future
1429 : * checks.
1430 : *
1431 : * However, in normal planning mode, caching this knowledge is totally
1432 : * pointless; it won't be queried again, because we build up joinrels
1433 : * from smaller to larger. It's only useful when using GEQO or
1434 : * another planner extension that attempts planning multiple times.
1435 : *
1436 : * Also, allow callers to override that heuristic and force caching;
1437 : * that's useful for reduce_unique_semijoins, which calls here before
1438 : * the normal join search starts.
1439 : */
1440 100268 : if (force_cache || root->assumeReplanning)
1441 : {
1442 3864 : old_context = MemoryContextSwitchTo(root->planner_cxt);
1443 3864 : innerrel->non_unique_for_rels =
1444 3864 : lappend(innerrel->non_unique_for_rels,
1445 3864 : bms_copy(outerrelids));
1446 3864 : MemoryContextSwitchTo(old_context);
1447 : }
1448 :
1449 100268 : return false;
1450 : }
1451 : }
1452 :
1453 : /*
1454 : * is_innerrel_unique_for
1455 : * Check if the innerrel provably contains at most one tuple matching any
1456 : * tuple from the outerrel, based on join clauses in the 'restrictlist'.
1457 : */
1458 : static bool
1459 215292 : is_innerrel_unique_for(PlannerInfo *root,
1460 : Relids joinrelids,
1461 : Relids outerrelids,
1462 : RelOptInfo *innerrel,
1463 : JoinType jointype,
1464 : List *restrictlist,
1465 : List **extra_clauses)
1466 : {
1467 215292 : List *clause_list = NIL;
1468 : ListCell *lc;
1469 :
1470 : /*
1471 : * Search for mergejoinable clauses that constrain the inner rel against
1472 : * the outer rel. If an operator is mergejoinable then it behaves like
1473 : * equality for some btree opclass, so it's what we want. The
1474 : * mergejoinability test also eliminates clauses containing volatile
1475 : * functions, which we couldn't depend on.
1476 : */
1477 482462 : foreach(lc, restrictlist)
1478 : {
1479 267170 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
1480 :
1481 : /*
1482 : * As noted above, if it's a pushed-down clause and we're at an outer
1483 : * join, we can't use it.
1484 : */
1485 267170 : if (IS_OUTER_JOIN(jointype) &&
1486 108178 : RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
1487 4324 : continue;
1488 :
1489 : /* Ignore if it's not a mergejoinable clause */
1490 262846 : if (!restrictinfo->can_join ||
1491 246206 : restrictinfo->mergeopfamilies == NIL)
1492 17628 : continue; /* not mergejoinable */
1493 :
1494 : /*
1495 : * Check if the clause has the form "outer op inner" or "inner op
1496 : * outer", and if so mark which side is inner.
1497 : */
1498 245218 : if (!clause_sides_match_join(restrictinfo, outerrelids,
1499 : innerrel->relids))
1500 40 : continue; /* no good for these input relations */
1501 :
1502 : /* OK, add to the list */
1503 245178 : clause_list = lappend(clause_list, restrictinfo);
1504 : }
1505 :
1506 : /* Let rel_is_distinct_for() do the hard work */
1507 215292 : return rel_is_distinct_for(root, innerrel, clause_list, extra_clauses);
1508 : }
1509 :
1510 : /*
1511 : * Update EC members to point to the remaining relation instead of the removed
1512 : * one, removing duplicates.
1513 : *
1514 : * Restriction clauses for base relations are already distributed to
1515 : * the respective baserestrictinfo lists (see
1516 : * generate_implied_equalities_for_column). The above code has already processed
1517 : * this list and updated these clauses to reference the remaining
1518 : * relation, so that we can skip them here based on their relids.
1519 : *
1520 : * Likewise, we have already processed the join clauses that join the
1521 : * removed relation to the remaining one.
1522 : *
1523 : * Finally, there might be join clauses tying the removed relation to
1524 : * some third relation. We can't just delete the source clauses and
1525 : * regenerate them from the EC because the corresponding equality
1526 : * operators might be missing (see the handling of ec_broken).
1527 : * Therefore, we will update the references in the source clauses.
1528 : *
1529 : * Derived clauses can be generated again, so it is simpler just to
1530 : * delete them.
1531 : */
1532 : static void
1533 924 : update_eclasses(EquivalenceClass *ec, int from, int to)
1534 : {
1535 924 : List *new_members = NIL;
1536 924 : List *new_sources = NIL;
1537 :
1538 : /*
1539 : * We don't expect any EC child members to exist at this point. Ensure
1540 : * that's the case, otherwise, we might be getting asked to do something
1541 : * this function hasn't been coded for.
1542 : */
1543 : Assert(ec->ec_childmembers == NULL);
1544 :
1545 3726 : foreach_node(EquivalenceMember, em, ec->ec_members)
1546 : {
1547 1878 : bool is_redundant = false;
1548 :
1549 1878 : if (!bms_is_member(from, em->em_relids))
1550 : {
1551 936 : new_members = lappend(new_members, em);
1552 936 : continue;
1553 : }
1554 :
1555 942 : em->em_relids = adjust_relid_set(em->em_relids, from, to);
1556 942 : em->em_jdomain->jd_relids = adjust_relid_set(em->em_jdomain->jd_relids, from, to);
1557 :
1558 : /* We only process inner joins */
1559 942 : ChangeVarNodesExtended((Node *) em->em_expr, from, to, 0,
1560 : replace_relid_callback);
1561 :
1562 1908 : foreach_node(EquivalenceMember, other, new_members)
1563 : {
1564 304 : if (!equal(em->em_relids, other->em_relids))
1565 24 : continue;
1566 :
1567 280 : if (equal(em->em_expr, other->em_expr))
1568 : {
1569 280 : is_redundant = true;
1570 280 : break;
1571 : }
1572 : }
1573 :
1574 942 : if (!is_redundant)
1575 662 : new_members = lappend(new_members, em);
1576 : }
1577 :
1578 924 : list_free(ec->ec_members);
1579 924 : ec->ec_members = new_members;
1580 :
1581 924 : ec_clear_derived_clauses(ec);
1582 :
1583 : /* Update EC source expressions */
1584 2802 : foreach_node(RestrictInfo, rinfo, ec->ec_sources)
1585 : {
1586 954 : bool is_redundant = false;
1587 :
1588 954 : if (!bms_is_member(from, rinfo->required_relids))
1589 : {
1590 118 : new_sources = lappend(new_sources, rinfo);
1591 118 : continue;
1592 : }
1593 :
1594 836 : ChangeVarNodesExtended((Node *) rinfo, from, to, 0,
1595 : replace_relid_callback);
1596 :
1597 : /*
1598 : * After switching the clause to the remaining relation, check it for
1599 : * redundancy with existing ones. We don't have to check for
1600 : * redundancy with derived clauses, because we've just deleted them.
1601 : */
1602 1696 : foreach_node(RestrictInfo, other, new_sources)
1603 : {
1604 36 : if (!equal(rinfo->clause_relids, other->clause_relids))
1605 24 : continue;
1606 :
1607 12 : if (equal(rinfo->clause, other->clause))
1608 : {
1609 12 : is_redundant = true;
1610 12 : break;
1611 : }
1612 : }
1613 :
1614 836 : if (!is_redundant)
1615 824 : new_sources = lappend(new_sources, rinfo);
1616 : }
1617 :
1618 924 : list_free(ec->ec_sources);
1619 924 : ec->ec_sources = new_sources;
1620 924 : ec->ec_relids = adjust_relid_set(ec->ec_relids, from, to);
1621 924 : }
1622 :
1623 : /*
1624 : * "Logically" compares two RestrictInfo's ignoring the 'rinfo_serial' field,
1625 : * which makes almost every RestrictInfo unique. This type of comparison is
1626 : * useful when removing duplicates while moving RestrictInfo's from removed
1627 : * relation to remaining relation during self-join elimination.
1628 : *
1629 : * XXX: In the future, we might remove the 'rinfo_serial' field completely and
1630 : * get rid of this function.
1631 : */
1632 : static bool
1633 512 : restrict_infos_logically_equal(RestrictInfo *a, RestrictInfo *b)
1634 : {
1635 512 : int saved_rinfo_serial = a->rinfo_serial;
1636 : bool result;
1637 :
1638 512 : a->rinfo_serial = b->rinfo_serial;
1639 512 : result = equal(a, b);
1640 512 : a->rinfo_serial = saved_rinfo_serial;
1641 :
1642 512 : return result;
1643 : }
1644 :
1645 : /*
1646 : * This function adds all non-redundant clauses to the keeping relation
1647 : * during self-join elimination. That is a contradictory operation. On the
1648 : * one hand, we reduce the length of the `restrict` lists, which can
1649 : * impact planning or executing time. Additionally, we improve the
1650 : * accuracy of cardinality estimation. On the other hand, it is one more
1651 : * place that can make planning time much longer in specific cases. It
1652 : * would have been better to avoid calling the equal() function here, but
1653 : * it's the only way to detect duplicated inequality expressions.
1654 : *
1655 : * (*keep_rinfo_list) is given by pointer because it might be altered by
1656 : * distribute_restrictinfo_to_rels().
1657 : */
1658 : static void
1659 1200 : add_non_redundant_clauses(PlannerInfo *root,
1660 : List *rinfo_candidates,
1661 : List **keep_rinfo_list,
1662 : Index removed_relid)
1663 : {
1664 3328 : foreach_node(RestrictInfo, rinfo, rinfo_candidates)
1665 : {
1666 928 : bool is_redundant = false;
1667 :
1668 : Assert(!bms_is_member(removed_relid, rinfo->required_relids));
1669 :
1670 2232 : foreach_node(RestrictInfo, src, (*keep_rinfo_list))
1671 : {
1672 530 : if (!bms_equal(src->clause_relids, rinfo->clause_relids))
1673 : /* Can't compare trivially different clauses */
1674 12 : continue;
1675 :
1676 518 : if (src == rinfo ||
1677 518 : (rinfo->parent_ec != NULL &&
1678 816 : src->parent_ec == rinfo->parent_ec) ||
1679 512 : restrict_infos_logically_equal(rinfo, src))
1680 : {
1681 154 : is_redundant = true;
1682 154 : break;
1683 : }
1684 : }
1685 928 : if (!is_redundant)
1686 774 : distribute_restrictinfo_to_rels(root, rinfo);
1687 : }
1688 1200 : }
1689 :
1690 : /*
1691 : * A custom callback for ChangeVarNodesExtended() providing Self-join
1692 : * elimination (SJE) related functionality
1693 : *
1694 : * SJE needs to skip the RangeTblRef node type. During SJE's last
1695 : * step, remove_rel_from_joinlist() removes remaining RangeTblRefs
1696 : * with target relid. If ChangeVarNodes() replaces the target relid
1697 : * before, remove_rel_from_joinlist() would fail to identify the nodes
1698 : * to delete.
1699 : *
1700 : * SJE also needs to change the relids within RestrictInfo's.
1701 : */
1702 : static bool
1703 41060 : replace_relid_callback(Node *node, ChangeVarNodes_context *context)
1704 : {
1705 41060 : if (IsA(node, RangeTblRef))
1706 : {
1707 1544 : return true;
1708 : }
1709 39516 : else if (IsA(node, RestrictInfo))
1710 : {
1711 2844 : RestrictInfo *rinfo = (RestrictInfo *) node;
1712 2844 : int relid = -1;
1713 2844 : bool is_req_equal =
1714 2844 : (rinfo->required_relids == rinfo->clause_relids);
1715 2844 : bool clause_relids_is_multiple =
1716 2844 : (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
1717 :
1718 : /*
1719 : * Recurse down into clauses if the target relation is present in
1720 : * clause_relids or required_relids. We must check required_relids
1721 : * because the relation not present in clause_relids might still be
1722 : * present somewhere in orclause.
1723 : */
1724 3960 : if (bms_is_member(context->rt_index, rinfo->clause_relids) ||
1725 1116 : bms_is_member(context->rt_index, rinfo->required_relids))
1726 : {
1727 : Relids new_clause_relids;
1728 :
1729 1776 : ChangeVarNodesWalkExpression((Node *) rinfo->clause, context);
1730 1776 : ChangeVarNodesWalkExpression((Node *) rinfo->orclause, context);
1731 :
1732 1776 : new_clause_relids = adjust_relid_set(rinfo->clause_relids,
1733 : context->rt_index,
1734 : context->new_index);
1735 :
1736 : /*
1737 : * Incrementally adjust num_base_rels based on the change of
1738 : * clause_relids, which could contain both base relids and
1739 : * outer-join relids. This operation is legal until we remove
1740 : * only baserels.
1741 : */
1742 1776 : rinfo->num_base_rels -= bms_num_members(rinfo->clause_relids) -
1743 1776 : bms_num_members(new_clause_relids);
1744 :
1745 1776 : rinfo->clause_relids = new_clause_relids;
1746 1776 : rinfo->left_relids =
1747 1776 : adjust_relid_set(rinfo->left_relids, context->rt_index, context->new_index);
1748 1776 : rinfo->right_relids =
1749 1776 : adjust_relid_set(rinfo->right_relids, context->rt_index, context->new_index);
1750 : }
1751 :
1752 2844 : if (is_req_equal)
1753 24 : rinfo->required_relids = rinfo->clause_relids;
1754 : else
1755 2820 : rinfo->required_relids =
1756 2820 : adjust_relid_set(rinfo->required_relids, context->rt_index, context->new_index);
1757 :
1758 2844 : rinfo->outer_relids =
1759 2844 : adjust_relid_set(rinfo->outer_relids, context->rt_index, context->new_index);
1760 2844 : rinfo->incompatible_relids =
1761 2844 : adjust_relid_set(rinfo->incompatible_relids, context->rt_index, context->new_index);
1762 :
1763 4836 : if (rinfo->mergeopfamilies &&
1764 3704 : bms_get_singleton_member(rinfo->clause_relids, &relid) &&
1765 1272 : clause_relids_is_multiple &&
1766 1272 : relid == context->new_index && IsA(rinfo->clause, OpExpr))
1767 : {
1768 : Expr *leftOp;
1769 : Expr *rightOp;
1770 :
1771 1272 : leftOp = (Expr *) get_leftop(rinfo->clause);
1772 1272 : rightOp = (Expr *) get_rightop(rinfo->clause);
1773 :
1774 : /*
1775 : * For self-join elimination, changing varnos could transform
1776 : * "t1.a = t2.a" into "t1.a = t1.a". That is always true as long
1777 : * as "t1.a" is not null. We use equal() to check for such a
1778 : * case, and then we replace the qual with a check for not null
1779 : * (NullTest).
1780 : */
1781 1272 : if (leftOp != NULL && equal(leftOp, rightOp))
1782 : {
1783 1260 : NullTest *ntest = makeNode(NullTest);
1784 :
1785 1260 : ntest->arg = leftOp;
1786 1260 : ntest->nulltesttype = IS_NOT_NULL;
1787 1260 : ntest->argisrow = false;
1788 1260 : ntest->location = -1;
1789 1260 : rinfo->clause = (Expr *) ntest;
1790 1260 : rinfo->mergeopfamilies = NIL;
1791 1260 : rinfo->left_em = NULL;
1792 1260 : rinfo->right_em = NULL;
1793 : }
1794 : Assert(rinfo->orclause == NULL);
1795 : }
1796 2844 : return true;
1797 : }
1798 :
1799 36672 : return false;
1800 : }
1801 :
1802 : /*
1803 : * Remove a relation after we have proven that it participates only in an
1804 : * unneeded unique self-join.
1805 : *
1806 : * Replace any links in planner info structures.
1807 : *
1808 : * Transfer join and restriction clauses from the removed relation to the
1809 : * remaining one. We change the Vars of the clause to point to the
1810 : * remaining relation instead of the removed one. The clauses that require
1811 : * a subset of joinrelids become restriction clauses of the remaining
1812 : * relation, and others remain join clauses. We append them to
1813 : * baserestrictinfo and joininfo, respectively, trying not to introduce
1814 : * duplicates.
1815 : *
1816 : * We also have to process the 'joinclauses' list here, because it
1817 : * contains EC-derived join clauses which must become filter clauses. It
1818 : * is not enough to just correct the ECs because the EC-derived
1819 : * restrictions are generated before join removal (see
1820 : * generate_base_implied_equalities).
1821 : *
1822 : * NOTE: Remember to keep the code in sync with PlannerInfo to be sure all
1823 : * cached relids and relid bitmapsets can be correctly cleaned during the
1824 : * self-join elimination procedure.
1825 : */
1826 : static void
1827 600 : remove_self_join_rel(PlannerInfo *root, PlanRowMark *kmark, PlanRowMark *rmark,
1828 : RelOptInfo *toKeep, RelOptInfo *toRemove,
1829 : List *restrictlist)
1830 : {
1831 : List *joininfos;
1832 : ListCell *lc;
1833 : int i;
1834 600 : List *jinfo_candidates = NIL;
1835 600 : List *binfo_candidates = NIL;
1836 :
1837 : Assert(toKeep->relid > 0);
1838 : Assert(toRemove->relid > 0);
1839 :
1840 : /*
1841 : * Replace the index of the removing table with the keeping one. The
1842 : * technique of removing/distributing restrictinfo is used here to attach
1843 : * just appeared (for keeping relation) join clauses and avoid adding
1844 : * duplicates of those that already exist in the joininfo list.
1845 : */
1846 600 : joininfos = list_copy(toRemove->joininfo);
1847 1290 : foreach_node(RestrictInfo, rinfo, joininfos)
1848 : {
1849 90 : remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
1850 90 : ChangeVarNodesExtended((Node *) rinfo, toRemove->relid, toKeep->relid,
1851 : 0, replace_relid_callback);
1852 :
1853 90 : if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
1854 72 : jinfo_candidates = lappend(jinfo_candidates, rinfo);
1855 : else
1856 18 : binfo_candidates = lappend(binfo_candidates, rinfo);
1857 : }
1858 :
1859 : /*
1860 : * Concatenate restrictlist to the list of base restrictions of the
1861 : * removing table just to simplify the replacement procedure: all of them
1862 : * weren't connected to any keeping relations and need to be added to some
1863 : * rels.
1864 : */
1865 600 : toRemove->baserestrictinfo = list_concat(toRemove->baserestrictinfo,
1866 : restrictlist);
1867 2038 : foreach_node(RestrictInfo, rinfo, toRemove->baserestrictinfo)
1868 : {
1869 838 : ChangeVarNodesExtended((Node *) rinfo, toRemove->relid, toKeep->relid,
1870 : 0, replace_relid_callback);
1871 :
1872 838 : if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
1873 0 : jinfo_candidates = lappend(jinfo_candidates, rinfo);
1874 : else
1875 838 : binfo_candidates = lappend(binfo_candidates, rinfo);
1876 : }
1877 :
1878 : /*
1879 : * Now, add all non-redundant clauses to the keeping relation.
1880 : */
1881 600 : add_non_redundant_clauses(root, binfo_candidates,
1882 : &toKeep->baserestrictinfo, toRemove->relid);
1883 600 : add_non_redundant_clauses(root, jinfo_candidates,
1884 : &toKeep->joininfo, toRemove->relid);
1885 :
1886 600 : list_free(binfo_candidates);
1887 600 : list_free(jinfo_candidates);
1888 :
1889 : /*
1890 : * Arrange equivalence classes, mentioned removing a table, with the
1891 : * keeping one: varno of removing table should be replaced in members and
1892 : * sources lists. Also, remove duplicated elements if this replacement
1893 : * procedure created them.
1894 : */
1895 600 : i = -1;
1896 1524 : while ((i = bms_next_member(toRemove->eclass_indexes, i)) >= 0)
1897 : {
1898 924 : EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes, i);
1899 :
1900 924 : update_eclasses(ec, toRemove->relid, toKeep->relid);
1901 924 : toKeep->eclass_indexes = bms_add_member(toKeep->eclass_indexes, i);
1902 : }
1903 :
1904 : /*
1905 : * Transfer the targetlist and attr_needed flags.
1906 : */
1907 :
1908 2396 : foreach(lc, toRemove->reltarget->exprs)
1909 : {
1910 1796 : Node *node = lfirst(lc);
1911 :
1912 1796 : ChangeVarNodesExtended(node, toRemove->relid, toKeep->relid, 0,
1913 : replace_relid_callback);
1914 1796 : if (!list_member(toKeep->reltarget->exprs, node))
1915 180 : toKeep->reltarget->exprs = lappend(toKeep->reltarget->exprs, node);
1916 : }
1917 :
1918 7590 : for (i = toKeep->min_attr; i <= toKeep->max_attr; i++)
1919 : {
1920 6990 : int attno = i - toKeep->min_attr;
1921 :
1922 13980 : toRemove->attr_needed[attno] = adjust_relid_set(toRemove->attr_needed[attno],
1923 6990 : toRemove->relid, toKeep->relid);
1924 6990 : toKeep->attr_needed[attno] = bms_add_members(toKeep->attr_needed[attno],
1925 6990 : toRemove->attr_needed[attno]);
1926 : }
1927 :
1928 : /*
1929 : * If the removed relation has a row mark, transfer it to the remaining
1930 : * one.
1931 : *
1932 : * If both rels have row marks, just keep the one corresponding to the
1933 : * remaining relation because we verified earlier that they have the same
1934 : * strength.
1935 : */
1936 600 : if (rmark)
1937 : {
1938 74 : if (kmark)
1939 : {
1940 : Assert(kmark->markType == rmark->markType);
1941 :
1942 74 : root->rowMarks = list_delete_ptr(root->rowMarks, rmark);
1943 : }
1944 : else
1945 : {
1946 : /* Shouldn't have inheritance children here. */
1947 : Assert(rmark->rti == rmark->prti);
1948 :
1949 0 : rmark->rti = rmark->prti = toKeep->relid;
1950 : }
1951 : }
1952 :
1953 : /*
1954 : * Replace varno in all the query structures, except nodes RangeTblRef
1955 : * otherwise later remove_rel_from_joinlist will yield errors.
1956 : */
1957 600 : ChangeVarNodesExtended((Node *) root->parse, toRemove->relid, toKeep->relid,
1958 : 0, replace_relid_callback);
1959 :
1960 : /* Replace links in the planner info */
1961 600 : remove_rel_from_query(root, toRemove, toKeep->relid, NULL, NULL);
1962 :
1963 : /* At last, replace varno in root targetlist and HAVING clause */
1964 600 : ChangeVarNodesExtended((Node *) root->processed_tlist, toRemove->relid,
1965 600 : toKeep->relid, 0, replace_relid_callback);
1966 600 : ChangeVarNodesExtended((Node *) root->processed_groupClause,
1967 600 : toRemove->relid, toKeep->relid, 0,
1968 : replace_relid_callback);
1969 :
1970 600 : adjust_relid_set(root->all_result_relids, toRemove->relid, toKeep->relid);
1971 600 : adjust_relid_set(root->leaf_result_relids, toRemove->relid, toKeep->relid);
1972 :
1973 : /*
1974 : * There may be references to the rel in root->fkey_list, but if so,
1975 : * match_foreign_keys_to_quals() will get rid of them.
1976 : */
1977 :
1978 : /*
1979 : * Finally, remove the rel from the baserel array to prevent it from being
1980 : * referenced again. (We can't do this earlier because
1981 : * remove_join_clause_from_rels will touch it.)
1982 : */
1983 600 : root->simple_rel_array[toRemove->relid] = NULL;
1984 600 : root->simple_rte_array[toRemove->relid] = NULL;
1985 :
1986 : /* And nuke the RelOptInfo, just in case there's another access path. */
1987 600 : pfree(toRemove);
1988 :
1989 :
1990 : /*
1991 : * Now repeat construction of attr_needed bits coming from all other
1992 : * sources.
1993 : */
1994 600 : rebuild_placeholder_attr_needed(root);
1995 600 : rebuild_joinclause_attr_needed(root);
1996 600 : rebuild_eclass_attr_needed(root);
1997 600 : rebuild_lateral_attr_needed(root);
1998 600 : }
1999 :
2000 : /*
2001 : * split_selfjoin_quals
2002 : * Processes 'joinquals' by building two lists: one containing the quals
2003 : * where the columns/exprs are on either side of the join match and
2004 : * another one containing the remaining quals.
2005 : *
2006 : * 'joinquals' must only contain quals for a RTE_RELATION being joined to
2007 : * itself.
2008 : */
2009 : static void
2010 2042 : split_selfjoin_quals(PlannerInfo *root, List *joinquals, List **selfjoinquals,
2011 : List **otherjoinquals, int from, int to)
2012 : {
2013 2042 : List *sjoinquals = NIL;
2014 2042 : List *ojoinquals = NIL;
2015 :
2016 6260 : foreach_node(RestrictInfo, rinfo, joinquals)
2017 : {
2018 : OpExpr *expr;
2019 : Node *leftexpr;
2020 : Node *rightexpr;
2021 :
2022 : /* In general, clause looks like F(arg1) = G(arg2) */
2023 4352 : if (!rinfo->mergeopfamilies ||
2024 4352 : bms_num_members(rinfo->clause_relids) != 2 ||
2025 4352 : bms_membership(rinfo->left_relids) != BMS_SINGLETON ||
2026 2176 : bms_membership(rinfo->right_relids) != BMS_SINGLETON)
2027 : {
2028 0 : ojoinquals = lappend(ojoinquals, rinfo);
2029 0 : continue;
2030 : }
2031 :
2032 2176 : expr = (OpExpr *) rinfo->clause;
2033 :
2034 2176 : if (!IsA(expr, OpExpr) || list_length(expr->args) != 2)
2035 : {
2036 0 : ojoinquals = lappend(ojoinquals, rinfo);
2037 0 : continue;
2038 : }
2039 :
2040 2176 : leftexpr = get_leftop(rinfo->clause);
2041 2176 : rightexpr = copyObject(get_rightop(rinfo->clause));
2042 :
2043 2176 : if (leftexpr && IsA(leftexpr, RelabelType))
2044 12 : leftexpr = (Node *) ((RelabelType *) leftexpr)->arg;
2045 2176 : if (rightexpr && IsA(rightexpr, RelabelType))
2046 6 : rightexpr = (Node *) ((RelabelType *) rightexpr)->arg;
2047 :
2048 : /*
2049 : * Quite an expensive operation, narrowing the use case. For example,
2050 : * when we have cast of the same var to different (but compatible)
2051 : * types.
2052 : */
2053 2176 : ChangeVarNodesExtended(rightexpr,
2054 2176 : bms_singleton_member(rinfo->right_relids),
2055 2176 : bms_singleton_member(rinfo->left_relids), 0,
2056 : replace_relid_callback);
2057 :
2058 2176 : if (equal(leftexpr, rightexpr))
2059 1672 : sjoinquals = lappend(sjoinquals, rinfo);
2060 : else
2061 504 : ojoinquals = lappend(ojoinquals, rinfo);
2062 : }
2063 :
2064 2042 : *selfjoinquals = sjoinquals;
2065 2042 : *otherjoinquals = ojoinquals;
2066 2042 : }
2067 :
2068 : /*
2069 : * Check for a case when uniqueness is at least partly derived from a
2070 : * baserestrictinfo clause. In this case, we have a chance to return only
2071 : * one row (if such clauses on both sides of SJ are equal) or nothing (if they
2072 : * are different).
2073 : */
2074 : static bool
2075 666 : match_unique_clauses(PlannerInfo *root, RelOptInfo *outer, List *uclauses,
2076 : Index relid)
2077 : {
2078 1350 : foreach_node(RestrictInfo, rinfo, uclauses)
2079 : {
2080 : Expr *clause;
2081 : Node *iclause;
2082 : Node *c1;
2083 150 : bool matched = false;
2084 :
2085 : Assert(outer->relid > 0 && relid > 0);
2086 :
2087 : /* Only filters like f(R.x1,...,R.xN) == expr we should consider. */
2088 : Assert(bms_is_empty(rinfo->left_relids) ^
2089 : bms_is_empty(rinfo->right_relids));
2090 :
2091 150 : clause = (Expr *) copyObject(rinfo->clause);
2092 150 : ChangeVarNodesExtended((Node *) clause, relid, outer->relid, 0,
2093 : replace_relid_callback);
2094 :
2095 150 : iclause = bms_is_empty(rinfo->left_relids) ? get_rightop(clause) :
2096 144 : get_leftop(clause);
2097 150 : c1 = bms_is_empty(rinfo->left_relids) ? get_leftop(clause) :
2098 144 : get_rightop(clause);
2099 :
2100 : /*
2101 : * Compare these left and right sides with the corresponding sides of
2102 : * the outer's filters. If no one is detected - return immediately.
2103 : */
2104 408 : foreach_node(RestrictInfo, orinfo, outer->baserestrictinfo)
2105 : {
2106 : Node *oclause;
2107 : Node *c2;
2108 :
2109 192 : if (orinfo->mergeopfamilies == NIL)
2110 : /* Don't consider clauses that aren't similar to 'F(X)=G(Y)' */
2111 60 : continue;
2112 :
2113 : Assert(is_opclause(orinfo->clause));
2114 :
2115 264 : oclause = bms_is_empty(orinfo->left_relids) ?
2116 132 : get_rightop(orinfo->clause) : get_leftop(orinfo->clause);
2117 264 : c2 = (bms_is_empty(orinfo->left_relids) ?
2118 132 : get_leftop(orinfo->clause) : get_rightop(orinfo->clause));
2119 :
2120 132 : if (equal(iclause, oclause) && equal(c1, c2))
2121 : {
2122 84 : matched = true;
2123 84 : break;
2124 : }
2125 : }
2126 :
2127 150 : if (!matched)
2128 66 : return false;
2129 : }
2130 :
2131 600 : return true;
2132 : }
2133 :
2134 : /*
2135 : * Find and remove unique self-joins in a group of base relations that have
2136 : * the same Oid.
2137 : *
2138 : * Returns a set of relids that were removed.
2139 : */
2140 : static Relids
2141 11132 : remove_self_joins_one_group(PlannerInfo *root, Relids relids)
2142 : {
2143 11132 : Relids result = NULL;
2144 : int k; /* Index of kept relation */
2145 11132 : int r = -1; /* Index of removed relation */
2146 :
2147 34706 : while ((r = bms_next_member(relids, r)) > 0)
2148 : {
2149 23574 : RelOptInfo *rrel = root->simple_rel_array[r];
2150 :
2151 23574 : k = r;
2152 :
2153 36992 : while ((k = bms_next_member(relids, k)) > 0)
2154 : {
2155 14018 : Relids joinrelids = NULL;
2156 14018 : RelOptInfo *krel = root->simple_rel_array[k];
2157 : List *restrictlist;
2158 : List *selfjoinquals;
2159 : List *otherjoinquals;
2160 : ListCell *lc;
2161 14018 : bool jinfo_check = true;
2162 14018 : PlanRowMark *kmark = NULL;
2163 14018 : PlanRowMark *rmark = NULL;
2164 14018 : List *uclauses = NIL;
2165 :
2166 : /* A sanity check: the relations have the same Oid. */
2167 : Assert(root->simple_rte_array[k]->relid ==
2168 : root->simple_rte_array[r]->relid);
2169 :
2170 : /*
2171 : * It is impossible to eliminate the join of two relations if they
2172 : * belong to different rules of order. Otherwise, the planner
2173 : * can't find any variants of the correct query plan.
2174 : */
2175 17358 : foreach(lc, root->join_info_list)
2176 : {
2177 11090 : SpecialJoinInfo *info = (SpecialJoinInfo *) lfirst(lc);
2178 :
2179 22180 : if ((bms_is_member(k, info->syn_lefthand) ^
2180 15746 : bms_is_member(r, info->syn_lefthand)) ||
2181 4656 : (bms_is_member(k, info->syn_righthand) ^
2182 4656 : bms_is_member(r, info->syn_righthand)))
2183 : {
2184 7750 : jinfo_check = false;
2185 7750 : break;
2186 : }
2187 : }
2188 14018 : if (!jinfo_check)
2189 13418 : continue;
2190 :
2191 : /*
2192 : * Check Row Marks equivalence. We can't remove the join if the
2193 : * relations have row marks of different strength (e.g., one is
2194 : * locked FOR UPDATE, and another just has ROW_MARK_REFERENCE for
2195 : * EvalPlanQual rechecking).
2196 : */
2197 6476 : foreach(lc, root->rowMarks)
2198 : {
2199 380 : PlanRowMark *rowMark = (PlanRowMark *) lfirst(lc);
2200 :
2201 380 : if (rowMark->rti == r)
2202 : {
2203 : Assert(rmark == NULL);
2204 172 : rmark = rowMark;
2205 : }
2206 208 : else if (rowMark->rti == k)
2207 : {
2208 : Assert(kmark == NULL);
2209 172 : kmark = rowMark;
2210 : }
2211 :
2212 380 : if (kmark && rmark)
2213 172 : break;
2214 : }
2215 6268 : if (kmark && rmark && kmark->markType != rmark->markType)
2216 52 : continue;
2217 :
2218 : /*
2219 : * We only deal with base rels here, so their relids bitset
2220 : * contains only one member -- their relid.
2221 : */
2222 6216 : joinrelids = bms_add_member(joinrelids, r);
2223 6216 : joinrelids = bms_add_member(joinrelids, k);
2224 :
2225 : /*
2226 : * PHVs should not impose any constraints on removing self-joins.
2227 : */
2228 :
2229 : /*
2230 : * At this stage, joininfo lists of inner and outer can contain
2231 : * only clauses required for a superior outer join that can't
2232 : * influence this optimization. So, we can avoid to call the
2233 : * build_joinrel_restrictlist() routine.
2234 : */
2235 6216 : restrictlist = generate_join_implied_equalities(root, joinrelids,
2236 : rrel->relids,
2237 : krel, NULL);
2238 6216 : if (restrictlist == NIL)
2239 4174 : continue;
2240 :
2241 : /*
2242 : * Process restrictlist to separate the self-join quals from the
2243 : * other quals. e.g., "x = x" goes to selfjoinquals and "a = b" to
2244 : * otherjoinquals.
2245 : */
2246 2042 : split_selfjoin_quals(root, restrictlist, &selfjoinquals,
2247 2042 : &otherjoinquals, rrel->relid, krel->relid);
2248 :
2249 : Assert(list_length(restrictlist) ==
2250 : (list_length(selfjoinquals) + list_length(otherjoinquals)));
2251 :
2252 : /*
2253 : * To enable SJE for the only degenerate case without any self
2254 : * join clauses at all, add baserestrictinfo to this list. The
2255 : * degenerate case works only if both sides have the same clause.
2256 : * So doesn't matter which side to add.
2257 : */
2258 2042 : selfjoinquals = list_concat(selfjoinquals, krel->baserestrictinfo);
2259 :
2260 : /*
2261 : * Determine if the rrel can duplicate outer rows. We must bypass
2262 : * the unique rel cache here since we're possibly using a subset
2263 : * of join quals. We can use 'force_cache' == true when all join
2264 : * quals are self-join quals. Otherwise, we could end up putting
2265 : * false negatives in the cache.
2266 : */
2267 2042 : if (!innerrel_is_unique_ext(root, joinrelids, rrel->relids,
2268 : krel, JOIN_INNER, selfjoinquals,
2269 2042 : list_length(otherjoinquals) == 0,
2270 : &uclauses))
2271 1376 : continue;
2272 :
2273 : /*
2274 : * 'uclauses' is the copy of outer->baserestrictinfo that are
2275 : * associated with an index. We proved by matching selfjoinquals
2276 : * to a unique index that the outer relation has at most one
2277 : * matching row for each inner row. Sometimes that is not enough.
2278 : * e.g. "WHERE s1.b = s2.b AND s1.a = 1 AND s2.a = 2" when the
2279 : * unique index is (a,b). Having non-empty uclauses, we must
2280 : * validate that the inner baserestrictinfo contains the same
2281 : * expressions, or we won't match the same row on each side of the
2282 : * join.
2283 : */
2284 666 : if (!match_unique_clauses(root, rrel, uclauses, krel->relid))
2285 66 : continue;
2286 :
2287 : /*
2288 : * Remove rrel ReloptInfo from the planner structures and the
2289 : * corresponding row mark.
2290 : */
2291 600 : remove_self_join_rel(root, kmark, rmark, krel, rrel, restrictlist);
2292 :
2293 600 : result = bms_add_member(result, r);
2294 :
2295 : /* We have removed the outer relation, try the next one. */
2296 600 : break;
2297 : }
2298 : }
2299 :
2300 11132 : return result;
2301 : }
2302 :
2303 : /*
2304 : * Gather indexes of base relations from the joinlist and try to eliminate self
2305 : * joins.
2306 : */
2307 : static Relids
2308 105850 : remove_self_joins_recurse(PlannerInfo *root, List *joinlist, Relids toRemove)
2309 : {
2310 : ListCell *jl;
2311 105850 : Relids relids = NULL;
2312 105850 : SelfJoinCandidate *candidates = NULL;
2313 : int i;
2314 : int j;
2315 : int numRels;
2316 :
2317 : /* Collect indexes of base relations of the join tree */
2318 352952 : foreach(jl, joinlist)
2319 : {
2320 247102 : Node *jlnode = (Node *) lfirst(jl);
2321 :
2322 247102 : if (IsA(jlnode, RangeTblRef))
2323 : {
2324 243706 : int varno = ((RangeTblRef *) jlnode)->rtindex;
2325 243706 : RangeTblEntry *rte = root->simple_rte_array[varno];
2326 :
2327 : /*
2328 : * We only consider ordinary relations as candidates to be
2329 : * removed, and these relations should not have TABLESAMPLE
2330 : * clauses specified. Removing a relation with TABLESAMPLE clause
2331 : * could potentially change the syntax of the query. Because of
2332 : * UPDATE/DELETE EPQ mechanism, currently Query->resultRelation or
2333 : * Query->mergeTargetRelation associated rel cannot be eliminated.
2334 : */
2335 243706 : if (rte->rtekind == RTE_RELATION &&
2336 212142 : rte->relkind == RELKIND_RELATION &&
2337 206450 : rte->tablesample == NULL &&
2338 206426 : varno != root->parse->resultRelation &&
2339 204550 : varno != root->parse->mergeTargetRelation)
2340 : {
2341 : Assert(!bms_is_member(varno, relids));
2342 204550 : relids = bms_add_member(relids, varno);
2343 : }
2344 : }
2345 3396 : else if (IsA(jlnode, List))
2346 : {
2347 : /* Recursively go inside the sub-joinlist */
2348 3396 : toRemove = remove_self_joins_recurse(root, (List *) jlnode,
2349 : toRemove);
2350 : }
2351 : else
2352 0 : elog(ERROR, "unrecognized joinlist node type: %d",
2353 : (int) nodeTag(jlnode));
2354 : }
2355 :
2356 105850 : numRels = bms_num_members(relids);
2357 :
2358 : /* Need at least two relations for the join */
2359 105850 : if (numRels < 2)
2360 31246 : return toRemove;
2361 :
2362 : /*
2363 : * In order to find relations with the same oid we first build an array of
2364 : * candidates and then sort it by oid.
2365 : */
2366 74604 : candidates = palloc_array(SelfJoinCandidate, numRels);
2367 74604 : i = -1;
2368 74604 : j = 0;
2369 256774 : while ((i = bms_next_member(relids, i)) >= 0)
2370 : {
2371 182170 : candidates[j].relid = i;
2372 182170 : candidates[j].reloid = root->simple_rte_array[i]->relid;
2373 182170 : j++;
2374 : }
2375 :
2376 74604 : qsort(candidates, numRels, sizeof(SelfJoinCandidate),
2377 : self_join_candidates_cmp);
2378 :
2379 : /*
2380 : * Iteratively form a group of relation indexes with the same oid and
2381 : * launch the routine that detects self-joins in this group and removes
2382 : * excessive range table entries.
2383 : *
2384 : * At the end of the iteration, exclude the group from the overall relids
2385 : * list. So each next iteration of the cycle will involve less and less
2386 : * value of relids.
2387 : */
2388 74604 : i = 0;
2389 256774 : for (j = 1; j < numRels + 1; j++)
2390 : {
2391 182170 : if (j == numRels || candidates[j].reloid != candidates[i].reloid)
2392 : {
2393 169812 : if (j - i >= 2)
2394 : {
2395 : /* Create a group of relation indexes with the same oid */
2396 11054 : Relids group = NULL;
2397 : Relids removed;
2398 :
2399 34466 : while (i < j)
2400 : {
2401 23412 : group = bms_add_member(group, candidates[i].relid);
2402 23412 : i++;
2403 : }
2404 11054 : relids = bms_del_members(relids, group);
2405 :
2406 : /*
2407 : * Try to remove self-joins from a group of identical entries.
2408 : * Make the next attempt iteratively - if something is deleted
2409 : * from a group, changes in clauses and equivalence classes
2410 : * can give us a chance to find more candidates.
2411 : */
2412 : do
2413 : {
2414 : Assert(!bms_overlap(group, toRemove));
2415 11132 : removed = remove_self_joins_one_group(root, group);
2416 11132 : toRemove = bms_add_members(toRemove, removed);
2417 11132 : group = bms_del_members(group, removed);
2418 11708 : } while (!bms_is_empty(removed) &&
2419 576 : bms_membership(group) == BMS_MULTIPLE);
2420 11054 : bms_free(removed);
2421 11054 : bms_free(group);
2422 : }
2423 : else
2424 : {
2425 : /* Single relation, just remove it from the set */
2426 158758 : relids = bms_del_member(relids, candidates[i].relid);
2427 158758 : i = j;
2428 : }
2429 : }
2430 : }
2431 :
2432 : Assert(bms_is_empty(relids));
2433 :
2434 74604 : return toRemove;
2435 : }
2436 :
2437 : /*
2438 : * Compare self-join candidates by their oids.
2439 : */
2440 : static int
2441 133986 : self_join_candidates_cmp(const void *a, const void *b)
2442 : {
2443 133986 : const SelfJoinCandidate *ca = (const SelfJoinCandidate *) a;
2444 133986 : const SelfJoinCandidate *cb = (const SelfJoinCandidate *) b;
2445 :
2446 133986 : if (ca->reloid != cb->reloid)
2447 121574 : return (ca->reloid < cb->reloid ? -1 : 1);
2448 : else
2449 12412 : return 0;
2450 : }
2451 :
2452 : /*
2453 : * Find and remove useless self joins.
2454 : *
2455 : * Search for joins where a relation is joined to itself. If the join clause
2456 : * for each tuple from one side of the join is proven to match the same
2457 : * physical row (or nothing) on the other side, that self-join can be
2458 : * eliminated from the query. Suitable join clauses are assumed to be in the
2459 : * form of X = X, and can be replaced with NOT NULL clauses.
2460 : *
2461 : * For the sake of simplicity, we don't apply this optimization to special
2462 : * joins. Here is a list of what we could do in some particular cases:
2463 : * 'a a1 semi join a a2': is reduced to inner by reduce_unique_semijoins,
2464 : * and then removed normally.
2465 : * 'a a1 anti join a a2': could simplify to a scan with 'outer quals AND
2466 : * (IS NULL on join columns OR NOT inner quals)'.
2467 : * 'a a1 left join a a2': could simplify to a scan like inner but without
2468 : * NOT NULL conditions on join columns.
2469 : * 'a a1 left join (a a2 join b)': can't simplify this, because join to b
2470 : * can both remove rows and introduce duplicates.
2471 : *
2472 : * To search for removable joins, we order all the relations on their Oid,
2473 : * go over each set with the same Oid, and consider each pair of relations
2474 : * in this set.
2475 : *
2476 : * To remove the join, we mark one of the participating relations as dead
2477 : * and rewrite all references to it to point to the remaining relation.
2478 : * This includes modifying RestrictInfos, EquivalenceClasses, and
2479 : * EquivalenceMembers. We also have to modify the row marks. The join clauses
2480 : * of the removed relation become either restriction or join clauses, based on
2481 : * whether they reference any relations not participating in the removed join.
2482 : *
2483 : * 'joinlist' is the top-level joinlist of the query. If it has any
2484 : * references to the removed relations, we update them to point to the
2485 : * remaining ones.
2486 : */
2487 : List *
2488 341504 : remove_useless_self_joins(PlannerInfo *root, List *joinlist)
2489 : {
2490 341504 : Relids toRemove = NULL;
2491 341504 : int relid = -1;
2492 :
2493 683008 : if (!enable_self_join_elimination || joinlist == NIL ||
2494 581434 : (list_length(joinlist) == 1 && !IsA(linitial(joinlist), List)))
2495 239050 : return joinlist;
2496 :
2497 : /*
2498 : * Merge pairs of relations participated in self-join. Remove unnecessary
2499 : * range table entries.
2500 : */
2501 102454 : toRemove = remove_self_joins_recurse(root, joinlist, toRemove);
2502 :
2503 102454 : if (unlikely(toRemove != NULL))
2504 : {
2505 : /* At the end, remove orphaned relation links */
2506 1170 : while ((relid = bms_next_member(toRemove, relid)) >= 0)
2507 : {
2508 600 : int nremoved = 0;
2509 :
2510 600 : joinlist = remove_rel_from_joinlist(joinlist, relid, &nremoved);
2511 600 : if (nremoved != 1)
2512 0 : elog(ERROR, "failed to find relation %d in joinlist", relid);
2513 : }
2514 : }
2515 :
2516 102454 : return joinlist;
2517 : }
|