Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * analyzejoins.c
4 : * Routines for simplifying joins after initial query analysis
5 : *
6 : * While we do a great deal of join simplification in prep/prepjointree.c,
7 : * certain optimizations cannot be performed at that stage for lack of
8 : * detailed information about the query. The routines here are invoked
9 : * after initsplan.c has done its work, and can do additional join removal
10 : * and simplification steps based on the information extracted. The penalty
11 : * is that we have to work harder to clean up after ourselves when we modify
12 : * the query, since the derived data structures have to be updated too.
13 : *
14 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
15 : * Portions Copyright (c) 1994, Regents of the University of California
16 : *
17 : *
18 : * IDENTIFICATION
19 : * src/backend/optimizer/plan/analyzejoins.c
20 : *
21 : *-------------------------------------------------------------------------
22 : */
23 : #include "postgres.h"
24 :
25 : #include "catalog/pg_class.h"
26 : #include "nodes/nodeFuncs.h"
27 : #include "optimizer/joininfo.h"
28 : #include "optimizer/optimizer.h"
29 : #include "optimizer/pathnode.h"
30 : #include "optimizer/paths.h"
31 : #include "optimizer/placeholder.h"
32 : #include "optimizer/planmain.h"
33 : #include "optimizer/restrictinfo.h"
34 : #include "rewrite/rewriteManip.h"
35 : #include "utils/lsyscache.h"
36 :
37 : /*
38 : * Utility structure. A sorting procedure is needed to simplify the search
39 : * of SJE-candidate baserels referencing the same database relation. Having
40 : * collected all baserels from the query jointree, the planner sorts them
41 : * according to the reloid value, groups them with the next pass and attempts
42 : * to remove self-joins.
43 : *
44 : * Preliminary sorting prevents quadratic behavior that can be harmful in the
45 : * case of numerous joins.
46 : */
47 : typedef struct
48 : {
49 : int relid;
50 : Oid reloid;
51 : } SelfJoinCandidate;
52 :
53 : bool enable_self_join_elimination;
54 :
55 : /* local functions */
56 : static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
57 : static void remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
58 : SpecialJoinInfo *sjinfo);
59 : static void remove_rel_from_restrictinfo(RestrictInfo *rinfo,
60 : int relid, int ojrelid);
61 : static void remove_rel_from_eclass(EquivalenceClass *ec,
62 : int relid, int ojrelid, int subst);
63 : static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
64 : static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
65 : static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
66 : List *clause_list, List **extra_clauses);
67 : static Oid distinct_col_search(int colno, List *colnos, List *opids);
68 : static bool is_innerrel_unique_for(PlannerInfo *root,
69 : Relids joinrelids,
70 : Relids outerrelids,
71 : RelOptInfo *innerrel,
72 : JoinType jointype,
73 : List *restrictlist,
74 : List **extra_clauses);
75 : static int self_join_candidates_cmp(const void *a, const void *b);
76 :
77 :
78 : /*
79 : * remove_useless_joins
80 : * Check for relations that don't actually need to be joined at all,
81 : * and remove them from the query.
82 : *
83 : * We are passed the current joinlist and return the updated list. Other
84 : * data structures that have to be updated are accessible via "root".
85 : */
86 : List *
87 346038 : remove_useless_joins(PlannerInfo *root, List *joinlist)
88 : {
89 : ListCell *lc;
90 :
91 : /*
92 : * We are only interested in relations that are left-joined to, so we can
93 : * scan the join_info_list to find them easily.
94 : */
95 346038 : restart:
96 383394 : foreach(lc, root->join_info_list)
97 : {
98 47264 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
99 : int innerrelid;
100 : int nremoved;
101 :
102 : /* Skip if not removable */
103 47264 : if (!join_is_removable(root, sjinfo))
104 37356 : continue;
105 :
106 : /*
107 : * Currently, join_is_removable can only succeed when the sjinfo's
108 : * righthand is a single baserel. Remove that rel from the query and
109 : * joinlist.
110 : */
111 9908 : innerrelid = bms_singleton_member(sjinfo->min_righthand);
112 :
113 9908 : remove_leftjoinrel_from_query(root, innerrelid, sjinfo);
114 :
115 : /* We verify that exactly one reference gets removed from joinlist */
116 9908 : nremoved = 0;
117 9908 : joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved);
118 9908 : if (nremoved != 1)
119 0 : elog(ERROR, "failed to find relation %d in joinlist", innerrelid);
120 :
121 : /*
122 : * We can delete this SpecialJoinInfo from the list too, since it's no
123 : * longer of interest. (Since we'll restart the foreach loop
124 : * immediately, we don't bother with foreach_delete_current.)
125 : */
126 9908 : root->join_info_list = list_delete_cell(root->join_info_list, lc);
127 :
128 : /*
129 : * Restart the scan. This is necessary to ensure we find all
130 : * removable joins independently of ordering of the join_info_list
131 : * (note that removal of attr_needed bits may make a join appear
132 : * removable that did not before).
133 : */
134 9908 : goto restart;
135 : }
136 :
137 336130 : return joinlist;
138 : }
139 :
140 : /*
141 : * join_is_removable
142 : * Check whether we need not perform this special join at all, because
143 : * it will just duplicate its left input.
144 : *
145 : * This is true for a left join for which the join condition cannot match
146 : * more than one inner-side row. (There are other possibly interesting
147 : * cases, but we don't have the infrastructure to prove them.) We also
148 : * have to check that the inner side doesn't generate any variables needed
149 : * above the join.
150 : */
151 : static bool
152 47264 : join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
153 : {
154 : int innerrelid;
155 : RelOptInfo *innerrel;
156 : Relids inputrelids;
157 : Relids joinrelids;
158 47264 : List *clause_list = NIL;
159 : ListCell *l;
160 : int attroff;
161 :
162 : /*
163 : * Must be a left join to a single baserel, else we aren't going to be
164 : * able to do anything with it.
165 : */
166 47264 : if (sjinfo->jointype != JOIN_LEFT)
167 6608 : return false;
168 :
169 40656 : if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
170 1266 : return false;
171 :
172 : /*
173 : * Never try to eliminate a left join to the query result rel. Although
174 : * the case is syntactically impossible in standard SQL, MERGE will build
175 : * a join tree that looks exactly like that.
176 : */
177 39390 : if (innerrelid == root->parse->resultRelation)
178 716 : return false;
179 :
180 38674 : innerrel = find_base_rel(root, innerrelid);
181 :
182 : /*
183 : * Before we go to the effort of checking whether any innerrel variables
184 : * are needed above the join, make a quick check to eliminate cases in
185 : * which we will surely be unable to prove uniqueness of the innerrel.
186 : */
187 38674 : if (!rel_supports_distinctness(root, innerrel))
188 2898 : return false;
189 :
190 : /* Compute the relid set for the join we are considering */
191 35776 : inputrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
192 : Assert(sjinfo->ojrelid != 0);
193 35776 : joinrelids = bms_copy(inputrelids);
194 35776 : joinrelids = bms_add_member(joinrelids, sjinfo->ojrelid);
195 :
196 : /*
197 : * We can't remove the join if any inner-rel attributes are used above the
198 : * join. Here, "above" the join includes pushed-down conditions, so we
199 : * should reject if attr_needed includes the OJ's own relid; therefore,
200 : * compare to inputrelids not joinrelids.
201 : *
202 : * As a micro-optimization, it seems better to start with max_attr and
203 : * count down rather than starting with min_attr and counting up, on the
204 : * theory that the system attributes are somewhat less likely to be wanted
205 : * and should be tested last.
206 : */
207 319270 : for (attroff = innerrel->max_attr - innerrel->min_attr;
208 : attroff >= 0;
209 283494 : attroff--)
210 : {
211 309188 : if (!bms_is_subset(innerrel->attr_needed[attroff], inputrelids))
212 25694 : return false;
213 : }
214 :
215 : /*
216 : * Similarly check that the inner rel isn't needed by any PlaceHolderVars
217 : * that will be used above the join. The PHV case is a little bit more
218 : * complicated, because PHVs may have been assigned a ph_eval_at location
219 : * that includes the innerrel, yet their contained expression might not
220 : * actually reference the innerrel (it could be just a constant, for
221 : * instance). If such a PHV is due to be evaluated above the join then it
222 : * needn't prevent join removal.
223 : */
224 10238 : foreach(l, root->placeholder_list)
225 : {
226 192 : PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
227 :
228 192 : if (bms_overlap(phinfo->ph_lateral, innerrel->relids))
229 36 : return false; /* it references innerrel laterally */
230 192 : if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids))
231 54 : continue; /* it definitely doesn't reference innerrel */
232 138 : if (bms_is_subset(phinfo->ph_needed, inputrelids))
233 6 : continue; /* PHV is not used above the join */
234 132 : if (!bms_is_member(sjinfo->ojrelid, phinfo->ph_eval_at))
235 30 : return false; /* it has to be evaluated below the join */
236 :
237 : /*
238 : * We need to be sure there will still be a place to evaluate the PHV
239 : * if we remove the join, ie that ph_eval_at wouldn't become empty.
240 : */
241 102 : if (!bms_overlap(sjinfo->min_lefthand, phinfo->ph_eval_at))
242 6 : return false; /* there isn't any other place to eval PHV */
243 : /* Check contained expression last, since this is a bit expensive */
244 96 : if (bms_overlap(pull_varnos(root, (Node *) phinfo->ph_var->phexpr),
245 96 : innerrel->relids))
246 0 : return false; /* contained expression references innerrel */
247 : }
248 :
249 : /*
250 : * Search for mergejoinable clauses that constrain the inner rel against
251 : * either the outer rel or a pseudoconstant. If an operator is
252 : * mergejoinable then it behaves like equality for some btree opclass, so
253 : * it's what we want. The mergejoinability test also eliminates clauses
254 : * containing volatile functions, which we couldn't depend on.
255 : */
256 20432 : foreach(l, innerrel->joininfo)
257 : {
258 10386 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
259 :
260 : /*
261 : * If the current join commutes with some other outer join(s) via
262 : * outer join identity 3, there will be multiple clones of its join
263 : * clauses in the joininfo list. We want to consider only the
264 : * has_clone form of such clauses. Processing more than one form
265 : * would be wasteful, and also some of the others would confuse the
266 : * RINFO_IS_PUSHED_DOWN test below.
267 : */
268 10386 : if (restrictinfo->is_clone)
269 136 : continue; /* ignore it */
270 :
271 : /*
272 : * If it's not a join clause for this outer join, we can't use it.
273 : * Note that if the clause is pushed-down, then it is logically from
274 : * above the outer join, even if it references no other rels (it might
275 : * be from WHERE, for example).
276 : */
277 10250 : if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
278 114 : continue; /* ignore; not useful here */
279 :
280 : /* Ignore if it's not a mergejoinable clause */
281 10136 : if (!restrictinfo->can_join ||
282 10058 : restrictinfo->mergeopfamilies == NIL)
283 78 : continue; /* not mergejoinable */
284 :
285 : /*
286 : * Check if the clause has the form "outer op inner" or "inner op
287 : * outer", and if so mark which side is inner.
288 : */
289 10058 : if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
290 : innerrel->relids))
291 6 : continue; /* no good for these input relations */
292 :
293 : /* OK, add to list */
294 10052 : clause_list = lappend(clause_list, restrictinfo);
295 : }
296 :
297 : /*
298 : * Now that we have the relevant equality join clauses, try to prove the
299 : * innerrel distinct.
300 : */
301 10046 : if (rel_is_distinct_for(root, innerrel, clause_list, NULL))
302 9908 : return true;
303 :
304 : /*
305 : * Some day it would be nice to check for other methods of establishing
306 : * distinctness.
307 : */
308 138 : return false;
309 : }
310 :
311 :
312 : /*
313 : * Remove the target rel->relid and references to the target join from the
314 : * planner's data structures, having determined that there is no need
315 : * to include them in the query. Optionally replace them with subst if subst
316 : * is non-negative.
317 : *
318 : * This function updates only parts needed for both left-join removal and
319 : * self-join removal.
320 : */
321 : static void
322 10496 : remove_rel_from_query(PlannerInfo *root, RelOptInfo *rel,
323 : int subst, SpecialJoinInfo *sjinfo,
324 : Relids joinrelids)
325 : {
326 10496 : int relid = rel->relid;
327 10496 : int ojrelid = (sjinfo != NULL) ? sjinfo->ojrelid : -1;
328 : Index rti;
329 : ListCell *l;
330 :
331 : /*
332 : * Update all_baserels and related relid sets.
333 : */
334 10496 : root->all_baserels = adjust_relid_set(root->all_baserels, relid, subst);
335 10496 : root->outer_join_rels = adjust_relid_set(root->outer_join_rels, ojrelid, subst);
336 10496 : root->all_query_rels = adjust_relid_set(root->all_query_rels, relid, subst);
337 10496 : root->all_query_rels = adjust_relid_set(root->all_query_rels, ojrelid, subst);
338 :
339 : /*
340 : * Likewise remove references from SpecialJoinInfo data structures.
341 : *
342 : * This is relevant in case the outer join we're deleting is nested inside
343 : * other outer joins: the upper joins' relid sets have to be adjusted. The
344 : * RHS of the target outer join will be made empty here, but that's OK
345 : * since caller will delete that SpecialJoinInfo entirely.
346 : */
347 24182 : foreach(l, root->join_info_list)
348 : {
349 13686 : SpecialJoinInfo *sjinf = (SpecialJoinInfo *) lfirst(l);
350 :
351 : /*
352 : * initsplan.c is fairly cavalier about allowing SpecialJoinInfos'
353 : * lefthand/righthand relid sets to be shared with other data
354 : * structures. Ensure that we don't modify the original relid sets.
355 : * (The commute_xxx sets are always per-SpecialJoinInfo though.)
356 : */
357 13686 : sjinf->min_lefthand = bms_copy(sjinf->min_lefthand);
358 13686 : sjinf->min_righthand = bms_copy(sjinf->min_righthand);
359 13686 : sjinf->syn_lefthand = bms_copy(sjinf->syn_lefthand);
360 13686 : sjinf->syn_righthand = bms_copy(sjinf->syn_righthand);
361 : /* Now remove relid from the sets: */
362 13686 : sjinf->min_lefthand = adjust_relid_set(sjinf->min_lefthand, relid, subst);
363 13686 : sjinf->min_righthand = adjust_relid_set(sjinf->min_righthand, relid, subst);
364 13686 : sjinf->syn_lefthand = adjust_relid_set(sjinf->syn_lefthand, relid, subst);
365 13686 : sjinf->syn_righthand = adjust_relid_set(sjinf->syn_righthand, relid, subst);
366 :
367 13686 : if (sjinfo != NULL)
368 : {
369 : Assert(subst <= 0 && ojrelid > 0);
370 :
371 : /* Remove ojrelid bits from the sets: */
372 13590 : sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand, ojrelid);
373 13590 : sjinf->min_righthand = bms_del_member(sjinf->min_righthand, ojrelid);
374 13590 : sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand, ojrelid);
375 13590 : sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand, ojrelid);
376 : /* relid cannot appear in these fields, but ojrelid can: */
377 13590 : sjinf->commute_above_l = bms_del_member(sjinf->commute_above_l, ojrelid);
378 13590 : sjinf->commute_above_r = bms_del_member(sjinf->commute_above_r, ojrelid);
379 13590 : sjinf->commute_below_l = bms_del_member(sjinf->commute_below_l, ojrelid);
380 13590 : sjinf->commute_below_r = bms_del_member(sjinf->commute_below_r, ojrelid);
381 : }
382 : else
383 : {
384 : Assert(subst > 0 && ojrelid == -1);
385 :
386 96 : ChangeVarNodes((Node *) sjinf->semi_rhs_exprs, relid, subst, 0);
387 : }
388 : }
389 :
390 : /*
391 : * Likewise remove references from PlaceHolderVar data structures,
392 : * removing any no-longer-needed placeholders entirely.
393 : *
394 : * Removal is a bit trickier than it might seem: we can remove PHVs that
395 : * are used at the target rel and/or in the join qual, but not those that
396 : * are used at join partner rels or above the join. It's not that easy to
397 : * distinguish PHVs used at partner rels from those used in the join qual,
398 : * since they will both have ph_needed sets that are subsets of
399 : * joinrelids. However, a PHV used at a partner rel could not have the
400 : * target rel in ph_eval_at, so we check that while deciding whether to
401 : * remove or just update the PHV. There is no corresponding test in
402 : * join_is_removable because it doesn't need to distinguish those cases.
403 : */
404 10670 : foreach(l, root->placeholder_list)
405 : {
406 174 : PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
407 :
408 : Assert(sjinfo == NULL || !bms_is_member(relid, phinfo->ph_lateral));
409 210 : if (bms_is_subset(phinfo->ph_needed, joinrelids) &&
410 54 : bms_is_member(relid, phinfo->ph_eval_at) &&
411 12 : (sjinfo == NULL || !bms_is_member(ojrelid, phinfo->ph_eval_at)))
412 : {
413 12 : root->placeholder_list = foreach_delete_current(root->placeholder_list,
414 : l);
415 12 : root->placeholder_array[phinfo->phid] = NULL;
416 : }
417 : else
418 : {
419 162 : PlaceHolderVar *phv = phinfo->ph_var;
420 :
421 162 : phinfo->ph_eval_at = adjust_relid_set(phinfo->ph_eval_at, relid, subst);
422 162 : phinfo->ph_eval_at = adjust_relid_set(phinfo->ph_eval_at, ojrelid, subst);
423 : Assert(!bms_is_empty(phinfo->ph_eval_at)); /* checked previously */
424 : /* Reduce ph_needed to contain only "relation 0"; see below */
425 162 : if (bms_is_member(0, phinfo->ph_needed))
426 84 : phinfo->ph_needed = bms_make_singleton(0);
427 : else
428 78 : phinfo->ph_needed = NULL;
429 :
430 162 : phinfo->ph_lateral = adjust_relid_set(phinfo->ph_lateral, relid, subst);
431 :
432 : /*
433 : * ph_lateral might contain rels mentioned in ph_eval_at after the
434 : * replacement, remove them.
435 : */
436 162 : phinfo->ph_lateral = bms_difference(phinfo->ph_lateral, phinfo->ph_eval_at);
437 : /* ph_lateral might or might not be empty */
438 :
439 162 : phv->phrels = adjust_relid_set(phv->phrels, relid, subst);
440 162 : phv->phrels = adjust_relid_set(phv->phrels, ojrelid, subst);
441 : Assert(!bms_is_empty(phv->phrels));
442 :
443 162 : ChangeVarNodes((Node *) phv->phexpr, relid, subst, 0);
444 :
445 : Assert(phv->phnullingrels == NULL); /* no need to adjust */
446 : }
447 : }
448 :
449 : /*
450 : * Likewise remove references from EquivalenceClasses.
451 : */
452 56036 : foreach(l, root->eq_classes)
453 : {
454 45540 : EquivalenceClass *ec = (EquivalenceClass *) lfirst(l);
455 :
456 45540 : if (bms_is_member(relid, ec->ec_relids) ||
457 30294 : (sjinfo == NULL || bms_is_member(ojrelid, ec->ec_relids)))
458 15246 : remove_rel_from_eclass(ec, relid, ojrelid, subst);
459 : }
460 :
461 : /*
462 : * Finally, we must recompute per-Var attr_needed and per-PlaceHolderVar
463 : * ph_needed relid sets. These have to be known accurately, else we may
464 : * fail to remove other now-removable outer joins. And our removal of the
465 : * join clause(s) for this outer join may mean that Vars that were
466 : * formerly needed no longer are. So we have to do this honestly by
467 : * repeating the construction of those relid sets. We can cheat to one
468 : * small extent: we can avoid re-examining the targetlist and HAVING qual
469 : * by preserving "relation 0" bits from the existing relid sets. This is
470 : * safe because we'd never remove such references.
471 : *
472 : * So, start by removing all other bits from attr_needed sets and
473 : * lateral_vars lists. (We already did this above for ph_needed.)
474 : */
475 65178 : for (rti = 1; rti < root->simple_rel_array_size; rti++)
476 : {
477 54682 : RelOptInfo *otherrel = root->simple_rel_array[rti];
478 : int attroff;
479 :
480 : /* there may be empty slots corresponding to non-baserel RTEs */
481 54682 : if (otherrel == NULL)
482 26300 : continue;
483 :
484 : Assert(otherrel->relid == rti); /* sanity check on array */
485 :
486 625790 : for (attroff = otherrel->max_attr - otherrel->min_attr;
487 : attroff >= 0;
488 597408 : attroff--)
489 : {
490 597408 : if (bms_is_member(0, otherrel->attr_needed[attroff]))
491 45194 : otherrel->attr_needed[attroff] = bms_make_singleton(0);
492 : else
493 552214 : otherrel->attr_needed[attroff] = NULL;
494 : }
495 :
496 28382 : if (subst > 0)
497 1460 : ChangeVarNodes((Node *) otherrel->lateral_vars, relid, subst, 0);
498 : }
499 10496 : }
500 :
501 : /*
502 : * Remove the target relid and references to the target join from the
503 : * planner's data structures, having determined that there is no need
504 : * to include them in the query.
505 : *
506 : * We are not terribly thorough here. We only bother to update parts of
507 : * the planner's data structures that will actually be consulted later.
508 : */
509 : static void
510 9908 : remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
511 : SpecialJoinInfo *sjinfo)
512 : {
513 9908 : RelOptInfo *rel = find_base_rel(root, relid);
514 9908 : int ojrelid = sjinfo->ojrelid;
515 : Relids joinrelids;
516 : Relids join_plus_commute;
517 : List *joininfos;
518 : ListCell *l;
519 :
520 : /* Compute the relid set for the join we are considering */
521 9908 : joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
522 : Assert(ojrelid != 0);
523 9908 : joinrelids = bms_add_member(joinrelids, ojrelid);
524 :
525 9908 : remove_rel_from_query(root, rel, -1, sjinfo, joinrelids);
526 :
527 : /*
528 : * Remove any joinquals referencing the rel from the joininfo lists.
529 : *
530 : * In some cases, a joinqual has to be put back after deleting its
531 : * reference to the target rel. This can occur for pseudoconstant and
532 : * outerjoin-delayed quals, which can get marked as requiring the rel in
533 : * order to force them to be evaluated at or above the join. We can't
534 : * just discard them, though. Only quals that logically belonged to the
535 : * outer join being discarded should be removed from the query.
536 : *
537 : * We might encounter a qual that is a clone of a deletable qual with some
538 : * outer-join relids added (see deconstruct_distribute_oj_quals). To
539 : * ensure we get rid of such clones as well, add the relids of all OJs
540 : * commutable with this one to the set we test against for
541 : * pushed-down-ness.
542 : */
543 9908 : join_plus_commute = bms_union(joinrelids,
544 9908 : sjinfo->commute_above_r);
545 9908 : join_plus_commute = bms_add_members(join_plus_commute,
546 9908 : sjinfo->commute_below_l);
547 :
548 : /*
549 : * We must make a copy of the rel's old joininfo list before starting the
550 : * loop, because otherwise remove_join_clause_from_rels would destroy the
551 : * list while we're scanning it.
552 : */
553 9908 : joininfos = list_copy(rel->joininfo);
554 20162 : foreach(l, joininfos)
555 : {
556 10254 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
557 :
558 10254 : remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
559 :
560 10254 : if (RINFO_IS_PUSHED_DOWN(rinfo, join_plus_commute))
561 : {
562 : /*
563 : * There might be references to relid or ojrelid in the
564 : * RestrictInfo's relid sets, as a consequence of PHVs having had
565 : * ph_eval_at sets that include those. We already checked above
566 : * that any such PHV is safe (and updated its ph_eval_at), so we
567 : * can just drop those references.
568 : */
569 108 : remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
570 :
571 : /*
572 : * Cross-check that the clause itself does not reference the
573 : * target rel or join.
574 : */
575 : #ifdef USE_ASSERT_CHECKING
576 : {
577 : Relids clause_varnos = pull_varnos(root,
578 : (Node *) rinfo->clause);
579 :
580 : Assert(!bms_is_member(relid, clause_varnos));
581 : Assert(!bms_is_member(ojrelid, clause_varnos));
582 : }
583 : #endif
584 : /* Now throw it back into the joininfo lists */
585 108 : distribute_restrictinfo_to_rels(root, rinfo);
586 : }
587 : }
588 :
589 : /*
590 : * There may be references to the rel in root->fkey_list, but if so,
591 : * match_foreign_keys_to_quals() will get rid of them.
592 : */
593 :
594 : /*
595 : * Now remove the rel from the baserel array to prevent it from being
596 : * referenced again. (We can't do this earlier because
597 : * remove_join_clause_from_rels will touch it.)
598 : */
599 9908 : root->simple_rel_array[relid] = NULL;
600 :
601 : /* And nuke the RelOptInfo, just in case there's another access path */
602 9908 : pfree(rel);
603 :
604 : /*
605 : * Now repeat construction of attr_needed bits coming from all other
606 : * sources.
607 : */
608 9908 : rebuild_placeholder_attr_needed(root);
609 9908 : rebuild_joinclause_attr_needed(root);
610 9908 : rebuild_eclass_attr_needed(root);
611 9908 : rebuild_lateral_attr_needed(root);
612 9908 : }
613 :
614 : /*
615 : * Remove any references to relid or ojrelid from the RestrictInfo.
616 : *
617 : * We only bother to clean out bits in clause_relids and required_relids,
618 : * not nullingrel bits in contained Vars and PHVs. (This might have to be
619 : * improved sometime.) However, if the RestrictInfo contains an OR clause
620 : * we have to also clean up the sub-clauses.
621 : */
622 : static void
623 4318 : remove_rel_from_restrictinfo(RestrictInfo *rinfo, int relid, int ojrelid)
624 : {
625 : /*
626 : * initsplan.c is fairly cavalier about allowing RestrictInfos to share
627 : * relid sets with other RestrictInfos, and SpecialJoinInfos too. Make
628 : * sure this RestrictInfo has its own relid sets before we modify them.
629 : * (In present usage, clause_relids is probably not shared, but
630 : * required_relids could be; let's not assume anything.)
631 : */
632 4318 : rinfo->clause_relids = bms_copy(rinfo->clause_relids);
633 4318 : rinfo->clause_relids = bms_del_member(rinfo->clause_relids, relid);
634 4318 : rinfo->clause_relids = bms_del_member(rinfo->clause_relids, ojrelid);
635 : /* Likewise for required_relids */
636 4318 : rinfo->required_relids = bms_copy(rinfo->required_relids);
637 4318 : rinfo->required_relids = bms_del_member(rinfo->required_relids, relid);
638 4318 : rinfo->required_relids = bms_del_member(rinfo->required_relids, ojrelid);
639 :
640 : /* If it's an OR, recurse to clean up sub-clauses */
641 4318 : if (restriction_is_or_clause(rinfo))
642 : {
643 : ListCell *lc;
644 :
645 : Assert(is_orclause(rinfo->orclause));
646 18 : foreach(lc, ((BoolExpr *) rinfo->orclause)->args)
647 : {
648 12 : Node *orarg = (Node *) lfirst(lc);
649 :
650 : /* OR arguments should be ANDs or sub-RestrictInfos */
651 12 : if (is_andclause(orarg))
652 : {
653 0 : List *andargs = ((BoolExpr *) orarg)->args;
654 : ListCell *lc2;
655 :
656 0 : foreach(lc2, andargs)
657 : {
658 0 : RestrictInfo *rinfo2 = lfirst_node(RestrictInfo, lc2);
659 :
660 0 : remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
661 : }
662 : }
663 : else
664 : {
665 12 : RestrictInfo *rinfo2 = castNode(RestrictInfo, orarg);
666 :
667 12 : remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
668 : }
669 : }
670 : }
671 4318 : }
672 :
673 : /*
674 : * Remove any references to relid or ojrelid from the EquivalenceClass.
675 : *
676 : * Like remove_rel_from_restrictinfo, we don't worry about cleaning out
677 : * any nullingrel bits in contained Vars and PHVs. (This might have to be
678 : * improved sometime.) We do need to fix the EC and EM relid sets to ensure
679 : * that implied join equalities will be generated at the appropriate join
680 : * level(s).
681 : */
682 : static void
683 15246 : remove_rel_from_eclass(EquivalenceClass *ec, int relid, int ojrelid, int subst)
684 : {
685 : ListCell *lc;
686 :
687 : /* Fix up the EC's overall relids */
688 15246 : ec->ec_relids = adjust_relid_set(ec->ec_relids, relid, subst);
689 15246 : ec->ec_relids = adjust_relid_set(ec->ec_relids, ojrelid, subst);
690 :
691 : /*
692 : * Fix up the member expressions. Any non-const member that ends with
693 : * empty em_relids must be a Var or PHV of the removed relation. We don't
694 : * need it anymore, so we can drop it.
695 : */
696 35460 : foreach(lc, ec->ec_members)
697 : {
698 20214 : EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
699 :
700 20214 : if (bms_is_member(relid, cur_em->em_relids) ||
701 4198 : (ojrelid != -1 && bms_is_member(ojrelid, cur_em->em_relids)))
702 : {
703 : Assert(!cur_em->em_is_const);
704 14208 : cur_em->em_relids = adjust_relid_set(cur_em->em_relids, relid, subst);
705 14208 : cur_em->em_relids = adjust_relid_set(cur_em->em_relids, ojrelid, subst);
706 14208 : if (bms_is_empty(cur_em->em_relids))
707 14196 : ec->ec_members = foreach_delete_current(ec->ec_members, lc);
708 : }
709 : }
710 :
711 : /* Fix up the source clauses, in case we can re-use them later */
712 20482 : foreach(lc, ec->ec_sources)
713 : {
714 5236 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
715 :
716 5236 : if (ojrelid == -1)
717 1038 : ChangeVarNodes((Node *) rinfo, relid, subst, 0);
718 : else
719 4198 : remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
720 : }
721 :
722 : /*
723 : * Rather than expend code on fixing up any already-derived clauses, just
724 : * drop them. (At this point, any such clauses would be base restriction
725 : * clauses, which we'd not need anymore anyway.)
726 : */
727 15246 : ec->ec_derives = NIL;
728 15246 : }
729 :
730 : /*
731 : * Remove any occurrences of the target relid from a joinlist structure.
732 : *
733 : * It's easiest to build a whole new list structure, so we handle it that
734 : * way. Efficiency is not a big deal here.
735 : *
736 : * *nremoved is incremented by the number of occurrences removed (there
737 : * should be exactly one, but the caller checks that).
738 : */
739 : static List *
740 10760 : remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
741 : {
742 10760 : List *result = NIL;
743 : ListCell *jl;
744 :
745 39406 : foreach(jl, joinlist)
746 : {
747 28646 : Node *jlnode = (Node *) lfirst(jl);
748 :
749 28646 : if (IsA(jlnode, RangeTblRef))
750 : {
751 28382 : int varno = ((RangeTblRef *) jlnode)->rtindex;
752 :
753 28382 : if (varno == relid)
754 10496 : (*nremoved)++;
755 : else
756 17886 : result = lappend(result, jlnode);
757 : }
758 264 : else if (IsA(jlnode, List))
759 : {
760 : /* Recurse to handle subproblem */
761 : List *sublist;
762 :
763 264 : sublist = remove_rel_from_joinlist((List *) jlnode,
764 : relid, nremoved);
765 : /* Avoid including empty sub-lists in the result */
766 264 : if (sublist)
767 264 : result = lappend(result, sublist);
768 : }
769 : else
770 : {
771 0 : elog(ERROR, "unrecognized joinlist node type: %d",
772 : (int) nodeTag(jlnode));
773 : }
774 : }
775 :
776 10760 : return result;
777 : }
778 :
779 :
780 : /*
781 : * reduce_unique_semijoins
782 : * Check for semijoins that can be simplified to plain inner joins
783 : * because the inner relation is provably unique for the join clauses.
784 : *
785 : * Ideally this would happen during reduce_outer_joins, but we don't have
786 : * enough information at that point.
787 : *
788 : * To perform the strength reduction when applicable, we need only delete
789 : * the semijoin's SpecialJoinInfo from root->join_info_list. (We don't
790 : * bother fixing the join type attributed to it in the query jointree,
791 : * since that won't be consulted again.)
792 : */
793 : void
794 336130 : reduce_unique_semijoins(PlannerInfo *root)
795 : {
796 : ListCell *lc;
797 :
798 : /*
799 : * Scan the join_info_list to find semijoins.
800 : */
801 373260 : foreach(lc, root->join_info_list)
802 : {
803 37130 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
804 : int innerrelid;
805 : RelOptInfo *innerrel;
806 : Relids joinrelids;
807 : List *restrictlist;
808 :
809 : /*
810 : * Must be a semijoin to a single baserel, else we aren't going to be
811 : * able to do anything with it.
812 : */
813 37130 : if (sjinfo->jointype != JOIN_SEMI)
814 36876 : continue;
815 :
816 2010 : if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
817 164 : continue;
818 :
819 1846 : innerrel = find_base_rel(root, innerrelid);
820 :
821 : /*
822 : * Before we trouble to run generate_join_implied_equalities, make a
823 : * quick check to eliminate cases in which we will surely be unable to
824 : * prove uniqueness of the innerrel.
825 : */
826 1846 : if (!rel_supports_distinctness(root, innerrel))
827 810 : continue;
828 :
829 : /* Compute the relid set for the join we are considering */
830 1036 : joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
831 : Assert(sjinfo->ojrelid == 0); /* SEMI joins don't have RT indexes */
832 :
833 : /*
834 : * Since we're only considering a single-rel RHS, any join clauses it
835 : * has must be clauses linking it to the semijoin's min_lefthand. We
836 : * can also consider EC-derived join clauses.
837 : */
838 : restrictlist =
839 1036 : list_concat(generate_join_implied_equalities(root,
840 : joinrelids,
841 : sjinfo->min_lefthand,
842 : innerrel,
843 : NULL),
844 1036 : innerrel->joininfo);
845 :
846 : /* Test whether the innerrel is unique for those clauses. */
847 1036 : if (!innerrel_is_unique(root,
848 : joinrelids, sjinfo->min_lefthand, innerrel,
849 : JOIN_SEMI, restrictlist, true))
850 782 : continue;
851 :
852 : /* OK, remove the SpecialJoinInfo from the list. */
853 254 : root->join_info_list = foreach_delete_current(root->join_info_list, lc);
854 : }
855 336130 : }
856 :
857 :
858 : /*
859 : * rel_supports_distinctness
860 : * Could the relation possibly be proven distinct on some set of columns?
861 : *
862 : * This is effectively a pre-checking function for rel_is_distinct_for().
863 : * It must return true if rel_is_distinct_for() could possibly return true
864 : * with this rel, but it should not expend a lot of cycles. The idea is
865 : * that callers can avoid doing possibly-expensive processing to compute
866 : * rel_is_distinct_for()'s argument lists if the call could not possibly
867 : * succeed.
868 : */
869 : static bool
870 966076 : rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
871 : {
872 : /* We only know about baserels ... */
873 966076 : if (rel->reloptkind != RELOPT_BASEREL)
874 333092 : return false;
875 632984 : if (rel->rtekind == RTE_RELATION)
876 : {
877 : /*
878 : * For a plain relation, we only know how to prove uniqueness by
879 : * reference to unique indexes. Make sure there's at least one
880 : * suitable unique index. It must be immediately enforced, and not a
881 : * partial index. (Keep these conditions in sync with
882 : * relation_has_unique_index_for!)
883 : */
884 : ListCell *lc;
885 :
886 817092 : foreach(lc, rel->indexlist)
887 : {
888 768822 : IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc);
889 :
890 768822 : if (ind->unique && ind->immediate && ind->indpred == NIL)
891 559754 : return true;
892 : }
893 : }
894 24960 : else if (rel->rtekind == RTE_SUBQUERY)
895 : {
896 4116 : Query *subquery = root->simple_rte_array[rel->relid]->subquery;
897 :
898 : /* Check if the subquery has any qualities that support distinctness */
899 4116 : if (query_supports_distinctness(subquery))
900 2662 : return true;
901 : }
902 : /* We have no proof rules for any other rtekinds. */
903 70568 : return false;
904 : }
905 :
906 : /*
907 : * rel_is_distinct_for
908 : * Does the relation return only distinct rows according to clause_list?
909 : *
910 : * clause_list is a list of join restriction clauses involving this rel and
911 : * some other one. Return true if no two rows emitted by this rel could
912 : * possibly join to the same row of the other rel.
913 : *
914 : * The caller must have already determined that each condition is a
915 : * mergejoinable equality with an expression in this relation on one side, and
916 : * an expression not involving this relation on the other. The transient
917 : * outer_is_left flag is used to identify which side references this relation:
918 : * left side if outer_is_left is false, right side if it is true.
919 : *
920 : * Note that the passed-in clause_list may be destructively modified! This
921 : * is OK for current uses, because the clause_list is built by the caller for
922 : * the sole purpose of passing to this function.
923 : *
924 : * (*extra_clauses) to be set to the right sides of baserestrictinfo clauses,
925 : * looking like "x = const" if distinctness is derived from such clauses, not
926 : * joininfo clauses. Pass NULL to the extra_clauses if this value is not
927 : * needed.
928 : */
929 : static bool
930 367050 : rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list,
931 : List **extra_clauses)
932 : {
933 : /*
934 : * We could skip a couple of tests here if we assume all callers checked
935 : * rel_supports_distinctness first, but it doesn't seem worth taking any
936 : * risk for.
937 : */
938 367050 : if (rel->reloptkind != RELOPT_BASEREL)
939 0 : return false;
940 367050 : if (rel->rtekind == RTE_RELATION)
941 : {
942 : /*
943 : * Examine the indexes to see if we have a matching unique index.
944 : * relation_has_unique_index_ext automatically adds any usable
945 : * restriction clauses for the rel, so we needn't do that here.
946 : */
947 365316 : if (relation_has_unique_index_ext(root, rel, clause_list, NIL, NIL,
948 : extra_clauses))
949 213330 : return true;
950 : }
951 1734 : else if (rel->rtekind == RTE_SUBQUERY)
952 : {
953 1734 : Index relid = rel->relid;
954 1734 : Query *subquery = root->simple_rte_array[relid]->subquery;
955 1734 : List *colnos = NIL;
956 1734 : List *opids = NIL;
957 : ListCell *l;
958 :
959 : /*
960 : * Build the argument lists for query_is_distinct_for: a list of
961 : * output column numbers that the query needs to be distinct over, and
962 : * a list of equality operators that the output columns need to be
963 : * distinct according to.
964 : *
965 : * (XXX we are not considering restriction clauses attached to the
966 : * subquery; is that worth doing?)
967 : */
968 3438 : foreach(l, clause_list)
969 : {
970 1704 : RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
971 : Oid op;
972 : Var *var;
973 :
974 : /*
975 : * Get the equality operator we need uniqueness according to.
976 : * (This might be a cross-type operator and thus not exactly the
977 : * same operator the subquery would consider; that's all right
978 : * since query_is_distinct_for can resolve such cases.) The
979 : * caller's mergejoinability test should have selected only
980 : * OpExprs.
981 : */
982 1704 : op = castNode(OpExpr, rinfo->clause)->opno;
983 :
984 : /* caller identified the inner side for us */
985 1704 : if (rinfo->outer_is_left)
986 1390 : var = (Var *) get_rightop(rinfo->clause);
987 : else
988 314 : var = (Var *) get_leftop(rinfo->clause);
989 :
990 : /*
991 : * We may ignore any RelabelType node above the operand. (There
992 : * won't be more than one, since eval_const_expressions() has been
993 : * applied already.)
994 : */
995 1704 : if (var && IsA(var, RelabelType))
996 638 : var = (Var *) ((RelabelType *) var)->arg;
997 :
998 : /*
999 : * If inner side isn't a Var referencing a subquery output column,
1000 : * this clause doesn't help us.
1001 : */
1002 1704 : if (!var || !IsA(var, Var) ||
1003 1692 : var->varno != relid || var->varlevelsup != 0)
1004 12 : continue;
1005 :
1006 1692 : colnos = lappend_int(colnos, var->varattno);
1007 1692 : opids = lappend_oid(opids, op);
1008 : }
1009 :
1010 1734 : if (query_is_distinct_for(subquery, colnos, opids))
1011 212 : return true;
1012 : }
1013 153508 : return false;
1014 : }
1015 :
1016 :
1017 : /*
1018 : * query_supports_distinctness - could the query possibly be proven distinct
1019 : * on some set of output columns?
1020 : *
1021 : * This is effectively a pre-checking function for query_is_distinct_for().
1022 : * It must return true if query_is_distinct_for() could possibly return true
1023 : * with this query, but it should not expend a lot of cycles. The idea is
1024 : * that callers can avoid doing possibly-expensive processing to compute
1025 : * query_is_distinct_for()'s argument lists if the call could not possibly
1026 : * succeed.
1027 : */
1028 : bool
1029 4788 : query_supports_distinctness(Query *query)
1030 : {
1031 : /* SRFs break distinctness except with DISTINCT, see below */
1032 4788 : if (query->hasTargetSRFs && query->distinctClause == NIL)
1033 956 : return false;
1034 :
1035 : /* check for features we can prove distinctness with */
1036 3832 : if (query->distinctClause != NIL ||
1037 3688 : query->groupClause != NIL ||
1038 3500 : query->groupingSets != NIL ||
1039 3500 : query->hasAggs ||
1040 3228 : query->havingQual ||
1041 3228 : query->setOperations)
1042 3298 : return true;
1043 :
1044 534 : return false;
1045 : }
1046 :
1047 : /*
1048 : * query_is_distinct_for - does query never return duplicates of the
1049 : * specified columns?
1050 : *
1051 : * query is a not-yet-planned subquery (in current usage, it's always from
1052 : * a subquery RTE, which the planner avoids scribbling on).
1053 : *
1054 : * colnos is an integer list of output column numbers (resno's). We are
1055 : * interested in whether rows consisting of just these columns are certain
1056 : * to be distinct. "Distinctness" is defined according to whether the
1057 : * corresponding upper-level equality operators listed in opids would think
1058 : * the values are distinct. (Note: the opids entries could be cross-type
1059 : * operators, and thus not exactly the equality operators that the subquery
1060 : * would use itself. We use equality_ops_are_compatible() to check
1061 : * compatibility. That looks at btree or hash opfamily membership, and so
1062 : * should give trustworthy answers for all operators that we might need
1063 : * to deal with here.)
1064 : */
1065 : bool
1066 1856 : query_is_distinct_for(Query *query, List *colnos, List *opids)
1067 : {
1068 : ListCell *l;
1069 : Oid opid;
1070 :
1071 : Assert(list_length(colnos) == list_length(opids));
1072 :
1073 : /*
1074 : * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
1075 : * columns in the DISTINCT clause appear in colnos and operator semantics
1076 : * match. This is true even if there are SRFs in the DISTINCT columns or
1077 : * elsewhere in the tlist.
1078 : */
1079 1856 : if (query->distinctClause)
1080 : {
1081 150 : foreach(l, query->distinctClause)
1082 : {
1083 120 : SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
1084 120 : TargetEntry *tle = get_sortgroupclause_tle(sgc,
1085 : query->targetList);
1086 :
1087 120 : opid = distinct_col_search(tle->resno, colnos, opids);
1088 120 : if (!OidIsValid(opid) ||
1089 48 : !equality_ops_are_compatible(opid, sgc->eqop))
1090 : break; /* exit early if no match */
1091 : }
1092 102 : if (l == NULL) /* had matches for all? */
1093 30 : return true;
1094 : }
1095 :
1096 : /*
1097 : * Otherwise, a set-returning function in the query's targetlist can
1098 : * result in returning duplicate rows, despite any grouping that might
1099 : * occur before tlist evaluation. (If all tlist SRFs are within GROUP BY
1100 : * columns, it would be safe because they'd be expanded before grouping.
1101 : * But it doesn't currently seem worth the effort to check for that.)
1102 : */
1103 1826 : if (query->hasTargetSRFs)
1104 0 : return false;
1105 :
1106 : /*
1107 : * Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all
1108 : * the grouped columns appear in colnos and operator semantics match.
1109 : */
1110 1826 : if (query->groupClause && !query->groupingSets)
1111 : {
1112 234 : foreach(l, query->groupClause)
1113 : {
1114 164 : SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
1115 164 : TargetEntry *tle = get_sortgroupclause_tle(sgc,
1116 : query->targetList);
1117 :
1118 164 : opid = distinct_col_search(tle->resno, colnos, opids);
1119 164 : if (!OidIsValid(opid) ||
1120 112 : !equality_ops_are_compatible(opid, sgc->eqop))
1121 : break; /* exit early if no match */
1122 : }
1123 122 : if (l == NULL) /* had matches for all? */
1124 70 : return true;
1125 : }
1126 1704 : else if (query->groupingSets)
1127 : {
1128 : /*
1129 : * If we have grouping sets with expressions, we probably don't have
1130 : * uniqueness and analysis would be hard. Punt.
1131 : */
1132 0 : if (query->groupClause)
1133 0 : return false;
1134 :
1135 : /*
1136 : * If we have no groupClause (therefore no grouping expressions), we
1137 : * might have one or many empty grouping sets. If there's just one,
1138 : * then we're returning only one row and are certainly unique. But
1139 : * otherwise, we know we're certainly not unique.
1140 : */
1141 0 : if (list_length(query->groupingSets) == 1 &&
1142 0 : ((GroupingSet *) linitial(query->groupingSets))->kind == GROUPING_SET_EMPTY)
1143 0 : return true;
1144 : else
1145 0 : return false;
1146 : }
1147 : else
1148 : {
1149 : /*
1150 : * If we have no GROUP BY, but do have aggregates or HAVING, then the
1151 : * result is at most one row so it's surely unique, for any operators.
1152 : */
1153 1704 : if (query->hasAggs || query->havingQual)
1154 100 : return true;
1155 : }
1156 :
1157 : /*
1158 : * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
1159 : * except with ALL.
1160 : */
1161 1656 : if (query->setOperations)
1162 : {
1163 1532 : SetOperationStmt *topop = castNode(SetOperationStmt, query->setOperations);
1164 :
1165 : Assert(topop->op != SETOP_NONE);
1166 :
1167 1532 : if (!topop->all)
1168 : {
1169 : ListCell *lg;
1170 :
1171 : /* We're good if all the nonjunk output columns are in colnos */
1172 72 : lg = list_head(topop->groupClauses);
1173 90 : foreach(l, query->targetList)
1174 : {
1175 78 : TargetEntry *tle = (TargetEntry *) lfirst(l);
1176 : SortGroupClause *sgc;
1177 :
1178 78 : if (tle->resjunk)
1179 0 : continue; /* ignore resjunk columns */
1180 :
1181 : /* non-resjunk columns should have grouping clauses */
1182 : Assert(lg != NULL);
1183 78 : sgc = (SortGroupClause *) lfirst(lg);
1184 78 : lg = lnext(topop->groupClauses, lg);
1185 :
1186 78 : opid = distinct_col_search(tle->resno, colnos, opids);
1187 78 : if (!OidIsValid(opid) ||
1188 18 : !equality_ops_are_compatible(opid, sgc->eqop))
1189 : break; /* exit early if no match */
1190 : }
1191 72 : if (l == NULL) /* had matches for all? */
1192 12 : return true;
1193 : }
1194 : }
1195 :
1196 : /*
1197 : * XXX Are there any other cases in which we can easily see the result
1198 : * must be distinct?
1199 : *
1200 : * If you do add more smarts to this function, be sure to update
1201 : * query_supports_distinctness() to match.
1202 : */
1203 :
1204 1644 : return false;
1205 : }
1206 :
1207 : /*
1208 : * distinct_col_search - subroutine for query_is_distinct_for
1209 : *
1210 : * If colno is in colnos, return the corresponding element of opids,
1211 : * else return InvalidOid. (Ordinarily colnos would not contain duplicates,
1212 : * but if it does, we arbitrarily select the first match.)
1213 : */
1214 : static Oid
1215 362 : distinct_col_search(int colno, List *colnos, List *opids)
1216 : {
1217 : ListCell *lc1,
1218 : *lc2;
1219 :
1220 574 : forboth(lc1, colnos, lc2, opids)
1221 : {
1222 390 : if (colno == lfirst_int(lc1))
1223 178 : return lfirst_oid(lc2);
1224 : }
1225 184 : return InvalidOid;
1226 : }
1227 :
1228 :
1229 : /*
1230 : * innerrel_is_unique
1231 : * Check if the innerrel provably contains at most one tuple matching any
1232 : * tuple from the outerrel, based on join clauses in the 'restrictlist'.
1233 : *
1234 : * We need an actual RelOptInfo for the innerrel, but it's sufficient to
1235 : * identify the outerrel by its Relids. This asymmetry supports use of this
1236 : * function before joinrels have been built. (The caller is expected to
1237 : * also supply the joinrelids, just to save recalculating that.)
1238 : *
1239 : * The proof must be made based only on clauses that will be "joinquals"
1240 : * rather than "otherquals" at execution. For an inner join there's no
1241 : * difference; but if the join is outer, we must ignore pushed-down quals,
1242 : * as those will become "otherquals". Note that this means the answer might
1243 : * vary depending on whether IS_OUTER_JOIN(jointype); since we cache the
1244 : * answer without regard to that, callers must take care not to call this
1245 : * with jointypes that would be classified differently by IS_OUTER_JOIN().
1246 : *
1247 : * The actual proof is undertaken by is_innerrel_unique_for(); this function
1248 : * is a frontend that is mainly concerned with caching the answers.
1249 : * In particular, the force_cache argument allows overriding the internal
1250 : * heuristic about whether to cache negative answers; it should be "true"
1251 : * if making an inquiry that is not part of the normal bottom-up join search
1252 : * sequence.
1253 : */
1254 : bool
1255 1097774 : innerrel_is_unique(PlannerInfo *root,
1256 : Relids joinrelids,
1257 : Relids outerrelids,
1258 : RelOptInfo *innerrel,
1259 : JoinType jointype,
1260 : List *restrictlist,
1261 : bool force_cache)
1262 : {
1263 1097774 : return innerrel_is_unique_ext(root, joinrelids, outerrelids, innerrel,
1264 : jointype, restrictlist, force_cache, NULL);
1265 : }
1266 :
1267 : /*
1268 : * innerrel_is_unique_ext
1269 : * Do the same as innerrel_is_unique(), but also set to (*extra_clauses)
1270 : * additional clauses from a baserestrictinfo list used to prove the
1271 : * uniqueness.
1272 : *
1273 : * A non-NULL extra_clauses indicates that we're checking for self-join and
1274 : * correspondingly dealing with filtered clauses.
1275 : */
1276 : bool
1277 1099750 : innerrel_is_unique_ext(PlannerInfo *root,
1278 : Relids joinrelids,
1279 : Relids outerrelids,
1280 : RelOptInfo *innerrel,
1281 : JoinType jointype,
1282 : List *restrictlist,
1283 : bool force_cache,
1284 : List **extra_clauses)
1285 : {
1286 : MemoryContext old_context;
1287 : ListCell *lc;
1288 : UniqueRelInfo *uniqueRelInfo;
1289 1099750 : List *outer_exprs = NIL;
1290 1099750 : bool self_join = (extra_clauses != NULL);
1291 :
1292 : /* Certainly can't prove uniqueness when there are no joinclauses */
1293 1099750 : if (restrictlist == NIL)
1294 174194 : return false;
1295 :
1296 : /*
1297 : * Make a quick check to eliminate cases in which we will surely be unable
1298 : * to prove uniqueness of the innerrel.
1299 : */
1300 925556 : if (!rel_supports_distinctness(root, innerrel))
1301 399952 : return false;
1302 :
1303 : /*
1304 : * Query the cache to see if we've managed to prove that innerrel is
1305 : * unique for any subset of this outerrel. For non-self-join search, we
1306 : * don't need an exact match, as extra outerrels can't make the innerrel
1307 : * any less unique (or more formally, the restrictlist for a join to a
1308 : * superset outerrel must be a superset of the conditions we successfully
1309 : * used before). For self-join search, we require an exact match of
1310 : * outerrels because we need extra clauses to be valid for our case. Also,
1311 : * for self-join checking we've filtered the clauses list. Thus, we can
1312 : * match only the result cached for a self-join search for another
1313 : * self-join check.
1314 : */
1315 609684 : foreach(lc, innerrel->unique_for_rels)
1316 : {
1317 252356 : uniqueRelInfo = (UniqueRelInfo *) lfirst(lc);
1318 :
1319 252356 : if ((!self_join && bms_is_subset(uniqueRelInfo->outerrelids, outerrelids)) ||
1320 68 : (self_join && bms_equal(uniqueRelInfo->outerrelids, outerrelids) &&
1321 56 : uniqueRelInfo->self_join))
1322 : {
1323 168276 : if (extra_clauses)
1324 12 : *extra_clauses = uniqueRelInfo->extra_clauses;
1325 168276 : return true; /* Success! */
1326 : }
1327 : }
1328 :
1329 : /*
1330 : * Conversely, we may have already determined that this outerrel, or some
1331 : * superset thereof, cannot prove this innerrel to be unique.
1332 : */
1333 357812 : foreach(lc, innerrel->non_unique_for_rels)
1334 : {
1335 808 : Relids unique_for_rels = (Relids) lfirst(lc);
1336 :
1337 808 : if (bms_is_subset(outerrelids, unique_for_rels))
1338 324 : return false;
1339 : }
1340 :
1341 : /* No cached information, so try to make the proof. */
1342 357004 : if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel,
1343 : jointype, restrictlist,
1344 : self_join ? &outer_exprs : NULL))
1345 : {
1346 : /*
1347 : * Cache the positive result for future probes, being sure to keep it
1348 : * in the planner_cxt even if we are working in GEQO.
1349 : *
1350 : * Note: one might consider trying to isolate the minimal subset of
1351 : * the outerrels that proved the innerrel unique. But it's not worth
1352 : * the trouble, because the planner builds up joinrels incrementally
1353 : * and so we'll see the minimally sufficient outerrels before any
1354 : * supersets of them anyway.
1355 : */
1356 203634 : old_context = MemoryContextSwitchTo(root->planner_cxt);
1357 203634 : uniqueRelInfo = makeNode(UniqueRelInfo);
1358 203634 : uniqueRelInfo->outerrelids = bms_copy(outerrelids);
1359 203634 : uniqueRelInfo->self_join = self_join;
1360 203634 : uniqueRelInfo->extra_clauses = outer_exprs;
1361 203634 : innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
1362 : uniqueRelInfo);
1363 203634 : MemoryContextSwitchTo(old_context);
1364 :
1365 203634 : if (extra_clauses)
1366 642 : *extra_clauses = outer_exprs;
1367 203634 : return true; /* Success! */
1368 : }
1369 : else
1370 : {
1371 : /*
1372 : * None of the join conditions for outerrel proved innerrel unique, so
1373 : * we can safely reject this outerrel or any subset of it in future
1374 : * checks.
1375 : *
1376 : * However, in normal planning mode, caching this knowledge is totally
1377 : * pointless; it won't be queried again, because we build up joinrels
1378 : * from smaller to larger. It is useful in GEQO mode, where the
1379 : * knowledge can be carried across successive planning attempts; and
1380 : * it's likely to be useful when using join-search plugins, too. Hence
1381 : * cache when join_search_private is non-NULL. (Yeah, that's a hack,
1382 : * but it seems reasonable.)
1383 : *
1384 : * Also, allow callers to override that heuristic and force caching;
1385 : * that's useful for reduce_unique_semijoins, which calls here before
1386 : * the normal join search starts.
1387 : */
1388 153370 : if (force_cache || root->join_search_private)
1389 : {
1390 1106 : old_context = MemoryContextSwitchTo(root->planner_cxt);
1391 1106 : innerrel->non_unique_for_rels =
1392 1106 : lappend(innerrel->non_unique_for_rels,
1393 1106 : bms_copy(outerrelids));
1394 1106 : MemoryContextSwitchTo(old_context);
1395 : }
1396 :
1397 153370 : return false;
1398 : }
1399 : }
1400 :
1401 : /*
1402 : * is_innerrel_unique_for
1403 : * Check if the innerrel provably contains at most one tuple matching any
1404 : * tuple from the outerrel, based on join clauses in the 'restrictlist'.
1405 : */
1406 : static bool
1407 357004 : is_innerrel_unique_for(PlannerInfo *root,
1408 : Relids joinrelids,
1409 : Relids outerrelids,
1410 : RelOptInfo *innerrel,
1411 : JoinType jointype,
1412 : List *restrictlist,
1413 : List **extra_clauses)
1414 : {
1415 357004 : List *clause_list = NIL;
1416 : ListCell *lc;
1417 :
1418 : /*
1419 : * Search for mergejoinable clauses that constrain the inner rel against
1420 : * the outer rel. If an operator is mergejoinable then it behaves like
1421 : * equality for some btree opclass, so it's what we want. The
1422 : * mergejoinability test also eliminates clauses containing volatile
1423 : * functions, which we couldn't depend on.
1424 : */
1425 828182 : foreach(lc, restrictlist)
1426 : {
1427 471178 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
1428 :
1429 : /*
1430 : * As noted above, if it's a pushed-down clause and we're at an outer
1431 : * join, we can't use it.
1432 : */
1433 471178 : if (IS_OUTER_JOIN(jointype) &&
1434 89096 : RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
1435 3510 : continue;
1436 :
1437 : /* Ignore if it's not a mergejoinable clause */
1438 467668 : if (!restrictinfo->can_join ||
1439 417188 : restrictinfo->mergeopfamilies == NIL)
1440 51380 : continue; /* not mergejoinable */
1441 :
1442 : /*
1443 : * Check if the clause has the form "outer op inner" or "inner op
1444 : * outer", and if so mark which side is inner.
1445 : */
1446 416288 : if (!clause_sides_match_join(restrictinfo, outerrelids,
1447 : innerrel->relids))
1448 14 : continue; /* no good for these input relations */
1449 :
1450 : /* OK, add to the list */
1451 416274 : clause_list = lappend(clause_list, restrictinfo);
1452 : }
1453 :
1454 : /* Let rel_is_distinct_for() do the hard work */
1455 357004 : return rel_is_distinct_for(root, innerrel, clause_list, extra_clauses);
1456 : }
1457 :
1458 : /*
1459 : * Update EC members to point to the remaining relation instead of the removed
1460 : * one, removing duplicates.
1461 : *
1462 : * Restriction clauses for base relations are already distributed to
1463 : * the respective baserestrictinfo lists (see
1464 : * generate_implied_equalities_for_column). The above code has already processed
1465 : * this list and updated these clauses to reference the remaining
1466 : * relation, so that we can skip them here based on their relids.
1467 : *
1468 : * Likewise, we have already processed the join clauses that join the
1469 : * removed relation to the remaining one.
1470 : *
1471 : * Finally, there might be join clauses tying the removed relation to
1472 : * some third relation. We can't just delete the source clauses and
1473 : * regenerate them from the EC because the corresponding equality
1474 : * operators might be missing (see the handling of ec_broken).
1475 : * Therefore, we will update the references in the source clauses.
1476 : *
1477 : * Derived clauses can be generated again, so it is simpler just to
1478 : * delete them.
1479 : */
1480 : static void
1481 900 : update_eclasses(EquivalenceClass *ec, int from, int to)
1482 : {
1483 900 : List *new_members = NIL;
1484 900 : List *new_sources = NIL;
1485 :
1486 3636 : foreach_node(EquivalenceMember, em, ec->ec_members)
1487 : {
1488 1836 : bool is_redundant = false;
1489 :
1490 1836 : if (!bms_is_member(from, em->em_relids))
1491 : {
1492 918 : new_members = lappend(new_members, em);
1493 918 : continue;
1494 : }
1495 :
1496 918 : em->em_relids = adjust_relid_set(em->em_relids, from, to);
1497 918 : em->em_jdomain->jd_relids = adjust_relid_set(em->em_jdomain->jd_relids, from, to);
1498 :
1499 : /* We only process inner joins */
1500 918 : ChangeVarNodes((Node *) em->em_expr, from, to, 0);
1501 :
1502 1860 : foreach_node(EquivalenceMember, other, new_members)
1503 : {
1504 298 : if (!equal(em->em_relids, other->em_relids))
1505 24 : continue;
1506 :
1507 274 : if (equal(em->em_expr, other->em_expr))
1508 : {
1509 274 : is_redundant = true;
1510 274 : break;
1511 : }
1512 : }
1513 :
1514 918 : if (!is_redundant)
1515 644 : new_members = lappend(new_members, em);
1516 : }
1517 :
1518 900 : list_free(ec->ec_members);
1519 900 : ec->ec_members = new_members;
1520 :
1521 900 : list_free(ec->ec_derives);
1522 900 : ec->ec_derives = NULL;
1523 :
1524 : /* Update EC source expressions */
1525 2736 : foreach_node(RestrictInfo, rinfo, ec->ec_sources)
1526 : {
1527 936 : bool is_redundant = false;
1528 :
1529 936 : if (!bms_is_member(from, rinfo->required_relids))
1530 : {
1531 118 : new_sources = lappend(new_sources, rinfo);
1532 118 : continue;
1533 : }
1534 :
1535 818 : ChangeVarNodes((Node *) rinfo, from, to, 0);
1536 :
1537 : /*
1538 : * After switching the clause to the remaining relation, check it for
1539 : * redundancy with existing ones. We don't have to check for
1540 : * redundancy with derived clauses, because we've just deleted them.
1541 : */
1542 1660 : foreach_node(RestrictInfo, other, new_sources)
1543 : {
1544 36 : if (!equal(rinfo->clause_relids, other->clause_relids))
1545 24 : continue;
1546 :
1547 12 : if (equal(rinfo->clause, other->clause))
1548 : {
1549 12 : is_redundant = true;
1550 12 : break;
1551 : }
1552 : }
1553 :
1554 818 : if (!is_redundant)
1555 806 : new_sources = lappend(new_sources, rinfo);
1556 : }
1557 :
1558 900 : list_free(ec->ec_sources);
1559 900 : ec->ec_sources = new_sources;
1560 900 : ec->ec_relids = adjust_relid_set(ec->ec_relids, from, to);
1561 900 : }
1562 :
1563 : /*
1564 : * "Logically" compares two RestrictInfo's ignoring the 'rinfo_serial' field,
1565 : * which makes almost every RestrictInfo unique. This type of comparison is
1566 : * useful when removing duplicates while moving RestrictInfo's from removed
1567 : * relation to remaining relation during self-join elimination.
1568 : *
1569 : * XXX: In the future, we might remove the 'rinfo_serial' field completely and
1570 : * get rid of this function.
1571 : */
1572 : static bool
1573 506 : restrict_infos_logically_equal(RestrictInfo *a, RestrictInfo *b)
1574 : {
1575 506 : int saved_rinfo_serial = a->rinfo_serial;
1576 : bool result;
1577 :
1578 506 : a->rinfo_serial = b->rinfo_serial;
1579 506 : result = equal(a, b);
1580 506 : a->rinfo_serial = saved_rinfo_serial;
1581 :
1582 506 : return result;
1583 : }
1584 :
1585 : /*
1586 : * This function adds all non-redundant clauses to the keeping relation
1587 : * during self-join elimination. That is a contradictory operation. On the
1588 : * one hand, we reduce the length of the `restrict` lists, which can
1589 : * impact planning or executing time. Additionally, we improve the
1590 : * accuracy of cardinality estimation. On the other hand, it is one more
1591 : * place that can make planning time much longer in specific cases. It
1592 : * would have been better to avoid calling the equal() function here, but
1593 : * it's the only way to detect duplicated inequality expressions.
1594 : *
1595 : * (*keep_rinfo_list) is given by pointer because it might be altered by
1596 : * distribute_restrictinfo_to_rels().
1597 : */
1598 : static void
1599 1176 : add_non_redundant_clauses(PlannerInfo *root,
1600 : List *rinfo_candidates,
1601 : List **keep_rinfo_list,
1602 : Index removed_relid)
1603 : {
1604 3250 : foreach_node(RestrictInfo, rinfo, rinfo_candidates)
1605 : {
1606 898 : bool is_redundant = false;
1607 :
1608 : Assert(!bms_is_member(removed_relid, rinfo->required_relids));
1609 :
1610 2160 : foreach_node(RestrictInfo, src, (*keep_rinfo_list))
1611 : {
1612 518 : if (!bms_equal(src->clause_relids, rinfo->clause_relids))
1613 : /* Can't compare trivially different clauses */
1614 6 : continue;
1615 :
1616 512 : if (src == rinfo ||
1617 512 : (rinfo->parent_ec != NULL &&
1618 804 : src->parent_ec == rinfo->parent_ec) ||
1619 506 : restrict_infos_logically_equal(rinfo, src))
1620 : {
1621 154 : is_redundant = true;
1622 154 : break;
1623 : }
1624 : }
1625 898 : if (!is_redundant)
1626 744 : distribute_restrictinfo_to_rels(root, rinfo);
1627 : }
1628 1176 : }
1629 :
1630 : /*
1631 : * Remove a relation after we have proven that it participates only in an
1632 : * unneeded unique self-join.
1633 : *
1634 : * Replace any links in planner info structures.
1635 : *
1636 : * Transfer join and restriction clauses from the removed relation to the
1637 : * remaining one. We change the Vars of the clause to point to the
1638 : * remaining relation instead of the removed one. The clauses that require
1639 : * a subset of joinrelids become restriction clauses of the remaining
1640 : * relation, and others remain join clauses. We append them to
1641 : * baserestrictinfo and joininfo, respectively, trying not to introduce
1642 : * duplicates.
1643 : *
1644 : * We also have to process the 'joinclauses' list here, because it
1645 : * contains EC-derived join clauses which must become filter clauses. It
1646 : * is not enough to just correct the ECs because the EC-derived
1647 : * restrictions are generated before join removal (see
1648 : * generate_base_implied_equalities).
1649 : *
1650 : * NOTE: Remember to keep the code in sync with PlannerInfo to be sure all
1651 : * cached relids and relid bitmapsets can be correctly cleaned during the
1652 : * self-join elimination procedure.
1653 : */
1654 : static void
1655 588 : remove_self_join_rel(PlannerInfo *root, PlanRowMark *kmark, PlanRowMark *rmark,
1656 : RelOptInfo *toKeep, RelOptInfo *toRemove,
1657 : List *restrictlist)
1658 : {
1659 : List *joininfos;
1660 : ListCell *lc;
1661 : int i;
1662 588 : List *jinfo_candidates = NIL;
1663 588 : List *binfo_candidates = NIL;
1664 :
1665 : Assert(toKeep->relid > 0);
1666 : Assert(toRemove->relid > 0);
1667 :
1668 : /*
1669 : * Replace the index of the removing table with the keeping one. The
1670 : * technique of removing/distributing restrictinfo is used here to attach
1671 : * just appeared (for keeping relation) join clauses and avoid adding
1672 : * duplicates of those that already exist in the joininfo list.
1673 : */
1674 588 : joininfos = list_copy(toRemove->joininfo);
1675 1254 : foreach_node(RestrictInfo, rinfo, joininfos)
1676 : {
1677 78 : remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
1678 78 : ChangeVarNodes((Node *) rinfo, toRemove->relid, toKeep->relid, 0);
1679 :
1680 78 : if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
1681 60 : jinfo_candidates = lappend(jinfo_candidates, rinfo);
1682 : else
1683 18 : binfo_candidates = lappend(binfo_candidates, rinfo);
1684 : }
1685 :
1686 : /*
1687 : * Concatenate restrictlist to the list of base restrictions of the
1688 : * removing table just to simplify the replacement procedure: all of them
1689 : * weren't connected to any keeping relations and need to be added to some
1690 : * rels.
1691 : */
1692 588 : toRemove->baserestrictinfo = list_concat(toRemove->baserestrictinfo,
1693 : restrictlist);
1694 1996 : foreach_node(RestrictInfo, rinfo, toRemove->baserestrictinfo)
1695 : {
1696 820 : ChangeVarNodes((Node *) rinfo, toRemove->relid, toKeep->relid, 0);
1697 :
1698 820 : if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
1699 0 : jinfo_candidates = lappend(jinfo_candidates, rinfo);
1700 : else
1701 820 : binfo_candidates = lappend(binfo_candidates, rinfo);
1702 : }
1703 :
1704 : /*
1705 : * Now, add all non-redundant clauses to the keeping relation.
1706 : */
1707 588 : add_non_redundant_clauses(root, binfo_candidates,
1708 : &toKeep->baserestrictinfo, toRemove->relid);
1709 588 : add_non_redundant_clauses(root, jinfo_candidates,
1710 : &toKeep->joininfo, toRemove->relid);
1711 :
1712 588 : list_free(binfo_candidates);
1713 588 : list_free(jinfo_candidates);
1714 :
1715 : /*
1716 : * Arrange equivalence classes, mentioned removing a table, with the
1717 : * keeping one: varno of removing table should be replaced in members and
1718 : * sources lists. Also, remove duplicated elements if this replacement
1719 : * procedure created them.
1720 : */
1721 588 : i = -1;
1722 1488 : while ((i = bms_next_member(toRemove->eclass_indexes, i)) >= 0)
1723 : {
1724 900 : EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes, i);
1725 :
1726 900 : update_eclasses(ec, toRemove->relid, toKeep->relid);
1727 900 : toKeep->eclass_indexes = bms_add_member(toKeep->eclass_indexes, i);
1728 : }
1729 :
1730 : /*
1731 : * Transfer the targetlist and attr_needed flags.
1732 : */
1733 :
1734 2360 : foreach(lc, toRemove->reltarget->exprs)
1735 : {
1736 1772 : Node *node = lfirst(lc);
1737 :
1738 1772 : ChangeVarNodes(node, toRemove->relid, toKeep->relid, 0);
1739 1772 : if (!list_member(toKeep->reltarget->exprs, node))
1740 174 : toKeep->reltarget->exprs = lappend(toKeep->reltarget->exprs, node);
1741 : }
1742 :
1743 7470 : for (i = toKeep->min_attr; i <= toKeep->max_attr; i++)
1744 : {
1745 6882 : int attno = i - toKeep->min_attr;
1746 :
1747 13764 : toRemove->attr_needed[attno] = adjust_relid_set(toRemove->attr_needed[attno],
1748 6882 : toRemove->relid, toKeep->relid);
1749 6882 : toKeep->attr_needed[attno] = bms_add_members(toKeep->attr_needed[attno],
1750 6882 : toRemove->attr_needed[attno]);
1751 : }
1752 :
1753 : /*
1754 : * If the removed relation has a row mark, transfer it to the remaining
1755 : * one.
1756 : *
1757 : * If both rels have row marks, just keep the one corresponding to the
1758 : * remaining relation because we verified earlier that they have the same
1759 : * strength.
1760 : */
1761 588 : if (rmark)
1762 : {
1763 74 : if (kmark)
1764 : {
1765 : Assert(kmark->markType == rmark->markType);
1766 :
1767 74 : root->rowMarks = list_delete_ptr(root->rowMarks, rmark);
1768 : }
1769 : else
1770 : {
1771 : /* Shouldn't have inheritance children here. */
1772 : Assert(rmark->rti == rmark->prti);
1773 :
1774 0 : rmark->rti = rmark->prti = toKeep->relid;
1775 : }
1776 : }
1777 :
1778 : /*
1779 : * Replace varno in all the query structures, except nodes RangeTblRef
1780 : * otherwise later remove_rel_from_joinlist will yield errors.
1781 : */
1782 588 : ChangeVarNodesExtended((Node *) root->parse, toRemove->relid, toKeep->relid, 0, false);
1783 :
1784 : /* Replace links in the planner info */
1785 588 : remove_rel_from_query(root, toRemove, toKeep->relid, NULL, NULL);
1786 :
1787 : /* At last, replace varno in root targetlist and HAVING clause */
1788 588 : ChangeVarNodes((Node *) root->processed_tlist, toRemove->relid, toKeep->relid, 0);
1789 588 : ChangeVarNodes((Node *) root->processed_groupClause, toRemove->relid, toKeep->relid, 0);
1790 :
1791 588 : adjust_relid_set(root->all_result_relids, toRemove->relid, toKeep->relid);
1792 588 : adjust_relid_set(root->leaf_result_relids, toRemove->relid, toKeep->relid);
1793 :
1794 : /*
1795 : * There may be references to the rel in root->fkey_list, but if so,
1796 : * match_foreign_keys_to_quals() will get rid of them.
1797 : */
1798 :
1799 : /*
1800 : * Finally, remove the rel from the baserel array to prevent it from being
1801 : * referenced again. (We can't do this earlier because
1802 : * remove_join_clause_from_rels will touch it.)
1803 : */
1804 588 : root->simple_rel_array[toRemove->relid] = NULL;
1805 :
1806 : /* And nuke the RelOptInfo, just in case there's another access path. */
1807 588 : pfree(toRemove);
1808 :
1809 : /*
1810 : * Now repeat construction of attr_needed bits coming from all other
1811 : * sources.
1812 : */
1813 588 : rebuild_placeholder_attr_needed(root);
1814 588 : rebuild_joinclause_attr_needed(root);
1815 588 : rebuild_eclass_attr_needed(root);
1816 588 : rebuild_lateral_attr_needed(root);
1817 588 : }
1818 :
1819 : /*
1820 : * split_selfjoin_quals
1821 : * Processes 'joinquals' by building two lists: one containing the quals
1822 : * where the columns/exprs are on either side of the join match and
1823 : * another one containing the remaining quals.
1824 : *
1825 : * 'joinquals' must only contain quals for a RTE_RELATION being joined to
1826 : * itself.
1827 : */
1828 : static void
1829 1976 : split_selfjoin_quals(PlannerInfo *root, List *joinquals, List **selfjoinquals,
1830 : List **otherjoinquals, int from, int to)
1831 : {
1832 1976 : List *sjoinquals = NIL;
1833 1976 : List *ojoinquals = NIL;
1834 :
1835 6062 : foreach_node(RestrictInfo, rinfo, joinquals)
1836 : {
1837 : OpExpr *expr;
1838 : Node *leftexpr;
1839 : Node *rightexpr;
1840 :
1841 : /* In general, clause looks like F(arg1) = G(arg2) */
1842 4220 : if (!rinfo->mergeopfamilies ||
1843 4220 : bms_num_members(rinfo->clause_relids) != 2 ||
1844 4220 : bms_membership(rinfo->left_relids) != BMS_SINGLETON ||
1845 2110 : bms_membership(rinfo->right_relids) != BMS_SINGLETON)
1846 : {
1847 0 : ojoinquals = lappend(ojoinquals, rinfo);
1848 0 : continue;
1849 : }
1850 :
1851 2110 : expr = (OpExpr *) rinfo->clause;
1852 :
1853 2110 : if (!IsA(expr, OpExpr) || list_length(expr->args) != 2)
1854 : {
1855 0 : ojoinquals = lappend(ojoinquals, rinfo);
1856 0 : continue;
1857 : }
1858 :
1859 2110 : leftexpr = get_leftop(rinfo->clause);
1860 2110 : rightexpr = copyObject(get_rightop(rinfo->clause));
1861 :
1862 2110 : if (leftexpr && IsA(leftexpr, RelabelType))
1863 12 : leftexpr = (Node *) ((RelabelType *) leftexpr)->arg;
1864 2110 : if (rightexpr && IsA(rightexpr, RelabelType))
1865 6 : rightexpr = (Node *) ((RelabelType *) rightexpr)->arg;
1866 :
1867 : /*
1868 : * Quite an expensive operation, narrowing the use case. For example,
1869 : * when we have cast of the same var to different (but compatible)
1870 : * types.
1871 : */
1872 2110 : ChangeVarNodes(rightexpr, bms_singleton_member(rinfo->right_relids),
1873 2110 : bms_singleton_member(rinfo->left_relids), 0);
1874 :
1875 2110 : if (equal(leftexpr, rightexpr))
1876 1624 : sjoinquals = lappend(sjoinquals, rinfo);
1877 : else
1878 486 : ojoinquals = lappend(ojoinquals, rinfo);
1879 : }
1880 :
1881 1976 : *selfjoinquals = sjoinquals;
1882 1976 : *otherjoinquals = ojoinquals;
1883 1976 : }
1884 :
1885 : /*
1886 : * Check for a case when uniqueness is at least partly derived from a
1887 : * baserestrictinfo clause. In this case, we have a chance to return only
1888 : * one row (if such clauses on both sides of SJ are equal) or nothing (if they
1889 : * are different).
1890 : */
1891 : static bool
1892 654 : match_unique_clauses(PlannerInfo *root, RelOptInfo *outer, List *uclauses,
1893 : Index relid)
1894 : {
1895 1326 : foreach_node(RestrictInfo, rinfo, uclauses)
1896 : {
1897 : Expr *clause;
1898 : Node *iclause;
1899 : Node *c1;
1900 150 : bool matched = false;
1901 :
1902 : Assert(outer->relid > 0 && relid > 0);
1903 :
1904 : /* Only filters like f(R.x1,...,R.xN) == expr we should consider. */
1905 : Assert(bms_is_empty(rinfo->left_relids) ^
1906 : bms_is_empty(rinfo->right_relids));
1907 :
1908 150 : clause = (Expr *) copyObject(rinfo->clause);
1909 150 : ChangeVarNodes((Node *) clause, relid, outer->relid, 0);
1910 :
1911 150 : iclause = bms_is_empty(rinfo->left_relids) ? get_rightop(clause) :
1912 144 : get_leftop(clause);
1913 150 : c1 = bms_is_empty(rinfo->left_relids) ? get_leftop(clause) :
1914 144 : get_rightop(clause);
1915 :
1916 : /*
1917 : * Compare these left and right sides with the corresponding sides of
1918 : * the outer's filters. If no one is detected - return immediately.
1919 : */
1920 408 : foreach_node(RestrictInfo, orinfo, outer->baserestrictinfo)
1921 : {
1922 : Node *oclause;
1923 : Node *c2;
1924 :
1925 192 : if (orinfo->mergeopfamilies == NIL)
1926 : /* Don't consider clauses that aren't similar to 'F(X)=G(Y)' */
1927 60 : continue;
1928 :
1929 : Assert(is_opclause(orinfo->clause));
1930 :
1931 264 : oclause = bms_is_empty(orinfo->left_relids) ?
1932 132 : get_rightop(orinfo->clause) : get_leftop(orinfo->clause);
1933 264 : c2 = (bms_is_empty(orinfo->left_relids) ?
1934 132 : get_leftop(orinfo->clause) : get_rightop(orinfo->clause));
1935 :
1936 132 : if (equal(iclause, oclause) && equal(c1, c2))
1937 : {
1938 84 : matched = true;
1939 84 : break;
1940 : }
1941 : }
1942 :
1943 150 : if (!matched)
1944 66 : return false;
1945 : }
1946 :
1947 588 : return true;
1948 : }
1949 :
1950 : /*
1951 : * Find and remove unique self-joins in a group of base relations that have
1952 : * the same Oid.
1953 : *
1954 : * Returns a set of relids that were removed.
1955 : */
1956 : static Relids
1957 10436 : remove_self_joins_one_group(PlannerInfo *root, Relids relids)
1958 : {
1959 10436 : Relids result = NULL;
1960 : int k; /* Index of kept relation */
1961 10436 : int r = -1; /* Index of removed relation */
1962 :
1963 32452 : while ((r = bms_next_member(relids, r)) > 0)
1964 : {
1965 22016 : RelOptInfo *inner = root->simple_rel_array[r];
1966 :
1967 22016 : k = r;
1968 :
1969 34370 : while ((k = bms_next_member(relids, k)) > 0)
1970 : {
1971 12942 : Relids joinrelids = NULL;
1972 12942 : RelOptInfo *outer = root->simple_rel_array[k];
1973 : List *restrictlist;
1974 : List *selfjoinquals;
1975 : List *otherjoinquals;
1976 : ListCell *lc;
1977 12942 : bool jinfo_check = true;
1978 12942 : PlanRowMark *omark = NULL;
1979 12942 : PlanRowMark *imark = NULL;
1980 12942 : List *uclauses = NIL;
1981 :
1982 : /* A sanity check: the relations have the same Oid. */
1983 : Assert(root->simple_rte_array[k]->relid ==
1984 : root->simple_rte_array[r]->relid);
1985 :
1986 : /*
1987 : * It is impossible to eliminate the join of two relations if they
1988 : * belong to different rules of order. Otherwise, the planner
1989 : * can't find any variants of the correct query plan.
1990 : */
1991 15980 : foreach(lc, root->join_info_list)
1992 : {
1993 10088 : SpecialJoinInfo *info = (SpecialJoinInfo *) lfirst(lc);
1994 :
1995 20176 : if ((bms_is_member(k, info->syn_lefthand) ^
1996 14342 : bms_is_member(r, info->syn_lefthand)) ||
1997 4254 : (bms_is_member(k, info->syn_righthand) ^
1998 4254 : bms_is_member(r, info->syn_righthand)))
1999 : {
2000 7050 : jinfo_check = false;
2001 7050 : break;
2002 : }
2003 : }
2004 12942 : if (!jinfo_check)
2005 12354 : continue;
2006 :
2007 : /*
2008 : * Check Row Marks equivalence. We can't remove the join if the
2009 : * relations have row marks of different strength (e.g., one is
2010 : * locked FOR UPDATE, and another just has ROW_MARK_REFERENCE for
2011 : * EvalPlanQual rechecking).
2012 : */
2013 6100 : foreach(lc, root->rowMarks)
2014 : {
2015 380 : PlanRowMark *rowMark = (PlanRowMark *) lfirst(lc);
2016 :
2017 380 : if (rowMark->rti == k)
2018 : {
2019 : Assert(imark == NULL);
2020 172 : imark = rowMark;
2021 : }
2022 208 : else if (rowMark->rti == r)
2023 : {
2024 : Assert(omark == NULL);
2025 172 : omark = rowMark;
2026 : }
2027 :
2028 380 : if (omark && imark)
2029 172 : break;
2030 : }
2031 5892 : if (omark && imark && omark->markType != imark->markType)
2032 52 : continue;
2033 :
2034 : /*
2035 : * We only deal with base rels here, so their relids bitset
2036 : * contains only one member -- their relid.
2037 : */
2038 5840 : joinrelids = bms_add_member(joinrelids, r);
2039 5840 : joinrelids = bms_add_member(joinrelids, k);
2040 :
2041 : /*
2042 : * PHVs should not impose any constraints on removing self-joins.
2043 : */
2044 :
2045 : /*
2046 : * At this stage, joininfo lists of inner and outer can contain
2047 : * only clauses required for a superior outer join that can't
2048 : * influence this optimization. So, we can avoid to call the
2049 : * build_joinrel_restrictlist() routine.
2050 : */
2051 5840 : restrictlist = generate_join_implied_equalities(root, joinrelids,
2052 : inner->relids,
2053 : outer, NULL);
2054 5840 : if (restrictlist == NIL)
2055 3864 : continue;
2056 :
2057 : /*
2058 : * Process restrictlist to separate the self-join quals from the
2059 : * other quals. e.g., "x = x" goes to selfjoinquals and "a = b" to
2060 : * otherjoinquals.
2061 : */
2062 1976 : split_selfjoin_quals(root, restrictlist, &selfjoinquals,
2063 1976 : &otherjoinquals, inner->relid, outer->relid);
2064 :
2065 : Assert(list_length(restrictlist) ==
2066 : (list_length(selfjoinquals) + list_length(otherjoinquals)));
2067 :
2068 : /*
2069 : * To enable SJE for the only degenerate case without any self
2070 : * join clauses at all, add baserestrictinfo to this list. The
2071 : * degenerate case works only if both sides have the same clause.
2072 : * So doesn't matter which side to add.
2073 : */
2074 1976 : selfjoinquals = list_concat(selfjoinquals, outer->baserestrictinfo);
2075 :
2076 : /*
2077 : * Determine if the inner table can duplicate outer rows. We must
2078 : * bypass the unique rel cache here since we're possibly using a
2079 : * subset of join quals. We can use 'force_cache' == true when all
2080 : * join quals are self-join quals. Otherwise, we could end up
2081 : * putting false negatives in the cache.
2082 : */
2083 1976 : if (!innerrel_is_unique_ext(root, joinrelids, inner->relids,
2084 : outer, JOIN_INNER, selfjoinquals,
2085 1976 : list_length(otherjoinquals) == 0,
2086 : &uclauses))
2087 1322 : continue;
2088 :
2089 : /*
2090 : * 'uclauses' is the copy of outer->baserestrictinfo that are
2091 : * associated with an index. We proved by matching selfjoinquals
2092 : * to a unique index that the outer relation has at most one
2093 : * matching row for each inner row. Sometimes that is not enough.
2094 : * e.g. "WHERE s1.b = s2.b AND s1.a = 1 AND s2.a = 2" when the
2095 : * unique index is (a,b). Having non-empty uclauses, we must
2096 : * validate that the inner baserestrictinfo contains the same
2097 : * expressions, or we won't match the same row on each side of the
2098 : * join.
2099 : */
2100 654 : if (!match_unique_clauses(root, inner, uclauses, outer->relid))
2101 66 : continue;
2102 :
2103 : /*
2104 : * We can remove either relation, so remove the inner one in order
2105 : * to simplify this loop.
2106 : */
2107 588 : remove_self_join_rel(root, omark, imark, outer, inner, restrictlist);
2108 :
2109 588 : result = bms_add_member(result, r);
2110 :
2111 : /* We have removed the outer relation, try the next one. */
2112 588 : break;
2113 : }
2114 : }
2115 :
2116 10436 : return result;
2117 : }
2118 :
2119 : /*
2120 : * Gather indexes of base relations from the joinlist and try to eliminate self
2121 : * joins.
2122 : */
2123 : static Relids
2124 128114 : remove_self_joins_recurse(PlannerInfo *root, List *joinlist, Relids toRemove)
2125 : {
2126 : ListCell *jl;
2127 128114 : Relids relids = NULL;
2128 128114 : SelfJoinCandidate *candidates = NULL;
2129 : int i;
2130 : int j;
2131 : int numRels;
2132 :
2133 : /* Collect indexes of base relations of the join tree */
2134 449488 : foreach(jl, joinlist)
2135 : {
2136 321374 : Node *jlnode = (Node *) lfirst(jl);
2137 :
2138 321374 : if (IsA(jlnode, RangeTblRef))
2139 : {
2140 318044 : int varno = ((RangeTblRef *) jlnode)->rtindex;
2141 318044 : RangeTblEntry *rte = root->simple_rte_array[varno];
2142 :
2143 : /*
2144 : * We only consider ordinary relations as candidates to be
2145 : * removed, and these relations should not have TABLESAMPLE
2146 : * clauses specified. Removing a relation with TABLESAMPLE clause
2147 : * could potentially change the syntax of the query. Because of
2148 : * UPDATE/DELETE EPQ mechanism, currently Query->resultRelation or
2149 : * Query->mergeTargetRelation associated rel cannot be eliminated.
2150 : */
2151 318044 : if (rte->rtekind == RTE_RELATION &&
2152 280832 : rte->relkind == RELKIND_RELATION &&
2153 275770 : rte->tablesample == NULL &&
2154 275746 : varno != root->parse->resultRelation &&
2155 273992 : varno != root->parse->mergeTargetRelation)
2156 : {
2157 : Assert(!bms_is_member(varno, relids));
2158 273992 : relids = bms_add_member(relids, varno);
2159 : }
2160 : }
2161 3330 : else if (IsA(jlnode, List))
2162 : {
2163 : /* Recursively go inside the sub-joinlist */
2164 3330 : toRemove = remove_self_joins_recurse(root, (List *) jlnode,
2165 : toRemove);
2166 : }
2167 : else
2168 0 : elog(ERROR, "unrecognized joinlist node type: %d",
2169 : (int) nodeTag(jlnode));
2170 : }
2171 :
2172 128114 : numRels = bms_num_members(relids);
2173 :
2174 : /* Need at least two relations for the join */
2175 128114 : if (numRels < 2)
2176 26250 : return toRemove;
2177 :
2178 : /*
2179 : * In order to find relations with the same oid we first build an array of
2180 : * candidates and then sort it by oid.
2181 : */
2182 101864 : candidates = (SelfJoinCandidate *) palloc(sizeof(SelfJoinCandidate) *
2183 : numRels);
2184 101864 : i = -1;
2185 101864 : j = 0;
2186 357510 : while ((i = bms_next_member(relids, i)) >= 0)
2187 : {
2188 255646 : candidates[j].relid = i;
2189 255646 : candidates[j].reloid = root->simple_rte_array[i]->relid;
2190 255646 : j++;
2191 : }
2192 :
2193 101864 : qsort(candidates, numRels, sizeof(SelfJoinCandidate),
2194 : self_join_candidates_cmp);
2195 :
2196 : /*
2197 : * Iteratively form a group of relation indexes with the same oid and
2198 : * launch the routine that detects self-joins in this group and removes
2199 : * excessive range table entries.
2200 : *
2201 : * At the end of the iteration, exclude the group from the overall relids
2202 : * list. So each next iteration of the cycle will involve less and less
2203 : * value of relids.
2204 : */
2205 101864 : i = 0;
2206 357510 : for (j = 1; j < numRels + 1; j++)
2207 : {
2208 255646 : if (j == numRels || candidates[j].reloid != candidates[i].reloid)
2209 : {
2210 244138 : if (j - i >= 2)
2211 : {
2212 : /* Create a group of relation indexes with the same oid */
2213 10370 : Relids group = NULL;
2214 : Relids removed;
2215 :
2216 32248 : while (i < j)
2217 : {
2218 21878 : group = bms_add_member(group, candidates[i].relid);
2219 21878 : i++;
2220 : }
2221 10370 : relids = bms_del_members(relids, group);
2222 :
2223 : /*
2224 : * Try to remove self-joins from a group of identical entries.
2225 : * Make the next attempt iteratively - if something is deleted
2226 : * from a group, changes in clauses and equivalence classes
2227 : * can give us a chance to find more candidates.
2228 : */
2229 : do
2230 : {
2231 : Assert(!bms_overlap(group, toRemove));
2232 10436 : removed = remove_self_joins_one_group(root, group);
2233 10436 : toRemove = bms_add_members(toRemove, removed);
2234 10436 : group = bms_del_members(group, removed);
2235 564 : } while (!bms_is_empty(removed) &&
2236 10436 : bms_membership(group) == BMS_MULTIPLE);
2237 10370 : bms_free(removed);
2238 10370 : bms_free(group);
2239 : }
2240 : else
2241 : {
2242 : /* Single relation, just remove it from the set */
2243 233768 : relids = bms_del_member(relids, candidates[i].relid);
2244 233768 : i = j;
2245 : }
2246 : }
2247 : }
2248 :
2249 : Assert(bms_is_empty(relids));
2250 :
2251 101864 : return toRemove;
2252 : }
2253 :
2254 : /*
2255 : * Compare self-join candidates by their oids.
2256 : */
2257 : static int
2258 199900 : self_join_candidates_cmp(const void *a, const void *b)
2259 : {
2260 199900 : const SelfJoinCandidate *ca = (const SelfJoinCandidate *) a;
2261 199900 : const SelfJoinCandidate *cb = (const SelfJoinCandidate *) b;
2262 :
2263 199900 : if (ca->reloid != cb->reloid)
2264 188338 : return (ca->reloid < cb->reloid ? -1 : 1);
2265 : else
2266 11562 : return 0;
2267 : }
2268 :
2269 : /*
2270 : * Find and remove useless self joins.
2271 : *
2272 : * Search for joins where a relation is joined to itself. If the join clause
2273 : * for each tuple from one side of the join is proven to match the same
2274 : * physical row (or nothing) on the other side, that self-join can be
2275 : * eliminated from the query. Suitable join clauses are assumed to be in the
2276 : * form of X = X, and can be replaced with NOT NULL clauses.
2277 : *
2278 : * For the sake of simplicity, we don't apply this optimization to special
2279 : * joins. Here is a list of what we could do in some particular cases:
2280 : * 'a a1 semi join a a2': is reduced to inner by reduce_unique_semijoins,
2281 : * and then removed normally.
2282 : * 'a a1 anti join a a2': could simplify to a scan with 'outer quals AND
2283 : * (IS NULL on join columns OR NOT inner quals)'.
2284 : * 'a a1 left join a a2': could simplify to a scan like inner but without
2285 : * NOT NULL conditions on join columns.
2286 : * 'a a1 left join (a a2 join b)': can't simplify this, because join to b
2287 : * can both remove rows and introduce duplicates.
2288 : *
2289 : * To search for removable joins, we order all the relations on their Oid,
2290 : * go over each set with the same Oid, and consider each pair of relations
2291 : * in this set.
2292 : *
2293 : * To remove the join, we mark one of the participating relations as dead
2294 : * and rewrite all references to it to point to the remaining relation.
2295 : * This includes modifying RestrictInfos, EquivalenceClasses, and
2296 : * EquivalenceMembers. We also have to modify the row marks. The join clauses
2297 : * of the removed relation become either restriction or join clauses, based on
2298 : * whether they reference any relations not participating in the removed join.
2299 : *
2300 : * 'joinlist' is the top-level joinlist of the query. If it has any
2301 : * references to the removed relations, we update them to point to the
2302 : * remaining ones.
2303 : */
2304 : List *
2305 336130 : remove_useless_self_joins(PlannerInfo *root, List *joinlist)
2306 : {
2307 336130 : Relids toRemove = NULL;
2308 336130 : int relid = -1;
2309 :
2310 672260 : if (!enable_self_join_elimination || joinlist == NIL ||
2311 548344 : (list_length(joinlist) == 1 && !IsA(linitial(joinlist), List)))
2312 211346 : return joinlist;
2313 :
2314 : /*
2315 : * Merge pairs of relations participated in self-join. Remove unnecessary
2316 : * range table entries.
2317 : */
2318 124784 : toRemove = remove_self_joins_recurse(root, joinlist, toRemove);
2319 :
2320 124784 : if (unlikely(toRemove != NULL))
2321 : {
2322 : /* At the end, remove orphaned relation links */
2323 1146 : while ((relid = bms_next_member(toRemove, relid)) >= 0)
2324 : {
2325 588 : int nremoved = 0;
2326 :
2327 588 : joinlist = remove_rel_from_joinlist(joinlist, relid, &nremoved);
2328 588 : if (nremoved != 1)
2329 0 : elog(ERROR, "failed to find relation %d in joinlist", relid);
2330 : }
2331 : }
2332 :
2333 124784 : return joinlist;
2334 : }
|