Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pgpa_identifier.c
4 : * create appropriate identifiers for range table entries
5 : *
6 : * The goal of this module is to be able to produce identifiers for range
7 : * table entries that are unique, understandable to human beings, and
8 : * able to be reconstructed during future planning cycles. As an
9 : * exception, we do not care about, or want to produce, identifiers for
10 : * RTE_JOIN entries. This is because (1) we would end up with a ton of
11 : * RTEs with unhelpful names like unnamed_join_17; (2) not all joins have
12 : * RTEs; and (3) we intend to refer to joins by their constituent members
13 : * rather than by reference to the join RTE.
14 : *
15 : * In general, we construct identifiers of the following form:
16 : *
17 : * alias_name#occurrence_number/child_table_name@subquery_name
18 : *
19 : * However, occurrence_number is omitted when it is the first occurrence
20 : * within the same subquery, child_table_name is omitted for relations that
21 : * are not child tables, and subquery_name is omitted for the topmost
22 : * query level. Whenever an item is omitted, the preceding punctuation mark
23 : * is also omitted. Identifier-style escaping is applied to alias_name and
24 : * subquery_name. In generated advice, child table names are always
25 : * schema-qualified, but users can supply advice where the schema name is
26 : * not mentioned. Identifier-style escaping is applied to the schema and to
27 : * the relation name separately.
28 : *
29 : * The upshot of all of these rules is that in simple cases, the relation
30 : * identifier is textually identical to the alias name, making life easier
31 : * for users. However, even in complex cases, every relation identifier
32 : * for a given query will be unique (or at least we hope so: if not, this
33 : * code is buggy and the identifier format might need to be rethought).
34 : *
35 : * A key goal of this system is that we want to be able to reconstruct the
36 : * same identifiers during a future planning cycle for the same query, so
37 : * that if a certain behavior is specified for a certain identifier, we can
38 : * properly identify the RTI for which that behavior is mandated. In order
39 : * for this to work, subquery names must be unique and known before the
40 : * subquery is planned, and the remainder of the identifier must not depend
41 : * on any part of the query outside of the current subquery level. In
42 : * particular, occurrence_number must be calculated relative to the range
43 : * table for the relevant subquery, not the final flattened range table.
44 : *
45 : * NB: All of this code must use rt_fetch(), not planner_rt_fetch()!
46 : * Join removal and self-join elimination remove rels from the arrays
47 : * that planner_rt_fetch() uses; using rt_fetch() is necessary to get
48 : * stable results.
49 : *
50 : * Copyright (c) 2016-2026, PostgreSQL Global Development Group
51 : *
52 : * contrib/pg_plan_advice/pgpa_identifier.c
53 : *
54 : *-------------------------------------------------------------------------
55 : */
56 :
57 : #include "postgres.h"
58 :
59 : #include "pgpa_identifier.h"
60 :
61 : #include "parser/parsetree.h"
62 : #include "utils/builtins.h"
63 : #include "utils/lsyscache.h"
64 :
65 : static Index *pgpa_create_top_rti_map(Index rtable_length, List *rtable,
66 : List *appinfos);
67 : static int pgpa_occurrence_number(List *rtable, Index *top_rti_map,
68 : SubPlanRTInfo *rtinfo, Index rti);
69 :
70 : /*
71 : * Create a range table identifier from scratch.
72 : *
73 : * This function leaves the caller to do all the heavy lifting, so it's
74 : * generally better to use one of the functions below instead.
75 : *
76 : * See the file header comments for more details on the format of an
77 : * identifier.
78 : */
79 : const char *
80 468 : pgpa_identifier_string(const pgpa_identifier *rid)
81 : {
82 : const char *result;
83 :
84 : Assert(rid->alias_name != NULL);
85 468 : result = quote_identifier(rid->alias_name);
86 :
87 : Assert(rid->occurrence >= 0);
88 468 : if (rid->occurrence > 1)
89 8 : result = psprintf("%s#%d", result, rid->occurrence);
90 :
91 468 : if (rid->partrel != NULL)
92 : {
93 72 : if (rid->partnsp == NULL)
94 7 : result = psprintf("%s/%s", result,
95 7 : quote_identifier(rid->partrel));
96 : else
97 65 : result = psprintf("%s/%s.%s", result,
98 65 : quote_identifier(rid->partnsp),
99 65 : quote_identifier(rid->partrel));
100 : }
101 :
102 468 : if (rid->plan_name != NULL)
103 14 : result = psprintf("%s@%s", result, quote_identifier(rid->plan_name));
104 :
105 468 : return result;
106 : }
107 :
108 : /*
109 : * Compute a relation identifier for a particular RTI.
110 : *
111 : * The caller provides root and rti, and gets the necessary details back via
112 : * the remaining parameters.
113 : */
114 : void
115 1084 : pgpa_compute_identifier_by_rti(PlannerInfo *root, Index rti,
116 : pgpa_identifier *rid)
117 : {
118 1084 : Index top_rti = rti;
119 1084 : int occurrence = 1;
120 : RangeTblEntry *rte;
121 : RangeTblEntry *top_rte;
122 1084 : char *partnsp = NULL;
123 1084 : char *partrel = NULL;
124 :
125 : /*
126 : * If this is a child RTE, find the topmost parent that is still of type
127 : * RTE_RELATION. We do this because we identify children of partitioned
128 : * tables by the name of the child table, but subqueries can also have
129 : * child rels and we don't care about those here.
130 : */
131 : for (;;)
132 227 : {
133 : AppendRelInfo *appinfo;
134 : RangeTblEntry *parent_rte;
135 :
136 : /* append_rel_array can be NULL if there are no children */
137 1311 : if (root->append_rel_array == NULL ||
138 566 : (appinfo = root->append_rel_array[top_rti]) == NULL)
139 : break;
140 :
141 227 : parent_rte = rt_fetch(appinfo->parent_relid, root->parse->rtable);
142 227 : if (parent_rte->rtekind != RTE_RELATION)
143 0 : break;
144 :
145 227 : top_rti = appinfo->parent_relid;
146 : }
147 :
148 : /* Get the range table entries for the RTI and top RTI. */
149 1084 : rte = rt_fetch(rti, root->parse->rtable);
150 1084 : top_rte = rt_fetch(top_rti, root->parse->rtable);
151 : Assert(rte->rtekind != RTE_JOIN);
152 : Assert(top_rte->rtekind != RTE_JOIN);
153 :
154 : /* Work out the correct occurrence number. */
155 2134 : for (Index prior_rti = 1; prior_rti < top_rti; ++prior_rti)
156 : {
157 : RangeTblEntry *prior_rte;
158 : AppendRelInfo *appinfo;
159 :
160 : /*
161 : * If this is a child rel of a parent that is a relation, skip it.
162 : *
163 : * Such range table entries are disambiguated by mentioning the schema
164 : * and name of the table, not by counting them as separate occurrences
165 : * of the same table.
166 : *
167 : * NB: append_rel_array can be NULL if there are no children
168 : */
169 1050 : if (root->append_rel_array != NULL &&
170 334 : (appinfo = root->append_rel_array[prior_rti]) != NULL)
171 : {
172 : RangeTblEntry *parent_rte;
173 :
174 0 : parent_rte = rt_fetch(appinfo->parent_relid, root->parse->rtable);
175 0 : if (parent_rte->rtekind == RTE_RELATION)
176 0 : continue;
177 : }
178 :
179 : /* Skip NULL entries and joins. */
180 1050 : prior_rte = rt_fetch(prior_rti, root->parse->rtable);
181 1050 : if (prior_rte == NULL || prior_rte->rtekind == RTE_JOIN)
182 108 : continue;
183 :
184 : /* Skip if the alias name differs. */
185 942 : if (strcmp(prior_rte->eref->aliasname, rte->eref->aliasname) != 0)
186 933 : continue;
187 :
188 : /* Looks like a true duplicate. */
189 9 : ++occurrence;
190 : }
191 :
192 : /* If this is a child table, get the schema and relation names. */
193 1084 : if (rti != top_rti)
194 : {
195 227 : partnsp = get_namespace_name_or_temp(get_rel_namespace(rte->relid));
196 227 : partrel = get_rel_name(rte->relid);
197 : }
198 :
199 : /* OK, we have all the answers we need. Return them to the caller. */
200 1084 : rid->alias_name = top_rte->eref->aliasname;
201 1084 : rid->occurrence = occurrence;
202 1084 : rid->partnsp = partnsp;
203 1084 : rid->partrel = partrel;
204 1084 : rid->plan_name = root->plan_name;
205 1084 : }
206 :
207 : /*
208 : * Compute a relation identifier for a set of RTIs, except for any RTE_JOIN
209 : * RTIs that may be present.
210 : *
211 : * RTE_JOIN entries are excluded because they cannot be mentioned by plan
212 : * advice.
213 : *
214 : * The caller is responsible for making sure that the tkeys array is large
215 : * enough to store the results.
216 : *
217 : * The return value is the number of identifiers computed.
218 : */
219 : int
220 718 : pgpa_compute_identifiers_by_relids(PlannerInfo *root, Bitmapset *relids,
221 : pgpa_identifier *rids)
222 : {
223 718 : int count = 0;
224 718 : int rti = -1;
225 :
226 1561 : while ((rti = bms_next_member(relids, rti)) >= 0)
227 : {
228 843 : RangeTblEntry *rte = rt_fetch(rti, root->parse->rtable);
229 :
230 843 : if (rte->rtekind == RTE_JOIN)
231 13 : continue;
232 830 : pgpa_compute_identifier_by_rti(root, rti, &rids[count++]);
233 : }
234 :
235 : Assert(count > 0);
236 718 : return count;
237 : }
238 :
239 : /*
240 : * Create an array of range table identifiers for all the non-NULL,
241 : * non-RTE_JOIN entries in the PlannedStmt's range table.
242 : */
243 : pgpa_identifier *
244 132 : pgpa_create_identifiers_for_planned_stmt(PlannedStmt *pstmt)
245 : {
246 132 : Index rtable_length = list_length(pstmt->rtable);
247 132 : pgpa_identifier *result = palloc0_array(pgpa_identifier, rtable_length);
248 : Index *top_rti_map;
249 132 : int rtinfoindex = 0;
250 132 : SubPlanRTInfo *rtinfo = NULL;
251 132 : SubPlanRTInfo *nextrtinfo = NULL;
252 :
253 : /*
254 : * Account for relations added by inheritance expansion of partitioned
255 : * tables.
256 : */
257 132 : top_rti_map = pgpa_create_top_rti_map(rtable_length, pstmt->rtable,
258 : pstmt->appendRelations);
259 :
260 : /*
261 : * When we begin iterating, we're processing the portion of the range
262 : * table that originated from the top-level PlannerInfo, so subrtinfo is
263 : * NULL. Later, subrtinfo will be the SubPlanRTInfo for the subquery whose
264 : * portion of the range table we are processing. nextrtinfo is always the
265 : * SubPlanRTInfo that follows the current one, if any, so when we're
266 : * processing the top-level query's portion of the range table, the next
267 : * SubPlanRTInfo is the very first one.
268 : */
269 132 : if (pstmt->subrtinfos != NULL)
270 8 : nextrtinfo = linitial(pstmt->subrtinfos);
271 :
272 : /* Main loop over the range table. */
273 513 : for (Index rti = 1; rti <= rtable_length; rti++)
274 : {
275 : const char *plan_name;
276 : Index top_rti;
277 : RangeTblEntry *rte;
278 : RangeTblEntry *top_rte;
279 381 : char *partnsp = NULL;
280 381 : char *partrel = NULL;
281 : int occurrence;
282 : pgpa_identifier *rid;
283 :
284 : /*
285 : * Advance to the next SubPlanRTInfo, if it's time to do that.
286 : *
287 : * This loop probably shouldn't ever iterate more than once, because
288 : * that would imply that a subquery was planned but added nothing to
289 : * the range table; but let's be defensive and assume it can happen.
290 : */
291 389 : while (nextrtinfo != NULL && rti > nextrtinfo->rtoffset)
292 : {
293 8 : rtinfo = nextrtinfo;
294 8 : if (++rtinfoindex >= list_length(pstmt->subrtinfos))
295 8 : nextrtinfo = NULL;
296 : else
297 0 : nextrtinfo = list_nth(pstmt->subrtinfos, rtinfoindex);
298 : }
299 :
300 : /* Fetch the range table entry, if any. */
301 381 : rte = rt_fetch(rti, pstmt->rtable);
302 :
303 : /*
304 : * We can't and don't need to identify null entries, and we don't want
305 : * to identify join entries.
306 : */
307 381 : if (rte == NULL || rte->rtekind == RTE_JOIN)
308 70 : continue;
309 :
310 : /*
311 : * If this is not a relation added by partitioned table expansion,
312 : * then the top RTI/RTE are just the same as this RTI/RTE. Otherwise,
313 : * we need the information for the top RTI/RTE, and must also fetch
314 : * the partition schema and name.
315 : */
316 311 : top_rti = top_rti_map[rti - 1];
317 311 : if (rti == top_rti)
318 252 : top_rte = rte;
319 : else
320 : {
321 59 : top_rte = rt_fetch(top_rti, pstmt->rtable);
322 : partnsp =
323 59 : get_namespace_name_or_temp(get_rel_namespace(rte->relid));
324 59 : partrel = get_rel_name(rte->relid);
325 : }
326 :
327 : /* Compute the correct occurrence number. */
328 311 : occurrence = pgpa_occurrence_number(pstmt->rtable, top_rti_map,
329 : rtinfo, top_rti);
330 :
331 : /* Get the name of the current plan (NULL for toplevel query). */
332 311 : plan_name = rtinfo == NULL ? NULL : rtinfo->plan_name;
333 :
334 : /* Save all the details we've derived. */
335 311 : rid = &result[rti - 1];
336 311 : rid->alias_name = top_rte->eref->aliasname;
337 311 : rid->occurrence = occurrence;
338 311 : rid->partnsp = partnsp;
339 311 : rid->partrel = partrel;
340 311 : rid->plan_name = plan_name;
341 : }
342 :
343 132 : return result;
344 : }
345 :
346 : /*
347 : * Search for a pgpa_identifier in the array of identifiers computed for the
348 : * range table. If exactly one match is found, return the matching RTI; else
349 : * return 0.
350 : */
351 : Index
352 140 : pgpa_compute_rti_from_identifier(int rtable_length,
353 : pgpa_identifier *rt_identifiers,
354 : pgpa_identifier *rid)
355 : {
356 140 : Index result = 0;
357 :
358 705 : for (Index rti = 1; rti <= rtable_length; ++rti)
359 : {
360 565 : pgpa_identifier *rti_rid = &rt_identifiers[rti - 1];
361 :
362 : /* If there's no identifier for this RTI, skip it. */
363 565 : if (rti_rid->alias_name == NULL)
364 103 : continue;
365 :
366 : /*
367 : * If it matches, return this RTI. As usual, an omitted partition
368 : * schema matches anything, but partition and plan names must either
369 : * match exactly or be omitted on both sides.
370 : */
371 462 : if (strcmp(rid->alias_name, rti_rid->alias_name) == 0 &&
372 197 : rid->occurrence == rti_rid->occurrence &&
373 193 : (rid->partnsp == NULL || rti_rid->partnsp == NULL ||
374 202 : strcmp(rid->partnsp, rti_rid->partnsp) == 0) &&
375 333 : strings_equal_or_both_null(rid->partrel, rti_rid->partrel) &&
376 140 : strings_equal_or_both_null(rid->plan_name, rti_rid->plan_name))
377 : {
378 140 : if (result != 0)
379 : {
380 : /* Multiple matches were found. */
381 0 : return 0;
382 : }
383 140 : result = rti;
384 : }
385 : }
386 :
387 140 : return result;
388 : }
389 :
390 : /*
391 : * Build a mapping from each RTI to the RTI whose alias_name will be used to
392 : * construct the range table identifier.
393 : *
394 : * For child relations, this is the topmost parent that is still of type
395 : * RTE_RELATION. For other relations, it's just the original RTI.
396 : *
397 : * Since we're eventually going to need this information for every RTI in
398 : * the range table, it's best to compute all the answers in a single pass over
399 : * the AppendRelInfo list. Otherwise, we might end up searching through that
400 : * list repeatedly for entries of interest.
401 : *
402 : * Note that the returned array is uses zero-based indexing, while RTIs use
403 : * 1-based indexing, so subtract 1 from the RTI before looking it up in the
404 : * array.
405 : */
406 : static Index *
407 132 : pgpa_create_top_rti_map(Index rtable_length, List *rtable, List *appinfos)
408 : {
409 132 : Index *top_rti_map = palloc0_array(Index, rtable_length);
410 :
411 : /* Initially, make every RTI point to itself. */
412 513 : for (Index rti = 1; rti <= rtable_length; ++rti)
413 381 : top_rti_map[rti - 1] = rti;
414 :
415 : /* Update the map for each AppendRelInfo object. */
416 323 : foreach_node(AppendRelInfo, appinfo, appinfos)
417 : {
418 59 : Index parent_rti = appinfo->parent_relid;
419 59 : RangeTblEntry *parent_rte = rt_fetch(parent_rti, rtable);
420 :
421 : /* If the parent is not RTE_RELATION, ignore this entry. */
422 59 : if (parent_rte->rtekind != RTE_RELATION)
423 0 : continue;
424 :
425 : /*
426 : * Map the child to wherever we mapped the parent. Parents always
427 : * precede their children in the AppendRelInfo list, so this should
428 : * work out.
429 : */
430 59 : top_rti_map[appinfo->child_relid - 1] = top_rti_map[parent_rti - 1];
431 : }
432 :
433 132 : return top_rti_map;
434 : }
435 :
436 : /*
437 : * Find the occurrence number of a certain relation within a certain subquery.
438 : *
439 : * The same alias name can occur multiple times within a subquery, but we want
440 : * to disambiguate by giving different occurrences different integer indexes.
441 : * However, child tables are disambiguated by including the table name rather
442 : * than by incrementing the occurrence number; and joins are not named and so
443 : * shouldn't increment the occurrence number either.
444 : */
445 : static int
446 311 : pgpa_occurrence_number(List *rtable, Index *top_rti_map,
447 : SubPlanRTInfo *rtinfo, Index rti)
448 : {
449 311 : Index rtoffset = (rtinfo == NULL) ? 0 : rtinfo->rtoffset;
450 311 : int occurrence = 1;
451 311 : RangeTblEntry *rte = rt_fetch(rti, rtable);
452 :
453 533 : for (Index prior_rti = rtoffset + 1; prior_rti < rti; ++prior_rti)
454 : {
455 : RangeTblEntry *prior_rte;
456 :
457 : /*
458 : * If this is a child rel of a parent that is a relation, skip it.
459 : *
460 : * Such range table entries are disambiguated by mentioning the schema
461 : * and name of the table, not by counting them as separate occurrences
462 : * of the same table.
463 : */
464 222 : if (top_rti_map[prior_rti - 1] != prior_rti)
465 0 : continue;
466 :
467 : /* Skip joins. */
468 222 : prior_rte = rt_fetch(prior_rti, rtable);
469 222 : if (prior_rte->rtekind == RTE_JOIN)
470 14 : continue;
471 :
472 : /* Skip if the alias name differs. */
473 208 : if (strcmp(prior_rte->eref->aliasname, rte->eref->aliasname) != 0)
474 204 : continue;
475 :
476 : /* Looks like a true duplicate. */
477 4 : ++occurrence;
478 : }
479 :
480 311 : return occurrence;
481 : }
|