Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * queryjumblefuncs.c
4 : * Query normalization and fingerprinting.
5 : *
6 : * Normalization is a process whereby similar queries, typically differing only
7 : * in their constants (though the exact rules are somewhat more subtle than
8 : * that) are recognized as equivalent, and are tracked as a single entry. This
9 : * is particularly useful for non-prepared queries.
10 : *
11 : * Normalization is implemented by fingerprinting queries, selectively
12 : * serializing those fields of each query tree's nodes that are judged to be
13 : * essential to the query. This is referred to as a query jumble. This is
14 : * distinct from a regular serialization in that various extraneous
15 : * information is ignored as irrelevant or not essential to the query, such
16 : * as the collations of Vars and, most notably, the values of constants.
17 : *
18 : * This jumble is acquired at the end of parse analysis of each query, and
19 : * a 64-bit hash of it is stored into the query's Query.queryId field.
20 : * The server then copies this value around, making it available in plan
21 : * tree(s) generated from the query. The executor can then use this value
22 : * to blame query costs on the proper queryId.
23 : *
24 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
25 : * Portions Copyright (c) 1994, Regents of the University of California
26 : *
27 : *
28 : * IDENTIFICATION
29 : * src/backend/nodes/queryjumblefuncs.c
30 : *
31 : *-------------------------------------------------------------------------
32 : */
33 : #include "postgres.h"
34 :
35 : #include "common/hashfn.h"
36 : #include "miscadmin.h"
37 : #include "nodes/queryjumble.h"
38 : #include "parser/scansup.h"
39 :
40 : #define JUMBLE_SIZE 1024 /* query serialization buffer size */
41 :
42 : /* GUC parameters */
43 : int compute_query_id = COMPUTE_QUERY_ID_AUTO;
44 :
45 : /*
46 : * True when compute_query_id is ON or AUTO, and a module requests them.
47 : *
48 : * Note that IsQueryIdEnabled() should be used instead of checking
49 : * query_id_enabled or compute_query_id directly when we want to know
50 : * whether query identifiers are computed in the core or not.
51 : */
52 : bool query_id_enabled = false;
53 :
54 : static void AppendJumble(JumbleState *jstate,
55 : const unsigned char *item, Size size);
56 : static void RecordConstLocation(JumbleState *jstate, int location);
57 : static void _jumbleNode(JumbleState *jstate, Node *node);
58 : static void _jumbleA_Const(JumbleState *jstate, Node *node);
59 : static void _jumbleList(JumbleState *jstate, Node *node);
60 : static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
61 :
62 : /*
63 : * Given a possibly multi-statement source string, confine our attention to the
64 : * relevant part of the string.
65 : */
66 : const char *
67 181378 : CleanQuerytext(const char *query, int *location, int *len)
68 : {
69 181378 : int query_location = *location;
70 181378 : int query_len = *len;
71 :
72 : /* First apply starting offset, unless it's -1 (unknown). */
73 181378 : if (query_location >= 0)
74 : {
75 : Assert(query_location <= strlen(query));
76 181004 : query += query_location;
77 : /* Length of 0 (or -1) means "rest of string" */
78 181004 : if (query_len <= 0)
79 29474 : query_len = strlen(query);
80 : else
81 : Assert(query_len <= strlen(query));
82 : }
83 : else
84 : {
85 : /* If query location is unknown, distrust query_len as well */
86 374 : query_location = 0;
87 374 : query_len = strlen(query);
88 : }
89 :
90 : /*
91 : * Discard leading and trailing whitespace, too. Use scanner_isspace()
92 : * not libc's isspace(), because we want to match the lexer's behavior.
93 : *
94 : * Note: the parser now strips leading comments and whitespace from the
95 : * reported stmt_location, so this first loop will only iterate in the
96 : * unusual case that the location didn't propagate to here. But the
97 : * statement length will extend to the end-of-string or terminating
98 : * semicolon, so the second loop often does something useful.
99 : */
100 181400 : while (query_len > 0 && scanner_isspace(query[0]))
101 22 : query++, query_location++, query_len--;
102 182366 : while (query_len > 0 && scanner_isspace(query[query_len - 1]))
103 988 : query_len--;
104 :
105 181378 : *location = query_location;
106 181378 : *len = query_len;
107 :
108 181378 : return query;
109 : }
110 :
111 : JumbleState *
112 149166 : JumbleQuery(Query *query)
113 : {
114 149166 : JumbleState *jstate = NULL;
115 :
116 : Assert(IsQueryIdEnabled());
117 :
118 149166 : jstate = (JumbleState *) palloc(sizeof(JumbleState));
119 :
120 : /* Set up workspace for query jumbling */
121 149166 : jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE);
122 149166 : jstate->jumble_len = 0;
123 149166 : jstate->clocations_buf_size = 32;
124 149166 : jstate->clocations = (LocationLen *)
125 149166 : palloc(jstate->clocations_buf_size * sizeof(LocationLen));
126 149166 : jstate->clocations_count = 0;
127 149166 : jstate->highest_extern_param_id = 0;
128 :
129 : /* Compute query ID and mark the Query node with it */
130 149166 : _jumbleNode(jstate, (Node *) query);
131 149166 : query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble,
132 149166 : jstate->jumble_len,
133 : 0));
134 :
135 : /*
136 : * If we are unlucky enough to get a hash of zero, use 1 instead for
137 : * normal statements and 2 for utility queries.
138 : */
139 149166 : if (query->queryId == UINT64CONST(0))
140 : {
141 0 : if (query->utilityStmt)
142 0 : query->queryId = UINT64CONST(2);
143 : else
144 0 : query->queryId = UINT64CONST(1);
145 : }
146 :
147 149166 : return jstate;
148 : }
149 :
150 : /*
151 : * Enables query identifier computation.
152 : *
153 : * Third-party plugins can use this function to inform core that they require
154 : * a query identifier to be computed.
155 : */
156 : void
157 14 : EnableQueryId(void)
158 : {
159 14 : if (compute_query_id != COMPUTE_QUERY_ID_OFF)
160 14 : query_id_enabled = true;
161 14 : }
162 :
163 : /*
164 : * AppendJumble: Append a value that is substantive in a given query to
165 : * the current jumble.
166 : */
167 : static void
168 8415426 : AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
169 : {
170 8415426 : unsigned char *jumble = jstate->jumble;
171 8415426 : Size jumble_len = jstate->jumble_len;
172 :
173 : /*
174 : * Whenever the jumble buffer is full, we hash the current contents and
175 : * reset the buffer to contain just that hash value, thus relying on the
176 : * hash to summarize everything so far.
177 : */
178 16833066 : while (size > 0)
179 : {
180 : Size part_size;
181 :
182 8417640 : if (jumble_len >= JUMBLE_SIZE)
183 : {
184 : uint64 start_hash;
185 :
186 2962 : start_hash = DatumGetUInt64(hash_any_extended(jumble,
187 : JUMBLE_SIZE, 0));
188 2962 : memcpy(jumble, &start_hash, sizeof(start_hash));
189 2962 : jumble_len = sizeof(start_hash);
190 : }
191 8417640 : part_size = Min(size, JUMBLE_SIZE - jumble_len);
192 8417640 : memcpy(jumble + jumble_len, item, part_size);
193 8417640 : jumble_len += part_size;
194 8417640 : item += part_size;
195 8417640 : size -= part_size;
196 : }
197 8415426 : jstate->jumble_len = jumble_len;
198 8415426 : }
199 :
200 : /*
201 : * Record location of constant within query string of query tree
202 : * that is currently being walked.
203 : */
204 : static void
205 228100 : RecordConstLocation(JumbleState *jstate, int location)
206 : {
207 : /* -1 indicates unknown or undefined location */
208 228100 : if (location >= 0)
209 : {
210 : /* enlarge array if needed */
211 215190 : if (jstate->clocations_count >= jstate->clocations_buf_size)
212 : {
213 126 : jstate->clocations_buf_size *= 2;
214 126 : jstate->clocations = (LocationLen *)
215 126 : repalloc(jstate->clocations,
216 126 : jstate->clocations_buf_size *
217 : sizeof(LocationLen));
218 : }
219 215190 : jstate->clocations[jstate->clocations_count].location = location;
220 : /* initialize lengths to -1 to simplify third-party module usage */
221 215190 : jstate->clocations[jstate->clocations_count].length = -1;
222 215190 : jstate->clocations_count++;
223 : }
224 228100 : }
225 :
226 : #define JUMBLE_NODE(item) \
227 : _jumbleNode(jstate, (Node *) expr->item)
228 : #define JUMBLE_LOCATION(location) \
229 : RecordConstLocation(jstate, expr->location)
230 : #define JUMBLE_FIELD(item) \
231 : AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
232 : #define JUMBLE_FIELD_SINGLE(item) \
233 : AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
234 : #define JUMBLE_STRING(str) \
235 : do { \
236 : if (expr->str) \
237 : AppendJumble(jstate, (const unsigned char *) (expr->str), strlen(expr->str) + 1); \
238 : } while(0)
239 :
240 : #include "queryjumblefuncs.funcs.c"
241 :
242 : static void
243 7189692 : _jumbleNode(JumbleState *jstate, Node *node)
244 : {
245 7189692 : Node *expr = node;
246 :
247 7189692 : if (expr == NULL)
248 4244284 : return;
249 :
250 : /* Guard against stack overflow due to overly complex expressions */
251 2945408 : check_stack_depth();
252 :
253 : /*
254 : * We always emit the node's NodeTag, then any additional fields that are
255 : * considered significant, and then we recurse to any child nodes.
256 : */
257 2945408 : JUMBLE_FIELD(type);
258 :
259 2945408 : switch (nodeTag(expr))
260 : {
261 : #include "queryjumblefuncs.switch.c"
262 :
263 703918 : case T_List:
264 : case T_IntList:
265 : case T_OidList:
266 : case T_XidList:
267 703918 : _jumbleList(jstate, expr);
268 703918 : break;
269 :
270 0 : default:
271 : /* Only a warning, since we can stumble along anyway */
272 0 : elog(WARNING, "unrecognized node type: %d",
273 : (int) nodeTag(expr));
274 0 : break;
275 : }
276 :
277 : /* Special cases to handle outside the automated code */
278 2945408 : switch (nodeTag(expr))
279 : {
280 10686 : case T_Param:
281 : {
282 10686 : Param *p = (Param *) node;
283 :
284 : /*
285 : * Update the highest Param id seen, in order to start
286 : * normalization correctly.
287 : */
288 10686 : if (p->paramkind == PARAM_EXTERN &&
289 9922 : p->paramid > jstate->highest_extern_param_id)
290 8582 : jstate->highest_extern_param_id = p->paramid;
291 : }
292 10686 : break;
293 2934722 : default:
294 2934722 : break;
295 : }
296 : }
297 :
298 : static void
299 703918 : _jumbleList(JumbleState *jstate, Node *node)
300 : {
301 703918 : List *expr = (List *) node;
302 : ListCell *l;
303 :
304 703918 : switch (expr->type)
305 : {
306 703008 : case T_List:
307 1978334 : foreach(l, expr)
308 1275326 : _jumbleNode(jstate, lfirst(l));
309 703008 : break;
310 910 : case T_IntList:
311 2036 : foreach(l, expr)
312 1126 : JUMBLE_FIELD_SINGLE(lfirst_int(l));
313 910 : break;
314 0 : case T_OidList:
315 0 : foreach(l, expr)
316 0 : JUMBLE_FIELD_SINGLE(lfirst_oid(l));
317 0 : break;
318 0 : case T_XidList:
319 0 : foreach(l, expr)
320 0 : JUMBLE_FIELD_SINGLE(lfirst_xid(l));
321 0 : break;
322 0 : default:
323 0 : elog(ERROR, "unrecognized list node type: %d",
324 : (int) expr->type);
325 : return;
326 : }
327 : }
328 :
329 : static void
330 16158 : _jumbleA_Const(JumbleState *jstate, Node *node)
331 : {
332 16158 : A_Const *expr = (A_Const *) node;
333 :
334 16158 : JUMBLE_FIELD(isnull);
335 16158 : if (!expr->isnull)
336 : {
337 16012 : JUMBLE_FIELD(val.node.type);
338 16012 : switch (nodeTag(&expr->val))
339 : {
340 7148 : case T_Integer:
341 7148 : JUMBLE_FIELD(val.ival.ival);
342 7148 : break;
343 42 : case T_Float:
344 42 : JUMBLE_STRING(val.fval.fval);
345 42 : break;
346 232 : case T_Boolean:
347 232 : JUMBLE_FIELD(val.boolval.boolval);
348 232 : break;
349 8586 : case T_String:
350 8586 : JUMBLE_STRING(val.sval.sval);
351 8586 : break;
352 4 : case T_BitString:
353 4 : JUMBLE_STRING(val.bsval.bsval);
354 4 : break;
355 0 : default:
356 0 : elog(ERROR, "unrecognized node type: %d",
357 : (int) nodeTag(&expr->val));
358 : break;
359 : }
360 146 : }
361 16158 : }
362 :
363 : static void
364 4992 : _jumbleVariableSetStmt(JumbleState *jstate, Node *node)
365 : {
366 4992 : VariableSetStmt *expr = (VariableSetStmt *) node;
367 :
368 4992 : JUMBLE_FIELD(kind);
369 4992 : JUMBLE_STRING(name);
370 :
371 : /*
372 : * Account for the list of arguments in query jumbling only if told by the
373 : * parser.
374 : */
375 4992 : if (expr->jumble_args)
376 120 : JUMBLE_NODE(args);
377 4992 : JUMBLE_FIELD(is_local);
378 4992 : JUMBLE_LOCATION(location);
379 4992 : }
|