Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * queryjumblefuncs.c
4 : * Query normalization and fingerprinting.
5 : *
6 : * Normalization is a process whereby similar queries, typically differing only
7 : * in their constants (though the exact rules are somewhat more subtle than
8 : * that) are recognized as equivalent, and are tracked as a single entry. This
9 : * is particularly useful for non-prepared queries.
10 : *
11 : * Normalization is implemented by fingerprinting queries, selectively
12 : * serializing those fields of each query tree's nodes that are judged to be
13 : * essential to the query. This is referred to as a query jumble. This is
14 : * distinct from a regular serialization in that various extraneous
15 : * information is ignored as irrelevant or not essential to the query, such
16 : * as the collations of Vars and, most notably, the values of constants.
17 : *
18 : * This jumble is acquired at the end of parse analysis of each query, and
19 : * a 64-bit hash of it is stored into the query's Query.queryId field.
20 : * The server then copies this value around, making it available in plan
21 : * tree(s) generated from the query. The executor can then use this value
22 : * to blame query costs on the proper queryId.
23 : *
24 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
25 : * Portions Copyright (c) 1994, Regents of the University of California
26 : *
27 : *
28 : * IDENTIFICATION
29 : * src/backend/nodes/queryjumblefuncs.c
30 : *
31 : *-------------------------------------------------------------------------
32 : */
33 : #include "postgres.h"
34 :
35 : #include "common/hashfn.h"
36 : #include "miscadmin.h"
37 : #include "nodes/queryjumble.h"
38 : #include "parser/scansup.h"
39 :
40 : #define JUMBLE_SIZE 1024 /* query serialization buffer size */
41 :
42 : /* GUC parameters */
43 : int compute_query_id = COMPUTE_QUERY_ID_AUTO;
44 :
45 : /*
46 : * True when compute_query_id is ON or AUTO, and a module requests them.
47 : *
48 : * Note that IsQueryIdEnabled() should be used instead of checking
49 : * query_id_enabled or compute_query_id directly when we want to know
50 : * whether query identifiers are computed in the core or not.
51 : */
52 : bool query_id_enabled = false;
53 :
54 : static void AppendJumble(JumbleState *jstate,
55 : const unsigned char *item, Size size);
56 : static void RecordConstLocation(JumbleState *jstate, int location);
57 : static void _jumbleNode(JumbleState *jstate, Node *node);
58 : static void _jumbleA_Const(JumbleState *jstate, Node *node);
59 : static void _jumbleList(JumbleState *jstate, Node *node);
60 :
61 : /*
62 : * Given a possibly multi-statement source string, confine our attention to the
63 : * relevant part of the string.
64 : */
65 : const char *
66 152528 : CleanQuerytext(const char *query, int *location, int *len)
67 : {
68 152528 : int query_location = *location;
69 152528 : int query_len = *len;
70 :
71 : /* First apply starting offset, unless it's -1 (unknown). */
72 152528 : if (query_location >= 0)
73 : {
74 : Assert(query_location <= strlen(query));
75 152160 : query += query_location;
76 : /* Length of 0 (or -1) means "rest of string" */
77 152160 : if (query_len <= 0)
78 9058 : query_len = strlen(query);
79 : else
80 : Assert(query_len <= strlen(query));
81 : }
82 : else
83 : {
84 : /* If query location is unknown, distrust query_len as well */
85 368 : query_location = 0;
86 368 : query_len = strlen(query);
87 : }
88 :
89 : /*
90 : * Discard leading and trailing whitespace, too. Use scanner_isspace()
91 : * not libc's isspace(), because we want to match the lexer's behavior.
92 : */
93 153044 : while (query_len > 0 && scanner_isspace(query[0]))
94 516 : query++, query_location++, query_len--;
95 153434 : while (query_len > 0 && scanner_isspace(query[query_len - 1]))
96 906 : query_len--;
97 :
98 152528 : *location = query_location;
99 152528 : *len = query_len;
100 :
101 152528 : return query;
102 : }
103 :
104 : JumbleState *
105 132680 : JumbleQuery(Query *query)
106 : {
107 132680 : JumbleState *jstate = NULL;
108 :
109 : Assert(IsQueryIdEnabled());
110 :
111 132680 : jstate = (JumbleState *) palloc(sizeof(JumbleState));
112 :
113 : /* Set up workspace for query jumbling */
114 132680 : jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE);
115 132680 : jstate->jumble_len = 0;
116 132680 : jstate->clocations_buf_size = 32;
117 132680 : jstate->clocations = (LocationLen *)
118 132680 : palloc(jstate->clocations_buf_size * sizeof(LocationLen));
119 132680 : jstate->clocations_count = 0;
120 132680 : jstate->highest_extern_param_id = 0;
121 :
122 : /* Compute query ID and mark the Query node with it */
123 132680 : _jumbleNode(jstate, (Node *) query);
124 132680 : query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble,
125 132680 : jstate->jumble_len,
126 : 0));
127 :
128 : /*
129 : * If we are unlucky enough to get a hash of zero, use 1 instead for
130 : * normal statements and 2 for utility queries.
131 : */
132 132680 : if (query->queryId == UINT64CONST(0))
133 : {
134 0 : if (query->utilityStmt)
135 0 : query->queryId = UINT64CONST(2);
136 : else
137 0 : query->queryId = UINT64CONST(1);
138 : }
139 :
140 132680 : return jstate;
141 : }
142 :
143 : /*
144 : * Enables query identifier computation.
145 : *
146 : * Third-party plugins can use this function to inform core that they require
147 : * a query identifier to be computed.
148 : */
149 : void
150 12 : EnableQueryId(void)
151 : {
152 12 : if (compute_query_id != COMPUTE_QUERY_ID_OFF)
153 12 : query_id_enabled = true;
154 12 : }
155 :
156 : /*
157 : * AppendJumble: Append a value that is substantive in a given query to
158 : * the current jumble.
159 : */
160 : static void
161 6745998 : AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
162 : {
163 6745998 : unsigned char *jumble = jstate->jumble;
164 6745998 : Size jumble_len = jstate->jumble_len;
165 :
166 : /*
167 : * Whenever the jumble buffer is full, we hash the current contents and
168 : * reset the buffer to contain just that hash value, thus relying on the
169 : * hash to summarize everything so far.
170 : */
171 13493834 : while (size > 0)
172 : {
173 : Size part_size;
174 :
175 6747836 : if (jumble_len >= JUMBLE_SIZE)
176 : {
177 : uint64 start_hash;
178 :
179 2248 : start_hash = DatumGetUInt64(hash_any_extended(jumble,
180 : JUMBLE_SIZE, 0));
181 2248 : memcpy(jumble, &start_hash, sizeof(start_hash));
182 2248 : jumble_len = sizeof(start_hash);
183 : }
184 6747836 : part_size = Min(size, JUMBLE_SIZE - jumble_len);
185 6747836 : memcpy(jumble + jumble_len, item, part_size);
186 6747836 : jumble_len += part_size;
187 6747836 : item += part_size;
188 6747836 : size -= part_size;
189 : }
190 6745998 : jstate->jumble_len = jumble_len;
191 6745998 : }
192 :
193 : /*
194 : * Record location of constant within query string of query tree
195 : * that is currently being walked.
196 : */
197 : static void
198 200202 : RecordConstLocation(JumbleState *jstate, int location)
199 : {
200 : /* -1 indicates unknown or undefined location */
201 200202 : if (location >= 0)
202 : {
203 : /* enlarge array if needed */
204 188384 : if (jstate->clocations_count >= jstate->clocations_buf_size)
205 : {
206 114 : jstate->clocations_buf_size *= 2;
207 114 : jstate->clocations = (LocationLen *)
208 114 : repalloc(jstate->clocations,
209 114 : jstate->clocations_buf_size *
210 : sizeof(LocationLen));
211 : }
212 188384 : jstate->clocations[jstate->clocations_count].location = location;
213 : /* initialize lengths to -1 to simplify third-party module usage */
214 188384 : jstate->clocations[jstate->clocations_count].length = -1;
215 188384 : jstate->clocations_count++;
216 : }
217 200202 : }
218 :
219 : #define JUMBLE_NODE(item) \
220 : _jumbleNode(jstate, (Node *) expr->item)
221 : #define JUMBLE_LOCATION(location) \
222 : RecordConstLocation(jstate, expr->location)
223 : #define JUMBLE_FIELD(item) \
224 : AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
225 : #define JUMBLE_FIELD_SINGLE(item) \
226 : AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
227 : #define JUMBLE_STRING(str) \
228 : do { \
229 : if (expr->str) \
230 : AppendJumble(jstate, (const unsigned char *) (expr->str), strlen(expr->str) + 1); \
231 : } while(0)
232 :
233 : #include "queryjumblefuncs.funcs.c"
234 :
235 : static void
236 6280388 : _jumbleNode(JumbleState *jstate, Node *node)
237 : {
238 6280388 : Node *expr = node;
239 :
240 6280388 : if (expr == NULL)
241 3762384 : return;
242 :
243 : /* Guard against stack overflow due to overly complex expressions */
244 2518004 : check_stack_depth();
245 :
246 : /*
247 : * We always emit the node's NodeTag, then any additional fields that are
248 : * considered significant, and then we recurse to any child nodes.
249 : */
250 2518004 : JUMBLE_FIELD(type);
251 :
252 2518004 : switch (nodeTag(expr))
253 : {
254 : #include "queryjumblefuncs.switch.c"
255 :
256 618144 : case T_List:
257 : case T_IntList:
258 : case T_OidList:
259 : case T_XidList:
260 618144 : _jumbleList(jstate, expr);
261 618144 : break;
262 :
263 0 : default:
264 : /* Only a warning, since we can stumble along anyway */
265 0 : elog(WARNING, "unrecognized node type: %d",
266 : (int) nodeTag(expr));
267 0 : break;
268 : }
269 :
270 : /* Special cases to handle outside the automated code */
271 2518004 : switch (nodeTag(expr))
272 : {
273 11330 : case T_Param:
274 : {
275 11330 : Param *p = (Param *) node;
276 :
277 : /*
278 : * Update the highest Param id seen, in order to start
279 : * normalization correctly.
280 : */
281 11330 : if (p->paramkind == PARAM_EXTERN &&
282 10588 : p->paramid > jstate->highest_extern_param_id)
283 8976 : jstate->highest_extern_param_id = p->paramid;
284 : }
285 11330 : break;
286 2506674 : default:
287 2506674 : break;
288 : }
289 : }
290 :
291 : static void
292 618144 : _jumbleList(JumbleState *jstate, Node *node)
293 : {
294 618144 : List *expr = (List *) node;
295 : ListCell *l;
296 :
297 618144 : switch (expr->type)
298 : {
299 617372 : case T_List:
300 1699652 : foreach(l, expr)
301 1082280 : _jumbleNode(jstate, lfirst(l));
302 617372 : break;
303 772 : case T_IntList:
304 1730 : foreach(l, expr)
305 958 : JUMBLE_FIELD_SINGLE(lfirst_int(l));
306 772 : break;
307 0 : case T_OidList:
308 0 : foreach(l, expr)
309 0 : JUMBLE_FIELD_SINGLE(lfirst_oid(l));
310 0 : break;
311 0 : case T_XidList:
312 0 : foreach(l, expr)
313 0 : JUMBLE_FIELD_SINGLE(lfirst_xid(l));
314 0 : break;
315 0 : default:
316 0 : elog(ERROR, "unrecognized list node type: %d",
317 : (int) expr->type);
318 : return;
319 : }
320 : }
321 :
322 : static void
323 16592 : _jumbleA_Const(JumbleState *jstate, Node *node)
324 : {
325 16592 : A_Const *expr = (A_Const *) node;
326 :
327 16592 : JUMBLE_FIELD(isnull);
328 16592 : if (!expr->isnull)
329 : {
330 16448 : JUMBLE_FIELD(val.node.type);
331 16448 : switch (nodeTag(&expr->val))
332 : {
333 7750 : case T_Integer:
334 7750 : JUMBLE_FIELD(val.ival.ival);
335 7750 : break;
336 84 : case T_Float:
337 84 : JUMBLE_STRING(val.fval.fval);
338 84 : break;
339 224 : case T_Boolean:
340 224 : JUMBLE_FIELD(val.boolval.boolval);
341 224 : break;
342 8386 : case T_String:
343 8386 : JUMBLE_STRING(val.sval.sval);
344 8386 : break;
345 4 : case T_BitString:
346 4 : JUMBLE_STRING(val.bsval.bsval);
347 4 : break;
348 0 : default:
349 0 : elog(ERROR, "unrecognized node type: %d",
350 : (int) nodeTag(&expr->val));
351 : break;
352 : }
353 144 : }
354 16592 : }
|