Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * queryjumblefuncs.c
4 : * Query normalization and fingerprinting.
5 : *
6 : * Normalization is a process whereby similar queries, typically differing only
7 : * in their constants (though the exact rules are somewhat more subtle than
8 : * that) are recognized as equivalent, and are tracked as a single entry. This
9 : * is particularly useful for non-prepared queries.
10 : *
11 : * Normalization is implemented by fingerprinting queries, selectively
12 : * serializing those fields of each query tree's nodes that are judged to be
13 : * essential to the query. This is referred to as a query jumble. This is
14 : * distinct from a regular serialization in that various extraneous
15 : * information is ignored as irrelevant or not essential to the query, such
16 : * as the collations of Vars and, most notably, the values of constants.
17 : *
18 : * This jumble is acquired at the end of parse analysis of each query, and
19 : * a 64-bit hash of it is stored into the query's Query.queryId field.
20 : * The server then copies this value around, making it available in plan
21 : * tree(s) generated from the query. The executor can then use this value
22 : * to blame query costs on the proper queryId.
23 : *
24 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
25 : * Portions Copyright (c) 1994, Regents of the University of California
26 : *
27 : *
28 : * IDENTIFICATION
29 : * src/backend/nodes/queryjumblefuncs.c
30 : *
31 : *-------------------------------------------------------------------------
32 : */
33 : #include "postgres.h"
34 :
35 : #include "common/hashfn.h"
36 : #include "miscadmin.h"
37 : #include "nodes/queryjumble.h"
38 : #include "parser/scansup.h"
39 :
40 : #define JUMBLE_SIZE 1024 /* query serialization buffer size */
41 :
42 : /* GUC parameters */
43 : int compute_query_id = COMPUTE_QUERY_ID_AUTO;
44 :
45 : /*
46 : * True when compute_query_id is ON or AUTO, and a module requests them.
47 : *
48 : * Note that IsQueryIdEnabled() should be used instead of checking
49 : * query_id_enabled or compute_query_id directly when we want to know
50 : * whether query identifiers are computed in the core or not.
51 : */
52 : bool query_id_enabled = false;
53 :
54 : static void AppendJumble(JumbleState *jstate,
55 : const unsigned char *item, Size size);
56 : static void RecordConstLocation(JumbleState *jstate, int location);
57 : static void _jumbleNode(JumbleState *jstate, Node *node);
58 : static void _jumbleA_Const(JumbleState *jstate, Node *node);
59 : static void _jumbleList(JumbleState *jstate, Node *node);
60 : static void _jumbleRangeTblEntry(JumbleState *jstate, Node *node);
61 :
62 : /*
63 : * Given a possibly multi-statement source string, confine our attention to the
64 : * relevant part of the string.
65 : */
66 : const char *
67 152230 : CleanQuerytext(const char *query, int *location, int *len)
68 : {
69 152230 : int query_location = *location;
70 152230 : int query_len = *len;
71 :
72 : /* First apply starting offset, unless it's -1 (unknown). */
73 152230 : if (query_location >= 0)
74 : {
75 : Assert(query_location <= strlen(query));
76 151862 : query += query_location;
77 : /* Length of 0 (or -1) means "rest of string" */
78 151862 : if (query_len <= 0)
79 9062 : query_len = strlen(query);
80 : else
81 : Assert(query_len <= strlen(query));
82 : }
83 : else
84 : {
85 : /* If query location is unknown, distrust query_len as well */
86 368 : query_location = 0;
87 368 : query_len = strlen(query);
88 : }
89 :
90 : /*
91 : * Discard leading and trailing whitespace, too. Use scanner_isspace()
92 : * not libc's isspace(), because we want to match the lexer's behavior.
93 : */
94 152746 : while (query_len > 0 && scanner_isspace(query[0]))
95 516 : query++, query_location++, query_len--;
96 153136 : while (query_len > 0 && scanner_isspace(query[query_len - 1]))
97 906 : query_len--;
98 :
99 152230 : *location = query_location;
100 152230 : *len = query_len;
101 :
102 152230 : return query;
103 : }
104 :
105 : JumbleState *
106 132696 : JumbleQuery(Query *query)
107 : {
108 132696 : JumbleState *jstate = NULL;
109 :
110 : Assert(IsQueryIdEnabled());
111 :
112 132696 : jstate = (JumbleState *) palloc(sizeof(JumbleState));
113 :
114 : /* Set up workspace for query jumbling */
115 132696 : jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE);
116 132696 : jstate->jumble_len = 0;
117 132696 : jstate->clocations_buf_size = 32;
118 132696 : jstate->clocations = (LocationLen *)
119 132696 : palloc(jstate->clocations_buf_size * sizeof(LocationLen));
120 132696 : jstate->clocations_count = 0;
121 132696 : jstate->highest_extern_param_id = 0;
122 :
123 : /* Compute query ID and mark the Query node with it */
124 132696 : _jumbleNode(jstate, (Node *) query);
125 132696 : query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble,
126 132696 : jstate->jumble_len,
127 : 0));
128 :
129 : /*
130 : * If we are unlucky enough to get a hash of zero, use 1 instead for
131 : * normal statements and 2 for utility queries.
132 : */
133 132696 : if (query->queryId == UINT64CONST(0))
134 : {
135 0 : if (query->utilityStmt)
136 0 : query->queryId = UINT64CONST(2);
137 : else
138 0 : query->queryId = UINT64CONST(1);
139 : }
140 :
141 132696 : return jstate;
142 : }
143 :
144 : /*
145 : * Enables query identifier computation.
146 : *
147 : * Third-party plugins can use this function to inform core that they require
148 : * a query identifier to be computed.
149 : */
150 : void
151 12 : EnableQueryId(void)
152 : {
153 12 : if (compute_query_id != COMPUTE_QUERY_ID_OFF)
154 12 : query_id_enabled = true;
155 12 : }
156 :
157 : /*
158 : * AppendJumble: Append a value that is substantive in a given query to
159 : * the current jumble.
160 : */
161 : static void
162 6737390 : AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
163 : {
164 6737390 : unsigned char *jumble = jstate->jumble;
165 6737390 : Size jumble_len = jstate->jumble_len;
166 :
167 : /*
168 : * Whenever the jumble buffer is full, we hash the current contents and
169 : * reset the buffer to contain just that hash value, thus relying on the
170 : * hash to summarize everything so far.
171 : */
172 13476582 : while (size > 0)
173 : {
174 : Size part_size;
175 :
176 6739192 : if (jumble_len >= JUMBLE_SIZE)
177 : {
178 : uint64 start_hash;
179 :
180 2216 : start_hash = DatumGetUInt64(hash_any_extended(jumble,
181 : JUMBLE_SIZE, 0));
182 2216 : memcpy(jumble, &start_hash, sizeof(start_hash));
183 2216 : jumble_len = sizeof(start_hash);
184 : }
185 6739192 : part_size = Min(size, JUMBLE_SIZE - jumble_len);
186 6739192 : memcpy(jumble + jumble_len, item, part_size);
187 6739192 : jumble_len += part_size;
188 6739192 : item += part_size;
189 6739192 : size -= part_size;
190 : }
191 6737390 : jstate->jumble_len = jumble_len;
192 6737390 : }
193 :
194 : /*
195 : * Record location of constant within query string of query tree
196 : * that is currently being walked.
197 : */
198 : static void
199 199316 : RecordConstLocation(JumbleState *jstate, int location)
200 : {
201 : /* -1 indicates unknown or undefined location */
202 199316 : if (location >= 0)
203 : {
204 : /* enlarge array if needed */
205 187534 : if (jstate->clocations_count >= jstate->clocations_buf_size)
206 : {
207 114 : jstate->clocations_buf_size *= 2;
208 114 : jstate->clocations = (LocationLen *)
209 114 : repalloc(jstate->clocations,
210 114 : jstate->clocations_buf_size *
211 : sizeof(LocationLen));
212 : }
213 187534 : jstate->clocations[jstate->clocations_count].location = location;
214 : /* initialize lengths to -1 to simplify third-party module usage */
215 187534 : jstate->clocations[jstate->clocations_count].length = -1;
216 187534 : jstate->clocations_count++;
217 : }
218 199316 : }
219 :
220 : #define JUMBLE_NODE(item) \
221 : _jumbleNode(jstate, (Node *) expr->item)
222 : #define JUMBLE_LOCATION(location) \
223 : RecordConstLocation(jstate, expr->location)
224 : #define JUMBLE_FIELD(item) \
225 : AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
226 : #define JUMBLE_FIELD_SINGLE(item) \
227 : AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
228 : #define JUMBLE_STRING(str) \
229 : do { \
230 : if (expr->str) \
231 : AppendJumble(jstate, (const unsigned char *) (expr->str), strlen(expr->str) + 1); \
232 : } while(0)
233 :
234 : #include "queryjumblefuncs.funcs.c"
235 :
236 : static void
237 6278870 : _jumbleNode(JumbleState *jstate, Node *node)
238 : {
239 6278870 : Node *expr = node;
240 :
241 6278870 : if (expr == NULL)
242 3764806 : return;
243 :
244 : /* Guard against stack overflow due to overly complex expressions */
245 2514064 : check_stack_depth();
246 :
247 : /*
248 : * We always emit the node's NodeTag, then any additional fields that are
249 : * considered significant, and then we recurse to any child nodes.
250 : */
251 2514064 : JUMBLE_FIELD(type);
252 :
253 2514064 : switch (nodeTag(expr))
254 : {
255 : #include "queryjumblefuncs.switch.c"
256 :
257 617050 : case T_List:
258 : case T_IntList:
259 : case T_OidList:
260 : case T_XidList:
261 617050 : _jumbleList(jstate, expr);
262 617050 : break;
263 :
264 0 : default:
265 : /* Only a warning, since we can stumble along anyway */
266 0 : elog(WARNING, "unrecognized node type: %d",
267 : (int) nodeTag(expr));
268 0 : break;
269 : }
270 :
271 : /* Special cases to handle outside the automated code */
272 2514064 : switch (nodeTag(expr))
273 : {
274 11064 : case T_Param:
275 : {
276 11064 : Param *p = (Param *) node;
277 :
278 : /*
279 : * Update the highest Param id seen, in order to start
280 : * normalization correctly.
281 : */
282 11064 : if (p->paramkind == PARAM_EXTERN &&
283 10318 : p->paramid > jstate->highest_extern_param_id)
284 9016 : jstate->highest_extern_param_id = p->paramid;
285 : }
286 11064 : break;
287 2503000 : default:
288 2503000 : break;
289 : }
290 : }
291 :
292 : static void
293 617050 : _jumbleList(JumbleState *jstate, Node *node)
294 : {
295 617050 : List *expr = (List *) node;
296 : ListCell *l;
297 :
298 617050 : switch (expr->type)
299 : {
300 616278 : case T_List:
301 1696504 : foreach(l, expr)
302 1080226 : _jumbleNode(jstate, lfirst(l));
303 616278 : break;
304 772 : case T_IntList:
305 1730 : foreach(l, expr)
306 958 : JUMBLE_FIELD_SINGLE(lfirst_int(l));
307 772 : break;
308 0 : case T_OidList:
309 0 : foreach(l, expr)
310 0 : JUMBLE_FIELD_SINGLE(lfirst_oid(l));
311 0 : break;
312 0 : case T_XidList:
313 0 : foreach(l, expr)
314 0 : JUMBLE_FIELD_SINGLE(lfirst_xid(l));
315 0 : break;
316 0 : default:
317 0 : elog(ERROR, "unrecognized list node type: %d",
318 : (int) expr->type);
319 : return;
320 : }
321 : }
322 :
323 : static void
324 16466 : _jumbleA_Const(JumbleState *jstate, Node *node)
325 : {
326 16466 : A_Const *expr = (A_Const *) node;
327 :
328 16466 : JUMBLE_FIELD(isnull);
329 16466 : if (!expr->isnull)
330 : {
331 16322 : JUMBLE_FIELD(val.node.type);
332 16322 : switch (nodeTag(&expr->val))
333 : {
334 7686 : case T_Integer:
335 7686 : JUMBLE_FIELD(val.ival.ival);
336 7686 : break;
337 84 : case T_Float:
338 84 : JUMBLE_STRING(val.fval.fval);
339 84 : break;
340 224 : case T_Boolean:
341 224 : JUMBLE_FIELD(val.boolval.boolval);
342 224 : break;
343 8324 : case T_String:
344 8324 : JUMBLE_STRING(val.sval.sval);
345 8324 : break;
346 4 : case T_BitString:
347 4 : JUMBLE_STRING(val.bsval.bsval);
348 4 : break;
349 0 : default:
350 0 : elog(ERROR, "unrecognized node type: %d",
351 : (int) nodeTag(&expr->val));
352 : break;
353 : }
354 144 : }
355 16466 : }
|